diff --git a/.gitattributes b/.gitattributes
new file mode 100644
index 0000000000..6c01695fd2
--- /dev/null
+++ b/.gitattributes
@@ -0,0 +1,16 @@
+# Use whitelist to set text on
+# text means convert to LF when check in
+# eol=lf means convert to LF when check out
+*.cpp text eol=lf
+*.cc text eol=lf
+*.c text eol=lf
+*.h text eol=lf
+*.java text eol=lf
+*.py text eol=lf
+*.js text eol=lf
+*.md text eol=lf
+*.txt text eol=lf
+*.sh text eol=lf
+*.thrift text eol=lf
+*.proto text eol=lf
+*.conf text eol=lf
diff --git a/be/src/exec/es_scan_node.cpp b/be/src/exec/es_scan_node.cpp
index d70fbfc008..c3e426c695 100644
--- a/be/src/exec/es_scan_node.cpp
+++ b/be/src/exec/es_scan_node.cpp
@@ -1,877 +1,877 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "es_scan_node.h"
-
-#include <string>
-#include <boost/algorithm/string.hpp>
-#include <gutil/strings/substitute.h>
-
-#include "gen_cpp/PlanNodes_types.h"
-#include "gen_cpp/Exprs_types.h"
-#include "runtime/runtime_state.h"
-#include "runtime/row_batch.h"
-#include "runtime/string_value.h"
-#include "runtime/tuple_row.h"
-#include "runtime/client_cache.h"
-#include "util/runtime_profile.h"
-#include "util/debug_util.h"
-#include "service/backend_options.h"
-#include "olap/olap_common.h"
-#include "olap/utils.h"
-#include "exprs/expr_context.h"
-#include "exprs/expr.h"
-#include "exprs/in_predicate.h"
-#include "exprs/slot_ref.h"
-
-namespace doris {
-
-// $0 = column type (e.g. INT)
-const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. "
-    "Expected value of type $0 based on column metadata. This likely indicates a "
-    "problem with the data source library.";
-const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate "
-    "$1 bytes for $2.";
-
-EsScanNode::EsScanNode(
-        ObjectPool* pool,
-        const TPlanNode& tnode,
-        const DescriptorTbl& descs) :
-            ScanNode(pool, tnode, descs),
-            _tuple_id(tnode.es_scan_node.tuple_id),
-            _scan_range_idx(0) {
-    if (tnode.es_scan_node.__isset.properties) {
-        _properties = tnode.es_scan_node.properties;
-    }
-}
-
-EsScanNode::~EsScanNode() {
-}
-
-Status EsScanNode::prepare(RuntimeState* state) {
-    VLOG(1) << "EsScanNode::Prepare";
-
-    RETURN_IF_ERROR(ScanNode::prepare(state));
-    _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
-    if (_tuple_desc == nullptr) {
-        std::stringstream ss;
-        ss << "es tuple descriptor is null, _tuple_id=" << _tuple_id;
-        LOG(WARNING) << ss.str();
-        return Status::InternalError(ss.str());
-    }
-    _env = state->exec_env();
-
-    return Status::OK();
-}
-
-Status EsScanNode::open(RuntimeState* state) {
-    VLOG(1) << "EsScanNode::Open";
-
-    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN));
-    RETURN_IF_CANCELLED(state);
-    SCOPED_TIMER(_runtime_profile->total_time_counter());
-    RETURN_IF_ERROR(ExecNode::open(state));
-
-    // TExtOpenParams.row_schema
-    vector<TExtColumnDesc> cols;
-    for (const SlotDescriptor* slot : _tuple_desc->slots()) {
-        TExtColumnDesc col;
-        col.__set_name(slot->col_name());
-        col.__set_type(slot->type().to_thrift());
-        cols.emplace_back(std::move(col));
-    }
-    TExtTableSchema row_schema;
-    row_schema.cols = std::move(cols);
-    row_schema.__isset.cols = true;
-
-    // TExtOpenParams.predicates
-    vector<vector<TExtPredicate> > predicates;
-    vector<int> predicate_to_conjunct;
-    for (int i = 0; i < _conjunct_ctxs.size(); ++i) {
-        VLOG(1) << "conjunct: " << _conjunct_ctxs[i]->root()->debug_string();
-        vector<TExtPredicate> disjuncts;
-        if (get_disjuncts(_conjunct_ctxs[i], _conjunct_ctxs[i]->root(), disjuncts)) {
-            predicates.emplace_back(std::move(disjuncts));
-            predicate_to_conjunct.push_back(i);
-        }
-    }
-
-    // open every scan range
-    vector<int> conjunct_accepted_times(_conjunct_ctxs.size(), 0); 
-    for (int i = 0; i < _scan_ranges.size(); ++i) {
-        TEsScanRange& es_scan_range = _scan_ranges[i];
-
-        if (es_scan_range.es_hosts.empty()) {
-            std::stringstream ss;
-            ss << "es fail to open: hosts empty";
-            LOG(WARNING) << ss.str();
-            return Status::InternalError(ss.str());
-        }
-
-
-        // TExtOpenParams
-        TExtOpenParams params;
-        params.__set_query_id(state->query_id());
-        _properties["index"] = es_scan_range.index;
-        if (es_scan_range.__isset.type) {
-            _properties["type"] = es_scan_range.type;
-        }
-        _properties["shard_id"] = std::to_string(es_scan_range.shard_id);
-        params.__set_properties(_properties);
-        params.__set_row_schema(row_schema);
-        params.__set_batch_size(state->batch_size());
-        params.__set_predicates(predicates);
-        TExtOpenResult result;
-
-        // choose an es node, local is the first choice
-        std::string localhost = BackendOptions::get_localhost();
-        bool is_success = false;
-        for (int j = 0; j < 2; ++j) {
-            for (auto& es_host : es_scan_range.es_hosts) {
-                if ((j == 0 && es_host.hostname != localhost)
-                    || (j == 1 && es_host.hostname == localhost)) {
-                    continue;
-                }
-                Status status = open_es(es_host, result, params);
-                if (status.ok()) {
-                   is_success = true;
-                   _addresses.push_back(es_host);
-                   _scan_handles.push_back(result.scan_handle);
-                   if (result.__isset.accepted_conjuncts) {
-                       for (int index : result.accepted_conjuncts) {
-                           conjunct_accepted_times[predicate_to_conjunct[index]]++;
-                       }
-                   }
-                   break;
-                } else if (status.code() == TStatusCode::ES_SHARD_NOT_FOUND) {
-                    // if shard not found, try other nodes
-                    LOG(WARNING) << "shard not found on es node: "
-                                 << ", address=" << es_host
-                                 << ", scan_range_idx=" << i << ", try other nodes";
-                } else {
-                    LOG(WARNING) << "es open error: scan_range_idx=" << i
-                                 << ", address=" << es_host
-                                 << ", msg=" << status.get_error_msg();
-                    return status;
-                } 
-            }
-            if (is_success) {
-                break;
-            }
-        }
-
-        if (!is_success) {
-            std::stringstream ss;
-            ss << "es open error: scan_range_idx=" << i
-               << ", can't find shard on any node";
-            return Status::InternalError(ss.str());
-        }
-    }
-
-    // remove those conjuncts that accepted by all scan ranges
-    for (int i = predicate_to_conjunct.size() - 1; i >= 0; i--) {
-        int conjunct_index = predicate_to_conjunct[i];
-        if (conjunct_accepted_times[conjunct_index] == _scan_ranges.size()) {
-            _pushdown_conjunct_ctxs.push_back(*(_conjunct_ctxs.begin() + conjunct_index));
-            _conjunct_ctxs.erase(_conjunct_ctxs.begin() + conjunct_index);
-        }
-    }
-
-    for (int i = 0; i < _conjunct_ctxs.size(); ++i) {
-        if (!check_left_conjuncts(_conjunct_ctxs[i]->root())) {
-            return Status::InternalError("esquery could only be executed on es, but could not push down to es");
-        }
-    }
-
-    return Status::OK();
-}
-
-Status EsScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
-    VLOG(1) << "EsScanNode::GetNext";
-
-    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
-    RETURN_IF_CANCELLED(state);
-    SCOPED_TIMER(_runtime_profile->total_time_counter());
-    SCOPED_TIMER(materialize_tuple_timer());
-
-    // create tuple
-    MemPool* tuple_pool = row_batch->tuple_data_pool();
-    int64_t tuple_buffer_size;
-    uint8_t* tuple_buffer = nullptr;
-    RETURN_IF_ERROR(row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buffer_size, &tuple_buffer));
-    Tuple* tuple = reinterpret_cast<Tuple*>(tuple_buffer);
-    
-    // get batch
-    TExtGetNextResult result;
-    RETURN_IF_ERROR(get_next_from_es(result));
-    _offsets[_scan_range_idx] += result.rows.num_rows;
-
-    // convert
-    VLOG(1) << "begin to convert: scan_range_idx=" << _scan_range_idx
-            << ", num_rows=" << result.rows.num_rows;
-    vector<TExtColumnData>& cols = result.rows.cols;
-    // indexes of the next non-null value in the row batch, per column. 
-    vector<int> cols_next_val_idx(_tuple_desc->slots().size(), 0);
-    for (int row_idx = 0; row_idx < result.rows.num_rows; row_idx++) {
-        if (reached_limit()) {
-            *eos = true;
-            break;
-        }
-        RETURN_IF_ERROR(materialize_row(tuple_pool, tuple, cols, row_idx, cols_next_val_idx));
-        TupleRow* tuple_row = row_batch->get_row(row_batch->add_row());
-        tuple_row->set_tuple(0, tuple);
-        if (ExecNode::eval_conjuncts(_conjunct_ctxs.data(), _conjunct_ctxs.size(), tuple_row)) {
-            row_batch->commit_last_row();
-            tuple = reinterpret_cast<Tuple*>(
-                reinterpret_cast<uint8_t*>(tuple) + _tuple_desc->byte_size());
-            ++_num_rows_returned;
-        }
-    }
-
-    VLOG(1) << "finish one batch: num_rows=" << row_batch->num_rows();
-    COUNTER_SET(_rows_returned_counter, _num_rows_returned);
-    if (result.__isset.eos && result.eos) {
-        VLOG(1) << "es finish one scan_range: scan_range_idx=" << _scan_range_idx;
-        ++_scan_range_idx;
-    }
-    if (_scan_range_idx == _scan_ranges.size()) {
-        *eos = true;
-    }
-
-    return Status::OK();
-}
-
-Status EsScanNode::close(RuntimeState* state) {
-    if (is_closed()) return Status::OK();
-    VLOG(1) << "EsScanNode::Close";
-    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::CLOSE));
-    SCOPED_TIMER(_runtime_profile->total_time_counter());
-    Expr::close(_pushdown_conjunct_ctxs, state);
-    RETURN_IF_ERROR(ExecNode::close(state));
-    for (int i = 0; i < _addresses.size(); ++i) {
-        TExtCloseParams params;
-        params.__set_scan_handle(_scan_handles[i]);
-        TExtCloseResult result;
-
-#ifndef BE_TEST
-        const TNetworkAddress& address = _addresses[i];
-        try {
-            Status status;
-            ExtDataSourceServiceClientCache* client_cache = _env->extdatasource_client_cache();
-            ExtDataSourceServiceConnection client(client_cache, address, 10000, &status);
-            if (!status.ok()) {
-                LOG(WARNING) << "es create client error: scan_range_idx=" << i
-                             << ", address=" << address
-                             << ", msg=" << status.get_error_msg();
-                return status;
-            }
-
-            try {
-                VLOG(1) << "es close param=" << apache::thrift::ThriftDebugString(params);
-                client->close(result, params);
-            } catch (apache::thrift::transport::TTransportException& e) {
-                LOG(WARNING) << "es close retrying, because: " << e.what();
-                RETURN_IF_ERROR(client.reopen());
-                client->close(result, params);
-            }
-        } catch (apache::thrift::TException &e) {
-            std::stringstream ss;
-            ss << "es close error: scan_range_idx=" << i
-               << ", msg=" << e.what();
-            LOG(WARNING) << ss.str();
-            return Status::ThriftRpcError(ss.str());
-        }
-
-        VLOG(1) << "es close result=" << apache::thrift::ThriftDebugString(result);
-        Status status(result.status);
-        if (!status.ok()) {
-            LOG(WARNING) << "es close error: : scan_range_idx=" << i
-                         << ", msg=" << status.get_error_msg();
-            return status;
-        }
-#else
-        TStatus status;
-        result.__set_status(status);
-#endif
-    }
-
-    return Status::OK();
-}
-
-void EsScanNode::debug_string(int indentation_level, stringstream* out) const {
-    *out << string(indentation_level * 2, ' ');
-    *out << "EsScanNode(tupleid=" << _tuple_id;
-    *out << ")" << std::endl;
-
-    for (int i = 0; i < _children.size(); ++i) {
-        _children[i]->debug_string(indentation_level + 1, out);
-    }
-}
-
-Status EsScanNode::set_scan_ranges(const vector<TScanRangeParams>& scan_ranges) {
-    for (int i = 0; i < scan_ranges.size(); ++i) {
-        TScanRangeParams scan_range = scan_ranges[i];
-        DCHECK(scan_range.scan_range.__isset.es_scan_range);
-        TEsScanRange es_scan_range = scan_range.scan_range.es_scan_range;
-        _scan_ranges.push_back(es_scan_range);
-    }
-
-    _offsets.resize(scan_ranges.size(), 0);
-    return Status::OK();
-}
-
-Status EsScanNode::open_es(TNetworkAddress& address, TExtOpenResult& result, TExtOpenParams& params) {
-
-    VLOG(1) << "es open param=" << apache::thrift::ThriftDebugString(params);
-#ifndef BE_TEST
-    try {
-        ExtDataSourceServiceClientCache* client_cache = _env->extdatasource_client_cache();
-        Status status;
-        ExtDataSourceServiceConnection client(client_cache, address, 10000, &status);
-        if (!status.ok()) {
-            std::stringstream ss;
-            ss << "es create client error: address=" << address
-               << ", msg=" << status.get_error_msg();
-            return Status::InternalError(ss.str());
-        }
-
-        try {
-            client->open(result, params);
-        } catch (apache::thrift::transport::TTransportException& e) {
-            LOG(WARNING) << "es open retrying, because: " << e.what();
-            RETURN_IF_ERROR(client.reopen());
-            client->open(result, params);
-        }
-        VLOG(1) << "es open result=" << apache::thrift::ThriftDebugString(result);
-        return Status(result.status);
-    } catch (apache::thrift::TException &e) {
-        std::stringstream ss;
-        ss << "es open error: address=" << address << ", msg=" << e.what();
-        return Status::InternalError(ss.str());
-    }
-#else
-    TStatus status;
-    result.__set_status(status);
-    result.__set_scan_handle("0");
-    return Status(status);
-#endif
-}
-
-// legacy conjuncts must not contain match function
-bool EsScanNode::check_left_conjuncts(Expr* conjunct) {
-    if (is_match_func(conjunct)) {
-        return false;
-    } else {
-        int num_children = conjunct->get_num_children();
-        for (int child_idx = 0; child_idx < num_children; ++child_idx) {
-            if (!check_left_conjuncts(conjunct->get_child(child_idx))) {
-                return false;
-            }
-        }
-        return true;
-    }
-}
-
-bool EsScanNode::ignore_cast(SlotDescriptor* slot, Expr* expr) {
-    if (slot->type().is_date_type() && expr->type().is_date_type()) {
-        return true;
-    }
-    if (slot->type().is_string_type() && expr->type().is_string_type()) {
-        return true;
-    }
-    return false;
-}
-
-bool EsScanNode::get_disjuncts(ExprContext* context, Expr* conjunct,
-                               vector<TExtPredicate>& disjuncts) {
-    if (TExprNodeType::BINARY_PRED == conjunct->node_type()) {
-        if (conjunct->children().size() != 2) {
-            VLOG(1) << "get disjuncts fail: number of childs is not 2";
-            return false;
-        }
-        SlotRef* slotRef;
-        TExprOpcode::type op;
-        Expr* expr;
-        if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type()) {
-            expr = conjunct->get_child(1);
-            slotRef = (SlotRef*)(conjunct->get_child(0));
-            op = conjunct->op();
-        } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type()) {
-            expr = conjunct->get_child(0);
-            slotRef = (SlotRef*)(conjunct->get_child(1));
-            op = conjunct->op();
-        } else {
-            VLOG(1) << "get disjuncts fail: no SLOT_REF child";
-            return false;
-        }
-
-        SlotDescriptor* slot_desc = get_slot_desc(slotRef);
-        if (slot_desc == nullptr) {
-            VLOG(1) << "get disjuncts fail: slot_desc is null";
-            return false;
-        }
-
-        TExtLiteral literal;
-        if (!to_ext_literal(context, expr, &literal)) {
-            VLOG(1) << "get disjuncts fail: can't get literal, node_type="
-                    << expr->node_type();
-            return false;
-        }
-
-        TExtColumnDesc columnDesc;
-        columnDesc.__set_name(slot_desc->col_name());
-        columnDesc.__set_type(slot_desc->type().to_thrift());
-        TExtBinaryPredicate binaryPredicate;
-        binaryPredicate.__set_col(columnDesc);
-        binaryPredicate.__set_op(op);
-        binaryPredicate.__set_value(std::move(literal));
-        TExtPredicate predicate;
-        predicate.__set_node_type(TExprNodeType::BINARY_PRED);
-        predicate.__set_binary_predicate(binaryPredicate);
-        disjuncts.push_back(std::move(predicate));
-        return true;
-    } else if (is_match_func(conjunct)) {
-        // if this is a function call expr and function name is match, then push 
-        // down it to es
-        TExtFunction match_function;
-        match_function.__set_func_name(conjunct->fn().name.function_name);
-        vector<TExtLiteral> query_conditions;
-
-
-        TExtLiteral literal;
-        if (!to_ext_literal(context, conjunct->get_child(1), &literal)) {
-            VLOG(1) << "get disjuncts fail: can't get literal, node_type="
-                    << conjunct->get_child(1)->node_type();
-            return false;
-        }
-
-        query_conditions.push_back(std::move(literal));
-        match_function.__set_values(query_conditions);
-        TExtPredicate predicate;
-        predicate.__set_node_type(TExprNodeType::FUNCTION_CALL);
-        predicate.__set_ext_function(match_function);
-        disjuncts.push_back(std::move(predicate));
-        return true;
-    } else if (TExprNodeType::IN_PRED == conjunct->node_type()) {
-        // the op code maybe FILTER_NEW_IN, it means there is function in list
-        // like col_a in (abs(1))
-        if (TExprOpcode::FILTER_IN != conjunct->op() 
-            && TExprOpcode::FILTER_NOT_IN != conjunct->op()) {
-            return false;
-        }
-        TExtInPredicate ext_in_predicate;
-        vector<TExtLiteral> in_pred_values;
-        InPredicate* pred = dynamic_cast<InPredicate*>(conjunct);
-        ext_in_predicate.__set_is_not_in(pred->is_not_in());
-        if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) {
-            return false;
-        }
-
-        SlotRef* slot_ref = (SlotRef*)(conjunct->get_child(0));
-        SlotDescriptor* slot_desc = get_slot_desc(slot_ref);
-        if (slot_desc == nullptr) {
-            return false;
-        }
-        TExtColumnDesc columnDesc;
-        columnDesc.__set_name(slot_desc->col_name());
-        columnDesc.__set_type(slot_desc->type().to_thrift());
-        ext_in_predicate.__set_col(columnDesc);
-
-        if (pred->get_child(0)->type().type != slot_desc->type().type) {
-            if (!ignore_cast(slot_desc, pred->get_child(0))) {
-                return false;
-            }
-        }
-
-        HybirdSetBase::IteratorBase* iter = pred->hybird_set()->begin();
-        while (iter->has_next()) {
-            if (nullptr == iter->get_value()) {
-                return false;
-            }
-            TExtLiteral literal;
-            if (!to_ext_literal(slot_desc->type().type, const_cast<void *>(iter->get_value()), &literal)) {
-                VLOG(1) << "get disjuncts fail: can't get literal, node_type="
-                        << slot_desc->type().type;
-                return false;
-            }
-            in_pred_values.push_back(literal);
-            iter->next();
-        }
-        ext_in_predicate.__set_values(in_pred_values);
-        TExtPredicate predicate;
-        predicate.__set_node_type(TExprNodeType::IN_PRED);
-        predicate.__set_in_predicate(ext_in_predicate);
-        disjuncts.push_back(std::move(predicate));
-        return true;
-    } else if (TExprNodeType::COMPOUND_PRED == conjunct->node_type()) {
-        if (TExprOpcode::COMPOUND_OR != conjunct->op()) {
-            VLOG(1) << "get disjuncts fail: op is not COMPOUND_OR";
-            return false;
-        }
-        if (!get_disjuncts(context, conjunct->get_child(0), disjuncts)) {
-            return false;
-        }
-        if (!get_disjuncts(context, conjunct->get_child(1), disjuncts)) {
-            return false;
-        }
-        return true;
-    } else {
-        VLOG(1) << "get disjuncts fail: node type is " << conjunct->node_type()
-                << ", should be BINARY_PRED or COMPOUND_PRED";
-        return false;
-    }
-}
-
-bool EsScanNode::is_match_func(Expr* conjunct) {
-    if (TExprNodeType::FUNCTION_CALL == conjunct->node_type()
-        && conjunct->fn().name.function_name == "esquery") {
-            return true;
-    }
-    return false;
-}
-
-SlotDescriptor* EsScanNode::get_slot_desc(SlotRef* slotRef) {
-    std::vector<SlotId> slot_ids;
-    slotRef->get_slot_ids(&slot_ids);
-    SlotDescriptor* slot_desc = nullptr;
-    for (SlotDescriptor* slot : _tuple_desc->slots()) {
-        if (slot->id() == slot_ids[0]) {
-            slot_desc = slot;
-            break;
-        }
-    }
-    return slot_desc;
-}
-
-bool EsScanNode::to_ext_literal(ExprContext* context, Expr* expr, TExtLiteral* literal) {
-    switch (expr->node_type()) {
-    case TExprNodeType::BOOL_LITERAL:
-    case TExprNodeType::INT_LITERAL:
-    case TExprNodeType::LARGE_INT_LITERAL:
-    case TExprNodeType::FLOAT_LITERAL:
-    case TExprNodeType::DECIMAL_LITERAL:
-    case TExprNodeType::STRING_LITERAL:
-    case TExprNodeType::DATE_LITERAL:
-        return to_ext_literal(expr->type().type, context->get_value(expr, NULL), literal);
-    default:
-        return false;
-    }
-}
-
-bool EsScanNode::to_ext_literal(PrimitiveType slot_type, void* value, TExtLiteral* literal) {
-    TExprNodeType::type node_type;
-    switch (slot_type) {
-    case TYPE_BOOLEAN: {
-        node_type = (TExprNodeType::BOOL_LITERAL);
-        TBoolLiteral bool_literal;
-        bool_literal.__set_value(*reinterpret_cast<bool*>(value));
-        literal->__set_bool_literal(bool_literal);
-        break;
-    }
-
-    case TYPE_TINYINT: {
-        node_type = (TExprNodeType::INT_LITERAL);
-        TIntLiteral int_literal;
-        int_literal.__set_value(*reinterpret_cast<int8_t*>(value));
-        literal->__set_int_literal(int_literal);
-        break;
-    }
-    case TYPE_SMALLINT: {
-        node_type = (TExprNodeType::INT_LITERAL);
-        TIntLiteral int_literal;
-        int_literal.__set_value(*reinterpret_cast<int16_t*>(value));
-        literal->__set_int_literal(int_literal);
-        break;
-    }
-    case TYPE_INT: {
-        node_type = (TExprNodeType::INT_LITERAL);
-        TIntLiteral int_literal;
-        int_literal.__set_value(*reinterpret_cast<int32_t*>(value));
-        literal->__set_int_literal(int_literal);
-        break;
-    }
-    case TYPE_BIGINT: {
-        node_type = (TExprNodeType::INT_LITERAL);
-        TIntLiteral int_literal;
-        int_literal.__set_value(*reinterpret_cast<int64_t*>(value));
-        literal->__set_int_literal(int_literal);
-        break;
-    }
-
-    case TYPE_LARGEINT: {
-        node_type = (TExprNodeType::LARGE_INT_LITERAL);
-        char buf[48];
-        int len = 48;
-        char* v = LargeIntValue::to_string(*reinterpret_cast<__int128*>(value), buf, &len);
-        TLargeIntLiteral large_int_literal;
-        large_int_literal.__set_value(v);
-        literal->__set_large_int_literal(large_int_literal);
-        break;
-    }
-
-    case TYPE_FLOAT: {
-        node_type = (TExprNodeType::FLOAT_LITERAL);
-        TFloatLiteral float_literal;
-        float_literal.__set_value(*reinterpret_cast<float*>(value));
-        literal->__set_float_literal(float_literal);
-        break;
-    }
-    case TYPE_DOUBLE: {
-        node_type = (TExprNodeType::FLOAT_LITERAL);
-        TFloatLiteral float_literal;
-        float_literal.__set_value(*reinterpret_cast<double*>(value));
-        literal->__set_float_literal(float_literal);
-        break;
-    }
-
-    case TYPE_DECIMAL: {
-        node_type = (TExprNodeType::DECIMAL_LITERAL);
-        TDecimalLiteral decimal_literal;
-        decimal_literal.__set_value(reinterpret_cast<DecimalValue*>(value)->to_string());
-        literal->__set_decimal_literal(decimal_literal);
-        break;
-    }
-
-    case TYPE_DATE:
-    case TYPE_DATETIME: {
-        node_type = (TExprNodeType::DATE_LITERAL);
-        const DateTimeValue date_value = *reinterpret_cast<DateTimeValue*>(value);
-        char str[MAX_DTVALUE_STR_LEN];
-        date_value.to_string(str);
-        TDateLiteral date_literal;
-        date_literal.__set_value(str);
-        literal->__set_date_literal(date_literal);
-        break;
-    }
-
-    case TYPE_CHAR:
-    case TYPE_VARCHAR: {
-        node_type = (TExprNodeType::STRING_LITERAL);
-        TStringLiteral string_literal;
-        string_literal.__set_value((reinterpret_cast<StringValue*>(value))->debug_string());
-        literal->__set_string_literal(string_literal);
-        break;
-    }
-
-    default: {
-        DCHECK(false) << "Invalid type.";
-        return false;
-    }
-    }
-    literal->__set_node_type(node_type);
-    return true;
-}
-
-Status EsScanNode::get_next_from_es(TExtGetNextResult& result) {
-    TExtGetNextParams params;
-    params.__set_scan_handle(_scan_handles[_scan_range_idx]);
-    params.__set_offset(_offsets[_scan_range_idx]);
-
-    // getNext
-    const TNetworkAddress &address = _addresses[_scan_range_idx];
-#ifndef BE_TEST
-    try {
-        Status create_client_status;
-        ExtDataSourceServiceClientCache *client_cache = _env->extdatasource_client_cache();
-        ExtDataSourceServiceConnection client(client_cache, address, 10000, &create_client_status);
-        if (!create_client_status.ok()) {
-            LOG(WARNING) << "es create client error: scan_range_idx=" << _scan_range_idx
-                         << ", address=" << address
-                         << ", msg=" << create_client_status.get_error_msg();
-            return create_client_status;
-        }
-
-        try {
-            VLOG(1) << "es get_next param=" << apache::thrift::ThriftDebugString(params);
-            client->getNext(result, params);
-        } catch (apache::thrift::transport::TTransportException& e) {
-            std::stringstream ss;
-            ss << "es get_next error: scan_range_idx=" << _scan_range_idx
-               << ", msg=" << e.what();
-            LOG(WARNING) << ss.str();
-            RETURN_IF_ERROR(client.reopen());
-            return Status::ThriftRpcError(ss.str());
-        }
-    } catch (apache::thrift::TException &e) {
-        std::stringstream ss;
-        ss << "es get_next error: scan_range_idx=" << _scan_range_idx
-           << ", msg=" << e.what();
-        LOG(WARNING) << ss.str();
-        return Status::ThriftRpcError(ss.str());
-    }
-#else
-    TStatus status;
-    result.__set_status(status);
-    result.__set_eos(true);
-    TExtColumnData col_data;
-    std::vector<bool> is_null;
-    is_null.push_back(false);
-    col_data.__set_is_null(is_null);
-    std::vector<int32_t> int_vals;
-    int_vals.push_back(1);
-    int_vals.push_back(2);
-    col_data.__set_int_vals(int_vals);
-    std::vector<TExtColumnData> cols;
-    cols.push_back(col_data);
-    TExtRowBatch rows;
-    rows.__set_cols(cols);
-    rows.__set_num_rows(2);
-    result.__set_rows(rows);
-    return Status(status);
-#endif
-
-    // check result
-    VLOG(1) << "es get_next result=" << apache::thrift::ThriftDebugString(result);
-    Status get_next_status(result.status);
-    if (!get_next_status.ok()) {
-        LOG(WARNING) << "es get_next error: scan_range_idx=" << _scan_range_idx
-                     << ", address=" << address
-                     << ", msg=" << get_next_status.get_error_msg();
-        return get_next_status;
-    }
-    if (!result.__isset.rows || !result.rows.__isset.num_rows) {
-        std::stringstream ss;
-        ss << "es get_next error: scan_range_idx=" << _scan_range_idx
-           << ", msg=rows or num_rows not in result";
-        LOG(WARNING) << ss.str();
-        return Status::InternalError(ss.str());
-    }
-
-    return Status::OK();
-}
-
-Status EsScanNode::materialize_row(MemPool* tuple_pool, Tuple* tuple,
-                                   const vector<TExtColumnData>& cols, int row_idx,
-                                   vector<int>& cols_next_val_idx) {
-  tuple->init(_tuple_desc->byte_size());
-
-  for (int i = 0; i < _tuple_desc->slots().size(); ++i) {
-    const SlotDescriptor* slot_desc = _tuple_desc->slots()[i];
-
-    if (!slot_desc->is_materialized()) {
-        continue;
-    }
-
-    void* slot = tuple->get_slot(slot_desc->tuple_offset());
-    const TExtColumnData& col = cols[i];
-
-    if (col.is_null[row_idx]) {
-      tuple->set_null(slot_desc->null_indicator_offset());
-      continue;
-    } else {
-      tuple->set_not_null(slot_desc->null_indicator_offset());
-    }
-
-    int val_idx = cols_next_val_idx[i]++;
-    switch (slot_desc->type().type) {
-      case TYPE_CHAR:
-      case TYPE_VARCHAR: {
-          if (val_idx >= col.string_vals.size()) {
-            return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "STRING"));
-          }
-          const string& val = col.string_vals[val_idx];
-          size_t val_size = val.size();
-          char* buffer = reinterpret_cast<char*>(tuple_pool->try_allocate_unaligned(val_size));
-          if (UNLIKELY(buffer == NULL)) {
-            string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow",
-                val_size, "string slot");
-            return tuple_pool->mem_tracker()->MemLimitExceeded(NULL, details, val_size);
-          }
-          memcpy(buffer, val.data(), val_size);
-          reinterpret_cast<StringValue*>(slot)->ptr = buffer;
-          reinterpret_cast<StringValue*>(slot)->len = val_size;
-          break;
-        }
-      case TYPE_TINYINT:
-        if (val_idx >= col.byte_vals.size()) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT"));
-        }
-        *reinterpret_cast<int8_t*>(slot) = col.byte_vals[val_idx];
-        break;
-      case TYPE_SMALLINT:
-        if (val_idx >= col.short_vals.size()) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT"));
-        }
-        *reinterpret_cast<int16_t*>(slot) = col.short_vals[val_idx];
-        break;
-      case TYPE_INT:
-        if (val_idx >= col.int_vals.size()) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "INT"));
-        }
-        *reinterpret_cast<int32_t*>(slot) = col.int_vals[val_idx];
-        break;
-      case TYPE_BIGINT:
-        if (val_idx >= col.long_vals.size()) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT"));
-        }
-        *reinterpret_cast<int64_t*>(slot) = col.long_vals[val_idx];
-        break;
-      case TYPE_LARGEINT:
-        if (val_idx >= col.long_vals.size()) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT"));
-        }
-        *reinterpret_cast<int128_t*>(slot) = col.long_vals[val_idx];
-        break;
-      case TYPE_DOUBLE:
-        if (val_idx >= col.double_vals.size()) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE"));
-        }
-        *reinterpret_cast<double*>(slot) = col.double_vals[val_idx];
-        break;
-      case TYPE_FLOAT:
-        if (val_idx >= col.double_vals.size()) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT"));
-        }
-        *reinterpret_cast<float*>(slot) = col.double_vals[val_idx];
-        break;
-      case TYPE_BOOLEAN:
-        if (val_idx >= col.bool_vals.size()) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
-        }
-        *reinterpret_cast<int8_t*>(slot) = col.bool_vals[val_idx];
-        break;
-      case TYPE_DATE:
-        if (val_idx >= col.long_vals.size() ||
-            !reinterpret_cast<DateTimeValue*>(slot)->from_unixtime(col.long_vals[val_idx], "+08:00")) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE"));
-        }
-        reinterpret_cast<DateTimeValue*>(slot)->cast_to_date();
-        break;
-      case TYPE_DATETIME: {
-        if (val_idx >= col.long_vals.size() ||
-            !reinterpret_cast<DateTimeValue*>(slot)->from_unixtime(col.long_vals[val_idx], "+08:00")) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME"));
-        }
-        reinterpret_cast<DateTimeValue*>(slot)->set_type(TIME_DATETIME);
-        break;
-      }
-      case TYPE_DECIMAL: {
-        if (val_idx >= col.binary_vals.size()) {
-          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "DECIMAL"));
-        }
-        const string& val = col.binary_vals[val_idx];
-        *reinterpret_cast<DecimalValue*>(slot) = *reinterpret_cast<const DecimalValue*>(&val);
-        break;
-      }
-      default:
-        DCHECK(false);
-    }
-  }
-  return Status::OK();
-}
-
-}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "es_scan_node.h"
+
+#include <string>
+#include <boost/algorithm/string.hpp>
+#include <gutil/strings/substitute.h>
+
+#include "gen_cpp/PlanNodes_types.h"
+#include "gen_cpp/Exprs_types.h"
+#include "runtime/runtime_state.h"
+#include "runtime/row_batch.h"
+#include "runtime/string_value.h"
+#include "runtime/tuple_row.h"
+#include "runtime/client_cache.h"
+#include "util/runtime_profile.h"
+#include "util/debug_util.h"
+#include "service/backend_options.h"
+#include "olap/olap_common.h"
+#include "olap/utils.h"
+#include "exprs/expr_context.h"
+#include "exprs/expr.h"
+#include "exprs/in_predicate.h"
+#include "exprs/slot_ref.h"
+
+namespace doris {
+
+// $0 = column type (e.g. INT)
+const string ERROR_INVALID_COL_DATA = "Data source returned inconsistent column data. "
+    "Expected value of type $0 based on column metadata. This likely indicates a "
+    "problem with the data source library.";
+const string ERROR_MEM_LIMIT_EXCEEDED = "DataSourceScanNode::$0() failed to allocate "
+    "$1 bytes for $2.";
+
+EsScanNode::EsScanNode(
+        ObjectPool* pool,
+        const TPlanNode& tnode,
+        const DescriptorTbl& descs) :
+            ScanNode(pool, tnode, descs),
+            _tuple_id(tnode.es_scan_node.tuple_id),
+            _scan_range_idx(0) {
+    if (tnode.es_scan_node.__isset.properties) {
+        _properties = tnode.es_scan_node.properties;
+    }
+}
+
+EsScanNode::~EsScanNode() {
+}
+
+Status EsScanNode::prepare(RuntimeState* state) {
+    VLOG(1) << "EsScanNode::Prepare";
+
+    RETURN_IF_ERROR(ScanNode::prepare(state));
+    _tuple_desc = state->desc_tbl().get_tuple_descriptor(_tuple_id);
+    if (_tuple_desc == nullptr) {
+        std::stringstream ss;
+        ss << "es tuple descriptor is null, _tuple_id=" << _tuple_id;
+        LOG(WARNING) << ss.str();
+        return Status::InternalError(ss.str());
+    }
+    _env = state->exec_env();
+
+    return Status::OK();
+}
+
+Status EsScanNode::open(RuntimeState* state) {
+    VLOG(1) << "EsScanNode::Open";
+
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::OPEN));
+    RETURN_IF_CANCELLED(state);
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    RETURN_IF_ERROR(ExecNode::open(state));
+
+    // TExtOpenParams.row_schema
+    vector<TExtColumnDesc> cols;
+    for (const SlotDescriptor* slot : _tuple_desc->slots()) {
+        TExtColumnDesc col;
+        col.__set_name(slot->col_name());
+        col.__set_type(slot->type().to_thrift());
+        cols.emplace_back(std::move(col));
+    }
+    TExtTableSchema row_schema;
+    row_schema.cols = std::move(cols);
+    row_schema.__isset.cols = true;
+
+    // TExtOpenParams.predicates
+    vector<vector<TExtPredicate> > predicates;
+    vector<int> predicate_to_conjunct;
+    for (int i = 0; i < _conjunct_ctxs.size(); ++i) {
+        VLOG(1) << "conjunct: " << _conjunct_ctxs[i]->root()->debug_string();
+        vector<TExtPredicate> disjuncts;
+        if (get_disjuncts(_conjunct_ctxs[i], _conjunct_ctxs[i]->root(), disjuncts)) {
+            predicates.emplace_back(std::move(disjuncts));
+            predicate_to_conjunct.push_back(i);
+        }
+    }
+
+    // open every scan range
+    vector<int> conjunct_accepted_times(_conjunct_ctxs.size(), 0); 
+    for (int i = 0; i < _scan_ranges.size(); ++i) {
+        TEsScanRange& es_scan_range = _scan_ranges[i];
+
+        if (es_scan_range.es_hosts.empty()) {
+            std::stringstream ss;
+            ss << "es fail to open: hosts empty";
+            LOG(WARNING) << ss.str();
+            return Status::InternalError(ss.str());
+        }
+
+
+        // TExtOpenParams
+        TExtOpenParams params;
+        params.__set_query_id(state->query_id());
+        _properties["index"] = es_scan_range.index;
+        if (es_scan_range.__isset.type) {
+            _properties["type"] = es_scan_range.type;
+        }
+        _properties["shard_id"] = std::to_string(es_scan_range.shard_id);
+        params.__set_properties(_properties);
+        params.__set_row_schema(row_schema);
+        params.__set_batch_size(state->batch_size());
+        params.__set_predicates(predicates);
+        TExtOpenResult result;
+
+        // choose an es node, local is the first choice
+        std::string localhost = BackendOptions::get_localhost();
+        bool is_success = false;
+        for (int j = 0; j < 2; ++j) {
+            for (auto& es_host : es_scan_range.es_hosts) {
+                if ((j == 0 && es_host.hostname != localhost)
+                    || (j == 1 && es_host.hostname == localhost)) {
+                    continue;
+                }
+                Status status = open_es(es_host, result, params);
+                if (status.ok()) {
+                   is_success = true;
+                   _addresses.push_back(es_host);
+                   _scan_handles.push_back(result.scan_handle);
+                   if (result.__isset.accepted_conjuncts) {
+                       for (int index : result.accepted_conjuncts) {
+                           conjunct_accepted_times[predicate_to_conjunct[index]]++;
+                       }
+                   }
+                   break;
+                } else if (status.code() == TStatusCode::ES_SHARD_NOT_FOUND) {
+                    // if shard not found, try other nodes
+                    LOG(WARNING) << "shard not found on es node: "
+                                 << ", address=" << es_host
+                                 << ", scan_range_idx=" << i << ", try other nodes";
+                } else {
+                    LOG(WARNING) << "es open error: scan_range_idx=" << i
+                                 << ", address=" << es_host
+                                 << ", msg=" << status.get_error_msg();
+                    return status;
+                } 
+            }
+            if (is_success) {
+                break;
+            }
+        }
+
+        if (!is_success) {
+            std::stringstream ss;
+            ss << "es open error: scan_range_idx=" << i
+               << ", can't find shard on any node";
+            return Status::InternalError(ss.str());
+        }
+    }
+
+    // remove those conjuncts that accepted by all scan ranges
+    for (int i = predicate_to_conjunct.size() - 1; i >= 0; i--) {
+        int conjunct_index = predicate_to_conjunct[i];
+        if (conjunct_accepted_times[conjunct_index] == _scan_ranges.size()) {
+            _pushdown_conjunct_ctxs.push_back(*(_conjunct_ctxs.begin() + conjunct_index));
+            _conjunct_ctxs.erase(_conjunct_ctxs.begin() + conjunct_index);
+        }
+    }
+
+    for (int i = 0; i < _conjunct_ctxs.size(); ++i) {
+        if (!check_left_conjuncts(_conjunct_ctxs[i]->root())) {
+            return Status::InternalError("esquery could only be executed on es, but could not push down to es");
+        }
+    }
+
+    return Status::OK();
+}
+
+Status EsScanNode::get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) {
+    VLOG(1) << "EsScanNode::GetNext";
+
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::GETNEXT));
+    RETURN_IF_CANCELLED(state);
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    SCOPED_TIMER(materialize_tuple_timer());
+
+    // create tuple
+    MemPool* tuple_pool = row_batch->tuple_data_pool();
+    int64_t tuple_buffer_size;
+    uint8_t* tuple_buffer = nullptr;
+    RETURN_IF_ERROR(row_batch->resize_and_allocate_tuple_buffer(state, &tuple_buffer_size, &tuple_buffer));
+    Tuple* tuple = reinterpret_cast<Tuple*>(tuple_buffer);
+    
+    // get batch
+    TExtGetNextResult result;
+    RETURN_IF_ERROR(get_next_from_es(result));
+    _offsets[_scan_range_idx] += result.rows.num_rows;
+
+    // convert
+    VLOG(1) << "begin to convert: scan_range_idx=" << _scan_range_idx
+            << ", num_rows=" << result.rows.num_rows;
+    vector<TExtColumnData>& cols = result.rows.cols;
+    // indexes of the next non-null value in the row batch, per column. 
+    vector<int> cols_next_val_idx(_tuple_desc->slots().size(), 0);
+    for (int row_idx = 0; row_idx < result.rows.num_rows; row_idx++) {
+        if (reached_limit()) {
+            *eos = true;
+            break;
+        }
+        RETURN_IF_ERROR(materialize_row(tuple_pool, tuple, cols, row_idx, cols_next_val_idx));
+        TupleRow* tuple_row = row_batch->get_row(row_batch->add_row());
+        tuple_row->set_tuple(0, tuple);
+        if (ExecNode::eval_conjuncts(_conjunct_ctxs.data(), _conjunct_ctxs.size(), tuple_row)) {
+            row_batch->commit_last_row();
+            tuple = reinterpret_cast<Tuple*>(
+                reinterpret_cast<uint8_t*>(tuple) + _tuple_desc->byte_size());
+            ++_num_rows_returned;
+        }
+    }
+
+    VLOG(1) << "finish one batch: num_rows=" << row_batch->num_rows();
+    COUNTER_SET(_rows_returned_counter, _num_rows_returned);
+    if (result.__isset.eos && result.eos) {
+        VLOG(1) << "es finish one scan_range: scan_range_idx=" << _scan_range_idx;
+        ++_scan_range_idx;
+    }
+    if (_scan_range_idx == _scan_ranges.size()) {
+        *eos = true;
+    }
+
+    return Status::OK();
+}
+
+Status EsScanNode::close(RuntimeState* state) {
+    if (is_closed()) return Status::OK();
+    VLOG(1) << "EsScanNode::Close";
+    RETURN_IF_ERROR(exec_debug_action(TExecNodePhase::CLOSE));
+    SCOPED_TIMER(_runtime_profile->total_time_counter());
+    Expr::close(_pushdown_conjunct_ctxs, state);
+    RETURN_IF_ERROR(ExecNode::close(state));
+    for (int i = 0; i < _addresses.size(); ++i) {
+        TExtCloseParams params;
+        params.__set_scan_handle(_scan_handles[i]);
+        TExtCloseResult result;
+
+#ifndef BE_TEST
+        const TNetworkAddress& address = _addresses[i];
+        try {
+            Status status;
+            ExtDataSourceServiceClientCache* client_cache = _env->extdatasource_client_cache();
+            ExtDataSourceServiceConnection client(client_cache, address, 10000, &status);
+            if (!status.ok()) {
+                LOG(WARNING) << "es create client error: scan_range_idx=" << i
+                             << ", address=" << address
+                             << ", msg=" << status.get_error_msg();
+                return status;
+            }
+
+            try {
+                VLOG(1) << "es close param=" << apache::thrift::ThriftDebugString(params);
+                client->close(result, params);
+            } catch (apache::thrift::transport::TTransportException& e) {
+                LOG(WARNING) << "es close retrying, because: " << e.what();
+                RETURN_IF_ERROR(client.reopen());
+                client->close(result, params);
+            }
+        } catch (apache::thrift::TException &e) {
+            std::stringstream ss;
+            ss << "es close error: scan_range_idx=" << i
+               << ", msg=" << e.what();
+            LOG(WARNING) << ss.str();
+            return Status::ThriftRpcError(ss.str());
+        }
+
+        VLOG(1) << "es close result=" << apache::thrift::ThriftDebugString(result);
+        Status status(result.status);
+        if (!status.ok()) {
+            LOG(WARNING) << "es close error: : scan_range_idx=" << i
+                         << ", msg=" << status.get_error_msg();
+            return status;
+        }
+#else
+        TStatus status;
+        result.__set_status(status);
+#endif
+    }
+
+    return Status::OK();
+}
+
+void EsScanNode::debug_string(int indentation_level, stringstream* out) const {
+    *out << string(indentation_level * 2, ' ');
+    *out << "EsScanNode(tupleid=" << _tuple_id;
+    *out << ")" << std::endl;
+
+    for (int i = 0; i < _children.size(); ++i) {
+        _children[i]->debug_string(indentation_level + 1, out);
+    }
+}
+
+Status EsScanNode::set_scan_ranges(const vector<TScanRangeParams>& scan_ranges) {
+    for (int i = 0; i < scan_ranges.size(); ++i) {
+        TScanRangeParams scan_range = scan_ranges[i];
+        DCHECK(scan_range.scan_range.__isset.es_scan_range);
+        TEsScanRange es_scan_range = scan_range.scan_range.es_scan_range;
+        _scan_ranges.push_back(es_scan_range);
+    }
+
+    _offsets.resize(scan_ranges.size(), 0);
+    return Status::OK();
+}
+
+Status EsScanNode::open_es(TNetworkAddress& address, TExtOpenResult& result, TExtOpenParams& params) {
+
+    VLOG(1) << "es open param=" << apache::thrift::ThriftDebugString(params);
+#ifndef BE_TEST
+    try {
+        ExtDataSourceServiceClientCache* client_cache = _env->extdatasource_client_cache();
+        Status status;
+        ExtDataSourceServiceConnection client(client_cache, address, 10000, &status);
+        if (!status.ok()) {
+            std::stringstream ss;
+            ss << "es create client error: address=" << address
+               << ", msg=" << status.get_error_msg();
+            return Status::InternalError(ss.str());
+        }
+
+        try {
+            client->open(result, params);
+        } catch (apache::thrift::transport::TTransportException& e) {
+            LOG(WARNING) << "es open retrying, because: " << e.what();
+            RETURN_IF_ERROR(client.reopen());
+            client->open(result, params);
+        }
+        VLOG(1) << "es open result=" << apache::thrift::ThriftDebugString(result);
+        return Status(result.status);
+    } catch (apache::thrift::TException &e) {
+        std::stringstream ss;
+        ss << "es open error: address=" << address << ", msg=" << e.what();
+        return Status::InternalError(ss.str());
+    }
+#else
+    TStatus status;
+    result.__set_status(status);
+    result.__set_scan_handle("0");
+    return Status(status);
+#endif
+}
+
+// legacy conjuncts must not contain match function
+bool EsScanNode::check_left_conjuncts(Expr* conjunct) {
+    if (is_match_func(conjunct)) {
+        return false;
+    } else {
+        int num_children = conjunct->get_num_children();
+        for (int child_idx = 0; child_idx < num_children; ++child_idx) {
+            if (!check_left_conjuncts(conjunct->get_child(child_idx))) {
+                return false;
+            }
+        }
+        return true;
+    }
+}
+
+bool EsScanNode::ignore_cast(SlotDescriptor* slot, Expr* expr) {
+    if (slot->type().is_date_type() && expr->type().is_date_type()) {
+        return true;
+    }
+    if (slot->type().is_string_type() && expr->type().is_string_type()) {
+        return true;
+    }
+    return false;
+}
+
+bool EsScanNode::get_disjuncts(ExprContext* context, Expr* conjunct,
+                               vector<TExtPredicate>& disjuncts) {
+    if (TExprNodeType::BINARY_PRED == conjunct->node_type()) {
+        if (conjunct->children().size() != 2) {
+            VLOG(1) << "get disjuncts fail: number of childs is not 2";
+            return false;
+        }
+        SlotRef* slotRef;
+        TExprOpcode::type op;
+        Expr* expr;
+        if (TExprNodeType::SLOT_REF == conjunct->get_child(0)->node_type()) {
+            expr = conjunct->get_child(1);
+            slotRef = (SlotRef*)(conjunct->get_child(0));
+            op = conjunct->op();
+        } else if (TExprNodeType::SLOT_REF == conjunct->get_child(1)->node_type()) {
+            expr = conjunct->get_child(0);
+            slotRef = (SlotRef*)(conjunct->get_child(1));
+            op = conjunct->op();
+        } else {
+            VLOG(1) << "get disjuncts fail: no SLOT_REF child";
+            return false;
+        }
+
+        SlotDescriptor* slot_desc = get_slot_desc(slotRef);
+        if (slot_desc == nullptr) {
+            VLOG(1) << "get disjuncts fail: slot_desc is null";
+            return false;
+        }
+
+        TExtLiteral literal;
+        if (!to_ext_literal(context, expr, &literal)) {
+            VLOG(1) << "get disjuncts fail: can't get literal, node_type="
+                    << expr->node_type();
+            return false;
+        }
+
+        TExtColumnDesc columnDesc;
+        columnDesc.__set_name(slot_desc->col_name());
+        columnDesc.__set_type(slot_desc->type().to_thrift());
+        TExtBinaryPredicate binaryPredicate;
+        binaryPredicate.__set_col(columnDesc);
+        binaryPredicate.__set_op(op);
+        binaryPredicate.__set_value(std::move(literal));
+        TExtPredicate predicate;
+        predicate.__set_node_type(TExprNodeType::BINARY_PRED);
+        predicate.__set_binary_predicate(binaryPredicate);
+        disjuncts.push_back(std::move(predicate));
+        return true;
+    } else if (is_match_func(conjunct)) {
+        // if this is a function call expr and function name is match, then push 
+        // down it to es
+        TExtFunction match_function;
+        match_function.__set_func_name(conjunct->fn().name.function_name);
+        vector<TExtLiteral> query_conditions;
+
+
+        TExtLiteral literal;
+        if (!to_ext_literal(context, conjunct->get_child(1), &literal)) {
+            VLOG(1) << "get disjuncts fail: can't get literal, node_type="
+                    << conjunct->get_child(1)->node_type();
+            return false;
+        }
+
+        query_conditions.push_back(std::move(literal));
+        match_function.__set_values(query_conditions);
+        TExtPredicate predicate;
+        predicate.__set_node_type(TExprNodeType::FUNCTION_CALL);
+        predicate.__set_ext_function(match_function);
+        disjuncts.push_back(std::move(predicate));
+        return true;
+    } else if (TExprNodeType::IN_PRED == conjunct->node_type()) {
+        // the op code maybe FILTER_NEW_IN, it means there is function in list
+        // like col_a in (abs(1))
+        if (TExprOpcode::FILTER_IN != conjunct->op() 
+            && TExprOpcode::FILTER_NOT_IN != conjunct->op()) {
+            return false;
+        }
+        TExtInPredicate ext_in_predicate;
+        vector<TExtLiteral> in_pred_values;
+        InPredicate* pred = dynamic_cast<InPredicate*>(conjunct);
+        ext_in_predicate.__set_is_not_in(pred->is_not_in());
+        if (Expr::type_without_cast(pred->get_child(0)) != TExprNodeType::SLOT_REF) {
+            return false;
+        }
+
+        SlotRef* slot_ref = (SlotRef*)(conjunct->get_child(0));
+        SlotDescriptor* slot_desc = get_slot_desc(slot_ref);
+        if (slot_desc == nullptr) {
+            return false;
+        }
+        TExtColumnDesc columnDesc;
+        columnDesc.__set_name(slot_desc->col_name());
+        columnDesc.__set_type(slot_desc->type().to_thrift());
+        ext_in_predicate.__set_col(columnDesc);
+
+        if (pred->get_child(0)->type().type != slot_desc->type().type) {
+            if (!ignore_cast(slot_desc, pred->get_child(0))) {
+                return false;
+            }
+        }
+
+        HybirdSetBase::IteratorBase* iter = pred->hybird_set()->begin();
+        while (iter->has_next()) {
+            if (nullptr == iter->get_value()) {
+                return false;
+            }
+            TExtLiteral literal;
+            if (!to_ext_literal(slot_desc->type().type, const_cast<void *>(iter->get_value()), &literal)) {
+                VLOG(1) << "get disjuncts fail: can't get literal, node_type="
+                        << slot_desc->type().type;
+                return false;
+            }
+            in_pred_values.push_back(literal);
+            iter->next();
+        }
+        ext_in_predicate.__set_values(in_pred_values);
+        TExtPredicate predicate;
+        predicate.__set_node_type(TExprNodeType::IN_PRED);
+        predicate.__set_in_predicate(ext_in_predicate);
+        disjuncts.push_back(std::move(predicate));
+        return true;
+    } else if (TExprNodeType::COMPOUND_PRED == conjunct->node_type()) {
+        if (TExprOpcode::COMPOUND_OR != conjunct->op()) {
+            VLOG(1) << "get disjuncts fail: op is not COMPOUND_OR";
+            return false;
+        }
+        if (!get_disjuncts(context, conjunct->get_child(0), disjuncts)) {
+            return false;
+        }
+        if (!get_disjuncts(context, conjunct->get_child(1), disjuncts)) {
+            return false;
+        }
+        return true;
+    } else {
+        VLOG(1) << "get disjuncts fail: node type is " << conjunct->node_type()
+                << ", should be BINARY_PRED or COMPOUND_PRED";
+        return false;
+    }
+}
+
+bool EsScanNode::is_match_func(Expr* conjunct) {
+    if (TExprNodeType::FUNCTION_CALL == conjunct->node_type()
+        && conjunct->fn().name.function_name == "esquery") {
+            return true;
+    }
+    return false;
+}
+
+SlotDescriptor* EsScanNode::get_slot_desc(SlotRef* slotRef) {
+    std::vector<SlotId> slot_ids;
+    slotRef->get_slot_ids(&slot_ids);
+    SlotDescriptor* slot_desc = nullptr;
+    for (SlotDescriptor* slot : _tuple_desc->slots()) {
+        if (slot->id() == slot_ids[0]) {
+            slot_desc = slot;
+            break;
+        }
+    }
+    return slot_desc;
+}
+
+bool EsScanNode::to_ext_literal(ExprContext* context, Expr* expr, TExtLiteral* literal) {
+    switch (expr->node_type()) {
+    case TExprNodeType::BOOL_LITERAL:
+    case TExprNodeType::INT_LITERAL:
+    case TExprNodeType::LARGE_INT_LITERAL:
+    case TExprNodeType::FLOAT_LITERAL:
+    case TExprNodeType::DECIMAL_LITERAL:
+    case TExprNodeType::STRING_LITERAL:
+    case TExprNodeType::DATE_LITERAL:
+        return to_ext_literal(expr->type().type, context->get_value(expr, NULL), literal);
+    default:
+        return false;
+    }
+}
+
+bool EsScanNode::to_ext_literal(PrimitiveType slot_type, void* value, TExtLiteral* literal) {
+    TExprNodeType::type node_type;
+    switch (slot_type) {
+    case TYPE_BOOLEAN: {
+        node_type = (TExprNodeType::BOOL_LITERAL);
+        TBoolLiteral bool_literal;
+        bool_literal.__set_value(*reinterpret_cast<bool*>(value));
+        literal->__set_bool_literal(bool_literal);
+        break;
+    }
+
+    case TYPE_TINYINT: {
+        node_type = (TExprNodeType::INT_LITERAL);
+        TIntLiteral int_literal;
+        int_literal.__set_value(*reinterpret_cast<int8_t*>(value));
+        literal->__set_int_literal(int_literal);
+        break;
+    }
+    case TYPE_SMALLINT: {
+        node_type = (TExprNodeType::INT_LITERAL);
+        TIntLiteral int_literal;
+        int_literal.__set_value(*reinterpret_cast<int16_t*>(value));
+        literal->__set_int_literal(int_literal);
+        break;
+    }
+    case TYPE_INT: {
+        node_type = (TExprNodeType::INT_LITERAL);
+        TIntLiteral int_literal;
+        int_literal.__set_value(*reinterpret_cast<int32_t*>(value));
+        literal->__set_int_literal(int_literal);
+        break;
+    }
+    case TYPE_BIGINT: {
+        node_type = (TExprNodeType::INT_LITERAL);
+        TIntLiteral int_literal;
+        int_literal.__set_value(*reinterpret_cast<int64_t*>(value));
+        literal->__set_int_literal(int_literal);
+        break;
+    }
+
+    case TYPE_LARGEINT: {
+        node_type = (TExprNodeType::LARGE_INT_LITERAL);
+        char buf[48];
+        int len = 48;
+        char* v = LargeIntValue::to_string(*reinterpret_cast<__int128*>(value), buf, &len);
+        TLargeIntLiteral large_int_literal;
+        large_int_literal.__set_value(v);
+        literal->__set_large_int_literal(large_int_literal);
+        break;
+    }
+
+    case TYPE_FLOAT: {
+        node_type = (TExprNodeType::FLOAT_LITERAL);
+        TFloatLiteral float_literal;
+        float_literal.__set_value(*reinterpret_cast<float*>(value));
+        literal->__set_float_literal(float_literal);
+        break;
+    }
+    case TYPE_DOUBLE: {
+        node_type = (TExprNodeType::FLOAT_LITERAL);
+        TFloatLiteral float_literal;
+        float_literal.__set_value(*reinterpret_cast<double*>(value));
+        literal->__set_float_literal(float_literal);
+        break;
+    }
+
+    case TYPE_DECIMAL: {
+        node_type = (TExprNodeType::DECIMAL_LITERAL);
+        TDecimalLiteral decimal_literal;
+        decimal_literal.__set_value(reinterpret_cast<DecimalValue*>(value)->to_string());
+        literal->__set_decimal_literal(decimal_literal);
+        break;
+    }
+
+    case TYPE_DATE:
+    case TYPE_DATETIME: {
+        node_type = (TExprNodeType::DATE_LITERAL);
+        const DateTimeValue date_value = *reinterpret_cast<DateTimeValue*>(value);
+        char str[MAX_DTVALUE_STR_LEN];
+        date_value.to_string(str);
+        TDateLiteral date_literal;
+        date_literal.__set_value(str);
+        literal->__set_date_literal(date_literal);
+        break;
+    }
+
+    case TYPE_CHAR:
+    case TYPE_VARCHAR: {
+        node_type = (TExprNodeType::STRING_LITERAL);
+        TStringLiteral string_literal;
+        string_literal.__set_value((reinterpret_cast<StringValue*>(value))->debug_string());
+        literal->__set_string_literal(string_literal);
+        break;
+    }
+
+    default: {
+        DCHECK(false) << "Invalid type.";
+        return false;
+    }
+    }
+    literal->__set_node_type(node_type);
+    return true;
+}
+
+Status EsScanNode::get_next_from_es(TExtGetNextResult& result) {
+    TExtGetNextParams params;
+    params.__set_scan_handle(_scan_handles[_scan_range_idx]);
+    params.__set_offset(_offsets[_scan_range_idx]);
+
+    // getNext
+    const TNetworkAddress &address = _addresses[_scan_range_idx];
+#ifndef BE_TEST
+    try {
+        Status create_client_status;
+        ExtDataSourceServiceClientCache *client_cache = _env->extdatasource_client_cache();
+        ExtDataSourceServiceConnection client(client_cache, address, 10000, &create_client_status);
+        if (!create_client_status.ok()) {
+            LOG(WARNING) << "es create client error: scan_range_idx=" << _scan_range_idx
+                         << ", address=" << address
+                         << ", msg=" << create_client_status.get_error_msg();
+            return create_client_status;
+        }
+
+        try {
+            VLOG(1) << "es get_next param=" << apache::thrift::ThriftDebugString(params);
+            client->getNext(result, params);
+        } catch (apache::thrift::transport::TTransportException& e) {
+            std::stringstream ss;
+            ss << "es get_next error: scan_range_idx=" << _scan_range_idx
+               << ", msg=" << e.what();
+            LOG(WARNING) << ss.str();
+            RETURN_IF_ERROR(client.reopen());
+            return Status::ThriftRpcError(ss.str());
+        }
+    } catch (apache::thrift::TException &e) {
+        std::stringstream ss;
+        ss << "es get_next error: scan_range_idx=" << _scan_range_idx
+           << ", msg=" << e.what();
+        LOG(WARNING) << ss.str();
+        return Status::ThriftRpcError(ss.str());
+    }
+#else
+    TStatus status;
+    result.__set_status(status);
+    result.__set_eos(true);
+    TExtColumnData col_data;
+    std::vector<bool> is_null;
+    is_null.push_back(false);
+    col_data.__set_is_null(is_null);
+    std::vector<int32_t> int_vals;
+    int_vals.push_back(1);
+    int_vals.push_back(2);
+    col_data.__set_int_vals(int_vals);
+    std::vector<TExtColumnData> cols;
+    cols.push_back(col_data);
+    TExtRowBatch rows;
+    rows.__set_cols(cols);
+    rows.__set_num_rows(2);
+    result.__set_rows(rows);
+    return Status(status);
+#endif
+
+    // check result
+    VLOG(1) << "es get_next result=" << apache::thrift::ThriftDebugString(result);
+    Status get_next_status(result.status);
+    if (!get_next_status.ok()) {
+        LOG(WARNING) << "es get_next error: scan_range_idx=" << _scan_range_idx
+                     << ", address=" << address
+                     << ", msg=" << get_next_status.get_error_msg();
+        return get_next_status;
+    }
+    if (!result.__isset.rows || !result.rows.__isset.num_rows) {
+        std::stringstream ss;
+        ss << "es get_next error: scan_range_idx=" << _scan_range_idx
+           << ", msg=rows or num_rows not in result";
+        LOG(WARNING) << ss.str();
+        return Status::InternalError(ss.str());
+    }
+
+    return Status::OK();
+}
+
+Status EsScanNode::materialize_row(MemPool* tuple_pool, Tuple* tuple,
+                                   const vector<TExtColumnData>& cols, int row_idx,
+                                   vector<int>& cols_next_val_idx) {
+  tuple->init(_tuple_desc->byte_size());
+
+  for (int i = 0; i < _tuple_desc->slots().size(); ++i) {
+    const SlotDescriptor* slot_desc = _tuple_desc->slots()[i];
+
+    if (!slot_desc->is_materialized()) {
+        continue;
+    }
+
+    void* slot = tuple->get_slot(slot_desc->tuple_offset());
+    const TExtColumnData& col = cols[i];
+
+    if (col.is_null[row_idx]) {
+      tuple->set_null(slot_desc->null_indicator_offset());
+      continue;
+    } else {
+      tuple->set_not_null(slot_desc->null_indicator_offset());
+    }
+
+    int val_idx = cols_next_val_idx[i]++;
+    switch (slot_desc->type().type) {
+      case TYPE_CHAR:
+      case TYPE_VARCHAR: {
+          if (val_idx >= col.string_vals.size()) {
+            return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "STRING"));
+          }
+          const string& val = col.string_vals[val_idx];
+          size_t val_size = val.size();
+          char* buffer = reinterpret_cast<char*>(tuple_pool->try_allocate_unaligned(val_size));
+          if (UNLIKELY(buffer == NULL)) {
+            string details = strings::Substitute(ERROR_MEM_LIMIT_EXCEEDED, "MaterializeNextRow",
+                val_size, "string slot");
+            return tuple_pool->mem_tracker()->MemLimitExceeded(NULL, details, val_size);
+          }
+          memcpy(buffer, val.data(), val_size);
+          reinterpret_cast<StringValue*>(slot)->ptr = buffer;
+          reinterpret_cast<StringValue*>(slot)->len = val_size;
+          break;
+        }
+      case TYPE_TINYINT:
+        if (val_idx >= col.byte_vals.size()) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TINYINT"));
+        }
+        *reinterpret_cast<int8_t*>(slot) = col.byte_vals[val_idx];
+        break;
+      case TYPE_SMALLINT:
+        if (val_idx >= col.short_vals.size()) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "SMALLINT"));
+        }
+        *reinterpret_cast<int16_t*>(slot) = col.short_vals[val_idx];
+        break;
+      case TYPE_INT:
+        if (val_idx >= col.int_vals.size()) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "INT"));
+        }
+        *reinterpret_cast<int32_t*>(slot) = col.int_vals[val_idx];
+        break;
+      case TYPE_BIGINT:
+        if (val_idx >= col.long_vals.size()) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "BIGINT"));
+        }
+        *reinterpret_cast<int64_t*>(slot) = col.long_vals[val_idx];
+        break;
+      case TYPE_LARGEINT:
+        if (val_idx >= col.long_vals.size()) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "LARGEINT"));
+        }
+        *reinterpret_cast<int128_t*>(slot) = col.long_vals[val_idx];
+        break;
+      case TYPE_DOUBLE:
+        if (val_idx >= col.double_vals.size()) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "DOUBLE"));
+        }
+        *reinterpret_cast<double*>(slot) = col.double_vals[val_idx];
+        break;
+      case TYPE_FLOAT:
+        if (val_idx >= col.double_vals.size()) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "FLOAT"));
+        }
+        *reinterpret_cast<float*>(slot) = col.double_vals[val_idx];
+        break;
+      case TYPE_BOOLEAN:
+        if (val_idx >= col.bool_vals.size()) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "BOOLEAN"));
+        }
+        *reinterpret_cast<int8_t*>(slot) = col.bool_vals[val_idx];
+        break;
+      case TYPE_DATE:
+        if (val_idx >= col.long_vals.size() ||
+            !reinterpret_cast<DateTimeValue*>(slot)->from_unixtime(col.long_vals[val_idx], "+08:00")) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATE"));
+        }
+        reinterpret_cast<DateTimeValue*>(slot)->cast_to_date();
+        break;
+      case TYPE_DATETIME: {
+        if (val_idx >= col.long_vals.size() ||
+            !reinterpret_cast<DateTimeValue*>(slot)->from_unixtime(col.long_vals[val_idx], "+08:00")) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "TYPE_DATETIME"));
+        }
+        reinterpret_cast<DateTimeValue*>(slot)->set_type(TIME_DATETIME);
+        break;
+      }
+      case TYPE_DECIMAL: {
+        if (val_idx >= col.binary_vals.size()) {
+          return Status::InternalError(strings::Substitute(ERROR_INVALID_COL_DATA, "DECIMAL"));
+        }
+        const string& val = col.binary_vals[val_idx];
+        *reinterpret_cast<DecimalValue*>(slot) = *reinterpret_cast<const DecimalValue*>(&val);
+        break;
+      }
+      default:
+        DCHECK(false);
+    }
+  }
+  return Status::OK();
+}
+
+}
diff --git a/be/src/exec/es_scan_node.h b/be/src/exec/es_scan_node.h
index 810917d9ff..de871a8731 100644
--- a/be/src/exec/es_scan_node.h
+++ b/be/src/exec/es_scan_node.h
@@ -1,92 +1,92 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef  BDG_PALO_BE_SRC_QUERY_EXEC_ES_SCAN_NODE_H
-#define  BDG_PALO_BE_SRC_QUERY_EXEC_ES_SCAN_NODE_H
-
-#include <memory>
-#include <vector>
-
-#include "runtime/descriptors.h"
-#include "runtime/tuple.h"
-#include "exec/scan_node.h"
-#include "exprs/slot_ref.h"
-#include "runtime/exec_env.h"
-#include "gen_cpp/TExtDataSourceService.h"
-#include "gen_cpp/PaloExternalDataSourceService_types.h"
-
-namespace doris {
-
-class TupleDescriptor;
-class RuntimeState;
-class Status;
-
-class EsScanNode : public ScanNode {
-public:
-    EsScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
-    ~EsScanNode();
-
-    virtual Status prepare(RuntimeState* state) override;
-    virtual Status open(RuntimeState* state) override;
-    virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override;
-    virtual Status close(RuntimeState* state) override;
-    virtual Status set_scan_ranges(const std::vector<TScanRangeParams>& scan_ranges) override;
-
-protected:
-    // Write debug string of this into out.
-    virtual void debug_string(int indentation_level, std::stringstream* out) const;
-
-private:
-    Status open_es(TNetworkAddress& address, TExtOpenResult& result, TExtOpenParams& params);
-    Status materialize_row(MemPool* tuple_pool, Tuple* tuple,
-                           const vector<TExtColumnData>& cols, int next_row_idx,
-                           vector<int>& cols_next_val_idx);
-    Status get_next_from_es(TExtGetNextResult& result);
-
-    bool get_disjuncts(ExprContext* context, Expr* conjunct, vector<TExtPredicate>& disjuncts);
-    bool to_ext_literal(ExprContext* context, Expr* expr, TExtLiteral* literal);
-    bool to_ext_literal(PrimitiveType node_type, void* value, TExtLiteral* literal);
-    bool ignore_cast(SlotDescriptor* slot, Expr* expr);
-
-    bool is_match_func(Expr* conjunct);
-
-    SlotDescriptor* get_slot_desc(SlotRef* slotRef);
-
-    // check if open result meets condition
-    // 1. check if left conjuncts contain "match" function, since match function could only be executed on es
-    bool check_left_conjuncts(Expr* conjunct);
-
-private:
-    TupleId _tuple_id;
-    std::map<std::string, std::string> _properties;
-    const TupleDescriptor* _tuple_desc;
-    ExecEnv* _env;
-    std::vector<TEsScanRange> _scan_ranges;
-
-    // scan range's iterator, used in get_next()
-    int _scan_range_idx;
-
-    // store every scan range's netaddress/handle/offset
-    std::vector<TNetworkAddress> _addresses;
-    std::vector<std::string> _scan_handles;
-    std::vector<int> _offsets;
-    std::vector<ExprContext*> _pushdown_conjunct_ctxs;
-};
-
-}
-
-#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef  BDG_PALO_BE_SRC_QUERY_EXEC_ES_SCAN_NODE_H
+#define  BDG_PALO_BE_SRC_QUERY_EXEC_ES_SCAN_NODE_H
+
+#include <memory>
+#include <vector>
+
+#include "runtime/descriptors.h"
+#include "runtime/tuple.h"
+#include "exec/scan_node.h"
+#include "exprs/slot_ref.h"
+#include "runtime/exec_env.h"
+#include "gen_cpp/TExtDataSourceService.h"
+#include "gen_cpp/PaloExternalDataSourceService_types.h"
+
+namespace doris {
+
+class TupleDescriptor;
+class RuntimeState;
+class Status;
+
+class EsScanNode : public ScanNode {
+public:
+    EsScanNode(ObjectPool* pool, const TPlanNode& tnode, const DescriptorTbl& descs);
+    ~EsScanNode();
+
+    virtual Status prepare(RuntimeState* state) override;
+    virtual Status open(RuntimeState* state) override;
+    virtual Status get_next(RuntimeState* state, RowBatch* row_batch, bool* eos) override;
+    virtual Status close(RuntimeState* state) override;
+    virtual Status set_scan_ranges(const std::vector<TScanRangeParams>& scan_ranges) override;
+
+protected:
+    // Write debug string of this into out.
+    virtual void debug_string(int indentation_level, std::stringstream* out) const;
+
+private:
+    Status open_es(TNetworkAddress& address, TExtOpenResult& result, TExtOpenParams& params);
+    Status materialize_row(MemPool* tuple_pool, Tuple* tuple,
+                           const vector<TExtColumnData>& cols, int next_row_idx,
+                           vector<int>& cols_next_val_idx);
+    Status get_next_from_es(TExtGetNextResult& result);
+
+    bool get_disjuncts(ExprContext* context, Expr* conjunct, vector<TExtPredicate>& disjuncts);
+    bool to_ext_literal(ExprContext* context, Expr* expr, TExtLiteral* literal);
+    bool to_ext_literal(PrimitiveType node_type, void* value, TExtLiteral* literal);
+    bool ignore_cast(SlotDescriptor* slot, Expr* expr);
+
+    bool is_match_func(Expr* conjunct);
+
+    SlotDescriptor* get_slot_desc(SlotRef* slotRef);
+
+    // check if open result meets condition
+    // 1. check if left conjuncts contain "match" function, since match function could only be executed on es
+    bool check_left_conjuncts(Expr* conjunct);
+
+private:
+    TupleId _tuple_id;
+    std::map<std::string, std::string> _properties;
+    const TupleDescriptor* _tuple_desc;
+    ExecEnv* _env;
+    std::vector<TEsScanRange> _scan_ranges;
+
+    // scan range's iterator, used in get_next()
+    int _scan_range_idx;
+
+    // store every scan range's netaddress/handle/offset
+    std::vector<TNetworkAddress> _addresses;
+    std::vector<std::string> _scan_handles;
+    std::vector<int> _offsets;
+    std::vector<ExprContext*> _pushdown_conjunct_ctxs;
+};
+
+}
+
+#endif
diff --git a/be/src/gutil/cpu.cc b/be/src/gutil/cpu.cc
index f43664aee7..c02f5e5949 100644
--- a/be/src/gutil/cpu.cc
+++ b/be/src/gutil/cpu.cc
@@ -1,286 +1,286 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#include "gutil/cpu.h"
-
-#include <cstring>
-#include <utility>
-
-#include "gutil/integral_types.h"
-
-#if defined(__x86_64__)
-#if defined(_MSC_VER)
-#include <intrin.h>
-#include <immintrin.h>  // For _xgetbv()
-#endif
-#endif
-
-namespace base {
-
-CPU::CPU()
-  : signature_(0),
-    type_(0),
-    family_(0),
-    model_(0),
-    stepping_(0),
-    ext_model_(0),
-    ext_family_(0),
-    has_mmx_(false),
-    has_sse_(false),
-    has_sse2_(false),
-    has_sse3_(false),
-    has_ssse3_(false),
-    has_sse41_(false),
-    has_sse42_(false),
-    has_avx_(false),
-    has_avx2_(false),
-    has_aesni_(false),
-    has_non_stop_time_stamp_counter_(false),
-    has_broken_neon_(false),
-    cpu_vendor_("unknown") {
-  Initialize();
-}
-
-namespace {
-
-#if defined(__x86_64__)
-#ifndef _MSC_VER
-
-#if defined(__pic__) && defined(__i386__)
-
-void __cpuid(int cpu_info[4], int info_type) {
-  __asm__ volatile (
-    "mov %%ebx, %%edi\n"
-    "cpuid\n"
-    "xchg %%edi, %%ebx\n"
-    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
-    : "a"(info_type)
-  );
-}
-
-#else
-
-void __cpuid(int cpu_info[4], int info_type) {
-  __asm__ volatile (
-    "cpuid\n"
-    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
-    : "a"(info_type), "c"(0)
-  );
-}
-
-#endif
-
-// _xgetbv returns the value of an Intel Extended Control Register (XCR).
-// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
-uint64 _xgetbv(uint32 xcr) {
-  uint32 eax, edx;
-
-  __asm__ volatile (
-    "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
-  return (static_cast<uint64>(edx) << 32) | eax;
-}
-
-#endif  // !_MSC_VER
-#endif  // __x86_64__
-
-#if defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
-class LazyCpuInfoValue {
- public:
-  LazyCpuInfoValue() : has_broken_neon_(false) {
-    // This function finds the value from /proc/cpuinfo under the key "model
-    // name" or "Processor". "model name" is used in Linux 3.8 and later (3.7
-    // and later for arm64) and is shown once per CPU. "Processor" is used in
-    // earler versions and is shown only once at the top of /proc/cpuinfo
-    // regardless of the number CPUs.
-    const char kModelNamePrefix[] = "model name\t: ";
-    const char kProcessorPrefix[] = "Processor\t: ";
-
-    // This function also calculates whether we believe that this CPU has a
-    // broken NEON unit based on these fields from cpuinfo:
-    unsigned implementer = 0, architecture = 0, variant = 0, part = 0,
-             revision = 0;
-    const struct {
-      const char key[17];
-      unsigned int* result;
-    } kUnsignedValues[] = {
-      {"CPU implementer", &implementer},
-      {"CPU architecture", &architecture},
-      {"CPU variant", &variant},
-      {"CPU part", &part},
-      {"CPU revision", &revision},
-    };
-
-    std::string contents;
-    ReadFileToString(FilePath("/proc/cpuinfo"), &contents);
-    DCHECK(!contents.empty());
-    if (contents.empty()) {
-      return;
-    }
-
-    std::istringstream iss(contents);
-    std::string line;
-    while (std::getline(iss, line)) {
-      if (brand_.empty() &&
-          (line.compare(0, strlen(kModelNamePrefix), kModelNamePrefix) == 0 ||
-           line.compare(0, strlen(kProcessorPrefix), kProcessorPrefix) == 0)) {
-        brand_.assign(line.substr(strlen(kModelNamePrefix)));
-      }
-
-      for (size_t i = 0; i < arraysize(kUnsignedValues); i++) {
-        const char *key = kUnsignedValues[i].key;
-        const size_t len = strlen(key);
-
-        if (line.compare(0, len, key) == 0 &&
-            line.size() >= len + 1 &&
-            (line[len] == '\t' || line[len] == ' ' || line[len] == ':')) {
-          size_t colon_pos = line.find(':', len);
-          if (colon_pos == std::string::npos) {
-            continue;
-          }
-
-          const StringPiece line_sp(line);
-          StringPiece value_sp = line_sp.substr(colon_pos + 1);
-          while (!value_sp.empty() &&
-                 (value_sp[0] == ' ' || value_sp[0] == '\t')) {
-            value_sp = value_sp.substr(1);
-          }
-
-          // The string may have leading "0x" or not, so we use strtoul to
-          // handle that.
-          char* endptr;
-          std::string value(value_sp.as_string());
-          unsigned long int result = strtoul(value.c_str(), &endptr, 0);
-          if (*endptr == 0 && result <= UINT_MAX) {
-            *kUnsignedValues[i].result = result;
-          }
-        }
-      }
-    }
-
-    has_broken_neon_ =
-      implementer == 0x51 &&
-      architecture == 7 &&
-      variant == 1 &&
-      part == 0x4d &&
-      revision == 0;
-  }
-
-  const std::string& brand() const { return brand_; }
-  bool has_broken_neon() const { return has_broken_neon_; }
-
- private:
-  std::string brand_;
-  bool has_broken_neon_;
-  DISALLOW_COPY_AND_ASSIGN(LazyCpuInfoValue);
-};
-
-base::LazyInstance<LazyCpuInfoValue>::Leaky g_lazy_cpuinfo =
-    LAZY_INSTANCE_INITIALIZER;
-
-#endif  // defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) ||
-        // defined(OS_LINUX))
-
-}  // anonymous namespace
-
-void CPU::Initialize() {
-#if defined(__x86_64__)
-  int cpu_info[4] = {-1};
-  char cpu_string[48];
-
-  // __cpuid with an InfoType argument of 0 returns the number of
-  // valid Ids in CPUInfo[0] and the CPU identification string in
-  // the other three array elements. The CPU identification string is
-  // not in linear order. The code below arranges the information
-  // in a human readable form. The human readable order is CPUInfo[1] |
-  // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
-  // before using memcpy to copy these three array elements to cpu_string.
-  __cpuid(cpu_info, 0);
-  int num_ids = cpu_info[0];
-  std::swap(cpu_info[2], cpu_info[3]);
-  memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1]));
-  cpu_vendor_.assign(cpu_string, 3 * sizeof(cpu_info[1]));
-
-  // Interpret CPU feature information.
-  if (num_ids > 0) {
-    int cpu_info7[4] = {0};
-    __cpuid(cpu_info, 1);
-    if (num_ids >= 7) {
-      __cpuid(cpu_info7, 7);
-    }
-    signature_ = cpu_info[0];
-    stepping_ = cpu_info[0] & 0xf;
-    model_ = ((cpu_info[0] >> 4) & 0xf) + ((cpu_info[0] >> 12) & 0xf0);
-    family_ = (cpu_info[0] >> 8) & 0xf;
-    type_ = (cpu_info[0] >> 12) & 0x3;
-    ext_model_ = (cpu_info[0] >> 16) & 0xf;
-    ext_family_ = (cpu_info[0] >> 20) & 0xff;
-    has_mmx_ =   (cpu_info[3] & 0x00800000) != 0;
-    has_sse_ =   (cpu_info[3] & 0x02000000) != 0;
-    has_sse2_ =  (cpu_info[3] & 0x04000000) != 0;
-    has_sse3_ =  (cpu_info[2] & 0x00000001) != 0;
-    has_ssse3_ = (cpu_info[2] & 0x00000200) != 0;
-    has_sse41_ = (cpu_info[2] & 0x00080000) != 0;
-    has_sse42_ = (cpu_info[2] & 0x00100000) != 0;
-    // AVX instructions will generate an illegal instruction exception unless
-    //   a) they are supported by the CPU,
-    //   b) XSAVE is supported by the CPU and
-    //   c) XSAVE is enabled by the kernel.
-    // See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
-    //
-    // In addition, we have observed some crashes with the xgetbv instruction
-    // even after following Intel's example code. (See crbug.com/375968.)
-    // Because of that, we also test the XSAVE bit because its description in
-    // the CPUID documentation suggests that it signals xgetbv support.
-    has_avx_ =
-        (cpu_info[2] & 0x10000000) != 0 &&
-        (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
-        (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
-        (_xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */;
-    has_aesni_ = (cpu_info[2] & 0x02000000) != 0;
-    has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0;
-  }
-
-  // Get the brand string of the cpu.
-  __cpuid(cpu_info, 0x80000000);
-  const int parameter_end = 0x80000004;
-  int max_parameter = cpu_info[0];
-
-  if (cpu_info[0] >= parameter_end) {
-    char* cpu_string_ptr = cpu_string;
-
-    for (int parameter = 0x80000002; parameter <= parameter_end &&
-         cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
-      __cpuid(cpu_info, parameter);
-      memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info));
-      cpu_string_ptr += sizeof(cpu_info);
-    }
-    cpu_brand_.assign(cpu_string, cpu_string_ptr - cpu_string);
-  }
-
-  const int parameter_containing_non_stop_time_stamp_counter = 0x80000007;
-  if (max_parameter >= parameter_containing_non_stop_time_stamp_counter) {
-    __cpuid(cpu_info, parameter_containing_non_stop_time_stamp_counter);
-    has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0;
-  }
-#elif defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
-  cpu_brand_.assign(g_lazy_cpuinfo.Get().brand());
-  has_broken_neon_ = g_lazy_cpuinfo.Get().has_broken_neon();
-#else
-  #error unknown architecture
-#endif
-}
-
-CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const {
-  if (has_avx2()) return AVX2;
-  if (has_avx()) return AVX;
-  if (has_sse42()) return SSE42;
-  if (has_sse41()) return SSE41;
-  if (has_ssse3()) return SSSE3;
-  if (has_sse3()) return SSE3;
-  if (has_sse2()) return SSE2;
-  if (has_sse()) return SSE;
-  return PENTIUM;
-}
-
-}  // namespace base
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#include "gutil/cpu.h"
+
+#include <cstring>
+#include <utility>
+
+#include "gutil/integral_types.h"
+
+#if defined(__x86_64__)
+#if defined(_MSC_VER)
+#include <intrin.h>
+#include <immintrin.h>  // For _xgetbv()
+#endif
+#endif
+
+namespace base {
+
+CPU::CPU()
+  : signature_(0),
+    type_(0),
+    family_(0),
+    model_(0),
+    stepping_(0),
+    ext_model_(0),
+    ext_family_(0),
+    has_mmx_(false),
+    has_sse_(false),
+    has_sse2_(false),
+    has_sse3_(false),
+    has_ssse3_(false),
+    has_sse41_(false),
+    has_sse42_(false),
+    has_avx_(false),
+    has_avx2_(false),
+    has_aesni_(false),
+    has_non_stop_time_stamp_counter_(false),
+    has_broken_neon_(false),
+    cpu_vendor_("unknown") {
+  Initialize();
+}
+
+namespace {
+
+#if defined(__x86_64__)
+#ifndef _MSC_VER
+
+#if defined(__pic__) && defined(__i386__)
+
+void __cpuid(int cpu_info[4], int info_type) {
+  __asm__ volatile (
+    "mov %%ebx, %%edi\n"
+    "cpuid\n"
+    "xchg %%edi, %%ebx\n"
+    : "=a"(cpu_info[0]), "=D"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+    : "a"(info_type)
+  );
+}
+
+#else
+
+void __cpuid(int cpu_info[4], int info_type) {
+  __asm__ volatile (
+    "cpuid\n"
+    : "=a"(cpu_info[0]), "=b"(cpu_info[1]), "=c"(cpu_info[2]), "=d"(cpu_info[3])
+    : "a"(info_type), "c"(0)
+  );
+}
+
+#endif
+
+// _xgetbv returns the value of an Intel Extended Control Register (XCR).
+// Currently only XCR0 is defined by Intel so |xcr| should always be zero.
+uint64 _xgetbv(uint32 xcr) {
+  uint32 eax, edx;
+
+  __asm__ volatile (
+    "xgetbv" : "=a"(eax), "=d"(edx) : "c"(xcr));
+  return (static_cast<uint64>(edx) << 32) | eax;
+}
+
+#endif  // !_MSC_VER
+#endif  // __x86_64__
+
+#if defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
+class LazyCpuInfoValue {
+ public:
+  LazyCpuInfoValue() : has_broken_neon_(false) {
+    // This function finds the value from /proc/cpuinfo under the key "model
+    // name" or "Processor". "model name" is used in Linux 3.8 and later (3.7
+    // and later for arm64) and is shown once per CPU. "Processor" is used in
+    // earler versions and is shown only once at the top of /proc/cpuinfo
+    // regardless of the number CPUs.
+    const char kModelNamePrefix[] = "model name\t: ";
+    const char kProcessorPrefix[] = "Processor\t: ";
+
+    // This function also calculates whether we believe that this CPU has a
+    // broken NEON unit based on these fields from cpuinfo:
+    unsigned implementer = 0, architecture = 0, variant = 0, part = 0,
+             revision = 0;
+    const struct {
+      const char key[17];
+      unsigned int* result;
+    } kUnsignedValues[] = {
+      {"CPU implementer", &implementer},
+      {"CPU architecture", &architecture},
+      {"CPU variant", &variant},
+      {"CPU part", &part},
+      {"CPU revision", &revision},
+    };
+
+    std::string contents;
+    ReadFileToString(FilePath("/proc/cpuinfo"), &contents);
+    DCHECK(!contents.empty());
+    if (contents.empty()) {
+      return;
+    }
+
+    std::istringstream iss(contents);
+    std::string line;
+    while (std::getline(iss, line)) {
+      if (brand_.empty() &&
+          (line.compare(0, strlen(kModelNamePrefix), kModelNamePrefix) == 0 ||
+           line.compare(0, strlen(kProcessorPrefix), kProcessorPrefix) == 0)) {
+        brand_.assign(line.substr(strlen(kModelNamePrefix)));
+      }
+
+      for (size_t i = 0; i < arraysize(kUnsignedValues); i++) {
+        const char *key = kUnsignedValues[i].key;
+        const size_t len = strlen(key);
+
+        if (line.compare(0, len, key) == 0 &&
+            line.size() >= len + 1 &&
+            (line[len] == '\t' || line[len] == ' ' || line[len] == ':')) {
+          size_t colon_pos = line.find(':', len);
+          if (colon_pos == std::string::npos) {
+            continue;
+          }
+
+          const StringPiece line_sp(line);
+          StringPiece value_sp = line_sp.substr(colon_pos + 1);
+          while (!value_sp.empty() &&
+                 (value_sp[0] == ' ' || value_sp[0] == '\t')) {
+            value_sp = value_sp.substr(1);
+          }
+
+          // The string may have leading "0x" or not, so we use strtoul to
+          // handle that.
+          char* endptr;
+          std::string value(value_sp.as_string());
+          unsigned long int result = strtoul(value.c_str(), &endptr, 0);
+          if (*endptr == 0 && result <= UINT_MAX) {
+            *kUnsignedValues[i].result = result;
+          }
+        }
+      }
+    }
+
+    has_broken_neon_ =
+      implementer == 0x51 &&
+      architecture == 7 &&
+      variant == 1 &&
+      part == 0x4d &&
+      revision == 0;
+  }
+
+  const std::string& brand() const { return brand_; }
+  bool has_broken_neon() const { return has_broken_neon_; }
+
+ private:
+  std::string brand_;
+  bool has_broken_neon_;
+  DISALLOW_COPY_AND_ASSIGN(LazyCpuInfoValue);
+};
+
+base::LazyInstance<LazyCpuInfoValue>::Leaky g_lazy_cpuinfo =
+    LAZY_INSTANCE_INITIALIZER;
+
+#endif  // defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) ||
+        // defined(OS_LINUX))
+
+}  // anonymous namespace
+
+void CPU::Initialize() {
+#if defined(__x86_64__)
+  int cpu_info[4] = {-1};
+  char cpu_string[48];
+
+  // __cpuid with an InfoType argument of 0 returns the number of
+  // valid Ids in CPUInfo[0] and the CPU identification string in
+  // the other three array elements. The CPU identification string is
+  // not in linear order. The code below arranges the information
+  // in a human readable form. The human readable order is CPUInfo[1] |
+  // CPUInfo[3] | CPUInfo[2]. CPUInfo[2] and CPUInfo[3] are swapped
+  // before using memcpy to copy these three array elements to cpu_string.
+  __cpuid(cpu_info, 0);
+  int num_ids = cpu_info[0];
+  std::swap(cpu_info[2], cpu_info[3]);
+  memcpy(cpu_string, &cpu_info[1], 3 * sizeof(cpu_info[1]));
+  cpu_vendor_.assign(cpu_string, 3 * sizeof(cpu_info[1]));
+
+  // Interpret CPU feature information.
+  if (num_ids > 0) {
+    int cpu_info7[4] = {0};
+    __cpuid(cpu_info, 1);
+    if (num_ids >= 7) {
+      __cpuid(cpu_info7, 7);
+    }
+    signature_ = cpu_info[0];
+    stepping_ = cpu_info[0] & 0xf;
+    model_ = ((cpu_info[0] >> 4) & 0xf) + ((cpu_info[0] >> 12) & 0xf0);
+    family_ = (cpu_info[0] >> 8) & 0xf;
+    type_ = (cpu_info[0] >> 12) & 0x3;
+    ext_model_ = (cpu_info[0] >> 16) & 0xf;
+    ext_family_ = (cpu_info[0] >> 20) & 0xff;
+    has_mmx_ =   (cpu_info[3] & 0x00800000) != 0;
+    has_sse_ =   (cpu_info[3] & 0x02000000) != 0;
+    has_sse2_ =  (cpu_info[3] & 0x04000000) != 0;
+    has_sse3_ =  (cpu_info[2] & 0x00000001) != 0;
+    has_ssse3_ = (cpu_info[2] & 0x00000200) != 0;
+    has_sse41_ = (cpu_info[2] & 0x00080000) != 0;
+    has_sse42_ = (cpu_info[2] & 0x00100000) != 0;
+    // AVX instructions will generate an illegal instruction exception unless
+    //   a) they are supported by the CPU,
+    //   b) XSAVE is supported by the CPU and
+    //   c) XSAVE is enabled by the kernel.
+    // See http://software.intel.com/en-us/blogs/2011/04/14/is-avx-enabled
+    //
+    // In addition, we have observed some crashes with the xgetbv instruction
+    // even after following Intel's example code. (See crbug.com/375968.)
+    // Because of that, we also test the XSAVE bit because its description in
+    // the CPUID documentation suggests that it signals xgetbv support.
+    has_avx_ =
+        (cpu_info[2] & 0x10000000) != 0 &&
+        (cpu_info[2] & 0x04000000) != 0 /* XSAVE */ &&
+        (cpu_info[2] & 0x08000000) != 0 /* OSXSAVE */ &&
+        (_xgetbv(0) & 6) == 6 /* XSAVE enabled by kernel */;
+    has_aesni_ = (cpu_info[2] & 0x02000000) != 0;
+    has_avx2_ = has_avx_ && (cpu_info7[1] & 0x00000020) != 0;
+  }
+
+  // Get the brand string of the cpu.
+  __cpuid(cpu_info, 0x80000000);
+  const int parameter_end = 0x80000004;
+  int max_parameter = cpu_info[0];
+
+  if (cpu_info[0] >= parameter_end) {
+    char* cpu_string_ptr = cpu_string;
+
+    for (int parameter = 0x80000002; parameter <= parameter_end &&
+         cpu_string_ptr < &cpu_string[sizeof(cpu_string)]; parameter++) {
+      __cpuid(cpu_info, parameter);
+      memcpy(cpu_string_ptr, cpu_info, sizeof(cpu_info));
+      cpu_string_ptr += sizeof(cpu_info);
+    }
+    cpu_brand_.assign(cpu_string, cpu_string_ptr - cpu_string);
+  }
+
+  const int parameter_containing_non_stop_time_stamp_counter = 0x80000007;
+  if (max_parameter >= parameter_containing_non_stop_time_stamp_counter) {
+    __cpuid(cpu_info, parameter_containing_non_stop_time_stamp_counter);
+    has_non_stop_time_stamp_counter_ = (cpu_info[3] & (1 << 8)) != 0;
+  }
+#elif defined(ARCH_CPU_ARM_FAMILY) && (defined(OS_ANDROID) || defined(OS_LINUX))
+  cpu_brand_.assign(g_lazy_cpuinfo.Get().brand());
+  has_broken_neon_ = g_lazy_cpuinfo.Get().has_broken_neon();
+#else
+  #error unknown architecture
+#endif
+}
+
+CPU::IntelMicroArchitecture CPU::GetIntelMicroArchitecture() const {
+  if (has_avx2()) return AVX2;
+  if (has_avx()) return AVX;
+  if (has_sse42()) return SSE42;
+  if (has_sse41()) return SSE41;
+  if (has_ssse3()) return SSSE3;
+  if (has_sse3()) return SSE3;
+  if (has_sse2()) return SSE2;
+  if (has_sse()) return SSE;
+  return PENTIUM;
+}
+
+}  // namespace base
diff --git a/be/src/gutil/cpu.h b/be/src/gutil/cpu.h
index b401867c3c..65498140d1 100644
--- a/be/src/gutil/cpu.h
+++ b/be/src/gutil/cpu.h
@@ -1,90 +1,90 @@
-// Copyright (c) 2012 The Chromium Authors. All rights reserved.
-// Use of this source code is governed by a BSD-style license that can be
-// found in the LICENSE file.
-
-#ifndef BASE_CPU_H_
-#define BASE_CPU_H_
-
-#include <string>
-
-namespace base {
-
-// Query information about the processor.
-class CPU {
- public:
-  // Constructor
-  CPU();
-
-  enum IntelMicroArchitecture {
-    PENTIUM,
-    SSE,
-    SSE2,
-    SSE3,
-    SSSE3,
-    SSE41,
-    SSE42,
-    AVX,
-    AVX2,
-    MAX_INTEL_MICRO_ARCHITECTURE
-  };
-
-  // Accessors for CPU information.
-  const std::string& vendor_name() const { return cpu_vendor_; }
-  int signature() const { return signature_; }
-  int stepping() const { return stepping_; }
-  int model() const { return model_; }
-  int family() const { return family_; }
-  int type() const { return type_; }
-  int extended_model() const { return ext_model_; }
-  int extended_family() const { return ext_family_; }
-  bool has_mmx() const { return has_mmx_; }
-  bool has_sse() const { return has_sse_; }
-  bool has_sse2() const { return has_sse2_; }
-  bool has_sse3() const { return has_sse3_; }
-  bool has_ssse3() const { return has_ssse3_; }
-  bool has_sse41() const { return has_sse41_; }
-  bool has_sse42() const { return has_sse42_; }
-  bool has_avx() const { return has_avx_; }
-  bool has_avx2() const { return has_avx2_; }
-  bool has_aesni() const { return has_aesni_; }
-  bool has_non_stop_time_stamp_counter() const {
-    return has_non_stop_time_stamp_counter_;
-  }
-  // has_broken_neon is only valid on ARM chips. If true, it indicates that we
-  // believe that the NEON unit on the current CPU is flawed and cannot execute
-  // some code. See https://code.google.com/p/chromium/issues/detail?id=341598
-  bool has_broken_neon() const { return has_broken_neon_; }
-
-  IntelMicroArchitecture GetIntelMicroArchitecture() const;
-  const std::string& cpu_brand() const { return cpu_brand_; }
-
- private:
-  // Query the processor for CPUID information.
-  void Initialize();
-
-  int signature_;  // raw form of type, family, model, and stepping
-  int type_;  // process type
-  int family_;  // family of the processor
-  int model_;  // model of processor
-  int stepping_;  // processor revision number
-  int ext_model_;
-  int ext_family_;
-  bool has_mmx_;
-  bool has_sse_;
-  bool has_sse2_;
-  bool has_sse3_;
-  bool has_ssse3_;
-  bool has_sse41_;
-  bool has_sse42_;
-  bool has_avx_;
-  bool has_avx2_;
-  bool has_aesni_;
-  bool has_non_stop_time_stamp_counter_;
-  bool has_broken_neon_;
-  std::string cpu_vendor_;
-  std::string cpu_brand_;
-};
-
-}  // namespace base
-
-#endif  // BASE_CPU_H_
+// Copyright (c) 2012 The Chromium Authors. All rights reserved.
+// Use of this source code is governed by a BSD-style license that can be
+// found in the LICENSE file.
+
+#ifndef BASE_CPU_H_
+#define BASE_CPU_H_
+
+#include <string>
+
+namespace base {
+
+// Query information about the processor.
+class CPU {
+ public:
+  // Constructor
+  CPU();
+
+  enum IntelMicroArchitecture {
+    PENTIUM,
+    SSE,
+    SSE2,
+    SSE3,
+    SSSE3,
+    SSE41,
+    SSE42,
+    AVX,
+    AVX2,
+    MAX_INTEL_MICRO_ARCHITECTURE
+  };
+
+  // Accessors for CPU information.
+  const std::string& vendor_name() const { return cpu_vendor_; }
+  int signature() const { return signature_; }
+  int stepping() const { return stepping_; }
+  int model() const { return model_; }
+  int family() const { return family_; }
+  int type() const { return type_; }
+  int extended_model() const { return ext_model_; }
+  int extended_family() const { return ext_family_; }
+  bool has_mmx() const { return has_mmx_; }
+  bool has_sse() const { return has_sse_; }
+  bool has_sse2() const { return has_sse2_; }
+  bool has_sse3() const { return has_sse3_; }
+  bool has_ssse3() const { return has_ssse3_; }
+  bool has_sse41() const { return has_sse41_; }
+  bool has_sse42() const { return has_sse42_; }
+  bool has_avx() const { return has_avx_; }
+  bool has_avx2() const { return has_avx2_; }
+  bool has_aesni() const { return has_aesni_; }
+  bool has_non_stop_time_stamp_counter() const {
+    return has_non_stop_time_stamp_counter_;
+  }
+  // has_broken_neon is only valid on ARM chips. If true, it indicates that we
+  // believe that the NEON unit on the current CPU is flawed and cannot execute
+  // some code. See https://code.google.com/p/chromium/issues/detail?id=341598
+  bool has_broken_neon() const { return has_broken_neon_; }
+
+  IntelMicroArchitecture GetIntelMicroArchitecture() const;
+  const std::string& cpu_brand() const { return cpu_brand_; }
+
+ private:
+  // Query the processor for CPUID information.
+  void Initialize();
+
+  int signature_;  // raw form of type, family, model, and stepping
+  int type_;  // process type
+  int family_;  // family of the processor
+  int model_;  // model of processor
+  int stepping_;  // processor revision number
+  int ext_model_;
+  int ext_family_;
+  bool has_mmx_;
+  bool has_sse_;
+  bool has_sse2_;
+  bool has_sse3_;
+  bool has_ssse3_;
+  bool has_sse41_;
+  bool has_sse42_;
+  bool has_avx_;
+  bool has_avx2_;
+  bool has_aesni_;
+  bool has_non_stop_time_stamp_counter_;
+  bool has_broken_neon_;
+  std::string cpu_vendor_;
+  std::string cpu_brand_;
+};
+
+}  // namespace base
+
+#endif  // BASE_CPU_H_
diff --git a/be/src/olap/rowset/rowset_writer_context.h b/be/src/olap/rowset/rowset_writer_context.h
index e4737e402b..5bfa28c348 100644
--- a/be/src/olap/rowset/rowset_writer_context.h
+++ b/be/src/olap/rowset/rowset_writer_context.h
@@ -1,74 +1,74 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_CONTEXT_H
-#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_CONTEXT_H
-
-#include "gen_cpp/olap_file.pb.h"
-#include "olap/data_dir.h"
-#include "olap/tablet_schema.h"
-
-namespace doris {
-
-class RowsetWriterContextBuilder;
-using RowsetWriterContextBuilderSharedPtr = std::shared_ptr<RowsetWriterContextBuilder>;
-
-struct RowsetWriterContext {
-    RowsetWriterContext() :
-        tablet_id(0),
-        tablet_schema_hash(0),
-        partition_id(0),
-        rowset_type(ALPHA_ROWSET),
-        rowset_path_prefix(""),
-        tablet_schema(nullptr),
-        rowset_state(PREPARED),
-        data_dir(nullptr),
-        version(Version(0, 0)),
-        version_hash(0),
-        txn_id(0),
-        tablet_uid(0, 0) {
-        load_id.set_hi(0);
-        load_id.set_lo(0);
-    }
-    RowsetId rowset_id;
-    int64_t tablet_id;
-    int64_t tablet_schema_hash;
-    int64_t partition_id;
-    RowsetTypePB rowset_type;
-    std::string rowset_path_prefix;
-    const TabletSchema* tablet_schema;
-    // PREPARED/COMMITTED for pending rowset
-    // VISIBLE for non-pending rowset
-    RowsetStatePB rowset_state;
-    DataDir* data_dir;
-    // properties for non-pending rowset
-    Version version;
-    VersionHash version_hash;
-
-    // properties for pending rowset
-    int64_t txn_id;
-    PUniqueId load_id;
-    TabletUid tablet_uid;
-    // segment file use uint32 to represent row number, therefore the maximum is UINT32_MAX.
-    // the default is set to INT32_MAX to avoid overflow issue when casting from uint32_t to int.
-    // test cases can change this value to control flush timing
-    uint32_t max_rows_per_segment = INT32_MAX;
-};
-
-} // namespace doris
-
-#endif // DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_CONTEXT_H
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#ifndef DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_CONTEXT_H
+#define DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_CONTEXT_H
+
+#include "gen_cpp/olap_file.pb.h"
+#include "olap/data_dir.h"
+#include "olap/tablet_schema.h"
+
+namespace doris {
+
+class RowsetWriterContextBuilder;
+using RowsetWriterContextBuilderSharedPtr = std::shared_ptr<RowsetWriterContextBuilder>;
+
+struct RowsetWriterContext {
+    RowsetWriterContext() :
+        tablet_id(0),
+        tablet_schema_hash(0),
+        partition_id(0),
+        rowset_type(ALPHA_ROWSET),
+        rowset_path_prefix(""),
+        tablet_schema(nullptr),
+        rowset_state(PREPARED),
+        data_dir(nullptr),
+        version(Version(0, 0)),
+        version_hash(0),
+        txn_id(0),
+        tablet_uid(0, 0) {
+        load_id.set_hi(0);
+        load_id.set_lo(0);
+    }
+    RowsetId rowset_id;
+    int64_t tablet_id;
+    int64_t tablet_schema_hash;
+    int64_t partition_id;
+    RowsetTypePB rowset_type;
+    std::string rowset_path_prefix;
+    const TabletSchema* tablet_schema;
+    // PREPARED/COMMITTED for pending rowset
+    // VISIBLE for non-pending rowset
+    RowsetStatePB rowset_state;
+    DataDir* data_dir;
+    // properties for non-pending rowset
+    Version version;
+    VersionHash version_hash;
+
+    // properties for pending rowset
+    int64_t txn_id;
+    PUniqueId load_id;
+    TabletUid tablet_uid;
+    // segment file use uint32 to represent row number, therefore the maximum is UINT32_MAX.
+    // the default is set to INT32_MAX to avoid overflow issue when casting from uint32_t to int.
+    // test cases can change this value to control flush timing
+    uint32_t max_rows_per_segment = INT32_MAX;
+};
+
+} // namespace doris
+
+#endif // DORIS_BE_SRC_OLAP_ROWSET_ROWSET_WRITER_CONTEXT_H
diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_page.h b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
index 6a9bd1ae98..1573d681f0 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_page.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_page.h
@@ -1,342 +1,342 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <sys/types.h>
-#include <algorithm>
-#include <cstring>
-#include <cstdint>
-#include <ostream>
-#include <glog/logging.h>
-
-#include "util/coding.h"
-#include "util/faststring.h"
-#include "gutil/port.h"
-#include "olap/olap_common.h"
-#include "olap/types.h"
-#include "olap/rowset/segment_v2/options.h"
-#include "olap/rowset/segment_v2/page_builder.h"
-#include "olap/rowset/segment_v2/page_decoder.h"
-#include "olap/rowset/segment_v2/common.h"
-#include "olap/rowset/segment_v2/bitshuffle_wrapper.h"
-
-namespace doris {
-namespace segment_v2 {
-
-enum {
-    BITSHUFFLE_PAGE_HEADER_SIZE = 16
-};
-
-void warn_with_bitshuffle_error(int64_t val);
-
-// BitshufflePageBuilder bitshuffles and compresses the bits of fixed
-// size type blocks with lz4.
-//
-// The page format is as follows:
-//
-// 1. Header: (16 bytes total)
-//
-//    <num_elements> [32-bit]
-//      The number of elements encoded in the page.
-//
-//    <compressed_size> [32-bit]
-//      The post-compression size of the page, including this header.
-//
-//    <padded_num_elements> [32-bit]
-//      Padding is needed to meet the requirements of the bitshuffle
-//      library such that the input/output is a multiple of 8. Some
-//      ignored elements are appended to the end of the page if necessary
-//      to meet this requirement.
-//
-//      This header field is the post-padding element count.
-//
-//    <elem_size_bytes> [32-bit]
-//      The size of the elements, in bytes, as actually encoded. In the
-//      case that all of the data in a page can fit into a smaller
-//      integer type, then we may choose to encode that smaller type
-//      to save CPU costs.
-//
-//      This is currently only implemented in the UINT32 page type.
-//
-//   NOTE: all on-disk ints are encoded little-endian
-//
-// 2. Element data
-//
-//    The header is followed by the bitshuffle-compressed element data.
-//
-template<FieldType Type>
-class BitshufflePageBuilder : public PageBuilder {
-public:
-    BitshufflePageBuilder(const PageBuilderOptions& options) :
-            _options(options),
-            _count(0),
-            _remain_element_capacity(0),
-            _finished(false) {
-        reset();
-    }
-
-    bool is_page_full() override {
-        return _remain_element_capacity == 0;
-    }
-
-    Status add(const uint8_t* vals, size_t* count) override {
-        DCHECK(!_finished);
-        int to_add = std::min<int>(_remain_element_capacity, *count);
-        _data.append(vals, to_add * SIZE_OF_TYPE);
-        _count += to_add;
-        _remain_element_capacity -= to_add;
-        // return added number through count
-        *count = to_add;
-        return Status::OK();
-    }
-
-    Slice finish() override {
-        return _finish(SIZE_OF_TYPE);
-    }
-
-    void reset() override {
-        auto block_size = _options.data_page_size;
-        _count = 0;
-        _data.clear();
-        _data.reserve(block_size);
-        DCHECK_EQ(reinterpret_cast<uintptr_t>(_data.data()) & (alignof(CppType) - 1), 0)
-            << "buffer must be naturally-aligned";
-        _buffer.clear();
-        _buffer.resize(BITSHUFFLE_PAGE_HEADER_SIZE);
-        _finished = false;
-        _remain_element_capacity = block_size / SIZE_OF_TYPE;
-    }
-
-    size_t count() const {
-        return _count;
-    }
-
-    uint64_t size() const override {
-        return _buffer.size();
-    }
-
-    // this api will release the memory ownership of encoded data
-    // Note:
-    //     release() should be called after finish
-    //     reset() should be called after this function before reuse the builder
-    void release() override {
-        uint8_t* ret = _buffer.release();
-        (void)ret;
-    }
-
-private:
-    Slice _finish(int final_size_of_type) {
-        _data.resize(final_size_of_type * _count);
-
-        // Do padding so that the input num of element is multiple of 8.
-        int num_elems_after_padding = ALIGN_UP(_count, 8);
-        int padding_elems = num_elems_after_padding - _count;
-        int padding_bytes = padding_elems * final_size_of_type;
-        for (int i = 0; i < padding_bytes; i++) {
-            _data.push_back(0);
-        }
-
-        _buffer.resize(BITSHUFFLE_PAGE_HEADER_SIZE +
-                bitshuffle::compress_lz4_bound(num_elems_after_padding, final_size_of_type, 0));
-
-        encode_fixed32_le(&_buffer[0], _count);
-        int64_t bytes = bitshuffle::compress_lz4(_data.data(), &_buffer[BITSHUFFLE_PAGE_HEADER_SIZE],
-                num_elems_after_padding, final_size_of_type, 0);
-        if (PREDICT_FALSE(bytes < 0)) {
-            // This means the bitshuffle function fails.
-            // Ideally, this should not happen.
-            warn_with_bitshuffle_error(bytes);
-            // It does not matter what will be returned here,
-            // since we have logged fatal in warn_with_bitshuffle_error().
-            return Slice();
-        }
-        encode_fixed32_le(&_buffer[4], BITSHUFFLE_PAGE_HEADER_SIZE + bytes);
-        encode_fixed32_le(&_buffer[8], num_elems_after_padding);
-        encode_fixed32_le(&_buffer[12], final_size_of_type);
-        _finished = true;
-        return Slice(_buffer.data(), BITSHUFFLE_PAGE_HEADER_SIZE + bytes);
-    }
-
-    typedef typename TypeTraits<Type>::CppType CppType;
-
-    CppType cell(int idx) const {
-        DCHECK_GE(idx, 0);
-        CppType ret;
-        memcpy(&ret, &_data[idx * SIZE_OF_TYPE], sizeof(CppType));
-        return ret;
-    }
-
-    enum {
-        SIZE_OF_TYPE = TypeTraits<Type>::size
-    };
-    PageBuilderOptions _options;
-    uint32_t _count;
-    int _remain_element_capacity;
-    bool _finished;
-    faststring _data;
-    faststring _buffer;
-};
-
-template<FieldType Type>
-class BitShufflePageDecoder : public PageDecoder {
-public:
-    BitShufflePageDecoder(Slice data, const PageDecoderOptions& options) : _data(data),
-    _options(options),
-    _parsed(false),
-    _num_elements(0),
-    _compressed_size(0),
-    _num_element_after_padding(0),
-    _size_of_element(0),
-    _cur_index(0) { }
-
-    Status init() override {
-        CHECK(!_parsed);
-        if (_data.size < BITSHUFFLE_PAGE_HEADER_SIZE) {
-            std::stringstream ss;
-            ss << "file corrupton: invalid data size:" << _data.size << ", header size:" << BITSHUFFLE_PAGE_HEADER_SIZE;
-            return Status::InternalError(ss.str());
-        }
-        _num_elements = decode_fixed32_le((const uint8_t*)&_data[0]);
-        _compressed_size   = decode_fixed32_le((const uint8_t*)&_data[4]);
-        if (_compressed_size != _data.size) {
-            std::stringstream ss;
-            ss << "Size information unmatched, _compressed_size:" << _compressed_size
-                << ", _num_elements:" << _num_elements
-                << ", data size:" << _data.size;
-            return Status::InternalError(ss.str());
-        }
-        _num_element_after_padding = decode_fixed32_le((const uint8_t*)&_data[8]);
-        if (_num_element_after_padding != ALIGN_UP(_num_elements, 8)) {
-            std::stringstream ss;
-            ss << "num of element information corrupted,"
-                << " _num_element_after_padding:" << _num_element_after_padding
-                << ", _num_elements:" << _num_elements;
-            return Status::InternalError(ss.str());
-        }
-        _size_of_element = decode_fixed32_le((const uint8_t*)&_data[12]);
-        switch (_size_of_element) {
-            case 1:
-            case 2:
-            case 3:
-            case 4:
-            case 8:
-            case 12:
-            case 16:
-                break;
-            default:
-                std::stringstream ss;
-                ss << "invalid size_of_elem:" << _size_of_element;
-                return Status::InternalError(ss.str());
-        }
-
-        // Currently, only the UINT32 block encoder supports expanding size:
-        if (UNLIKELY(Type != OLAP_FIELD_TYPE_UNSIGNED_INT && _size_of_element != SIZE_OF_TYPE)) {
-            std::stringstream ss;
-            ss << "invalid size info. size of element:" << _size_of_element
-                << ", SIZE_OF_TYPE:" << SIZE_OF_TYPE
-                << ", type:" << Type;
-            return Status::InternalError(ss.str());
-        }
-        if (UNLIKELY(_size_of_element > SIZE_OF_TYPE)) {
-            std::stringstream ss;
-            ss << "invalid size info. size of element:" << _size_of_element
-                << ", SIZE_OF_TYPE:" << SIZE_OF_TYPE;
-            return Status::InternalError(ss.str());
-        }
-
-        RETURN_IF_ERROR(_decode());
-        _parsed = true;
-        return Status::OK();
-    }
-
-    Status seek_to_position_in_page(size_t pos) override {
-        DCHECK(_parsed) << "Must call init()";
-        if (PREDICT_FALSE(_num_elements == 0)) {
-            DCHECK_EQ(0, pos);
-            return Status::InvalidArgument("invalid pos");
-        }
-
-        DCHECK_LE(pos, _num_elements);
-        _cur_index = pos;
-        return Status::OK();
-    }
-
-    Status next_batch(size_t* n, ColumnBlockView* dst) override {
-        DCHECK(_parsed);
-        if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) {
-            *n = 0;
-            return Status::OK();
-        }
-
-        size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elements - _cur_index));
-        _copy_next_values(max_fetch, dst->data());
-        *n = max_fetch;
-        _cur_index += max_fetch;
-
-        return Status::OK();
-    }
-
-    size_t count() const override {
-        return _num_elements;
-    }
-
-    size_t current_index() const override {
-        return _cur_index;
-    }
-
-private:
-    void _copy_next_values(size_t n, void* data) {
-        memcpy(data, &_decoded[_cur_index * SIZE_OF_TYPE], n * SIZE_OF_TYPE);
-    }
-
-    Status _decode() {
-        if (_num_elements > 0) {
-            int64_t bytes;
-            _decoded.resize(_num_element_after_padding * _size_of_element);
-            char* in = const_cast<char*>(&_data[BITSHUFFLE_PAGE_HEADER_SIZE]);
-            bytes = bitshuffle::decompress_lz4(in, _decoded.data(), _num_element_after_padding,
-                    _size_of_element, 0);
-            if (PREDICT_FALSE(bytes < 0)) {
-                // Ideally, this should not happen.
-                warn_with_bitshuffle_error(bytes);
-                return Status::RuntimeError("Unshuffle Process failed");
-            }
-        }
-        return Status::OK();
-    }
-
-    typedef typename TypeTraits<Type>::CppType CppType;
-
-    enum {
-        SIZE_OF_TYPE = TypeTraits<Type>::size
-    };
-
-    Slice _data;
-    PageDecoderOptions _options;
-    bool _parsed;
-    size_t _num_elements;
-    size_t _compressed_size;
-    size_t _num_element_after_padding;
-
-    int _size_of_element;
-    size_t _cur_index;
-    faststring _decoded;
-};
-
-} // namespace segment_v2
-} // namespace doris
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <sys/types.h>
+#include <algorithm>
+#include <cstring>
+#include <cstdint>
+#include <ostream>
+#include <glog/logging.h>
+
+#include "util/coding.h"
+#include "util/faststring.h"
+#include "gutil/port.h"
+#include "olap/olap_common.h"
+#include "olap/types.h"
+#include "olap/rowset/segment_v2/options.h"
+#include "olap/rowset/segment_v2/page_builder.h"
+#include "olap/rowset/segment_v2/page_decoder.h"
+#include "olap/rowset/segment_v2/common.h"
+#include "olap/rowset/segment_v2/bitshuffle_wrapper.h"
+
+namespace doris {
+namespace segment_v2 {
+
+enum {
+    BITSHUFFLE_PAGE_HEADER_SIZE = 16
+};
+
+void warn_with_bitshuffle_error(int64_t val);
+
+// BitshufflePageBuilder bitshuffles and compresses the bits of fixed
+// size type blocks with lz4.
+//
+// The page format is as follows:
+//
+// 1. Header: (16 bytes total)
+//
+//    <num_elements> [32-bit]
+//      The number of elements encoded in the page.
+//
+//    <compressed_size> [32-bit]
+//      The post-compression size of the page, including this header.
+//
+//    <padded_num_elements> [32-bit]
+//      Padding is needed to meet the requirements of the bitshuffle
+//      library such that the input/output is a multiple of 8. Some
+//      ignored elements are appended to the end of the page if necessary
+//      to meet this requirement.
+//
+//      This header field is the post-padding element count.
+//
+//    <elem_size_bytes> [32-bit]
+//      The size of the elements, in bytes, as actually encoded. In the
+//      case that all of the data in a page can fit into a smaller
+//      integer type, then we may choose to encode that smaller type
+//      to save CPU costs.
+//
+//      This is currently only implemented in the UINT32 page type.
+//
+//   NOTE: all on-disk ints are encoded little-endian
+//
+// 2. Element data
+//
+//    The header is followed by the bitshuffle-compressed element data.
+//
+template<FieldType Type>
+class BitshufflePageBuilder : public PageBuilder {
+public:
+    BitshufflePageBuilder(const PageBuilderOptions& options) :
+            _options(options),
+            _count(0),
+            _remain_element_capacity(0),
+            _finished(false) {
+        reset();
+    }
+
+    bool is_page_full() override {
+        return _remain_element_capacity == 0;
+    }
+
+    Status add(const uint8_t* vals, size_t* count) override {
+        DCHECK(!_finished);
+        int to_add = std::min<int>(_remain_element_capacity, *count);
+        _data.append(vals, to_add * SIZE_OF_TYPE);
+        _count += to_add;
+        _remain_element_capacity -= to_add;
+        // return added number through count
+        *count = to_add;
+        return Status::OK();
+    }
+
+    Slice finish() override {
+        return _finish(SIZE_OF_TYPE);
+    }
+
+    void reset() override {
+        auto block_size = _options.data_page_size;
+        _count = 0;
+        _data.clear();
+        _data.reserve(block_size);
+        DCHECK_EQ(reinterpret_cast<uintptr_t>(_data.data()) & (alignof(CppType) - 1), 0)
+            << "buffer must be naturally-aligned";
+        _buffer.clear();
+        _buffer.resize(BITSHUFFLE_PAGE_HEADER_SIZE);
+        _finished = false;
+        _remain_element_capacity = block_size / SIZE_OF_TYPE;
+    }
+
+    size_t count() const {
+        return _count;
+    }
+
+    uint64_t size() const override {
+        return _buffer.size();
+    }
+
+    // this api will release the memory ownership of encoded data
+    // Note:
+    //     release() should be called after finish
+    //     reset() should be called after this function before reuse the builder
+    void release() override {
+        uint8_t* ret = _buffer.release();
+        (void)ret;
+    }
+
+private:
+    Slice _finish(int final_size_of_type) {
+        _data.resize(final_size_of_type * _count);
+
+        // Do padding so that the input num of element is multiple of 8.
+        int num_elems_after_padding = ALIGN_UP(_count, 8);
+        int padding_elems = num_elems_after_padding - _count;
+        int padding_bytes = padding_elems * final_size_of_type;
+        for (int i = 0; i < padding_bytes; i++) {
+            _data.push_back(0);
+        }
+
+        _buffer.resize(BITSHUFFLE_PAGE_HEADER_SIZE +
+                bitshuffle::compress_lz4_bound(num_elems_after_padding, final_size_of_type, 0));
+
+        encode_fixed32_le(&_buffer[0], _count);
+        int64_t bytes = bitshuffle::compress_lz4(_data.data(), &_buffer[BITSHUFFLE_PAGE_HEADER_SIZE],
+                num_elems_after_padding, final_size_of_type, 0);
+        if (PREDICT_FALSE(bytes < 0)) {
+            // This means the bitshuffle function fails.
+            // Ideally, this should not happen.
+            warn_with_bitshuffle_error(bytes);
+            // It does not matter what will be returned here,
+            // since we have logged fatal in warn_with_bitshuffle_error().
+            return Slice();
+        }
+        encode_fixed32_le(&_buffer[4], BITSHUFFLE_PAGE_HEADER_SIZE + bytes);
+        encode_fixed32_le(&_buffer[8], num_elems_after_padding);
+        encode_fixed32_le(&_buffer[12], final_size_of_type);
+        _finished = true;
+        return Slice(_buffer.data(), BITSHUFFLE_PAGE_HEADER_SIZE + bytes);
+    }
+
+    typedef typename TypeTraits<Type>::CppType CppType;
+
+    CppType cell(int idx) const {
+        DCHECK_GE(idx, 0);
+        CppType ret;
+        memcpy(&ret, &_data[idx * SIZE_OF_TYPE], sizeof(CppType));
+        return ret;
+    }
+
+    enum {
+        SIZE_OF_TYPE = TypeTraits<Type>::size
+    };
+    PageBuilderOptions _options;
+    uint32_t _count;
+    int _remain_element_capacity;
+    bool _finished;
+    faststring _data;
+    faststring _buffer;
+};
+
+template<FieldType Type>
+class BitShufflePageDecoder : public PageDecoder {
+public:
+    BitShufflePageDecoder(Slice data, const PageDecoderOptions& options) : _data(data),
+    _options(options),
+    _parsed(false),
+    _num_elements(0),
+    _compressed_size(0),
+    _num_element_after_padding(0),
+    _size_of_element(0),
+    _cur_index(0) { }
+
+    Status init() override {
+        CHECK(!_parsed);
+        if (_data.size < BITSHUFFLE_PAGE_HEADER_SIZE) {
+            std::stringstream ss;
+            ss << "file corrupton: invalid data size:" << _data.size << ", header size:" << BITSHUFFLE_PAGE_HEADER_SIZE;
+            return Status::InternalError(ss.str());
+        }
+        _num_elements = decode_fixed32_le((const uint8_t*)&_data[0]);
+        _compressed_size   = decode_fixed32_le((const uint8_t*)&_data[4]);
+        if (_compressed_size != _data.size) {
+            std::stringstream ss;
+            ss << "Size information unmatched, _compressed_size:" << _compressed_size
+                << ", _num_elements:" << _num_elements
+                << ", data size:" << _data.size;
+            return Status::InternalError(ss.str());
+        }
+        _num_element_after_padding = decode_fixed32_le((const uint8_t*)&_data[8]);
+        if (_num_element_after_padding != ALIGN_UP(_num_elements, 8)) {
+            std::stringstream ss;
+            ss << "num of element information corrupted,"
+                << " _num_element_after_padding:" << _num_element_after_padding
+                << ", _num_elements:" << _num_elements;
+            return Status::InternalError(ss.str());
+        }
+        _size_of_element = decode_fixed32_le((const uint8_t*)&_data[12]);
+        switch (_size_of_element) {
+            case 1:
+            case 2:
+            case 3:
+            case 4:
+            case 8:
+            case 12:
+            case 16:
+                break;
+            default:
+                std::stringstream ss;
+                ss << "invalid size_of_elem:" << _size_of_element;
+                return Status::InternalError(ss.str());
+        }
+
+        // Currently, only the UINT32 block encoder supports expanding size:
+        if (UNLIKELY(Type != OLAP_FIELD_TYPE_UNSIGNED_INT && _size_of_element != SIZE_OF_TYPE)) {
+            std::stringstream ss;
+            ss << "invalid size info. size of element:" << _size_of_element
+                << ", SIZE_OF_TYPE:" << SIZE_OF_TYPE
+                << ", type:" << Type;
+            return Status::InternalError(ss.str());
+        }
+        if (UNLIKELY(_size_of_element > SIZE_OF_TYPE)) {
+            std::stringstream ss;
+            ss << "invalid size info. size of element:" << _size_of_element
+                << ", SIZE_OF_TYPE:" << SIZE_OF_TYPE;
+            return Status::InternalError(ss.str());
+        }
+
+        RETURN_IF_ERROR(_decode());
+        _parsed = true;
+        return Status::OK();
+    }
+
+    Status seek_to_position_in_page(size_t pos) override {
+        DCHECK(_parsed) << "Must call init()";
+        if (PREDICT_FALSE(_num_elements == 0)) {
+            DCHECK_EQ(0, pos);
+            return Status::InvalidArgument("invalid pos");
+        }
+
+        DCHECK_LE(pos, _num_elements);
+        _cur_index = pos;
+        return Status::OK();
+    }
+
+    Status next_batch(size_t* n, ColumnBlockView* dst) override {
+        DCHECK(_parsed);
+        if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) {
+            *n = 0;
+            return Status::OK();
+        }
+
+        size_t max_fetch = std::min(*n, static_cast<size_t>(_num_elements - _cur_index));
+        _copy_next_values(max_fetch, dst->data());
+        *n = max_fetch;
+        _cur_index += max_fetch;
+
+        return Status::OK();
+    }
+
+    size_t count() const override {
+        return _num_elements;
+    }
+
+    size_t current_index() const override {
+        return _cur_index;
+    }
+
+private:
+    void _copy_next_values(size_t n, void* data) {
+        memcpy(data, &_decoded[_cur_index * SIZE_OF_TYPE], n * SIZE_OF_TYPE);
+    }
+
+    Status _decode() {
+        if (_num_elements > 0) {
+            int64_t bytes;
+            _decoded.resize(_num_element_after_padding * _size_of_element);
+            char* in = const_cast<char*>(&_data[BITSHUFFLE_PAGE_HEADER_SIZE]);
+            bytes = bitshuffle::decompress_lz4(in, _decoded.data(), _num_element_after_padding,
+                    _size_of_element, 0);
+            if (PREDICT_FALSE(bytes < 0)) {
+                // Ideally, this should not happen.
+                warn_with_bitshuffle_error(bytes);
+                return Status::RuntimeError("Unshuffle Process failed");
+            }
+        }
+        return Status::OK();
+    }
+
+    typedef typename TypeTraits<Type>::CppType CppType;
+
+    enum {
+        SIZE_OF_TYPE = TypeTraits<Type>::size
+    };
+
+    Slice _data;
+    PageDecoderOptions _options;
+    bool _parsed;
+    size_t _num_elements;
+    size_t _compressed_size;
+    size_t _num_element_after_padding;
+
+    int _size_of_element;
+    size_t _cur_index;
+    faststring _decoded;
+};
+
+} // namespace segment_v2
+} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp
index 36ceb8ce39..22c280ae1a 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.cpp
@@ -1,81 +1,81 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "olap/rowset/segment_v2/bitshuffle_wrapper.h"
-
-// Include the bitshuffle header once to get the default (non-AVX2)
-// symbols.
-#include <bitshuffle/bitshuffle.h>
-
-#include "gutil/cpu.h"
-
-// Include the bitshuffle header again, but this time importing the
-// AVX2-compiled symbols by defining some macros.
-#undef BITSHUFFLE_H
-#define bshuf_compress_lz4_bound bshuf_compress_lz4_bound_avx2
-#define bshuf_compress_lz4 bshuf_compress_lz4_avx2
-#define bshuf_decompress_lz4 bshuf_decompress_lz4_avx2
-#include <bitshuffle/bitshuffle.h> // NOLINT(*)
-#undef bshuf_compress_lz4_bound
-#undef bshuf_compress_lz4
-#undef bshuf_decompress_lz4
-
-using base::CPU;
-
-namespace doris {
-namespace bitshuffle {
-
-// Function pointers which will be assigned the correct implementation
-// for the runtime architecture.
-namespace {
-decltype(&bshuf_compress_lz4_bound) g_bshuf_compress_lz4_bound;
-decltype(&bshuf_compress_lz4) g_bshuf_compress_lz4;
-decltype(&bshuf_decompress_lz4) g_bshuf_decompress_lz4;
-} // anonymous namespace
-
-// When this translation unit is initialized, figure out the current CPU and
-// assign the correct function for this architecture.
-//
-// This avoids an expensive 'cpuid' call in the hot path, and also avoids
-// the cost of a 'std::once' call.
-__attribute__((constructor))
-void SelectBitshuffleFunctions() {
-    if (CPU().has_avx2()) {
-        g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx2;
-        g_bshuf_compress_lz4 = bshuf_compress_lz4_avx2;
-        g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx2;
-    } else {
-        g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound;
-        g_bshuf_compress_lz4 = bshuf_compress_lz4;
-        g_bshuf_decompress_lz4 = bshuf_decompress_lz4;
-    }
-}
-
-int64_t compress_lz4(void* in, void* out, size_t size,
-                     size_t elem_size, size_t block_size) {
-    return g_bshuf_compress_lz4(in, out, size, elem_size, block_size);
-}
-int64_t decompress_lz4(void* in, void* out, size_t size,
-                       size_t elem_size, size_t block_size) {
-    return g_bshuf_decompress_lz4(in, out, size, elem_size, block_size);
-}
-size_t compress_lz4_bound(size_t size, size_t elem_size, size_t block_size) {
-    return g_bshuf_compress_lz4_bound(size, elem_size, block_size);
-}
-
-} // namespace bitshuffle
-} // namespace doris
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "olap/rowset/segment_v2/bitshuffle_wrapper.h"
+
+// Include the bitshuffle header once to get the default (non-AVX2)
+// symbols.
+#include <bitshuffle/bitshuffle.h>
+
+#include "gutil/cpu.h"
+
+// Include the bitshuffle header again, but this time importing the
+// AVX2-compiled symbols by defining some macros.
+#undef BITSHUFFLE_H
+#define bshuf_compress_lz4_bound bshuf_compress_lz4_bound_avx2
+#define bshuf_compress_lz4 bshuf_compress_lz4_avx2
+#define bshuf_decompress_lz4 bshuf_decompress_lz4_avx2
+#include <bitshuffle/bitshuffle.h> // NOLINT(*)
+#undef bshuf_compress_lz4_bound
+#undef bshuf_compress_lz4
+#undef bshuf_decompress_lz4
+
+using base::CPU;
+
+namespace doris {
+namespace bitshuffle {
+
+// Function pointers which will be assigned the correct implementation
+// for the runtime architecture.
+namespace {
+decltype(&bshuf_compress_lz4_bound) g_bshuf_compress_lz4_bound;
+decltype(&bshuf_compress_lz4) g_bshuf_compress_lz4;
+decltype(&bshuf_decompress_lz4) g_bshuf_decompress_lz4;
+} // anonymous namespace
+
+// When this translation unit is initialized, figure out the current CPU and
+// assign the correct function for this architecture.
+//
+// This avoids an expensive 'cpuid' call in the hot path, and also avoids
+// the cost of a 'std::once' call.
+__attribute__((constructor))
+void SelectBitshuffleFunctions() {
+    if (CPU().has_avx2()) {
+        g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound_avx2;
+        g_bshuf_compress_lz4 = bshuf_compress_lz4_avx2;
+        g_bshuf_decompress_lz4 = bshuf_decompress_lz4_avx2;
+    } else {
+        g_bshuf_compress_lz4_bound = bshuf_compress_lz4_bound;
+        g_bshuf_compress_lz4 = bshuf_compress_lz4;
+        g_bshuf_decompress_lz4 = bshuf_decompress_lz4;
+    }
+}
+
+int64_t compress_lz4(void* in, void* out, size_t size,
+                     size_t elem_size, size_t block_size) {
+    return g_bshuf_compress_lz4(in, out, size, elem_size, block_size);
+}
+int64_t decompress_lz4(void* in, void* out, size_t size,
+                       size_t elem_size, size_t block_size) {
+    return g_bshuf_decompress_lz4(in, out, size, elem_size, block_size);
+}
+size_t compress_lz4_bound(size_t size, size_t elem_size, size_t block_size) {
+    return g_bshuf_compress_lz4_bound(size, elem_size, block_size);
+}
+
+} // namespace bitshuffle
+} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h
index 38c1e7231f..4846438130 100644
--- a/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h
+++ b/be/src/olap/rowset/segment_v2/bitshuffle_wrapper.h
@@ -1,34 +1,34 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <stddef.h>
-#include <stdint.h>
-
-// This namespace has wrappers for the Bitshuffle library which do runtime dispatch to
-// either AVX2-accelerated or regular SSE2 implementations based on the available CPU.
-namespace doris {
-namespace bitshuffle {
-
-// See <bitshuffle.h> for documentation on these functions.
-size_t compress_lz4_bound(size_t size, size_t elem_size, size_t block_size);
-int64_t compress_lz4(void* in, void* out, size_t size, size_t elem_size, size_t block_size);
-int64_t decompress_lz4(void* in, void* out, size_t size, size_t elem_size, size_t block_size);
-
-} // namespace bitshuffle
-} // namespace doris
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stddef.h>
+#include <stdint.h>
+
+// This namespace has wrappers for the Bitshuffle library which do runtime dispatch to
+// either AVX2-accelerated or regular SSE2 implementations based on the available CPU.
+namespace doris {
+namespace bitshuffle {
+
+// See <bitshuffle.h> for documentation on these functions.
+size_t compress_lz4_bound(size_t size, size_t elem_size, size_t block_size);
+int64_t compress_lz4(void* in, void* out, size_t size, size_t elem_size, size_t block_size);
+int64_t decompress_lz4(void* in, void* out, size_t size, size_t elem_size, size_t block_size);
+
+} // namespace bitshuffle
+} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/page_builder.h b/be/src/olap/rowset/segment_v2/page_builder.h
index 4ef0701588..c2cc0eb813 100644
--- a/be/src/olap/rowset/segment_v2/page_builder.h
+++ b/be/src/olap/rowset/segment_v2/page_builder.h
@@ -1,87 +1,87 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <stdint.h>
-#include <vector>
-
-#include "gutil/macros.h"
-#include "util/slice.h"
-#include "common/status.h"
-#include "olap/rowset/segment_v2/common.h"
-
-namespace doris {
-namespace segment_v2 {
-
-// PageBuilder is used to build page
-// Page is a data management unit, including:
-// 1. Data Page: store encoded and compressed data
-// 2. BloomFilter Page: store bloom filter of data
-// 3. Ordinal Index Page: store ordinal index of data
-// 4. Short Key Index Page: store short key index of data
-// 5. Bitmap Index Page: store bitmap index of data
-class PageBuilder {
-public:
-    PageBuilder() { }
-
-    virtual ~PageBuilder() { }
-
-    // Used by column writer to determine whether the current page is full.
-    // Column writer depends on the result to decide whether to flush current page.
-    virtual bool is_page_full() = 0;
-
-    // Add a sequence of values to the page.
-    // The number of values actually added will be returned through count, which may be less
-    // than requested if the page is full.
-    //
-    // vals size should be decided according to the page build type
-    virtual doris::Status add(const uint8_t* vals, size_t* count) = 0;
-
-    // Get the dictionary page for dictionary encoding mode column.
-    virtual Status get_dictionary_page(Slice* dictionary_page) {
-        return Status::NotSupported("get_dictionary_page not implemented");
-    }
-
-    // Return a Slice which represents the encoded data of current page.
-    //
-    // This Slice points to internal data of this builder.
-    virtual Slice finish() = 0;
-
-    // Reset the internal state of the page builder.
-    //
-    // Any data previously returned by finish may be invalidated by this call.
-    virtual void reset() = 0;
-
-    // Return the number of entries that have been added to the page.
-    virtual size_t count() const = 0;
-
-    // Return the total bytes of pageBuilder that have been added to the page.
-    virtual uint64_t size() const = 0;
-
-    // This api is for release the resource owned by builder
-    // It means it will transfer the ownership of some resource to other.
-    // This api is always called after finish
-    // and should be followed by reset() before reuse the builder
-    virtual void release() = 0;
-
-private:
-    DISALLOW_COPY_AND_ASSIGN(PageBuilder);
-};
-
-} // namespace segment_v2
-} // namespace doris
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <stdint.h>
+#include <vector>
+
+#include "gutil/macros.h"
+#include "util/slice.h"
+#include "common/status.h"
+#include "olap/rowset/segment_v2/common.h"
+
+namespace doris {
+namespace segment_v2 {
+
+// PageBuilder is used to build page
+// Page is a data management unit, including:
+// 1. Data Page: store encoded and compressed data
+// 2. BloomFilter Page: store bloom filter of data
+// 3. Ordinal Index Page: store ordinal index of data
+// 4. Short Key Index Page: store short key index of data
+// 5. Bitmap Index Page: store bitmap index of data
+class PageBuilder {
+public:
+    PageBuilder() { }
+
+    virtual ~PageBuilder() { }
+
+    // Used by column writer to determine whether the current page is full.
+    // Column writer depends on the result to decide whether to flush current page.
+    virtual bool is_page_full() = 0;
+
+    // Add a sequence of values to the page.
+    // The number of values actually added will be returned through count, which may be less
+    // than requested if the page is full.
+    //
+    // vals size should be decided according to the page build type
+    virtual doris::Status add(const uint8_t* vals, size_t* count) = 0;
+
+    // Get the dictionary page for dictionary encoding mode column.
+    virtual Status get_dictionary_page(Slice* dictionary_page) {
+        return Status::NotSupported("get_dictionary_page not implemented");
+    }
+
+    // Return a Slice which represents the encoded data of current page.
+    //
+    // This Slice points to internal data of this builder.
+    virtual Slice finish() = 0;
+
+    // Reset the internal state of the page builder.
+    //
+    // Any data previously returned by finish may be invalidated by this call.
+    virtual void reset() = 0;
+
+    // Return the number of entries that have been added to the page.
+    virtual size_t count() const = 0;
+
+    // Return the total bytes of pageBuilder that have been added to the page.
+    virtual uint64_t size() const = 0;
+
+    // This api is for release the resource owned by builder
+    // It means it will transfer the ownership of some resource to other.
+    // This api is always called after finish
+    // and should be followed by reset() before reuse the builder
+    virtual void release() = 0;
+
+private:
+    DISALLOW_COPY_AND_ASSIGN(PageBuilder);
+};
+
+} // namespace segment_v2
+} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/page_decoder.h b/be/src/olap/rowset/segment_v2/page_decoder.h
index a6e4c47ef2..490f45f498 100644
--- a/be/src/olap/rowset/segment_v2/page_decoder.h
+++ b/be/src/olap/rowset/segment_v2/page_decoder.h
@@ -1,79 +1,79 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "olap/column_block.h" // for ColumnBlockView
-#include "olap/rowset/segment_v2/common.h" // for rowid_t
-#include "common/status.h" // for Status
-
-namespace doris {
-namespace segment_v2 {
-
-// PageDecoder is used to decode page.
-class PageDecoder {
-public:
-    PageDecoder() { }
-
-    virtual ~PageDecoder() { }
-
-    // Call this to do some preparation for decoder.
-    // eg: parse data page header
-    virtual Status init() = 0;
-
-    // Seek the decoder to the given positional index of the page.
-    // For example, seek_to_position_in_page(0) seeks to the first
-    // stored entry.
-    //
-    // It is an error to call this with a value larger than Count().
-    // Doing so has undefined results.
-    virtual Status seek_to_position_in_page(size_t pos) = 0;
-
-    // Seek the decoder forward by a given number of rows, or to the end
-    // of the page. This is primarily used to skip over data.
-    //
-    // Return the step skipped.
-    virtual size_t seek_forward(size_t n) {
-        size_t step = std::min(n, count() - current_index());
-        DCHECK_GE(step, 0);
-        seek_to_position_in_page(current_index() + step);
-        return step;
-    }
-
-    // Fetch the next vector of values from the page into 'column_vector_view'.
-    // The output vector must have space for up to n cells.
-    //
-    // Return the size of read entries .
-    //
-    // In the case that the values are themselves references
-    // to other memory (eg Slices), the referred-to memory is
-    // allocated in the column_vector_view's mem_pool.
-    virtual Status next_batch(size_t* n, ColumnBlockView* dst) = 0;
-
-    // Return the number of elements in this page.
-    virtual size_t count() const = 0;
-
-    // Return the position within the page of the currently seeked
-    // entry (ie the entry that will next be returned by next_vector())
-    virtual size_t current_index() const = 0;
-
-private:
-    DISALLOW_COPY_AND_ASSIGN(PageDecoder);
-};
-
-} // namespace segment_v2
-} // namespace doris
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "olap/column_block.h" // for ColumnBlockView
+#include "olap/rowset/segment_v2/common.h" // for rowid_t
+#include "common/status.h" // for Status
+
+namespace doris {
+namespace segment_v2 {
+
+// PageDecoder is used to decode page.
+class PageDecoder {
+public:
+    PageDecoder() { }
+
+    virtual ~PageDecoder() { }
+
+    // Call this to do some preparation for decoder.
+    // eg: parse data page header
+    virtual Status init() = 0;
+
+    // Seek the decoder to the given positional index of the page.
+    // For example, seek_to_position_in_page(0) seeks to the first
+    // stored entry.
+    //
+    // It is an error to call this with a value larger than Count().
+    // Doing so has undefined results.
+    virtual Status seek_to_position_in_page(size_t pos) = 0;
+
+    // Seek the decoder forward by a given number of rows, or to the end
+    // of the page. This is primarily used to skip over data.
+    //
+    // Return the step skipped.
+    virtual size_t seek_forward(size_t n) {
+        size_t step = std::min(n, count() - current_index());
+        DCHECK_GE(step, 0);
+        seek_to_position_in_page(current_index() + step);
+        return step;
+    }
+
+    // Fetch the next vector of values from the page into 'column_vector_view'.
+    // The output vector must have space for up to n cells.
+    //
+    // Return the size of read entries .
+    //
+    // In the case that the values are themselves references
+    // to other memory (eg Slices), the referred-to memory is
+    // allocated in the column_vector_view's mem_pool.
+    virtual Status next_batch(size_t* n, ColumnBlockView* dst) = 0;
+
+    // Return the number of elements in this page.
+    virtual size_t count() const = 0;
+
+    // Return the position within the page of the currently seeked
+    // entry (ie the entry that will next be returned by next_vector())
+    virtual size_t current_index() const = 0;
+
+private:
+    DISALLOW_COPY_AND_ASSIGN(PageDecoder);
+};
+
+} // namespace segment_v2
+} // namespace doris
diff --git a/be/src/olap/rowset/segment_v2/rle_page.h b/be/src/olap/rowset/segment_v2/rle_page.h
index 36817b7cfd..46fb197312 100644
--- a/be/src/olap/rowset/segment_v2/rle_page.h
+++ b/be/src/olap/rowset/segment_v2/rle_page.h
@@ -1,256 +1,256 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include "olap/rowset/segment_v2/page_builder.h" // for PageBuilder
-#include "olap/rowset/segment_v2/page_decoder.h" // for PageDecoder
-#include "olap/rowset/segment_v2/options.h" // for PageBuilderOptions/PageDecoderOptions
-#include "olap/rowset/segment_v2/common.h" // for rowid_t
-#include "util/rle_encoding.h" // for RleEncoder/RleDecoder
-#include "util/coding.h" // for encode_fixed32_le/decode_fixed32_le
-
-namespace doris {
-namespace segment_v2 {
-
-enum {
-    RLE_PAGE_HEADER_SIZE = 4
-};
-
-// RLE builder for generic integer and bool types. What is missing is some way
-// to enforce that this can only be instantiated for INT and BOOL types.
-//
-// The page format is as follows:
-//
-// 1. Header: (4 bytes total)
-//
-//    <num_elements> [32-bit]
-//      The number of elements encoded in the page.
-//
-//    NOTE: all on-disk ints are encoded little-endian
-//
-// 2. Element data
-//
-//    The header is followed by the rle-encoded element data.
-//
-// This Rle encoding algorithm is only effective for repeated INT type and bool type,
-// It is not good for sequence number or random number. BitshufflePage is recommended
-// for these case.
-//
-// TODO(hkp): optimize rle algorithm 
-template<FieldType Type>
-class RlePageBuilder : public PageBuilder {
-public:
-    RlePageBuilder(const PageBuilderOptions& options) :
-        _options(options),
-        _count(0),
-        _finished(false),
-        _bit_width(0),
-        _rle_encoder(nullptr) {
-        switch(Type) {
-            case OLAP_FIELD_TYPE_BOOL: {
-                _bit_width = 1;
-                break;
-            }
-            default: {
-                _bit_width = SIZE_OF_TYPE * 8;
-                break;
-            }
-        }
-        _rle_encoder = new RleEncoder<CppType>(&_buf, _bit_width);
-        reset();
-    }
-
-    ~RlePageBuilder() {
-        delete _rle_encoder;
-    }
-
-    bool is_page_full() override {
-        return _rle_encoder->len() >= _options.data_page_size;
-    }
-
-    Status add(const uint8_t* vals, size_t* count) override {
-        DCHECK(!_finished);
-        DCHECK_EQ(reinterpret_cast<uintptr_t>(vals) & (alignof(CppType) - 1), 0)
-                << "Pointer passed to Add() must be naturally-aligned";
-
-        const CppType* new_vals = reinterpret_cast<const CppType*>(vals);
-        for (int i = 0; i < *count; ++i) {
-            _rle_encoder->Put(new_vals[i]);
-        }
-        
-        _count += *count;
-        return Status::OK();
-    }
-
-    Slice finish() override {
-        _finished = true;
-        // here should Flush first and then encode the count header
-        // or it will lead to a bug if the header is less than 8 byte and the data is small
-        _rle_encoder->Flush();
-        encode_fixed32_le(&_buf[0], _count);
-        return Slice(_buf.data(), _buf.size());
-    }
-
-    void reset() override {
-        _count = 0;
-        _rle_encoder->Clear();
-        _rle_encoder->Reserve(RLE_PAGE_HEADER_SIZE, 0);
-    }
-
-    size_t count() const override {
-        return _count;
-    }
-
-    uint64_t size() const override {
-        return _rle_encoder->len();
-    }
-
-    // this api will release the memory ownership of encoded data
-    // Note:
-    //     release() should be called after finish
-    //     reset() should be called after this function before reuse the builder
-    void release() override {
-        uint8_t* ret = _buf.release();
-        (void)ret;
-    }
-
-private:
-    typedef typename TypeTraits<Type>::CppType CppType;
-    enum {
-        SIZE_OF_TYPE = TypeTraits<Type>::size
-    };
-
-    PageBuilderOptions _options;
-    size_t _count;
-    bool _finished;
-    int _bit_width;
-    RleEncoder<CppType>* _rle_encoder;
-    faststring _buf;
-};
-
-template<FieldType Type>
-class RlePageDecoder : public PageDecoder {
-public:
-    RlePageDecoder(Slice slice, const PageDecoderOptions& options) :
-        _data(slice),
-        _options(options),
-        _parsed(false),
-        _num_elements(0),
-        _cur_index(0),
-        _bit_width(0) { }
-
-    Status init() override {
-        CHECK(!_parsed);
-
-        if (_data.size < RLE_PAGE_HEADER_SIZE) {
-            return Status::Corruption(
-                "not enough bytes for header in RleBitMapBlockDecoder");
-        }
-        _num_elements = decode_fixed32_le((const uint8_t*)&_data[0]);
-
-        _parsed = true;
-
-        switch(Type) {
-            case OLAP_FIELD_TYPE_BOOL: {
-                _bit_width = 1;
-                break;
-            }
-            default: {
-                _bit_width = SIZE_OF_TYPE * 8;
-                break;
-            }
-        }
-
-        _rle_decoder = RleDecoder<CppType>((uint8_t*)_data.data + RLE_PAGE_HEADER_SIZE,
-                 _data.size - RLE_PAGE_HEADER_SIZE, _bit_width);
-
-        seek_to_position_in_page(0);
-        return Status::OK();
-    }
-
-    Status seek_to_position_in_page(size_t pos) override {
-        DCHECK(_parsed) << "Must call init()";
-        DCHECK_LE(pos, _num_elements) << "Tried to seek to " << pos << " which is > number of elements ("
-                << _num_elements << ") in the block!";
-        // If the block is empty (e.g. the column is filled with nulls), there is no data to seek.
-        if (PREDICT_FALSE(_num_elements == 0)) {
-            return Status::OK();
-        }
-        if (_cur_index == pos) {
-            // No need to seek.
-            return Status::OK();
-        } else if (_cur_index < pos) {
-            uint nskip = pos - _cur_index;
-            _rle_decoder.Skip(nskip);
-        } else {
-            _rle_decoder = RleDecoder<CppType>((uint8_t*)_data.data + RLE_PAGE_HEADER_SIZE,
-                    _data.size - RLE_PAGE_HEADER_SIZE, _bit_width);
-            _rle_decoder.Skip(pos);
-        }
-        _cur_index = pos;
-        return Status::OK();
-    }
-
-    Status next_batch(size_t* n, ColumnBlockView* dst) override {
-        DCHECK(_parsed);
-        if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) {
-            *n = 0;
-            return Status::OK();
-        }
-
-        size_t to_fetch = std::min(*n, static_cast<size_t>(_num_elements - _cur_index));
-        size_t remaining = to_fetch;
-        uint8_t* data_ptr = dst->data();
-        bool result = false;
-        while (remaining > 0) {
-            result = _rle_decoder.Get(reinterpret_cast<CppType*>(data_ptr));
-            DCHECK(result);
-            remaining--;
-            data_ptr += SIZE_OF_TYPE;
-        }
-
-        _cur_index += to_fetch;
-        *n = to_fetch;
-        return Status::OK();
-    }
-
-    size_t count() const override {
-        return _num_elements;
-    }
-
-    size_t current_index() const override {
-        return _cur_index;
-    }
-
-private:
-    typedef typename TypeTraits<Type>::CppType CppType;
-    enum {
-        SIZE_OF_TYPE = TypeTraits<Type>::size
-    };
-
-    Slice _data;
-    PageDecoderOptions _options;
-    bool _parsed;
-    uint32_t _num_elements;
-    size_t _cur_index;
-    int _bit_width;
-    RleDecoder<CppType> _rle_decoder;
-};
-
-} // namespace segment_v2
-} // namespace doris
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include "olap/rowset/segment_v2/page_builder.h" // for PageBuilder
+#include "olap/rowset/segment_v2/page_decoder.h" // for PageDecoder
+#include "olap/rowset/segment_v2/options.h" // for PageBuilderOptions/PageDecoderOptions
+#include "olap/rowset/segment_v2/common.h" // for rowid_t
+#include "util/rle_encoding.h" // for RleEncoder/RleDecoder
+#include "util/coding.h" // for encode_fixed32_le/decode_fixed32_le
+
+namespace doris {
+namespace segment_v2 {
+
+enum {
+    RLE_PAGE_HEADER_SIZE = 4
+};
+
+// RLE builder for generic integer and bool types. What is missing is some way
+// to enforce that this can only be instantiated for INT and BOOL types.
+//
+// The page format is as follows:
+//
+// 1. Header: (4 bytes total)
+//
+//    <num_elements> [32-bit]
+//      The number of elements encoded in the page.
+//
+//    NOTE: all on-disk ints are encoded little-endian
+//
+// 2. Element data
+//
+//    The header is followed by the rle-encoded element data.
+//
+// This Rle encoding algorithm is only effective for repeated INT type and bool type,
+// It is not good for sequence number or random number. BitshufflePage is recommended
+// for these case.
+//
+// TODO(hkp): optimize rle algorithm 
+template<FieldType Type>
+class RlePageBuilder : public PageBuilder {
+public:
+    RlePageBuilder(const PageBuilderOptions& options) :
+        _options(options),
+        _count(0),
+        _finished(false),
+        _bit_width(0),
+        _rle_encoder(nullptr) {
+        switch(Type) {
+            case OLAP_FIELD_TYPE_BOOL: {
+                _bit_width = 1;
+                break;
+            }
+            default: {
+                _bit_width = SIZE_OF_TYPE * 8;
+                break;
+            }
+        }
+        _rle_encoder = new RleEncoder<CppType>(&_buf, _bit_width);
+        reset();
+    }
+
+    ~RlePageBuilder() {
+        delete _rle_encoder;
+    }
+
+    bool is_page_full() override {
+        return _rle_encoder->len() >= _options.data_page_size;
+    }
+
+    Status add(const uint8_t* vals, size_t* count) override {
+        DCHECK(!_finished);
+        DCHECK_EQ(reinterpret_cast<uintptr_t>(vals) & (alignof(CppType) - 1), 0)
+                << "Pointer passed to Add() must be naturally-aligned";
+
+        const CppType* new_vals = reinterpret_cast<const CppType*>(vals);
+        for (int i = 0; i < *count; ++i) {
+            _rle_encoder->Put(new_vals[i]);
+        }
+        
+        _count += *count;
+        return Status::OK();
+    }
+
+    Slice finish() override {
+        _finished = true;
+        // here should Flush first and then encode the count header
+        // or it will lead to a bug if the header is less than 8 byte and the data is small
+        _rle_encoder->Flush();
+        encode_fixed32_le(&_buf[0], _count);
+        return Slice(_buf.data(), _buf.size());
+    }
+
+    void reset() override {
+        _count = 0;
+        _rle_encoder->Clear();
+        _rle_encoder->Reserve(RLE_PAGE_HEADER_SIZE, 0);
+    }
+
+    size_t count() const override {
+        return _count;
+    }
+
+    uint64_t size() const override {
+        return _rle_encoder->len();
+    }
+
+    // this api will release the memory ownership of encoded data
+    // Note:
+    //     release() should be called after finish
+    //     reset() should be called after this function before reuse the builder
+    void release() override {
+        uint8_t* ret = _buf.release();
+        (void)ret;
+    }
+
+private:
+    typedef typename TypeTraits<Type>::CppType CppType;
+    enum {
+        SIZE_OF_TYPE = TypeTraits<Type>::size
+    };
+
+    PageBuilderOptions _options;
+    size_t _count;
+    bool _finished;
+    int _bit_width;
+    RleEncoder<CppType>* _rle_encoder;
+    faststring _buf;
+};
+
+template<FieldType Type>
+class RlePageDecoder : public PageDecoder {
+public:
+    RlePageDecoder(Slice slice, const PageDecoderOptions& options) :
+        _data(slice),
+        _options(options),
+        _parsed(false),
+        _num_elements(0),
+        _cur_index(0),
+        _bit_width(0) { }
+
+    Status init() override {
+        CHECK(!_parsed);
+
+        if (_data.size < RLE_PAGE_HEADER_SIZE) {
+            return Status::Corruption(
+                "not enough bytes for header in RleBitMapBlockDecoder");
+        }
+        _num_elements = decode_fixed32_le((const uint8_t*)&_data[0]);
+
+        _parsed = true;
+
+        switch(Type) {
+            case OLAP_FIELD_TYPE_BOOL: {
+                _bit_width = 1;
+                break;
+            }
+            default: {
+                _bit_width = SIZE_OF_TYPE * 8;
+                break;
+            }
+        }
+
+        _rle_decoder = RleDecoder<CppType>((uint8_t*)_data.data + RLE_PAGE_HEADER_SIZE,
+                 _data.size - RLE_PAGE_HEADER_SIZE, _bit_width);
+
+        seek_to_position_in_page(0);
+        return Status::OK();
+    }
+
+    Status seek_to_position_in_page(size_t pos) override {
+        DCHECK(_parsed) << "Must call init()";
+        DCHECK_LE(pos, _num_elements) << "Tried to seek to " << pos << " which is > number of elements ("
+                << _num_elements << ") in the block!";
+        // If the block is empty (e.g. the column is filled with nulls), there is no data to seek.
+        if (PREDICT_FALSE(_num_elements == 0)) {
+            return Status::OK();
+        }
+        if (_cur_index == pos) {
+            // No need to seek.
+            return Status::OK();
+        } else if (_cur_index < pos) {
+            uint nskip = pos - _cur_index;
+            _rle_decoder.Skip(nskip);
+        } else {
+            _rle_decoder = RleDecoder<CppType>((uint8_t*)_data.data + RLE_PAGE_HEADER_SIZE,
+                    _data.size - RLE_PAGE_HEADER_SIZE, _bit_width);
+            _rle_decoder.Skip(pos);
+        }
+        _cur_index = pos;
+        return Status::OK();
+    }
+
+    Status next_batch(size_t* n, ColumnBlockView* dst) override {
+        DCHECK(_parsed);
+        if (PREDICT_FALSE(*n == 0 || _cur_index >= _num_elements)) {
+            *n = 0;
+            return Status::OK();
+        }
+
+        size_t to_fetch = std::min(*n, static_cast<size_t>(_num_elements - _cur_index));
+        size_t remaining = to_fetch;
+        uint8_t* data_ptr = dst->data();
+        bool result = false;
+        while (remaining > 0) {
+            result = _rle_decoder.Get(reinterpret_cast<CppType*>(data_ptr));
+            DCHECK(result);
+            remaining--;
+            data_ptr += SIZE_OF_TYPE;
+        }
+
+        _cur_index += to_fetch;
+        *n = to_fetch;
+        return Status::OK();
+    }
+
+    size_t count() const override {
+        return _num_elements;
+    }
+
+    size_t current_index() const override {
+        return _cur_index;
+    }
+
+private:
+    typedef typename TypeTraits<Type>::CppType CppType;
+    enum {
+        SIZE_OF_TYPE = TypeTraits<Type>::size
+    };
+
+    Slice _data;
+    PageDecoderOptions _options;
+    bool _parsed;
+    uint32_t _num_elements;
+    size_t _cur_index;
+    int _bit_width;
+    RleDecoder<CppType> _rle_decoder;
+};
+
+} // namespace segment_v2
+} // namespace doris
diff --git a/be/src/udf/CMakeLists.txt b/be/src/udf/CMakeLists.txt
index 1587d0176f..c8a5b05d67 100755
--- a/be/src/udf/CMakeLists.txt
+++ b/be/src/udf/CMakeLists.txt
@@ -15,43 +15,43 @@
 # specific language governing permissions and limitations
 # under the License.
 
-# where to put generated libraries
+# where to put generated libraries
 set(CMAKE_POSITION_INDEPENDENT_CODE ON)
 
-set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/udf")
-
-# where to put generated binaries
-set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/udf")
-
-# Build this library twice. Once to be linked into the main Doris. This version
-# can have dependencies on our other libs. The second version is shipped as part
-# of the UDF sdk, which can't use other libs.
-add_library(Udf udf.cpp udf_ir.cpp)
-add_library(DorisUdf udf.cpp udf_ir.cpp)
-set_target_properties(DorisUdf PROPERTIES COMPILE_FLAGS "-DDORIS_UDF_SDK_BUILD")
-
-# We can't use the normal link list since we want to pick up libDorisUdf (the external
-# library) rather than the interal libUdf.
-set (UDF_TEST_LINK_LIBS
-  -Wl,--start-group
-  Common
-  GlobalFlags
-  DorisUdf
-  Runtime
-  Util
-  -Wl,--end-group
-# Below are all external dependencies.  They should some after the doris libs.
-  ${Boost_LIBRARIES}
-  glogstatic
-  gflagsstatic
-  -lboost_date_time
-  gtest)
-  
+set(LIBRARY_OUTPUT_PATH "${BUILD_DIR}/src/udf")
+
+# where to put generated binaries
+set(EXECUTABLE_OUTPUT_PATH "${BUILD_DIR}/src/udf")
+
+# Build this library twice. Once to be linked into the main Doris. This version
+# can have dependencies on our other libs. The second version is shipped as part
+# of the UDF sdk, which can't use other libs.
+add_library(Udf udf.cpp udf_ir.cpp)
+add_library(DorisUdf udf.cpp udf_ir.cpp)
+set_target_properties(DorisUdf PROPERTIES COMPILE_FLAGS "-DDORIS_UDF_SDK_BUILD")
+
+# We can't use the normal link list since we want to pick up libDorisUdf (the external
+# library) rather than the interal libUdf.
+set (UDF_TEST_LINK_LIBS
+  -Wl,--start-group
+  Common
+  GlobalFlags
+  DorisUdf
+  Runtime
+  Util
+  -Wl,--end-group
+# Below are all external dependencies.  They should some after the doris libs.
+  ${Boost_LIBRARIES}
+  glogstatic
+  gflagsstatic
+  -lboost_date_time
+  gtest)
+  
 set_target_properties(DorisUdf PROPERTIES PUBLIC_HEADER "udf.h;uda_test_harness.h")
 INSTALL(TARGETS DorisUdf
         ARCHIVE DESTINATION ${OUTPUT_DIR}/udf
         LIBRARY DESTINATION ${OUTPUT_DIR}/udf/lib
         PUBLIC_HEADER DESTINATION ${OUTPUT_DIR}/udf/include)
 
-#ADD_BE_TEST(udf_test)
-#ADD_BE_TEST(uda_test)
+#ADD_BE_TEST(udf_test)
+#ADD_BE_TEST(uda_test)
diff --git a/be/src/util/alignment.h b/be/src/util/alignment.h
index e1cc759d71..43802805fb 100644
--- a/be/src/util/alignment.h
+++ b/be/src/util/alignment.h
@@ -1,26 +1,26 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-//
-// Macros for dealing with memory alignment.
-#pragma once
-
-// Round down 'x' to the nearest 'align' boundary
-#define ALIGN_DOWN(x, align) ((x) & (~(align) + 1))
-
-// Round up 'x' to the nearest 'align' boundary
-#define ALIGN_UP(x, align) (((x) + ((align) - 1)) & (~(align) + 1))
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+//
+// Macros for dealing with memory alignment.
+#pragma once
+
+// Round down 'x' to the nearest 'align' boundary
+#define ALIGN_DOWN(x, align) ((x) & (~(align) + 1))
+
+// Round up 'x' to the nearest 'align' boundary
+#define ALIGN_UP(x, align) (((x) + ((align) - 1)) & (~(align) + 1))
+
diff --git a/be/src/util/bit_stream_utils.h b/be/src/util/bit_stream_utils.h
index 220c8cb4f1..cc463c346f 100644
--- a/be/src/util/bit_stream_utils.h
+++ b/be/src/util/bit_stream_utils.h
@@ -1,149 +1,149 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-#pragma once
-
-#include "gutil/port.h"
-#include "util/bit_util.h"
-#include "util/faststring.h"
-
-using doris::BitUtil;
-
-namespace doris {
-
-// Utility class to write bit/byte streams.  This class can write data to either be
-// bit packed or byte aligned (and a single stream that has a mix of both).
-class BitWriter {
- public:
-  // buffer: buffer to write bits to.
-  explicit BitWriter(faststring *buffer)
-    : buffer_(buffer) {
-    Clear();
-  }
-
-  void Clear() {
-    buffered_values_ = 0;
-    byte_offset_ = 0;
-    bit_offset_ = 0;
-    buffer_->clear();
-  }
-
-  // Returns a pointer to the underlying buffer
-  faststring *buffer() const { return buffer_; }
-
-  // The number of current bytes written, including the current byte (i.e. may include a
-  // fraction of a byte). Includes buffered values.
-  int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); }
-
-  // Writes a value to buffered_values_, flushing to buffer_ if necessary.  This is bit
-  // packed.
-  void PutValue(uint64_t v, int num_bits);
-
-  // Writes v to the next aligned byte using num_bits. If T is larger than num_bits, the
-  // extra high-order bits will be ignored.
-  template<typename T>
-  void PutAligned(T v, int num_bits);
-
-  // Write a Vlq encoded int to the buffer. The value is written byte aligned.
-  // For more details on vlq: en.wikipedia.org/wiki/Variable-length_quantity
-  void PutVlqInt(int32_t v);
-
-  // Get the index to the next aligned byte and advance the underlying buffer by num_bytes.
-  size_t GetByteIndexAndAdvance(int num_bytes) {
-    uint8_t* ptr = GetNextBytePtr(num_bytes);
-    return ptr - buffer_->data();
-  }
-
-  // Get a pointer to the next aligned byte and advance the underlying buffer by num_bytes.
-  uint8_t* GetNextBytePtr(int num_bytes);
-
-  // Flushes all buffered values to the buffer. Call this when done writing to the buffer.
-  // If 'align' is true, buffered_values_ is reset and any future writes will be written
-  // to the next byte boundary.
-  void Flush(bool align = false);
-
- private:
-  // Bit-packed values are initially written to this variable before being memcpy'd to
-  // buffer_. This is faster than writing values byte by byte directly to buffer_.
-  uint64_t buffered_values_;
-
-  faststring *buffer_;
-  int byte_offset_;       // Offset in buffer_
-  int bit_offset_;        // Offset in buffered_values_
-};
-
-// Utility class to read bit/byte stream.  This class can read bits or bytes
-// that are either byte aligned or not.  It also has utilities to read multiple
-// bytes in one read (e.g. encoded int).
-class BitReader {
- public:
-  // 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'.
-  BitReader(const uint8_t* buffer, int buffer_len);
-
-  BitReader() : buffer_(NULL), max_bytes_(0) {}
-
-  // Gets the next value from the buffer.  Returns true if 'v' could be read or false if
-  // there are not enough bytes left. num_bits must be <= 32.
-  template<typename T>
-  bool GetValue(int num_bits, T* v);
-
-  // Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T needs to be a
-  // little-endian native type and big enough to store 'num_bytes'. The value is assumed
-  // to be byte-aligned so the stream will be advanced to the start of the next byte
-  // before 'v' is read. Returns false if there are not enough bytes left.
-  template<typename T>
-  bool GetAligned(int num_bytes, T* v);
-
-  // Reads a vlq encoded int from the stream.  The encoded int must start at the
-  // beginning of a byte. Return false if there were not enough bytes in the buffer.
-  bool GetVlqInt(int32_t* v);
-
-  // Returns the number of bytes left in the stream, not including the current byte (i.e.,
-  // there may be an additional fraction of a byte).
-  int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); }
-
-  // Current position in the stream, by bit.
-  int position() const { return byte_offset_ * 8 + bit_offset_; }
-
-  // Rewind the stream by 'num_bits' bits
-  void Rewind(int num_bits);
-
-  // Seek to a specific bit in the buffer
-  void SeekToBit(uint stream_position);
-
-  // Maximum byte length of a vlq encoded int
-  static const int MAX_VLQ_BYTE_LEN = 5;
-
-  bool is_initialized() const { return buffer_ != NULL; }
-
- private:
-  // Used by SeekToBit() and GetValue() to fetch the
-  // the next word into buffer_.
-  void BufferValues();
-
-  const uint8_t* buffer_;
-  int max_bytes_;
-
-  // Bytes are memcpy'd from buffer_ and values are read from this variable. This is
-  // faster than reading values byte by byte directly from buffer_.
-  uint64_t buffered_values_;
-
-  int byte_offset_;       // Offset in buffer_
-  int bit_offset_;        // Offset in buffered_values_
-};
-
-} // namespace doris
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include "gutil/port.h"
+#include "util/bit_util.h"
+#include "util/faststring.h"
+
+using doris::BitUtil;
+
+namespace doris {
+
+// Utility class to write bit/byte streams.  This class can write data to either be
+// bit packed or byte aligned (and a single stream that has a mix of both).
+class BitWriter {
+ public:
+  // buffer: buffer to write bits to.
+  explicit BitWriter(faststring *buffer)
+    : buffer_(buffer) {
+    Clear();
+  }
+
+  void Clear() {
+    buffered_values_ = 0;
+    byte_offset_ = 0;
+    bit_offset_ = 0;
+    buffer_->clear();
+  }
+
+  // Returns a pointer to the underlying buffer
+  faststring *buffer() const { return buffer_; }
+
+  // The number of current bytes written, including the current byte (i.e. may include a
+  // fraction of a byte). Includes buffered values.
+  int bytes_written() const { return byte_offset_ + BitUtil::Ceil(bit_offset_, 8); }
+
+  // Writes a value to buffered_values_, flushing to buffer_ if necessary.  This is bit
+  // packed.
+  void PutValue(uint64_t v, int num_bits);
+
+  // Writes v to the next aligned byte using num_bits. If T is larger than num_bits, the
+  // extra high-order bits will be ignored.
+  template<typename T>
+  void PutAligned(T v, int num_bits);
+
+  // Write a Vlq encoded int to the buffer. The value is written byte aligned.
+  // For more details on vlq: en.wikipedia.org/wiki/Variable-length_quantity
+  void PutVlqInt(int32_t v);
+
+  // Get the index to the next aligned byte and advance the underlying buffer by num_bytes.
+  size_t GetByteIndexAndAdvance(int num_bytes) {
+    uint8_t* ptr = GetNextBytePtr(num_bytes);
+    return ptr - buffer_->data();
+  }
+
+  // Get a pointer to the next aligned byte and advance the underlying buffer by num_bytes.
+  uint8_t* GetNextBytePtr(int num_bytes);
+
+  // Flushes all buffered values to the buffer. Call this when done writing to the buffer.
+  // If 'align' is true, buffered_values_ is reset and any future writes will be written
+  // to the next byte boundary.
+  void Flush(bool align = false);
+
+ private:
+  // Bit-packed values are initially written to this variable before being memcpy'd to
+  // buffer_. This is faster than writing values byte by byte directly to buffer_.
+  uint64_t buffered_values_;
+
+  faststring *buffer_;
+  int byte_offset_;       // Offset in buffer_
+  int bit_offset_;        // Offset in buffered_values_
+};
+
+// Utility class to read bit/byte stream.  This class can read bits or bytes
+// that are either byte aligned or not.  It also has utilities to read multiple
+// bytes in one read (e.g. encoded int).
+class BitReader {
+ public:
+  // 'buffer' is the buffer to read from.  The buffer's length is 'buffer_len'.
+  BitReader(const uint8_t* buffer, int buffer_len);
+
+  BitReader() : buffer_(NULL), max_bytes_(0) {}
+
+  // Gets the next value from the buffer.  Returns true if 'v' could be read or false if
+  // there are not enough bytes left. num_bits must be <= 32.
+  template<typename T>
+  bool GetValue(int num_bits, T* v);
+
+  // Reads a 'num_bytes'-sized value from the buffer and stores it in 'v'. T needs to be a
+  // little-endian native type and big enough to store 'num_bytes'. The value is assumed
+  // to be byte-aligned so the stream will be advanced to the start of the next byte
+  // before 'v' is read. Returns false if there are not enough bytes left.
+  template<typename T>
+  bool GetAligned(int num_bytes, T* v);
+
+  // Reads a vlq encoded int from the stream.  The encoded int must start at the
+  // beginning of a byte. Return false if there were not enough bytes in the buffer.
+  bool GetVlqInt(int32_t* v);
+
+  // Returns the number of bytes left in the stream, not including the current byte (i.e.,
+  // there may be an additional fraction of a byte).
+  int bytes_left() { return max_bytes_ - (byte_offset_ + BitUtil::Ceil(bit_offset_, 8)); }
+
+  // Current position in the stream, by bit.
+  int position() const { return byte_offset_ * 8 + bit_offset_; }
+
+  // Rewind the stream by 'num_bits' bits
+  void Rewind(int num_bits);
+
+  // Seek to a specific bit in the buffer
+  void SeekToBit(uint stream_position);
+
+  // Maximum byte length of a vlq encoded int
+  static const int MAX_VLQ_BYTE_LEN = 5;
+
+  bool is_initialized() const { return buffer_ != NULL; }
+
+ private:
+  // Used by SeekToBit() and GetValue() to fetch the
+  // the next word into buffer_.
+  void BufferValues();
+
+  const uint8_t* buffer_;
+  int max_bytes_;
+
+  // Bytes are memcpy'd from buffer_ and values are read from this variable. This is
+  // faster than reading values byte by byte directly from buffer_.
+  uint64_t buffered_values_;
+
+  int byte_offset_;       // Offset in buffer_
+  int bit_offset_;        // Offset in buffered_values_
+};
+
+} // namespace doris
+
diff --git a/be/src/util/bit_stream_utils.inline.h b/be/src/util/bit_stream_utils.inline.h
index 1bbabd789b..deac875ce2 100644
--- a/be/src/util/bit_stream_utils.inline.h
+++ b/be/src/util/bit_stream_utils.inline.h
@@ -1,213 +1,213 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-#ifndef IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H
-#define IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H
-
-#include <algorithm>
-
-#include "glog/logging.h"
-#include "util/bit_stream_utils.h"
-#include "util/alignment.h"
-
-using doris::BitUtil;
-
-namespace doris {
-
-inline void BitWriter::PutValue(uint64_t v, int num_bits) {
-  DCHECK_LE(num_bits, 64);
-  // Truncate the higher-order bits. This is necessary to
-  // support signed values.
-  v &= ~0ULL >> (64 - num_bits);
-
-
-  buffered_values_ |= v << bit_offset_;
-  bit_offset_ += num_bits;
-
-  if (PREDICT_FALSE(bit_offset_ >= 64)) {
-    // Flush buffered_values_ and write out bits of v that did not fit
-    buffer_->reserve(ALIGN_UP(byte_offset_ + 8, 8));
-    buffer_->resize(byte_offset_ + 8);
-    DCHECK_LE(byte_offset_ + 8, buffer_->capacity());
-    memcpy(buffer_->data() + byte_offset_, &buffered_values_, 8);
-    buffered_values_ = 0;
-    byte_offset_ += 8;
-    bit_offset_ -= 64;
-    buffered_values_ = BitUtil::ShiftRightZeroOnOverflow(v, (num_bits - bit_offset_));
-  }
-  DCHECK_LT(bit_offset_, 64);
-}
-
-inline void BitWriter::Flush(bool align) {
-  int num_bytes = BitUtil::Ceil(bit_offset_, 8);
-  buffer_->reserve(ALIGN_UP(byte_offset_ + num_bytes, 8));
-  buffer_->resize(byte_offset_ + num_bytes);
-  DCHECK_LE(byte_offset_ + num_bytes, buffer_->capacity());
-  memcpy(buffer_->data() + byte_offset_, &buffered_values_, num_bytes);
-
-  if (align) {
-    buffered_values_ = 0;
-    byte_offset_ += num_bytes;
-    bit_offset_ = 0;
-  }
-}
-
-inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) {
-  Flush(/* align */ true);
-  buffer_->reserve(ALIGN_UP(byte_offset_ + num_bytes, 8));
-  buffer_->resize(byte_offset_ + num_bytes);
-  uint8_t* ptr = buffer_->data() + byte_offset_;
-  byte_offset_ += num_bytes;
-  DCHECK_LE(byte_offset_, buffer_->capacity());
-  return ptr;
-}
-
-template<typename T>
-inline void BitWriter::PutAligned(T val, int num_bytes) {
-  DCHECK_LE(num_bytes, sizeof(T));
-  uint8_t* ptr = GetNextBytePtr(num_bytes);
-  memcpy(ptr, &val, num_bytes);
-}
-
-inline void BitWriter::PutVlqInt(int32_t v) {
-  while ((v & 0xFFFFFF80) != 0L) {
-    PutAligned<uint8_t>((v & 0x7F) | 0x80, 1);
-    v >>= 7;
-  }
-  PutAligned<uint8_t>(v & 0x7F, 1);
-}
-
-
-inline BitReader::BitReader(const uint8_t* buffer, int buffer_len)
-  : buffer_(buffer),
-    max_bytes_(buffer_len),
-    buffered_values_(0),
-    byte_offset_(0),
-    bit_offset_(0) {
-  int num_bytes = std::min(8, max_bytes_);
-  memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
-}
-
-inline void BitReader::BufferValues() {
-  int bytes_remaining = max_bytes_ - byte_offset_;
-  if (PREDICT_TRUE(bytes_remaining >= 8)) {
-    memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
-  } else {
-    memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
-  }
-}
-
-template<typename T>
-inline bool BitReader::GetValue(int num_bits, T* v) {
-  DCHECK_LE(num_bits, 64);
-  DCHECK_LE(num_bits, sizeof(T) * 8);
-
-  if (PREDICT_FALSE(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
-
-  *v = BitUtil::TrailingBits(buffered_values_, bit_offset_ + num_bits) >> bit_offset_;
-
-  bit_offset_ += num_bits;
-  if (bit_offset_ >= 64) {
-    byte_offset_ += 8;
-    bit_offset_ -= 64;
-    BufferValues();
-    // Read bits of v that crossed into new buffered_values_
-    *v |= BitUtil::ShiftLeftZeroOnOverflow(
-        BitUtil::TrailingBits(buffered_values_, bit_offset_),
-        (num_bits - bit_offset_));
-  }
-  DCHECK_LE(bit_offset_, 64);
-  return true;
-}
-
-inline void BitReader::Rewind(int num_bits) {
-  bit_offset_ -= num_bits;
-  if (bit_offset_ >= 0) {
-    return;
-  }
-  while (bit_offset_ < 0) {
-    int seek_back = std::min(byte_offset_, 8);
-    byte_offset_ -= seek_back;
-    bit_offset_ += seek_back * 8;
-  }
-  // This should only be executed *if* rewinding by 'num_bits'
-  // make the existing buffered_values_ invalid
-  DCHECK_GE(byte_offset_, 0); // Check for underflow
-  memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
-}
-
-inline void BitReader::SeekToBit(uint stream_position) {
-  DCHECK_LE(stream_position, max_bytes_ * 8);
-
-  int delta = static_cast<int>(stream_position) - position();
-  if (delta == 0) {
-    return;
-  } else if (delta < 0) {
-    Rewind(position() - stream_position);
-  } else {
-    bit_offset_ += delta;
-    while (bit_offset_ >= 64) {
-      byte_offset_ +=8;
-      bit_offset_ -= 64;
-      if (bit_offset_ < 64) {
-        // This should only be executed if seeking to
-        // 'stream_position' makes the existing buffered_values_
-        // invalid.
-        BufferValues();
-      }
-    }
-  }
-}
-
-template<typename T>
-inline bool BitReader::GetAligned(int num_bytes, T* v) {
-  DCHECK_LE(num_bytes, sizeof(T));
-  int bytes_read = BitUtil::Ceil(bit_offset_, 8);
-  if (PREDICT_FALSE(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false;
-
-  // Advance byte_offset to next unread byte and read num_bytes
-  byte_offset_ += bytes_read;
-  memcpy(v, buffer_ + byte_offset_, num_bytes);
-  byte_offset_ += num_bytes;
-
-  // Reset buffered_values_
-  bit_offset_ = 0;
-  int bytes_remaining = max_bytes_ - byte_offset_;
-  if (PREDICT_TRUE(bytes_remaining >= 8)) {
-    memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
-  } else {
-    memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
-  }
-  return true;
-}
-
-inline bool BitReader::GetVlqInt(int32_t* v) {
-  *v = 0;
-  int shift = 0;
-  int num_bytes = 0;
-  uint8_t byte = 0;
-  do {
-    if (!GetAligned<uint8_t>(1, &byte)) return false;
-    *v |= (byte & 0x7F) << shift;
-    shift += 7;
-    DCHECK_LE(++num_bytes, MAX_VLQ_BYTE_LEN);
-  } while ((byte & 0x80) != 0);
-  return true;
-}
-
-} // namespace doris
-
-#endif
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#ifndef IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H
+#define IMPALA_UTIL_BIT_STREAM_UTILS_INLINE_H
+
+#include <algorithm>
+
+#include "glog/logging.h"
+#include "util/bit_stream_utils.h"
+#include "util/alignment.h"
+
+using doris::BitUtil;
+
+namespace doris {
+
+inline void BitWriter::PutValue(uint64_t v, int num_bits) {
+  DCHECK_LE(num_bits, 64);
+  // Truncate the higher-order bits. This is necessary to
+  // support signed values.
+  v &= ~0ULL >> (64 - num_bits);
+
+
+  buffered_values_ |= v << bit_offset_;
+  bit_offset_ += num_bits;
+
+  if (PREDICT_FALSE(bit_offset_ >= 64)) {
+    // Flush buffered_values_ and write out bits of v that did not fit
+    buffer_->reserve(ALIGN_UP(byte_offset_ + 8, 8));
+    buffer_->resize(byte_offset_ + 8);
+    DCHECK_LE(byte_offset_ + 8, buffer_->capacity());
+    memcpy(buffer_->data() + byte_offset_, &buffered_values_, 8);
+    buffered_values_ = 0;
+    byte_offset_ += 8;
+    bit_offset_ -= 64;
+    buffered_values_ = BitUtil::ShiftRightZeroOnOverflow(v, (num_bits - bit_offset_));
+  }
+  DCHECK_LT(bit_offset_, 64);
+}
+
+inline void BitWriter::Flush(bool align) {
+  int num_bytes = BitUtil::Ceil(bit_offset_, 8);
+  buffer_->reserve(ALIGN_UP(byte_offset_ + num_bytes, 8));
+  buffer_->resize(byte_offset_ + num_bytes);
+  DCHECK_LE(byte_offset_ + num_bytes, buffer_->capacity());
+  memcpy(buffer_->data() + byte_offset_, &buffered_values_, num_bytes);
+
+  if (align) {
+    buffered_values_ = 0;
+    byte_offset_ += num_bytes;
+    bit_offset_ = 0;
+  }
+}
+
+inline uint8_t* BitWriter::GetNextBytePtr(int num_bytes) {
+  Flush(/* align */ true);
+  buffer_->reserve(ALIGN_UP(byte_offset_ + num_bytes, 8));
+  buffer_->resize(byte_offset_ + num_bytes);
+  uint8_t* ptr = buffer_->data() + byte_offset_;
+  byte_offset_ += num_bytes;
+  DCHECK_LE(byte_offset_, buffer_->capacity());
+  return ptr;
+}
+
+template<typename T>
+inline void BitWriter::PutAligned(T val, int num_bytes) {
+  DCHECK_LE(num_bytes, sizeof(T));
+  uint8_t* ptr = GetNextBytePtr(num_bytes);
+  memcpy(ptr, &val, num_bytes);
+}
+
+inline void BitWriter::PutVlqInt(int32_t v) {
+  while ((v & 0xFFFFFF80) != 0L) {
+    PutAligned<uint8_t>((v & 0x7F) | 0x80, 1);
+    v >>= 7;
+  }
+  PutAligned<uint8_t>(v & 0x7F, 1);
+}
+
+
+inline BitReader::BitReader(const uint8_t* buffer, int buffer_len)
+  : buffer_(buffer),
+    max_bytes_(buffer_len),
+    buffered_values_(0),
+    byte_offset_(0),
+    bit_offset_(0) {
+  int num_bytes = std::min(8, max_bytes_);
+  memcpy(&buffered_values_, buffer_ + byte_offset_, num_bytes);
+}
+
+inline void BitReader::BufferValues() {
+  int bytes_remaining = max_bytes_ - byte_offset_;
+  if (PREDICT_TRUE(bytes_remaining >= 8)) {
+    memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
+  } else {
+    memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
+  }
+}
+
+template<typename T>
+inline bool BitReader::GetValue(int num_bits, T* v) {
+  DCHECK_LE(num_bits, 64);
+  DCHECK_LE(num_bits, sizeof(T) * 8);
+
+  if (PREDICT_FALSE(byte_offset_ * 8 + bit_offset_ + num_bits > max_bytes_ * 8)) return false;
+
+  *v = BitUtil::TrailingBits(buffered_values_, bit_offset_ + num_bits) >> bit_offset_;
+
+  bit_offset_ += num_bits;
+  if (bit_offset_ >= 64) {
+    byte_offset_ += 8;
+    bit_offset_ -= 64;
+    BufferValues();
+    // Read bits of v that crossed into new buffered_values_
+    *v |= BitUtil::ShiftLeftZeroOnOverflow(
+        BitUtil::TrailingBits(buffered_values_, bit_offset_),
+        (num_bits - bit_offset_));
+  }
+  DCHECK_LE(bit_offset_, 64);
+  return true;
+}
+
+inline void BitReader::Rewind(int num_bits) {
+  bit_offset_ -= num_bits;
+  if (bit_offset_ >= 0) {
+    return;
+  }
+  while (bit_offset_ < 0) {
+    int seek_back = std::min(byte_offset_, 8);
+    byte_offset_ -= seek_back;
+    bit_offset_ += seek_back * 8;
+  }
+  // This should only be executed *if* rewinding by 'num_bits'
+  // make the existing buffered_values_ invalid
+  DCHECK_GE(byte_offset_, 0); // Check for underflow
+  memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
+}
+
+inline void BitReader::SeekToBit(uint stream_position) {
+  DCHECK_LE(stream_position, max_bytes_ * 8);
+
+  int delta = static_cast<int>(stream_position) - position();
+  if (delta == 0) {
+    return;
+  } else if (delta < 0) {
+    Rewind(position() - stream_position);
+  } else {
+    bit_offset_ += delta;
+    while (bit_offset_ >= 64) {
+      byte_offset_ +=8;
+      bit_offset_ -= 64;
+      if (bit_offset_ < 64) {
+        // This should only be executed if seeking to
+        // 'stream_position' makes the existing buffered_values_
+        // invalid.
+        BufferValues();
+      }
+    }
+  }
+}
+
+template<typename T>
+inline bool BitReader::GetAligned(int num_bytes, T* v) {
+  DCHECK_LE(num_bytes, sizeof(T));
+  int bytes_read = BitUtil::Ceil(bit_offset_, 8);
+  if (PREDICT_FALSE(byte_offset_ + bytes_read + num_bytes > max_bytes_)) return false;
+
+  // Advance byte_offset to next unread byte and read num_bytes
+  byte_offset_ += bytes_read;
+  memcpy(v, buffer_ + byte_offset_, num_bytes);
+  byte_offset_ += num_bytes;
+
+  // Reset buffered_values_
+  bit_offset_ = 0;
+  int bytes_remaining = max_bytes_ - byte_offset_;
+  if (PREDICT_TRUE(bytes_remaining >= 8)) {
+    memcpy(&buffered_values_, buffer_ + byte_offset_, 8);
+  } else {
+    memcpy(&buffered_values_, buffer_ + byte_offset_, bytes_remaining);
+  }
+  return true;
+}
+
+inline bool BitReader::GetVlqInt(int32_t* v) {
+  *v = 0;
+  int shift = 0;
+  int num_bytes = 0;
+  uint8_t byte = 0;
+  do {
+    if (!GetAligned<uint8_t>(1, &byte)) return false;
+    *v |= (byte & 0x7F) << shift;
+    shift += 7;
+    DCHECK_LE(++num_bytes, MAX_VLQ_BYTE_LEN);
+  } while ((byte & 0x80) != 0);
+  return true;
+}
+
+} // namespace doris
+
+#endif
diff --git a/be/src/util/faststring.cc b/be/src/util/faststring.cc
index 30febe9705..49f868704c 100644
--- a/be/src/util/faststring.cc
+++ b/be/src/util/faststring.cc
@@ -1,72 +1,72 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include "util/faststring.h"
-
-#include <glog/logging.h>
-#include <memory>
-
-namespace doris {
-
-void faststring::GrowByAtLeast(size_t count) {
-  // Not enough space, need to reserve more.
-  // Don't reserve exactly enough space for the new string -- that makes it
-  // too easy to write perf bugs where you get O(n^2) append.
-  // Instead, alwayhs expand by at least 50%.
-
-  size_t to_reserve = len_ + count;
-  if (len_ + count < len_ * 3 / 2) {
-    to_reserve = len_ *  3 / 2;
-  }
-  GrowArray(to_reserve);
-}
-
-void faststring::GrowArray(size_t newcapacity) {
-  DCHECK_GE(newcapacity, capacity_);
-  std::unique_ptr<uint8_t[]> newdata(new uint8_t[newcapacity]);
-  if (len_ > 0) {
-    memcpy(&newdata[0], &data_[0], len_);
-  }
-  capacity_ = newcapacity;
-  if (data_ != initial_data_) {
-    delete[] data_;
-  } else {
-    ASAN_POISON_MEMORY_REGION(initial_data_, arraysize(initial_data_));
-  }
-
-  data_ = newdata.release();
-  ASAN_POISON_MEMORY_REGION(data_ + len_, capacity_ - len_);
-}
-
-void faststring::ShrinkToFitInternal() {
-  DCHECK_NE(data_, initial_data_);
-  if (len_ <= kInitialCapacity) {
-    ASAN_UNPOISON_MEMORY_REGION(initial_data_, len_);
-    memcpy(initial_data_, &data_[0], len_);
-    delete[] data_;
-    data_ = initial_data_;
-    capacity_ = kInitialCapacity;
-  } else {
-    std::unique_ptr<uint8_t[]> newdata(new uint8_t[len_]);
-    memcpy(&newdata[0], &data_[0], len_);
-    delete[] data_;
-    data_ = newdata.release();
-    capacity_ = len_;
-  }
-}
-
-} // namespace doris
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include "util/faststring.h"
+
+#include <glog/logging.h>
+#include <memory>
+
+namespace doris {
+
+void faststring::GrowByAtLeast(size_t count) {
+  // Not enough space, need to reserve more.
+  // Don't reserve exactly enough space for the new string -- that makes it
+  // too easy to write perf bugs where you get O(n^2) append.
+  // Instead, alwayhs expand by at least 50%.
+
+  size_t to_reserve = len_ + count;
+  if (len_ + count < len_ * 3 / 2) {
+    to_reserve = len_ *  3 / 2;
+  }
+  GrowArray(to_reserve);
+}
+
+void faststring::GrowArray(size_t newcapacity) {
+  DCHECK_GE(newcapacity, capacity_);
+  std::unique_ptr<uint8_t[]> newdata(new uint8_t[newcapacity]);
+  if (len_ > 0) {
+    memcpy(&newdata[0], &data_[0], len_);
+  }
+  capacity_ = newcapacity;
+  if (data_ != initial_data_) {
+    delete[] data_;
+  } else {
+    ASAN_POISON_MEMORY_REGION(initial_data_, arraysize(initial_data_));
+  }
+
+  data_ = newdata.release();
+  ASAN_POISON_MEMORY_REGION(data_ + len_, capacity_ - len_);
+}
+
+void faststring::ShrinkToFitInternal() {
+  DCHECK_NE(data_, initial_data_);
+  if (len_ <= kInitialCapacity) {
+    ASAN_UNPOISON_MEMORY_REGION(initial_data_, len_);
+    memcpy(initial_data_, &data_[0], len_);
+    delete[] data_;
+    data_ = initial_data_;
+    capacity_ = kInitialCapacity;
+  } else {
+    std::unique_ptr<uint8_t[]> newdata(new uint8_t[len_]);
+    memcpy(&newdata[0], &data_[0], len_);
+    delete[] data_;
+    data_ = newdata.release();
+    capacity_ = len_;
+  }
+}
+
+} // namespace doris
diff --git a/be/src/util/faststring.h b/be/src/util/faststring.h
index f3892f4170..98dfbb7828 100644
--- a/be/src/util/faststring.h
+++ b/be/src/util/faststring.h
@@ -1,257 +1,257 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#pragma once
-
-#include <cstdint>
-#include <cstring>
-#include <string>
-
-#include "gutil/dynamic_annotations.h"
-#include "gutil/macros.h"
-#include "gutil/port.h"
-#include "gutil/strings/fastmem.h"
-
-namespace doris {
-
-// A faststring is similar to a std::string, except that it is faster for many
-// common use cases (in particular, resize() will fill with uninitialized data
-// instead of memsetting to \0)
-class faststring {
- public:
-  enum {
-    kInitialCapacity = 32
-  };
-
-  faststring() :
-    data_(initial_data_),
-    len_(0),
-    capacity_(kInitialCapacity) {
-  }
-
-  // Construct a string with the given capacity, in bytes.
-  explicit faststring(size_t capacity)
-    : data_(initial_data_),
-      len_(0),
-      capacity_(kInitialCapacity) {
-    if (capacity > capacity_) {
-      data_ = new uint8_t[capacity];
-      capacity_ = capacity;
-    }
-    ASAN_POISON_MEMORY_REGION(data_, capacity_);
-  }
-
-  ~faststring() {
-    ASAN_UNPOISON_MEMORY_REGION(initial_data_, arraysize(initial_data_));
-    if (data_ != initial_data_) {
-      delete[] data_;
-    }
-  }
-
-  // Reset the valid length of the string to 0.
-  //
-  // This does not free up any memory. The capacity of the string remains unchanged.
-  void clear() {
-    resize(0);
-    ASAN_POISON_MEMORY_REGION(data_, capacity_);
-  }
-
-  // Resize the string to the given length.
-  // If the new length is larger than the old length, the capacity is expanded as necessary.
-  //
-  // NOTE: in contrast to std::string's implementation, Any newly "exposed" bytes of data are
-  // not cleared.
-  void resize(size_t newsize) {
-    if (newsize > capacity_) {
-      reserve(newsize);
-    }
-    len_ = newsize;
-    ASAN_POISON_MEMORY_REGION(data_ + len_, capacity_ - len_);
-    ASAN_UNPOISON_MEMORY_REGION(data_, len_);
-  }
-
-  // Releases the underlying array; after this, the buffer is left empty.
-  //
-  // NOTE: the data pointer returned by release() is not necessarily the pointer
-  uint8_t *release() {
-    uint8_t *ret = data_;
-    if (ret == initial_data_) {
-      ret = new uint8_t[len_];
-      memcpy(ret, data_, len_);
-    }
-    len_ = 0;
-    capacity_ = kInitialCapacity;
-    data_ = initial_data_;
-    ASAN_POISON_MEMORY_REGION(data_, capacity_);
-    return ret;
-  }
-
-  // Reserve space for the given total amount of data. If the current capacity is already
-  // larger than the newly requested capacity, this is a no-op (i.e. it does not ever free memory).
-  //
-  // NOTE: even though the new capacity is reserved, it is illegal to begin writing into that memory
-  // directly using pointers. If ASAN is enabled, this is ensured using manual memory poisoning.
-  void reserve(size_t newcapacity) {
-    if (PREDICT_TRUE(newcapacity <= capacity_)) return;
-    GrowArray(newcapacity);
-  }
-
-  // Append the given data to the string, resizing capacity as necessary.
-  void append(const void *src_v, size_t count) {
-    const uint8_t *src = reinterpret_cast<const uint8_t *>(src_v);
-    EnsureRoomForAppend(count);
-    ASAN_UNPOISON_MEMORY_REGION(data_ + len_, count);
-
-    // appending short values is common enough that this
-    // actually helps, according to benchmarks. In theory
-    // memcpy_inlined should already be just as good, but this
-    // was ~20% faster for reading a large prefix-coded string file
-    // where each string was only a few chars different
-    if (count <= 4) {
-      uint8_t *p = &data_[len_];
-      for (int i = 0; i < count; i++) {
-        *p++ = *src++;
-      }
-    } else {
-      strings::memcpy_inlined(&data_[len_], src, count);
-    }
-    len_ += count;
-  }
-
-  // Append the given string to this string.
-  void append(const std::string &str) {
-    append(str.data(), str.size());
-  }
-
-  // Append the given character to this string.
-  void push_back(const char byte) {
-    EnsureRoomForAppend(1);
-    ASAN_UNPOISON_MEMORY_REGION(data_ + len_, 1);
-    data_[len_] = byte;
-    len_++;
-  }
-
-  // Return the valid length of this string.
-  size_t length() const {
-    return len_;
-  }
-
-  // Return the valid length of this string (identical to length())
-  size_t size() const {
-    return len_;
-  }
-
-  // Return the allocated capacity of this string.
-  size_t capacity() const {
-    return capacity_;
-  }
-
-  // Return a pointer to the data in this string. Note that this pointer
-  // may be invalidated by any later non-const operation.
-  const uint8_t *data() const {
-    return &data_[0];
-  }
-
-  // Return a pointer to the data in this string. Note that this pointer
-  // may be invalidated by any later non-const operation.
-  uint8_t *data() {
-    return &data_[0];
-  }
-
-  // Return the given element of this string. Note that this does not perform
-  // any bounds checking.
-  const uint8_t &at(size_t i) const {
-    return data_[i];
-  }
-
-  // Return the given element of this string. Note that this does not perform
-  // any bounds checking.
-  const uint8_t &operator[](size_t i) const {
-    return data_[i];
-  }
-
-  // Return the given element of this string. Note that this does not perform
-  // any bounds checking.
-  uint8_t &operator[](size_t i) {
-    return data_[i];
-  }
-
-  // Reset the contents of this string by copying 'len' bytes from 'src'.
-  void assign_copy(const uint8_t *src, size_t len) {
-    // Reset length so that the first resize doesn't need to copy the current
-    // contents of the array.
-    len_ = 0;
-    resize(len);
-    memcpy(data(), src, len);
-  }
-
-  // Reset the contents of this string by copying from the given std::string.
-  void assign_copy(const std::string &str) {
-    assign_copy(reinterpret_cast<const uint8_t *>(str.c_str()),
-                str.size());
-  }
-
-  // Reallocates the internal storage to fit only the current data.
-  //
-  // This may revert to using internal storage if the current length is shorter than
-  // kInitialCapacity. Note that, in that case, after this call, capacity() will return
-  // a capacity larger than the data length.
-  //
-  // Any pointers within this instance are invalidated.
-  void shrink_to_fit() {
-    if (data_ == initial_data_ || capacity_ == len_) return;
-    ShrinkToFitInternal();
-  }
-
-  // Return a copy of this string as a std::string.
-  std::string ToString() const {
-    return std::string(reinterpret_cast<const char *>(data()),
-                       len_);
-  }
-
- private:
-  DISALLOW_COPY_AND_ASSIGN(faststring);
-
-  // If necessary, expand the buffer to fit at least 'count' more bytes.
-  // If the array has to be grown, it is grown by at least 50%.
-  void EnsureRoomForAppend(size_t count) {
-    if (PREDICT_TRUE(len_ + count <= capacity_)) {
-      return;
-    }
-
-    // Call the non-inline slow path - this reduces the number of instructions
-    // on the hot path.
-    GrowByAtLeast(count);
-  }
-
-  // The slow path of MakeRoomFor. Grows the buffer by either
-  // 'count' bytes, or 50%, whichever is more.
-  void GrowByAtLeast(size_t count);
-
-  // Grow the array to the given capacity, which must be more than
-  // the current capacity.
-  void GrowArray(size_t newcapacity);
-
-  void ShrinkToFitInternal();
-
-  uint8_t* data_;
-  uint8_t initial_data_[kInitialCapacity];
-  size_t len_;
-  size_t capacity_;
-};
-
-} // namespace doris
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#pragma once
+
+#include <cstdint>
+#include <cstring>
+#include <string>
+
+#include "gutil/dynamic_annotations.h"
+#include "gutil/macros.h"
+#include "gutil/port.h"
+#include "gutil/strings/fastmem.h"
+
+namespace doris {
+
+// A faststring is similar to a std::string, except that it is faster for many
+// common use cases (in particular, resize() will fill with uninitialized data
+// instead of memsetting to \0)
+class faststring {
+ public:
+  enum {
+    kInitialCapacity = 32
+  };
+
+  faststring() :
+    data_(initial_data_),
+    len_(0),
+    capacity_(kInitialCapacity) {
+  }
+
+  // Construct a string with the given capacity, in bytes.
+  explicit faststring(size_t capacity)
+    : data_(initial_data_),
+      len_(0),
+      capacity_(kInitialCapacity) {
+    if (capacity > capacity_) {
+      data_ = new uint8_t[capacity];
+      capacity_ = capacity;
+    }
+    ASAN_POISON_MEMORY_REGION(data_, capacity_);
+  }
+
+  ~faststring() {
+    ASAN_UNPOISON_MEMORY_REGION(initial_data_, arraysize(initial_data_));
+    if (data_ != initial_data_) {
+      delete[] data_;
+    }
+  }
+
+  // Reset the valid length of the string to 0.
+  //
+  // This does not free up any memory. The capacity of the string remains unchanged.
+  void clear() {
+    resize(0);
+    ASAN_POISON_MEMORY_REGION(data_, capacity_);
+  }
+
+  // Resize the string to the given length.
+  // If the new length is larger than the old length, the capacity is expanded as necessary.
+  //
+  // NOTE: in contrast to std::string's implementation, Any newly "exposed" bytes of data are
+  // not cleared.
+  void resize(size_t newsize) {
+    if (newsize > capacity_) {
+      reserve(newsize);
+    }
+    len_ = newsize;
+    ASAN_POISON_MEMORY_REGION(data_ + len_, capacity_ - len_);
+    ASAN_UNPOISON_MEMORY_REGION(data_, len_);
+  }
+
+  // Releases the underlying array; after this, the buffer is left empty.
+  //
+  // NOTE: the data pointer returned by release() is not necessarily the pointer
+  uint8_t *release() {
+    uint8_t *ret = data_;
+    if (ret == initial_data_) {
+      ret = new uint8_t[len_];
+      memcpy(ret, data_, len_);
+    }
+    len_ = 0;
+    capacity_ = kInitialCapacity;
+    data_ = initial_data_;
+    ASAN_POISON_MEMORY_REGION(data_, capacity_);
+    return ret;
+  }
+
+  // Reserve space for the given total amount of data. If the current capacity is already
+  // larger than the newly requested capacity, this is a no-op (i.e. it does not ever free memory).
+  //
+  // NOTE: even though the new capacity is reserved, it is illegal to begin writing into that memory
+  // directly using pointers. If ASAN is enabled, this is ensured using manual memory poisoning.
+  void reserve(size_t newcapacity) {
+    if (PREDICT_TRUE(newcapacity <= capacity_)) return;
+    GrowArray(newcapacity);
+  }
+
+  // Append the given data to the string, resizing capacity as necessary.
+  void append(const void *src_v, size_t count) {
+    const uint8_t *src = reinterpret_cast<const uint8_t *>(src_v);
+    EnsureRoomForAppend(count);
+    ASAN_UNPOISON_MEMORY_REGION(data_ + len_, count);
+
+    // appending short values is common enough that this
+    // actually helps, according to benchmarks. In theory
+    // memcpy_inlined should already be just as good, but this
+    // was ~20% faster for reading a large prefix-coded string file
+    // where each string was only a few chars different
+    if (count <= 4) {
+      uint8_t *p = &data_[len_];
+      for (int i = 0; i < count; i++) {
+        *p++ = *src++;
+      }
+    } else {
+      strings::memcpy_inlined(&data_[len_], src, count);
+    }
+    len_ += count;
+  }
+
+  // Append the given string to this string.
+  void append(const std::string &str) {
+    append(str.data(), str.size());
+  }
+
+  // Append the given character to this string.
+  void push_back(const char byte) {
+    EnsureRoomForAppend(1);
+    ASAN_UNPOISON_MEMORY_REGION(data_ + len_, 1);
+    data_[len_] = byte;
+    len_++;
+  }
+
+  // Return the valid length of this string.
+  size_t length() const {
+    return len_;
+  }
+
+  // Return the valid length of this string (identical to length())
+  size_t size() const {
+    return len_;
+  }
+
+  // Return the allocated capacity of this string.
+  size_t capacity() const {
+    return capacity_;
+  }
+
+  // Return a pointer to the data in this string. Note that this pointer
+  // may be invalidated by any later non-const operation.
+  const uint8_t *data() const {
+    return &data_[0];
+  }
+
+  // Return a pointer to the data in this string. Note that this pointer
+  // may be invalidated by any later non-const operation.
+  uint8_t *data() {
+    return &data_[0];
+  }
+
+  // Return the given element of this string. Note that this does not perform
+  // any bounds checking.
+  const uint8_t &at(size_t i) const {
+    return data_[i];
+  }
+
+  // Return the given element of this string. Note that this does not perform
+  // any bounds checking.
+  const uint8_t &operator[](size_t i) const {
+    return data_[i];
+  }
+
+  // Return the given element of this string. Note that this does not perform
+  // any bounds checking.
+  uint8_t &operator[](size_t i) {
+    return data_[i];
+  }
+
+  // Reset the contents of this string by copying 'len' bytes from 'src'.
+  void assign_copy(const uint8_t *src, size_t len) {
+    // Reset length so that the first resize doesn't need to copy the current
+    // contents of the array.
+    len_ = 0;
+    resize(len);
+    memcpy(data(), src, len);
+  }
+
+  // Reset the contents of this string by copying from the given std::string.
+  void assign_copy(const std::string &str) {
+    assign_copy(reinterpret_cast<const uint8_t *>(str.c_str()),
+                str.size());
+  }
+
+  // Reallocates the internal storage to fit only the current data.
+  //
+  // This may revert to using internal storage if the current length is shorter than
+  // kInitialCapacity. Note that, in that case, after this call, capacity() will return
+  // a capacity larger than the data length.
+  //
+  // Any pointers within this instance are invalidated.
+  void shrink_to_fit() {
+    if (data_ == initial_data_ || capacity_ == len_) return;
+    ShrinkToFitInternal();
+  }
+
+  // Return a copy of this string as a std::string.
+  std::string ToString() const {
+    return std::string(reinterpret_cast<const char *>(data()),
+                       len_);
+  }
+
+ private:
+  DISALLOW_COPY_AND_ASSIGN(faststring);
+
+  // If necessary, expand the buffer to fit at least 'count' more bytes.
+  // If the array has to be grown, it is grown by at least 50%.
+  void EnsureRoomForAppend(size_t count) {
+    if (PREDICT_TRUE(len_ + count <= capacity_)) {
+      return;
+    }
+
+    // Call the non-inline slow path - this reduces the number of instructions
+    // on the hot path.
+    GrowByAtLeast(count);
+  }
+
+  // The slow path of MakeRoomFor. Grows the buffer by either
+  // 'count' bytes, or 50%, whichever is more.
+  void GrowByAtLeast(size_t count);
+
+  // Grow the array to the given capacity, which must be more than
+  // the current capacity.
+  void GrowArray(size_t newcapacity);
+
+  void ShrinkToFitInternal();
+
+  uint8_t* data_;
+  uint8_t initial_data_[kInitialCapacity];
+  size_t len_;
+  size_t capacity_;
+};
+
+} // namespace doris
diff --git a/be/src/util/rle_encoding.h b/be/src/util/rle_encoding.h
index 8538bc6460..26b03e1b1f 100644
--- a/be/src/util/rle_encoding.h
+++ b/be/src/util/rle_encoding.h
@@ -1,521 +1,521 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-#pragma once
-
-#include <glog/logging.h>
-
-#include "gutil/port.h"
-#include "util/bit_stream_utils.inline.h"
-#include "util/bit_util.h"
-
-namespace doris {
-
-// Utility classes to do run length encoding (RLE) for fixed bit width values.  If runs
-// are sufficiently long, RLE is used, otherwise, the values are just bit-packed
-// (literal encoding).
-// For both types of runs, there is a byte-aligned indicator which encodes the length
-// of the run and the type of the run.
-// This encoding has the benefit that when there aren't any long enough runs, values
-// are always decoded at fixed (can be precomputed) bit offsets OR both the value and
-// the run length are byte aligned. This allows for very efficient decoding
-// implementations.
-// The encoding is:
-//    encoded-block := run*
-//    run := literal-run | repeated-run
-//    literal-run := literal-indicator < literal bytes >
-//    repeated-run := repeated-indicator < repeated value. padded to byte boundary >
-//    literal-indicator := varint_encode( number_of_groups << 1 | 1)
-//    repeated-indicator := varint_encode( number_of_repetitions << 1 )
-//
-// Each run is preceded by a varint. The varint's least significant bit is
-// used to indicate whether the run is a literal run or a repeated run. The rest
-// of the varint is used to determine the length of the run (eg how many times the
-// value repeats).
-//
-// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode
-// in groups of 8), so that no matter the bit-width of the value, the sequence will end
-// on a byte boundary without padding.
-// Given that we know it is a multiple of 8, we store the number of 8-groups rather than
-// the actual number of encoded ints. (This means that the total number of encoded values
-// can not be determined from the encoded data, since the number of values in the last
-// group may not be a multiple of 8).
-// There is a break-even point when it is more storage efficient to do run length
-// encoding.  For 1 bit-width values, that point is 8 values.  They require 2 bytes
-// for both the repeated encoding or the literal encoding.  This value can always
-// be computed based on the bit-width.
-// TODO: think about how to use this for strings.  The bit packing isn't quite the same.
-//
-// Examples with bit-width 1 (eg encoding booleans):
-// ----------------------------------------
-// 100 1s followed by 100 0s:
-// <varint(100 << 1)> <1, padded to 1 byte> <varint(100 << 1)> <0, padded to 1 byte>
-//  - (total 4 bytes)
-//
-// alternating 1s and 0s (200 total):
-// 200 ints = 25 groups of 8
-// <varint((25 << 1) | 1)> <25 bytes of values, bitpacked>
-// (total 26 bytes, 1 byte overhead)
-//
-
-// Decoder class for RLE encoded data.
-//
-// NOTE: the encoded format does not have any length prefix or any other way of
-// indicating that the encoded sequence ends at a certain point, so the Decoder
-// methods may return some extra bits at the end before the read methods start
-// to return 0/false.
-template<typename T>
-class RleDecoder {
- public:
-  // Create a decoder object. buffer/buffer_len is the decoded data.
-  // bit_width is the width of each value (before encoding).
-  RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width)
-    : bit_reader_(buffer, buffer_len),
-      bit_width_(bit_width),
-      current_value_(0),
-      repeat_count_(0),
-      literal_count_(0),
-      rewind_state_(CANT_REWIND) {
-    DCHECK_GE(bit_width_, 1);
-    DCHECK_LE(bit_width_, 64);
-  }
-
-  RleDecoder() {}
-
-  // Skip n values, and returns the number of non-zero entries skipped.
-  size_t Skip(size_t to_skip);
-
-  // Gets the next value.  Returns false if there are no more.
-  bool Get(T* val);
-
-  // Seek to the previous value.
-  void RewindOne();
-
-  // Gets the next run of the same 'val'. Returns 0 if there is no
-  // more data to be decoded. Will return a run of at most 'max_run'
-  // values. If there are more values than this, the next call to
-  // GetNextRun will return more from the same run.
-  size_t GetNextRun(T* val, size_t max_run);
-
- private:
-  bool ReadHeader();
-
-  enum RewindState {
-    REWIND_LITERAL,
-    REWIND_RUN,
-    CANT_REWIND
-  };
-
-  BitReader bit_reader_;
-  int bit_width_;
-  uint64_t current_value_;
-  uint32_t repeat_count_;
-  uint32_t literal_count_;
-  RewindState rewind_state_;
-};
-
-// Class to incrementally build the rle data.
-// The encoding has two modes: encoding repeated runs and literal runs.
-// If the run is sufficiently short, it is more efficient to encode as a literal run.
-// This class does so by buffering 8 values at a time.  If they are not all the same
-// they are added to the literal run.  If they are the same, they are added to the
-// repeated run.  When we switch modes, the previous run is flushed out.
-template<typename T>
-class RleEncoder {
- public:
-  // buffer: buffer to write bits to.
-  // bit_width: max number of bits for value.
-  // TODO: consider adding a min_repeated_run_length so the caller can control
-  // when values should be encoded as repeated runs.  Currently this is derived
-  // based on the bit_width, which can determine a storage optimal choice.
-  explicit RleEncoder(faststring *buffer, int bit_width)
-    : bit_width_(bit_width),
-      bit_writer_(buffer) {
-    DCHECK_GE(bit_width_, 1);
-    DCHECK_LE(bit_width_, 64);
-    Clear();
-  }
-
-  // Reserve 'num_bytes' bytes for a plain encoded header, set each
-  // byte with 'val': this is used for the RLE-encoded data blocks in
-  // order to be able to able to store the initial ordinal position
-  // and number of elements. This is a part of RleEncoder in order to
-  // maintain the correct offset in 'buffer'.
-  void Reserve(int num_bytes, uint8_t val);
-
-  // Encode value. This value must be representable with bit_width_ bits.
-  void Put(T value, size_t run_length = 1);
-
-  // Flushes any pending values to the underlying buffer.
-  // Returns the total number of bytes written
-  int Flush();
-
-  // Resets all the state in the encoder.
-  void Clear();
-
-  int32_t len() const { return bit_writer_.bytes_written(); }
-
- private:
-  // Flushes any buffered values.  If this is part of a repeated run, this is largely
-  // a no-op.
-  // If it is part of a literal run, this will call FlushLiteralRun, which writes
-  // out the buffered literal values.
-  // If 'done' is true, the current run would be written even if it would normally
-  // have been buffered more.  This should only be called at the end, when the
-  // encoder has received all values even if it would normally continue to be
-  // buffered.
-  void FlushBufferedValues(bool done);
-
-  // Flushes literal values to the underlying buffer.  If update_indicator_byte,
-  // then the current literal run is complete and the indicator byte is updated.
-  void FlushLiteralRun(bool update_indicator_byte);
-
-  // Flushes a repeated run to the underlying buffer.
-  void FlushRepeatedRun();
-
-  // Number of bits needed to encode the value.
-  const int bit_width_;
-
-  // Underlying buffer.
-  BitWriter bit_writer_;
-
-  // We need to buffer at most 8 values for literals.  This happens when the
-  // bit_width is 1 (so 8 values fit in one byte).
-  // TODO: generalize this to other bit widths
-  uint64_t buffered_values_[8];
-
-  // Number of values in buffered_values_
-  int num_buffered_values_;
-
-  // The current (also last) value that was written and the count of how
-  // many times in a row that value has been seen.  This is maintained even
-  // if we are in a literal run.  If the repeat_count_ get high enough, we switch
-  // to encoding repeated runs.
-  uint64_t current_value_;
-  int repeat_count_;
-
-  // Number of literals in the current run.  This does not include the literals
-  // that might be in buffered_values_.  Only after we've got a group big enough
-  // can we decide if they should part of the literal_count_ or repeat_count_
-  int literal_count_;
-
-  // Index of a byte in the underlying buffer that stores the indicator byte.
-  // This is reserved as soon as we need a literal run but the value is written
-  // when the literal run is complete. We maintain an index rather than a pointer
-  // into the underlying buffer because the pointer value may become invalid if
-  // the underlying buffer is resized.
-  int literal_indicator_byte_idx_;
-};
-
-template<typename T>
-inline bool RleDecoder<T>::ReadHeader() {
-  DCHECK(bit_reader_.is_initialized());
-  if (PREDICT_FALSE(literal_count_ == 0 && repeat_count_ == 0)) {
-    // Read the next run's indicator int, it could be a literal or repeated run
-    // The int is encoded as a vlq-encoded value.
-    int32_t indicator_value = 0;
-    bool result = bit_reader_.GetVlqInt(&indicator_value);
-    if (PREDICT_FALSE(!result)) {
-      return false;
-    }
-
-    // lsb indicates if it is a literal run or repeated run
-    bool is_literal = indicator_value & 1;
-    if (is_literal) {
-      literal_count_ = (indicator_value >> 1) * 8;
-      DCHECK_GT(literal_count_, 0);
-    } else {
-      repeat_count_ = indicator_value >> 1;
-      DCHECK_GT(repeat_count_, 0);
-      bool result = bit_reader_.GetAligned<T>(
-          BitUtil::Ceil(bit_width_, 8), reinterpret_cast<T*>(&current_value_));
-      DCHECK(result);
-    }
-  }
-  return true;
-}
-
-template<typename T>
-inline bool RleDecoder<T>::Get(T* val) {
-  DCHECK(bit_reader_.is_initialized());
-  if (PREDICT_FALSE(!ReadHeader())) {
-    return false;
-  }
-
-  if (PREDICT_TRUE(repeat_count_ > 0)) {
-    *val = current_value_;
-    --repeat_count_;
-    rewind_state_ = REWIND_RUN;
-  } else {
-    DCHECK(literal_count_ > 0);
-    bool result = bit_reader_.GetValue(bit_width_, val);
-    DCHECK(result);
-    --literal_count_;
-    rewind_state_ = REWIND_LITERAL;
-  }
-
-  return true;
-}
-
-template<typename T>
-inline void RleDecoder<T>::RewindOne() {
-  DCHECK(bit_reader_.is_initialized());
-
-  switch (rewind_state_) {
-    case CANT_REWIND:
-      LOG(FATAL) << "Can't rewind more than once after each read!";
-      break;
-    case REWIND_RUN:
-      ++repeat_count_;
-      break;
-    case REWIND_LITERAL:
-      {
-        bit_reader_.Rewind(bit_width_);
-        ++literal_count_;
-        break;
-      }
-  }
-
-  rewind_state_ = CANT_REWIND;
-}
-
-template<typename T>
-inline size_t RleDecoder<T>::GetNextRun(T* val, size_t max_run) {
-  DCHECK(bit_reader_.is_initialized());
-  DCHECK_GT(max_run, 0);
-  size_t ret = 0;
-  size_t rem = max_run;
-  while (ReadHeader()) {
-    if (PREDICT_TRUE(repeat_count_ > 0)) {
-      if (PREDICT_FALSE(ret > 0 && *val != current_value_)) {
-        return ret;
-      }
-      *val = current_value_;
-      if (repeat_count_ >= rem) {
-        // The next run is longer than the amount of remaining data
-        // that the caller wants to read. Only consume it partially.
-        repeat_count_ -= rem;
-        ret += rem;
-        return ret;
-      }
-      ret += repeat_count_;
-      rem -= repeat_count_;
-      repeat_count_ = 0;
-    } else {
-      DCHECK(literal_count_ > 0);
-      if (ret == 0) {
-        bool has_more = bit_reader_.GetValue(bit_width_, val);
-        DCHECK(has_more);
-        literal_count_--;
-        ret++;
-        rem--;
-      }
-
-      while (literal_count_ > 0) {
-        bool result = bit_reader_.GetValue(bit_width_, &current_value_);
-        DCHECK(result);
-        if (current_value_ != *val || rem == 0) {
-          bit_reader_.Rewind(bit_width_);
-          return ret;
-        }
-        ret++;
-        rem--;
-        literal_count_--;
-      }
-    }
-  }
-  return ret;
- }
-
-template<typename T>
-inline size_t RleDecoder<T>::Skip(size_t to_skip) {
-  DCHECK(bit_reader_.is_initialized());
-
-  size_t set_count = 0;
-  while (to_skip > 0) {
-    bool result = ReadHeader();
-    DCHECK(result);
-
-    if (PREDICT_TRUE(repeat_count_ > 0)) {
-      size_t nskip = (repeat_count_ < to_skip) ? repeat_count_ : to_skip;
-      repeat_count_ -= nskip;
-      to_skip -= nskip;
-      if (current_value_ != 0) {
-        set_count += nskip;
-      }
-    } else {
-      DCHECK(literal_count_ > 0);
-      size_t nskip = (literal_count_ < to_skip) ? literal_count_ : to_skip;
-      literal_count_ -= nskip;
-      to_skip -= nskip;
-      for (; nskip > 0; nskip--) {
-        T value = 0;
-        bool result = bit_reader_.GetValue(bit_width_, &value);
-        DCHECK(result);
-        if (value != 0) {
-          set_count++;
-        }
-      }
-    }
-  }
-  return set_count;
-}
-
-// This function buffers input values 8 at a time.  After seeing all 8 values,
-// it decides whether they should be encoded as a literal or repeated run.
-template<typename T>
-inline void RleEncoder<T>::Put(T value, size_t run_length) {
-  DCHECK(bit_width_ == 64 || value < (1LL << bit_width_));
-
-  // TODO(perf): remove the loop and use the repeat_count_
-  for (; run_length > 0; run_length--) {
-    if (PREDICT_TRUE(current_value_ == value)) {
-      ++repeat_count_;
-      if (repeat_count_ > 8) {
-        // This is just a continuation of the current run, no need to buffer the
-        // values.
-        // Note that this is the fast path for long repeated runs.
-        continue;
-      }
-    } else {
-      if (repeat_count_ >= 8) {
-        // We had a run that was long enough but it has ended.  Flush the
-        // current repeated run.
-        DCHECK_EQ(literal_count_, 0);
-        FlushRepeatedRun();
-      }
-      repeat_count_ = 1;
-      current_value_ = value;
-    }
-
-    buffered_values_[num_buffered_values_] = value;
-    if (++num_buffered_values_ == 8) {
-      DCHECK_EQ(literal_count_ % 8, 0);
-      FlushBufferedValues(false);
-    }
-  }
-}
-
-template<typename T>
-inline void RleEncoder<T>::FlushLiteralRun(bool update_indicator_byte) {
-  if (literal_indicator_byte_idx_ < 0) {
-    // The literal indicator byte has not been reserved yet, get one now.
-    literal_indicator_byte_idx_ = bit_writer_.GetByteIndexAndAdvance(1);
-    DCHECK_GE(literal_indicator_byte_idx_, 0);
-  }
-
-  // Write all the buffered values as bit packed literals
-  for (int i = 0; i < num_buffered_values_; ++i) {
-    bit_writer_.PutValue(buffered_values_[i], bit_width_);
-  }
-  num_buffered_values_ = 0;
-
-  if (update_indicator_byte) {
-    // At this point we need to write the indicator byte for the literal run.
-    // We only reserve one byte, to allow for streaming writes of literal values.
-    // The logic makes sure we flush literal runs often enough to not overrun
-    // the 1 byte.
-    int num_groups = BitUtil::Ceil(literal_count_, 8);
-    int32_t indicator_value = (num_groups << 1) | 1;
-    DCHECK_EQ(indicator_value & 0xFFFFFF00, 0);
-    bit_writer_.buffer()->data()[literal_indicator_byte_idx_] = indicator_value;
-    literal_indicator_byte_idx_ = -1;
-    literal_count_ = 0;
-  }
-}
-
-template<typename T>
-inline void RleEncoder<T>::FlushRepeatedRun() {
-  DCHECK_GT(repeat_count_, 0);
-  // The lsb of 0 indicates this is a repeated run
-  int32_t indicator_value = repeat_count_ << 1 | 0;
-  bit_writer_.PutVlqInt(indicator_value);
-  bit_writer_.PutAligned(current_value_, BitUtil::Ceil(bit_width_, 8));
-  num_buffered_values_ = 0;
-  repeat_count_ = 0;
-}
-
-// Flush the values that have been buffered.  At this point we decide whether
-// we need to switch between the run types or continue the current one.
-template<typename T>
-inline void RleEncoder<T>::FlushBufferedValues(bool done) {
-  if (repeat_count_ >= 8) {
-    // Clear the buffered values.  They are part of the repeated run now and we
-    // don't want to flush them out as literals.
-    num_buffered_values_ = 0;
-    if (literal_count_ != 0) {
-      // There was a current literal run.  All the values in it have been flushed
-      // but we still need to update the indicator byte.
-      DCHECK_EQ(literal_count_ % 8, 0);
-      DCHECK_EQ(repeat_count_, 8);
-      FlushLiteralRun(true);
-    }
-    DCHECK_EQ(literal_count_, 0);
-    return;
-  }
-
-  literal_count_ += num_buffered_values_;
-  int num_groups = BitUtil::Ceil(literal_count_, 8);
-  if (num_groups + 1 >= (1 << 6)) {
-    // We need to start a new literal run because the indicator byte we've reserved
-    // cannot store more values.
-    DCHECK_GE(literal_indicator_byte_idx_, 0);
-    FlushLiteralRun(true);
-  } else {
-    FlushLiteralRun(done);
-  }
-  repeat_count_ = 0;
-}
-
-template<typename T>
-inline void RleEncoder<T>::Reserve(int num_bytes, uint8_t val) {
-  for (int i = 0; i < num_bytes; ++i) {
-    bit_writer_.PutValue(val, 8);
-  }
-}
-
-template<typename T>
-inline int RleEncoder<T>::Flush() {
-  if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) {
-    bool all_repeat = literal_count_ == 0 &&
-        (repeat_count_ == num_buffered_values_ || num_buffered_values_ == 0);
-    // There is something pending, figure out if it's a repeated or literal run
-    if (repeat_count_ > 0 && all_repeat) {
-      FlushRepeatedRun();
-    } else  {
-      literal_count_ += num_buffered_values_;
-      FlushLiteralRun(true);
-      repeat_count_ = 0;
-    }
-  }
-  bit_writer_.Flush();
-  DCHECK_EQ(num_buffered_values_, 0);
-  DCHECK_EQ(literal_count_, 0);
-  DCHECK_EQ(repeat_count_, 0);
-  return bit_writer_.bytes_written();
-}
-
-template<typename T>
-inline void RleEncoder<T>::Clear() {
-  current_value_ = 0;
-  repeat_count_ = 0;
-  num_buffered_values_ = 0;
-  literal_count_ = 0;
-  literal_indicator_byte_idx_ = -1;
-  bit_writer_.Clear();
-}
-
-} // namespace doris
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+#pragma once
+
+#include <glog/logging.h>
+
+#include "gutil/port.h"
+#include "util/bit_stream_utils.inline.h"
+#include "util/bit_util.h"
+
+namespace doris {
+
+// Utility classes to do run length encoding (RLE) for fixed bit width values.  If runs
+// are sufficiently long, RLE is used, otherwise, the values are just bit-packed
+// (literal encoding).
+// For both types of runs, there is a byte-aligned indicator which encodes the length
+// of the run and the type of the run.
+// This encoding has the benefit that when there aren't any long enough runs, values
+// are always decoded at fixed (can be precomputed) bit offsets OR both the value and
+// the run length are byte aligned. This allows for very efficient decoding
+// implementations.
+// The encoding is:
+//    encoded-block := run*
+//    run := literal-run | repeated-run
+//    literal-run := literal-indicator < literal bytes >
+//    repeated-run := repeated-indicator < repeated value. padded to byte boundary >
+//    literal-indicator := varint_encode( number_of_groups << 1 | 1)
+//    repeated-indicator := varint_encode( number_of_repetitions << 1 )
+//
+// Each run is preceded by a varint. The varint's least significant bit is
+// used to indicate whether the run is a literal run or a repeated run. The rest
+// of the varint is used to determine the length of the run (eg how many times the
+// value repeats).
+//
+// In the case of literal runs, the run length is always a multiple of 8 (i.e. encode
+// in groups of 8), so that no matter the bit-width of the value, the sequence will end
+// on a byte boundary without padding.
+// Given that we know it is a multiple of 8, we store the number of 8-groups rather than
+// the actual number of encoded ints. (This means that the total number of encoded values
+// can not be determined from the encoded data, since the number of values in the last
+// group may not be a multiple of 8).
+// There is a break-even point when it is more storage efficient to do run length
+// encoding.  For 1 bit-width values, that point is 8 values.  They require 2 bytes
+// for both the repeated encoding or the literal encoding.  This value can always
+// be computed based on the bit-width.
+// TODO: think about how to use this for strings.  The bit packing isn't quite the same.
+//
+// Examples with bit-width 1 (eg encoding booleans):
+// ----------------------------------------
+// 100 1s followed by 100 0s:
+// <varint(100 << 1)> <1, padded to 1 byte> <varint(100 << 1)> <0, padded to 1 byte>
+//  - (total 4 bytes)
+//
+// alternating 1s and 0s (200 total):
+// 200 ints = 25 groups of 8
+// <varint((25 << 1) | 1)> <25 bytes of values, bitpacked>
+// (total 26 bytes, 1 byte overhead)
+//
+
+// Decoder class for RLE encoded data.
+//
+// NOTE: the encoded format does not have any length prefix or any other way of
+// indicating that the encoded sequence ends at a certain point, so the Decoder
+// methods may return some extra bits at the end before the read methods start
+// to return 0/false.
+template<typename T>
+class RleDecoder {
+ public:
+  // Create a decoder object. buffer/buffer_len is the decoded data.
+  // bit_width is the width of each value (before encoding).
+  RleDecoder(const uint8_t* buffer, int buffer_len, int bit_width)
+    : bit_reader_(buffer, buffer_len),
+      bit_width_(bit_width),
+      current_value_(0),
+      repeat_count_(0),
+      literal_count_(0),
+      rewind_state_(CANT_REWIND) {
+    DCHECK_GE(bit_width_, 1);
+    DCHECK_LE(bit_width_, 64);
+  }
+
+  RleDecoder() {}
+
+  // Skip n values, and returns the number of non-zero entries skipped.
+  size_t Skip(size_t to_skip);
+
+  // Gets the next value.  Returns false if there are no more.
+  bool Get(T* val);
+
+  // Seek to the previous value.
+  void RewindOne();
+
+  // Gets the next run of the same 'val'. Returns 0 if there is no
+  // more data to be decoded. Will return a run of at most 'max_run'
+  // values. If there are more values than this, the next call to
+  // GetNextRun will return more from the same run.
+  size_t GetNextRun(T* val, size_t max_run);
+
+ private:
+  bool ReadHeader();
+
+  enum RewindState {
+    REWIND_LITERAL,
+    REWIND_RUN,
+    CANT_REWIND
+  };
+
+  BitReader bit_reader_;
+  int bit_width_;
+  uint64_t current_value_;
+  uint32_t repeat_count_;
+  uint32_t literal_count_;
+  RewindState rewind_state_;
+};
+
+// Class to incrementally build the rle data.
+// The encoding has two modes: encoding repeated runs and literal runs.
+// If the run is sufficiently short, it is more efficient to encode as a literal run.
+// This class does so by buffering 8 values at a time.  If they are not all the same
+// they are added to the literal run.  If they are the same, they are added to the
+// repeated run.  When we switch modes, the previous run is flushed out.
+template<typename T>
+class RleEncoder {
+ public:
+  // buffer: buffer to write bits to.
+  // bit_width: max number of bits for value.
+  // TODO: consider adding a min_repeated_run_length so the caller can control
+  // when values should be encoded as repeated runs.  Currently this is derived
+  // based on the bit_width, which can determine a storage optimal choice.
+  explicit RleEncoder(faststring *buffer, int bit_width)
+    : bit_width_(bit_width),
+      bit_writer_(buffer) {
+    DCHECK_GE(bit_width_, 1);
+    DCHECK_LE(bit_width_, 64);
+    Clear();
+  }
+
+  // Reserve 'num_bytes' bytes for a plain encoded header, set each
+  // byte with 'val': this is used for the RLE-encoded data blocks in
+  // order to be able to able to store the initial ordinal position
+  // and number of elements. This is a part of RleEncoder in order to
+  // maintain the correct offset in 'buffer'.
+  void Reserve(int num_bytes, uint8_t val);
+
+  // Encode value. This value must be representable with bit_width_ bits.
+  void Put(T value, size_t run_length = 1);
+
+  // Flushes any pending values to the underlying buffer.
+  // Returns the total number of bytes written
+  int Flush();
+
+  // Resets all the state in the encoder.
+  void Clear();
+
+  int32_t len() const { return bit_writer_.bytes_written(); }
+
+ private:
+  // Flushes any buffered values.  If this is part of a repeated run, this is largely
+  // a no-op.
+  // If it is part of a literal run, this will call FlushLiteralRun, which writes
+  // out the buffered literal values.
+  // If 'done' is true, the current run would be written even if it would normally
+  // have been buffered more.  This should only be called at the end, when the
+  // encoder has received all values even if it would normally continue to be
+  // buffered.
+  void FlushBufferedValues(bool done);
+
+  // Flushes literal values to the underlying buffer.  If update_indicator_byte,
+  // then the current literal run is complete and the indicator byte is updated.
+  void FlushLiteralRun(bool update_indicator_byte);
+
+  // Flushes a repeated run to the underlying buffer.
+  void FlushRepeatedRun();
+
+  // Number of bits needed to encode the value.
+  const int bit_width_;
+
+  // Underlying buffer.
+  BitWriter bit_writer_;
+
+  // We need to buffer at most 8 values for literals.  This happens when the
+  // bit_width is 1 (so 8 values fit in one byte).
+  // TODO: generalize this to other bit widths
+  uint64_t buffered_values_[8];
+
+  // Number of values in buffered_values_
+  int num_buffered_values_;
+
+  // The current (also last) value that was written and the count of how
+  // many times in a row that value has been seen.  This is maintained even
+  // if we are in a literal run.  If the repeat_count_ get high enough, we switch
+  // to encoding repeated runs.
+  uint64_t current_value_;
+  int repeat_count_;
+
+  // Number of literals in the current run.  This does not include the literals
+  // that might be in buffered_values_.  Only after we've got a group big enough
+  // can we decide if they should part of the literal_count_ or repeat_count_
+  int literal_count_;
+
+  // Index of a byte in the underlying buffer that stores the indicator byte.
+  // This is reserved as soon as we need a literal run but the value is written
+  // when the literal run is complete. We maintain an index rather than a pointer
+  // into the underlying buffer because the pointer value may become invalid if
+  // the underlying buffer is resized.
+  int literal_indicator_byte_idx_;
+};
+
+template<typename T>
+inline bool RleDecoder<T>::ReadHeader() {
+  DCHECK(bit_reader_.is_initialized());
+  if (PREDICT_FALSE(literal_count_ == 0 && repeat_count_ == 0)) {
+    // Read the next run's indicator int, it could be a literal or repeated run
+    // The int is encoded as a vlq-encoded value.
+    int32_t indicator_value = 0;
+    bool result = bit_reader_.GetVlqInt(&indicator_value);
+    if (PREDICT_FALSE(!result)) {
+      return false;
+    }
+
+    // lsb indicates if it is a literal run or repeated run
+    bool is_literal = indicator_value & 1;
+    if (is_literal) {
+      literal_count_ = (indicator_value >> 1) * 8;
+      DCHECK_GT(literal_count_, 0);
+    } else {
+      repeat_count_ = indicator_value >> 1;
+      DCHECK_GT(repeat_count_, 0);
+      bool result = bit_reader_.GetAligned<T>(
+          BitUtil::Ceil(bit_width_, 8), reinterpret_cast<T*>(&current_value_));
+      DCHECK(result);
+    }
+  }
+  return true;
+}
+
+template<typename T>
+inline bool RleDecoder<T>::Get(T* val) {
+  DCHECK(bit_reader_.is_initialized());
+  if (PREDICT_FALSE(!ReadHeader())) {
+    return false;
+  }
+
+  if (PREDICT_TRUE(repeat_count_ > 0)) {
+    *val = current_value_;
+    --repeat_count_;
+    rewind_state_ = REWIND_RUN;
+  } else {
+    DCHECK(literal_count_ > 0);
+    bool result = bit_reader_.GetValue(bit_width_, val);
+    DCHECK(result);
+    --literal_count_;
+    rewind_state_ = REWIND_LITERAL;
+  }
+
+  return true;
+}
+
+template<typename T>
+inline void RleDecoder<T>::RewindOne() {
+  DCHECK(bit_reader_.is_initialized());
+
+  switch (rewind_state_) {
+    case CANT_REWIND:
+      LOG(FATAL) << "Can't rewind more than once after each read!";
+      break;
+    case REWIND_RUN:
+      ++repeat_count_;
+      break;
+    case REWIND_LITERAL:
+      {
+        bit_reader_.Rewind(bit_width_);
+        ++literal_count_;
+        break;
+      }
+  }
+
+  rewind_state_ = CANT_REWIND;
+}
+
+template<typename T>
+inline size_t RleDecoder<T>::GetNextRun(T* val, size_t max_run) {
+  DCHECK(bit_reader_.is_initialized());
+  DCHECK_GT(max_run, 0);
+  size_t ret = 0;
+  size_t rem = max_run;
+  while (ReadHeader()) {
+    if (PREDICT_TRUE(repeat_count_ > 0)) {
+      if (PREDICT_FALSE(ret > 0 && *val != current_value_)) {
+        return ret;
+      }
+      *val = current_value_;
+      if (repeat_count_ >= rem) {
+        // The next run is longer than the amount of remaining data
+        // that the caller wants to read. Only consume it partially.
+        repeat_count_ -= rem;
+        ret += rem;
+        return ret;
+      }
+      ret += repeat_count_;
+      rem -= repeat_count_;
+      repeat_count_ = 0;
+    } else {
+      DCHECK(literal_count_ > 0);
+      if (ret == 0) {
+        bool has_more = bit_reader_.GetValue(bit_width_, val);
+        DCHECK(has_more);
+        literal_count_--;
+        ret++;
+        rem--;
+      }
+
+      while (literal_count_ > 0) {
+        bool result = bit_reader_.GetValue(bit_width_, &current_value_);
+        DCHECK(result);
+        if (current_value_ != *val || rem == 0) {
+          bit_reader_.Rewind(bit_width_);
+          return ret;
+        }
+        ret++;
+        rem--;
+        literal_count_--;
+      }
+    }
+  }
+  return ret;
+ }
+
+template<typename T>
+inline size_t RleDecoder<T>::Skip(size_t to_skip) {
+  DCHECK(bit_reader_.is_initialized());
+
+  size_t set_count = 0;
+  while (to_skip > 0) {
+    bool result = ReadHeader();
+    DCHECK(result);
+
+    if (PREDICT_TRUE(repeat_count_ > 0)) {
+      size_t nskip = (repeat_count_ < to_skip) ? repeat_count_ : to_skip;
+      repeat_count_ -= nskip;
+      to_skip -= nskip;
+      if (current_value_ != 0) {
+        set_count += nskip;
+      }
+    } else {
+      DCHECK(literal_count_ > 0);
+      size_t nskip = (literal_count_ < to_skip) ? literal_count_ : to_skip;
+      literal_count_ -= nskip;
+      to_skip -= nskip;
+      for (; nskip > 0; nskip--) {
+        T value = 0;
+        bool result = bit_reader_.GetValue(bit_width_, &value);
+        DCHECK(result);
+        if (value != 0) {
+          set_count++;
+        }
+      }
+    }
+  }
+  return set_count;
+}
+
+// This function buffers input values 8 at a time.  After seeing all 8 values,
+// it decides whether they should be encoded as a literal or repeated run.
+template<typename T>
+inline void RleEncoder<T>::Put(T value, size_t run_length) {
+  DCHECK(bit_width_ == 64 || value < (1LL << bit_width_));
+
+  // TODO(perf): remove the loop and use the repeat_count_
+  for (; run_length > 0; run_length--) {
+    if (PREDICT_TRUE(current_value_ == value)) {
+      ++repeat_count_;
+      if (repeat_count_ > 8) {
+        // This is just a continuation of the current run, no need to buffer the
+        // values.
+        // Note that this is the fast path for long repeated runs.
+        continue;
+      }
+    } else {
+      if (repeat_count_ >= 8) {
+        // We had a run that was long enough but it has ended.  Flush the
+        // current repeated run.
+        DCHECK_EQ(literal_count_, 0);
+        FlushRepeatedRun();
+      }
+      repeat_count_ = 1;
+      current_value_ = value;
+    }
+
+    buffered_values_[num_buffered_values_] = value;
+    if (++num_buffered_values_ == 8) {
+      DCHECK_EQ(literal_count_ % 8, 0);
+      FlushBufferedValues(false);
+    }
+  }
+}
+
+template<typename T>
+inline void RleEncoder<T>::FlushLiteralRun(bool update_indicator_byte) {
+  if (literal_indicator_byte_idx_ < 0) {
+    // The literal indicator byte has not been reserved yet, get one now.
+    literal_indicator_byte_idx_ = bit_writer_.GetByteIndexAndAdvance(1);
+    DCHECK_GE(literal_indicator_byte_idx_, 0);
+  }
+
+  // Write all the buffered values as bit packed literals
+  for (int i = 0; i < num_buffered_values_; ++i) {
+    bit_writer_.PutValue(buffered_values_[i], bit_width_);
+  }
+  num_buffered_values_ = 0;
+
+  if (update_indicator_byte) {
+    // At this point we need to write the indicator byte for the literal run.
+    // We only reserve one byte, to allow for streaming writes of literal values.
+    // The logic makes sure we flush literal runs often enough to not overrun
+    // the 1 byte.
+    int num_groups = BitUtil::Ceil(literal_count_, 8);
+    int32_t indicator_value = (num_groups << 1) | 1;
+    DCHECK_EQ(indicator_value & 0xFFFFFF00, 0);
+    bit_writer_.buffer()->data()[literal_indicator_byte_idx_] = indicator_value;
+    literal_indicator_byte_idx_ = -1;
+    literal_count_ = 0;
+  }
+}
+
+template<typename T>
+inline void RleEncoder<T>::FlushRepeatedRun() {
+  DCHECK_GT(repeat_count_, 0);
+  // The lsb of 0 indicates this is a repeated run
+  int32_t indicator_value = repeat_count_ << 1 | 0;
+  bit_writer_.PutVlqInt(indicator_value);
+  bit_writer_.PutAligned(current_value_, BitUtil::Ceil(bit_width_, 8));
+  num_buffered_values_ = 0;
+  repeat_count_ = 0;
+}
+
+// Flush the values that have been buffered.  At this point we decide whether
+// we need to switch between the run types or continue the current one.
+template<typename T>
+inline void RleEncoder<T>::FlushBufferedValues(bool done) {
+  if (repeat_count_ >= 8) {
+    // Clear the buffered values.  They are part of the repeated run now and we
+    // don't want to flush them out as literals.
+    num_buffered_values_ = 0;
+    if (literal_count_ != 0) {
+      // There was a current literal run.  All the values in it have been flushed
+      // but we still need to update the indicator byte.
+      DCHECK_EQ(literal_count_ % 8, 0);
+      DCHECK_EQ(repeat_count_, 8);
+      FlushLiteralRun(true);
+    }
+    DCHECK_EQ(literal_count_, 0);
+    return;
+  }
+
+  literal_count_ += num_buffered_values_;
+  int num_groups = BitUtil::Ceil(literal_count_, 8);
+  if (num_groups + 1 >= (1 << 6)) {
+    // We need to start a new literal run because the indicator byte we've reserved
+    // cannot store more values.
+    DCHECK_GE(literal_indicator_byte_idx_, 0);
+    FlushLiteralRun(true);
+  } else {
+    FlushLiteralRun(done);
+  }
+  repeat_count_ = 0;
+}
+
+template<typename T>
+inline void RleEncoder<T>::Reserve(int num_bytes, uint8_t val) {
+  for (int i = 0; i < num_bytes; ++i) {
+    bit_writer_.PutValue(val, 8);
+  }
+}
+
+template<typename T>
+inline int RleEncoder<T>::Flush() {
+  if (literal_count_ > 0 || repeat_count_ > 0 || num_buffered_values_ > 0) {
+    bool all_repeat = literal_count_ == 0 &&
+        (repeat_count_ == num_buffered_values_ || num_buffered_values_ == 0);
+    // There is something pending, figure out if it's a repeated or literal run
+    if (repeat_count_ > 0 && all_repeat) {
+      FlushRepeatedRun();
+    } else  {
+      literal_count_ += num_buffered_values_;
+      FlushLiteralRun(true);
+      repeat_count_ = 0;
+    }
+  }
+  bit_writer_.Flush();
+  DCHECK_EQ(num_buffered_values_, 0);
+  DCHECK_EQ(literal_count_, 0);
+  DCHECK_EQ(repeat_count_, 0);
+  return bit_writer_.bytes_written();
+}
+
+template<typename T>
+inline void RleEncoder<T>::Clear() {
+  current_value_ = 0;
+  repeat_count_ = 0;
+  num_buffered_values_ = 0;
+  literal_count_ = 0;
+  literal_indicator_byte_idx_ = -1;
+  bit_writer_.Clear();
+}
+
+} // namespace doris
diff --git a/be/test/exec/es_scan_node_test.cpp b/be/test/exec/es_scan_node_test.cpp
index 77f2cb7cf6..0f6eab51ab 100644
--- a/be/test/exec/es_scan_node_test.cpp
+++ b/be/test/exec/es_scan_node_test.cpp
@@ -1,154 +1,154 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <string>
-
-#include "common/object_pool.h"
-#include "exec/es_scan_node.h"
-#include "gen_cpp/PlanNodes_types.h"
-#include "runtime/mem_pool.h"
-#include "runtime/descriptors.h"
-#include "runtime/runtime_state.h"
-#include "runtime/row_batch.h"
-#include "runtime/string_value.h"
-#include "runtime/tuple_row.h"
-#include "util/runtime_profile.h"
-#include "util/debug_util.h"
-
-using std::vector;
-
-namespace doris {
-
-// mock
-class EsScanNodeTest : public testing::Test {
-public:
-    EsScanNodeTest() : _runtime_state(TQueryGlobals()) {
-        _runtime_state._instance_mem_tracker.reset(new MemTracker());
-        TDescriptorTable t_desc_table;
-
-        // table descriptors
-        TTableDescriptor t_table_desc;
-
-        t_table_desc.id = 0;
-        t_table_desc.tableType = TTableType::ES_TABLE;
-        t_table_desc.numCols = 0;
-        t_table_desc.numClusteringCols = 0;
-        t_table_desc.__isset.esTable = true;
-        t_desc_table.tableDescriptors.push_back(t_table_desc);
-        t_desc_table.__isset.tableDescriptors = true;
-        // TSlotDescriptor
-        int offset = 1;
-        int i = 0;
-        // id
-        {
-            TSlotDescriptor t_slot_desc;
-            t_slot_desc.__set_slotType(TypeDescriptor(TYPE_INT).to_thrift());
-            t_slot_desc.__set_columnPos(i);
-            t_slot_desc.__set_byteOffset(offset);
-            t_slot_desc.__set_nullIndicatorByte(0);
-            t_slot_desc.__set_nullIndicatorBit(-1);
-            t_slot_desc.__set_slotIdx(i);
-            t_slot_desc.__set_isMaterialized(true);
-            t_desc_table.slotDescriptors.push_back(t_slot_desc);
-            offset += sizeof(int);
-        }
-
-        TTupleDescriptor t_tuple_desc;
-        t_tuple_desc.id = 0;
-        t_tuple_desc.byteSize = offset;
-        t_tuple_desc.numNullBytes = 1;
-        t_tuple_desc.tableId = 0;
-        t_tuple_desc.__isset.tableId = true;
-        t_desc_table.__isset.slotDescriptors = true;
-        t_desc_table.tupleDescriptors.push_back(t_tuple_desc);
-
-        DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl);
-        _runtime_state.set_desc_tbl(_desc_tbl);
-
-        // Node Id
-        _tnode.node_id = 0;
-        _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE;
-        _tnode.num_children = 0;
-        _tnode.limit = -1;
-        _tnode.row_tuples.push_back(0);
-        _tnode.nullable_tuples.push_back(false);
-        _tnode.es_scan_node.tuple_id = 0;
-        std::map<std::string, std::string> properties;
-        _tnode.es_scan_node.__set_properties(properties);
-        _tnode.__isset.es_scan_node = true;
-    }
-
-protected:
-    virtual void SetUp() {
-    }
-    virtual void TearDown() {
-    }
-    TPlanNode _tnode;
-    ObjectPool _obj_pool;
-    DescriptorTbl* _desc_tbl;
-    RuntimeState _runtime_state;
-};
-
-
-TEST_F(EsScanNodeTest, normal_use) {
-    EsScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl);
-    Status status = scan_node.prepare(&_runtime_state);
-    ASSERT_TRUE(status.ok());
-	TEsScanRange es_scan_range;
-	es_scan_range.__set_index("index1");
-	es_scan_range.__set_type("docs");
-	es_scan_range.__set_shard_id(0);
-    TNetworkAddress es_host;
-    es_host.__set_hostname("host");
-    es_host.__set_port(8200);
-    std::vector<TNetworkAddress> es_hosts;
-    es_hosts.push_back(es_host);
-    es_scan_range.__set_es_hosts(es_hosts);
-    TScanRange scan_range;
-    scan_range.__set_es_scan_range(es_scan_range);
-    TScanRangeParams scan_range_params;
-    scan_range_params.__set_scan_range(scan_range);
-    std::vector<TScanRangeParams> scan_ranges;
-    scan_ranges.push_back(scan_range_params);
-	
-    status = scan_node.set_scan_ranges(scan_ranges);
-    ASSERT_TRUE(status.ok());
-    std::stringstream out;
-    scan_node.debug_string(1, &out);
-    LOG(WARNING) << out.str();
-
-    status = scan_node.open(&_runtime_state);
-    ASSERT_TRUE(status.ok());
-    RowBatch row_batch(scan_node._row_descriptor, _runtime_state.batch_size(), new MemTracker(-1));
-    bool eos = false;
-    status = scan_node.get_next(&_runtime_state, &row_batch, &eos);
-    ASSERT_TRUE(status.ok());
-    ASSERT_EQ(2, row_batch.num_rows());
-    ASSERT_TRUE(eos);
-
-    status = scan_node.close(&_runtime_state);
-    ASSERT_TRUE(status.ok());
-}
-
-}
-
-int main(int argc, char** argv) {
-    ::testing::InitGoogleTest(&argc, argv);
-    return RUN_ALL_TESTS();
-}
-
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <string>
+
+#include "common/object_pool.h"
+#include "exec/es_scan_node.h"
+#include "gen_cpp/PlanNodes_types.h"
+#include "runtime/mem_pool.h"
+#include "runtime/descriptors.h"
+#include "runtime/runtime_state.h"
+#include "runtime/row_batch.h"
+#include "runtime/string_value.h"
+#include "runtime/tuple_row.h"
+#include "util/runtime_profile.h"
+#include "util/debug_util.h"
+
+using std::vector;
+
+namespace doris {
+
+// mock
+class EsScanNodeTest : public testing::Test {
+public:
+    EsScanNodeTest() : _runtime_state(TQueryGlobals()) {
+        _runtime_state._instance_mem_tracker.reset(new MemTracker());
+        TDescriptorTable t_desc_table;
+
+        // table descriptors
+        TTableDescriptor t_table_desc;
+
+        t_table_desc.id = 0;
+        t_table_desc.tableType = TTableType::ES_TABLE;
+        t_table_desc.numCols = 0;
+        t_table_desc.numClusteringCols = 0;
+        t_table_desc.__isset.esTable = true;
+        t_desc_table.tableDescriptors.push_back(t_table_desc);
+        t_desc_table.__isset.tableDescriptors = true;
+        // TSlotDescriptor
+        int offset = 1;
+        int i = 0;
+        // id
+        {
+            TSlotDescriptor t_slot_desc;
+            t_slot_desc.__set_slotType(TypeDescriptor(TYPE_INT).to_thrift());
+            t_slot_desc.__set_columnPos(i);
+            t_slot_desc.__set_byteOffset(offset);
+            t_slot_desc.__set_nullIndicatorByte(0);
+            t_slot_desc.__set_nullIndicatorBit(-1);
+            t_slot_desc.__set_slotIdx(i);
+            t_slot_desc.__set_isMaterialized(true);
+            t_desc_table.slotDescriptors.push_back(t_slot_desc);
+            offset += sizeof(int);
+        }
+
+        TTupleDescriptor t_tuple_desc;
+        t_tuple_desc.id = 0;
+        t_tuple_desc.byteSize = offset;
+        t_tuple_desc.numNullBytes = 1;
+        t_tuple_desc.tableId = 0;
+        t_tuple_desc.__isset.tableId = true;
+        t_desc_table.__isset.slotDescriptors = true;
+        t_desc_table.tupleDescriptors.push_back(t_tuple_desc);
+
+        DescriptorTbl::create(&_obj_pool, t_desc_table, &_desc_tbl);
+        _runtime_state.set_desc_tbl(_desc_tbl);
+
+        // Node Id
+        _tnode.node_id = 0;
+        _tnode.node_type = TPlanNodeType::SCHEMA_SCAN_NODE;
+        _tnode.num_children = 0;
+        _tnode.limit = -1;
+        _tnode.row_tuples.push_back(0);
+        _tnode.nullable_tuples.push_back(false);
+        _tnode.es_scan_node.tuple_id = 0;
+        std::map<std::string, std::string> properties;
+        _tnode.es_scan_node.__set_properties(properties);
+        _tnode.__isset.es_scan_node = true;
+    }
+
+protected:
+    virtual void SetUp() {
+    }
+    virtual void TearDown() {
+    }
+    TPlanNode _tnode;
+    ObjectPool _obj_pool;
+    DescriptorTbl* _desc_tbl;
+    RuntimeState _runtime_state;
+};
+
+
+TEST_F(EsScanNodeTest, normal_use) {
+    EsScanNode scan_node(&_obj_pool, _tnode, *_desc_tbl);
+    Status status = scan_node.prepare(&_runtime_state);
+    ASSERT_TRUE(status.ok());
+	TEsScanRange es_scan_range;
+	es_scan_range.__set_index("index1");
+	es_scan_range.__set_type("docs");
+	es_scan_range.__set_shard_id(0);
+    TNetworkAddress es_host;
+    es_host.__set_hostname("host");
+    es_host.__set_port(8200);
+    std::vector<TNetworkAddress> es_hosts;
+    es_hosts.push_back(es_host);
+    es_scan_range.__set_es_hosts(es_hosts);
+    TScanRange scan_range;
+    scan_range.__set_es_scan_range(es_scan_range);
+    TScanRangeParams scan_range_params;
+    scan_range_params.__set_scan_range(scan_range);
+    std::vector<TScanRangeParams> scan_ranges;
+    scan_ranges.push_back(scan_range_params);
+	
+    status = scan_node.set_scan_ranges(scan_ranges);
+    ASSERT_TRUE(status.ok());
+    std::stringstream out;
+    scan_node.debug_string(1, &out);
+    LOG(WARNING) << out.str();
+
+    status = scan_node.open(&_runtime_state);
+    ASSERT_TRUE(status.ok());
+    RowBatch row_batch(scan_node._row_descriptor, _runtime_state.batch_size(), new MemTracker(-1));
+    bool eos = false;
+    status = scan_node.get_next(&_runtime_state, &row_batch, &eos);
+    ASSERT_TRUE(status.ok());
+    ASSERT_EQ(2, row_batch.num_rows());
+    ASSERT_TRUE(eos);
+
+    status = scan_node.close(&_runtime_state);
+    ASSERT_TRUE(status.ok());
+}
+
+}
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
+
diff --git a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
index f2f4d9383e..c99347119f 100644
--- a/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/bitshuffle_page_test.cpp
@@ -1,229 +1,229 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <memory>
-
-#include "olap/rowset/segment_v2/options.h"
-#include "olap/rowset/segment_v2/page_builder.h"
-#include "olap/rowset/segment_v2/page_decoder.h"
-#include "olap/rowset/segment_v2/bitshuffle_page.h"
-#include "util/arena.h"
-#include "util/logging.h"
-
-using doris::segment_v2::PageBuilderOptions;
-
-namespace doris {
-
-class BitShufflePageTest : public testing::Test {
-public:
-    virtual ~BitShufflePageTest() {}
-
-    template<FieldType type, class PageDecoderType>
-    void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
-        Arena arena;
-        uint8_t null_bitmap = 0;
-        ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
-        ColumnBlockView column_block_view(&block);
-
-        size_t n = 1;
-        decoder->_copy_next_values(n, column_block_view.data());
-        ASSERT_EQ(1, n);
-    }
-
-    template <FieldType Type, class PageBuilderType, class PageDecoderType>
-    void test_encode_decode_page_template(typename TypeTraits<Type>::CppType* src,
-            size_t size) {
-        typedef typename TypeTraits<Type>::CppType CppType;
-        PageBuilderOptions options;
-        options.data_page_size = 256 * 1024;
-        PageBuilderType page_builder(options);
-
-        page_builder.add(reinterpret_cast<const uint8_t *>(src), &size);
-        Slice s = page_builder.finish();
-        LOG(INFO) << "RLE Encoded size for 10k values: " << s.size
-                << ", original size:" << size * sizeof(CppType);
-
-        segment_v2::PageDecoderOptions decoder_options;
-        PageDecoderType page_decoder(s, decoder_options);
-        Status status = page_decoder.init();
-        ASSERT_TRUE(status.ok());
-        ASSERT_EQ(0, page_decoder.current_index());
-
-        Arena arena;
-
-        CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
-        uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
-        ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
-        ColumnBlockView column_block_view(&block);
-
-        status = page_decoder.next_batch(&size, &column_block_view);
-        ASSERT_TRUE(status.ok());
-
-        CppType* decoded = (CppType*)values;
-        for (uint i = 0; i < size; i++) {
-            if (src[i] != decoded[i]) {
-                FAIL() << "Fail at index " << i <<
-                    " inserted=" << src[i] << " got=" << decoded[i];
-            }
-        }
-
-        // Test Seek within block by ordinal
-        for (int i = 0; i < 100; i++) {
-            int seek_off = random() % size;
-            page_decoder.seek_to_position_in_page(seek_off);
-            EXPECT_EQ((int32_t )(seek_off), page_decoder.current_index());
-            CppType ret;
-            copy_one<Type, PageDecoderType>(&page_decoder, &ret);
-            EXPECT_EQ(decoded[seek_off], ret);
-        }
-    }
-};
-
-// Test for bitshuffle block, for INT32, INT64, FLOAT, DOUBLE
-TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderRandom) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
-    for (int i = 0; i < size; i++) {
-        ints.get()[i] = random();
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>,
-        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
-}
-
-TEST_F(BitShufflePageTest, TestBitShuffleInt64BlockEncoderRandom) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<int64_t[]> ints(new int64_t[size]);
-    for (int i = 0; i < size; i++) {
-        ints.get()[i] = random();
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_BIGINT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_BIGINT>,
-        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_BIGINT> >(ints.get(), size);
-}
-
-TEST_F(BitShufflePageTest, TestBitShuffleFloatBlockEncoderRandom) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<float[]> floats(new float[size]);
-    for (int i = 0; i < size; i++) {
-        floats.get()[i] = random() + static_cast<float>(random())/INT_MAX;
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_FLOAT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_FLOAT>,
-        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_FLOAT> >(floats.get(), size);
-}
-
-TEST_F(BitShufflePageTest, TestBitShuffleDoubleBlockEncoderRandom) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<double[]> doubles(new double[size]);
-    for (int i = 0; i < size; i++) {
-        doubles.get()[i] = random() + static_cast<double>(random())/INT_MAX;
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_DOUBLE, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_DOUBLE>,
-        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_DOUBLE> >(doubles.get(), size);
-}
-
-TEST_F(BitShufflePageTest, TestBitShuffleDoubleBlockEncoderEqual) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<double[]> doubles(new double[size]);
-    for (int i = 0; i < size; i++) {
-        doubles.get()[i] = 19880217.19890323;
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_DOUBLE, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_DOUBLE>,
-        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_DOUBLE> >(doubles.get(), size);
-}
-
-TEST_F(BitShufflePageTest, TestBitShuffleDoubleBlockEncoderSequence) {
-    const uint32_t size = 10000;
-
-    double base = 19880217.19890323;
-    double delta = 13.14;
-    std::unique_ptr<double[]> doubles(new double[size]);
-    for (int i = 0; i < size; i++) {
-        base = base + delta;
-        doubles.get()[i] = base;
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_DOUBLE, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_DOUBLE>,
-        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_DOUBLE> >(doubles.get(), size);
-}
-
-TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderEqual) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
-    for (int i = 0; i < size; i++) {
-        ints.get()[i] = 12345;
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>,
-        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
-}
-
-TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderMaxNumberEqual) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
-    for (int i = 0; i < size; i++) {
-        ints.get()[i] = 1234567890;
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>,
-        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
-}
-
-TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderSequence) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
-    int32_t number = 0;
-    for (int i = 0; i < size; i++) {
-        ints.get()[i] = ++number;
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>,
-        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
-}
-
-TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderMaxNumberSequence) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
-    int32_t number = 0;
-    for (int i = 0; i < size; i++) {
-        ints.get()[i] = 1234567890 + number;
-        ++number;
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>,
-        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
-}
-
-}
-
-int main(int argc, char** argv) {
-    testing::InitGoogleTest(&argc, argv);
-    return RUN_ALL_TESTS();
-}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <memory>
+
+#include "olap/rowset/segment_v2/options.h"
+#include "olap/rowset/segment_v2/page_builder.h"
+#include "olap/rowset/segment_v2/page_decoder.h"
+#include "olap/rowset/segment_v2/bitshuffle_page.h"
+#include "util/arena.h"
+#include "util/logging.h"
+
+using doris::segment_v2::PageBuilderOptions;
+
+namespace doris {
+
+class BitShufflePageTest : public testing::Test {
+public:
+    virtual ~BitShufflePageTest() {}
+
+    template<FieldType type, class PageDecoderType>
+    void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
+        Arena arena;
+        uint8_t null_bitmap = 0;
+        ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
+        ColumnBlockView column_block_view(&block);
+
+        size_t n = 1;
+        decoder->_copy_next_values(n, column_block_view.data());
+        ASSERT_EQ(1, n);
+    }
+
+    template <FieldType Type, class PageBuilderType, class PageDecoderType>
+    void test_encode_decode_page_template(typename TypeTraits<Type>::CppType* src,
+            size_t size) {
+        typedef typename TypeTraits<Type>::CppType CppType;
+        PageBuilderOptions options;
+        options.data_page_size = 256 * 1024;
+        PageBuilderType page_builder(options);
+
+        page_builder.add(reinterpret_cast<const uint8_t *>(src), &size);
+        Slice s = page_builder.finish();
+        LOG(INFO) << "RLE Encoded size for 10k values: " << s.size
+                << ", original size:" << size * sizeof(CppType);
+
+        segment_v2::PageDecoderOptions decoder_options;
+        PageDecoderType page_decoder(s, decoder_options);
+        Status status = page_decoder.init();
+        ASSERT_TRUE(status.ok());
+        ASSERT_EQ(0, page_decoder.current_index());
+
+        Arena arena;
+
+        CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
+        uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
+        ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
+        ColumnBlockView column_block_view(&block);
+
+        status = page_decoder.next_batch(&size, &column_block_view);
+        ASSERT_TRUE(status.ok());
+
+        CppType* decoded = (CppType*)values;
+        for (uint i = 0; i < size; i++) {
+            if (src[i] != decoded[i]) {
+                FAIL() << "Fail at index " << i <<
+                    " inserted=" << src[i] << " got=" << decoded[i];
+            }
+        }
+
+        // Test Seek within block by ordinal
+        for (int i = 0; i < 100; i++) {
+            int seek_off = random() % size;
+            page_decoder.seek_to_position_in_page(seek_off);
+            EXPECT_EQ((int32_t )(seek_off), page_decoder.current_index());
+            CppType ret;
+            copy_one<Type, PageDecoderType>(&page_decoder, &ret);
+            EXPECT_EQ(decoded[seek_off], ret);
+        }
+    }
+};
+
+// Test for bitshuffle block, for INT32, INT64, FLOAT, DOUBLE
+TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderRandom) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+    for (int i = 0; i < size; i++) {
+        ints.get()[i] = random();
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>,
+        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+TEST_F(BitShufflePageTest, TestBitShuffleInt64BlockEncoderRandom) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<int64_t[]> ints(new int64_t[size]);
+    for (int i = 0; i < size; i++) {
+        ints.get()[i] = random();
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_BIGINT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_BIGINT>,
+        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_BIGINT> >(ints.get(), size);
+}
+
+TEST_F(BitShufflePageTest, TestBitShuffleFloatBlockEncoderRandom) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<float[]> floats(new float[size]);
+    for (int i = 0; i < size; i++) {
+        floats.get()[i] = random() + static_cast<float>(random())/INT_MAX;
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_FLOAT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_FLOAT>,
+        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_FLOAT> >(floats.get(), size);
+}
+
+TEST_F(BitShufflePageTest, TestBitShuffleDoubleBlockEncoderRandom) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<double[]> doubles(new double[size]);
+    for (int i = 0; i < size; i++) {
+        doubles.get()[i] = random() + static_cast<double>(random())/INT_MAX;
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_DOUBLE, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_DOUBLE>,
+        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_DOUBLE> >(doubles.get(), size);
+}
+
+TEST_F(BitShufflePageTest, TestBitShuffleDoubleBlockEncoderEqual) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<double[]> doubles(new double[size]);
+    for (int i = 0; i < size; i++) {
+        doubles.get()[i] = 19880217.19890323;
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_DOUBLE, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_DOUBLE>,
+        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_DOUBLE> >(doubles.get(), size);
+}
+
+TEST_F(BitShufflePageTest, TestBitShuffleDoubleBlockEncoderSequence) {
+    const uint32_t size = 10000;
+
+    double base = 19880217.19890323;
+    double delta = 13.14;
+    std::unique_ptr<double[]> doubles(new double[size]);
+    for (int i = 0; i < size; i++) {
+        base = base + delta;
+        doubles.get()[i] = base;
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_DOUBLE, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_DOUBLE>,
+        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_DOUBLE> >(doubles.get(), size);
+}
+
+TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderEqual) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+    for (int i = 0; i < size; i++) {
+        ints.get()[i] = 12345;
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>,
+        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderMaxNumberEqual) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+    for (int i = 0; i < size; i++) {
+        ints.get()[i] = 1234567890;
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>,
+        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderSequence) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+    int32_t number = 0;
+    for (int i = 0; i < size; i++) {
+        ints.get()[i] = ++number;
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>,
+        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+TEST_F(BitShufflePageTest, TestBitShuffleInt32BlockEncoderMaxNumberSequence) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+    int32_t number = 0;
+    for (int i = 0; i < size; i++) {
+        ints.get()[i] = 1234567890 + number;
+        ++number;
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::BitshufflePageBuilder<OLAP_FIELD_TYPE_INT>,
+        segment_v2::BitShufflePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+}
+
+int main(int argc, char** argv) {
+    testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/olap/rowset/segment_v2/rle_page_test.cpp b/be/test/olap/rowset/segment_v2/rle_page_test.cpp
index e30e45df5e..97015950ab 100644
--- a/be/test/olap/rowset/segment_v2/rle_page_test.cpp
+++ b/be/test/olap/rowset/segment_v2/rle_page_test.cpp
@@ -1,193 +1,193 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <gtest/gtest.h>
-#include <memory>
-
-#include "olap/rowset/segment_v2/options.h"
-#include "olap/rowset/segment_v2/page_builder.h"
-#include "olap/rowset/segment_v2/page_decoder.h"
-#include "olap/rowset/segment_v2/rle_page.h"
-#include "util/arena.h"
-#include "util/logging.h"
-
-using doris::segment_v2::PageBuilderOptions;
-using doris::segment_v2::PageDecoderOptions;
-
-namespace doris {
-
-class RlePageTest : public testing::Test {
-public:
-    virtual ~RlePageTest() { }
-
-    template<FieldType type, class PageDecoderType>
-    void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
-        Arena arena;
-        uint8_t null_bitmap = 0;
-        ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
-        ColumnBlockView column_block_view(&block);
-
-        size_t n = 1;
-        decoder->next_batch(&n, &column_block_view);
-        ASSERT_EQ(1, n);
-    }
-
-    template <FieldType Type, class PageBuilderType, class PageDecoderType>
-    void test_encode_decode_page_template(typename TypeTraits<Type>::CppType* src,
-            size_t size) {
-        typedef typename TypeTraits<Type>::CppType CppType;
-        PageBuilderOptions builder_options;
-        builder_options.data_page_size = 256 * 1024;
-        PageBuilderType rle_page_builder(builder_options);
-        rle_page_builder.add(reinterpret_cast<const uint8_t *>(src), &size);
-        Slice s = rle_page_builder.finish();
-        ASSERT_EQ(size, rle_page_builder.count());
-        LOG(INFO) << "RLE Encoded size for 10k values: " << s.size
-                << ", original size:" << size * sizeof(CppType);
-
-        PageDecoderOptions decodeder_options;
-        PageDecoderType rle_page_decoder(s, decodeder_options);
-        Status status = rle_page_decoder.init();
-        ASSERT_TRUE(status.ok());
-        ASSERT_EQ(0, rle_page_decoder.current_index());
-        ASSERT_EQ(size, rle_page_decoder.count());
-
-        Arena arena;
-
-        CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
-        uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
-        ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
-        ColumnBlockView column_block_view(&block);
-        size_t size_to_fetch = size;
-        status = rle_page_decoder.next_batch(&size_to_fetch, &column_block_view);
-        ASSERT_TRUE(status.ok());
-        ASSERT_EQ(size, size_to_fetch);
-
-        for (uint i = 0; i < size; i++) {
-            if (src[i] != values[i]) {
-                FAIL() << "Fail at index " << i <<
-                    " inserted=" << src[i] << " got=" << values[i];
-            }
-        }
-
-        // Test Seek within block by ordinal
-        for (int i = 0; i < 100; i++) {
-            int seek_off = random() % size;
-            rle_page_decoder.seek_to_position_in_page(seek_off);
-            EXPECT_EQ((int32_t )(seek_off), rle_page_decoder.current_index());
-            CppType ret;
-            copy_one<Type, PageDecoderType>(&rle_page_decoder, &ret);
-            EXPECT_EQ(values[seek_off], ret);
-        }
-    }
-};
-
-// Test for rle block, for INT32, BOOL
-TEST_F(RlePageTest, TestRleInt32BlockEncoderRandom) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
-    for (int i = 0; i < size; i++) {
-        ints.get()[i] = random();
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_INT>,
-        segment_v2::RlePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
-}
-
-TEST_F(RlePageTest, TestRleInt32BlockEncoderEqual) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
-    for (int i = 0; i < size; i++) {
-        ints.get()[i] = 12345;
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_INT>,
-        segment_v2::RlePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
-}
-
-TEST_F(RlePageTest, TestRleInt32BlockEncoderSequence) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
-    for (int i = 0; i < size; i++) {
-        ints.get()[i] = 12345 + i;
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_INT>,
-        segment_v2::RlePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
-}
-
-TEST_F(RlePageTest, TestRleInt32BlockEncoderSize) {
-    size_t size = 100;
-
-    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
-    for (int i = 0; i < size; i++) {
-        ints.get()[i] = 0;
-    }
-    PageBuilderOptions builder_options;
-    builder_options.data_page_size = 256 * 1024;
-    segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_INT> rle_page_builder(builder_options);
-    rle_page_builder.add(reinterpret_cast<const uint8_t *>(ints.get()), &size);
-    Slice s = rle_page_builder.finish();
-    // 4 bytes header
-    // 2 bytes indicate_value(): 0x64 << 1 | 1 = 201
-    // 4 bytes values
-    ASSERT_EQ(10, s.size);
-}
-
-TEST_F(RlePageTest, TestRleBoolBlockEncoderRandom) {
-    const uint32_t size = 10000;
-
-    std::unique_ptr<bool[]> bools(new bool[size]);
-    for (int i = 0; i < size; i++) {
-        if (random() % 2 == 0) {
-            bools.get()[i] = true;
-        } else {
-            bools.get()[i] = false;
-        }
-    }
-
-    test_encode_decode_page_template<OLAP_FIELD_TYPE_BOOL, segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_BOOL>,
-        segment_v2::RlePageDecoder<OLAP_FIELD_TYPE_BOOL> >(bools.get(), size);
-}
-
-TEST_F(RlePageTest, TestRleBoolBlockEncoderSize) {
-    size_t size = 100;
-
-    std::unique_ptr<bool[]> bools(new bool[size]);
-    for (int i = 0; i < size; i++) {
-        bools.get()[i] = true;
-    }
-    PageBuilderOptions builder_options;
-    builder_options.data_page_size = 256 * 1024;
-    segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_BOOL> rle_page_builder(builder_options);
-    rle_page_builder.add(reinterpret_cast<const uint8_t *>(bools.get()), &size);
-    Slice s = rle_page_builder.finish();
-    // 4 bytes header
-    // 2 bytes indicate_value(): 0x64 << 1 | 1 = 201
-    // 1 bytes values
-    ASSERT_EQ(7, s.size);
-}
-
-}
-
-int main(int argc, char** argv) {
-    testing::InitGoogleTest(&argc, argv);
-    return RUN_ALL_TESTS();
-}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <gtest/gtest.h>
+#include <memory>
+
+#include "olap/rowset/segment_v2/options.h"
+#include "olap/rowset/segment_v2/page_builder.h"
+#include "olap/rowset/segment_v2/page_decoder.h"
+#include "olap/rowset/segment_v2/rle_page.h"
+#include "util/arena.h"
+#include "util/logging.h"
+
+using doris::segment_v2::PageBuilderOptions;
+using doris::segment_v2::PageDecoderOptions;
+
+namespace doris {
+
+class RlePageTest : public testing::Test {
+public:
+    virtual ~RlePageTest() { }
+
+    template<FieldType type, class PageDecoderType>
+    void copy_one(PageDecoderType* decoder, typename TypeTraits<type>::CppType* ret) {
+        Arena arena;
+        uint8_t null_bitmap = 0;
+        ColumnBlock block(get_type_info(type), (uint8_t*)ret, &null_bitmap, &arena);
+        ColumnBlockView column_block_view(&block);
+
+        size_t n = 1;
+        decoder->next_batch(&n, &column_block_view);
+        ASSERT_EQ(1, n);
+    }
+
+    template <FieldType Type, class PageBuilderType, class PageDecoderType>
+    void test_encode_decode_page_template(typename TypeTraits<Type>::CppType* src,
+            size_t size) {
+        typedef typename TypeTraits<Type>::CppType CppType;
+        PageBuilderOptions builder_options;
+        builder_options.data_page_size = 256 * 1024;
+        PageBuilderType rle_page_builder(builder_options);
+        rle_page_builder.add(reinterpret_cast<const uint8_t *>(src), &size);
+        Slice s = rle_page_builder.finish();
+        ASSERT_EQ(size, rle_page_builder.count());
+        LOG(INFO) << "RLE Encoded size for 10k values: " << s.size
+                << ", original size:" << size * sizeof(CppType);
+
+        PageDecoderOptions decodeder_options;
+        PageDecoderType rle_page_decoder(s, decodeder_options);
+        Status status = rle_page_decoder.init();
+        ASSERT_TRUE(status.ok());
+        ASSERT_EQ(0, rle_page_decoder.current_index());
+        ASSERT_EQ(size, rle_page_decoder.count());
+
+        Arena arena;
+
+        CppType* values = reinterpret_cast<CppType*>(arena.Allocate(size * sizeof(CppType)));
+        uint8_t* null_bitmap = reinterpret_cast<uint8_t*>(arena.Allocate(BitmapSize(size)));
+        ColumnBlock block(get_type_info(Type), (uint8_t*)values, null_bitmap, &arena);
+        ColumnBlockView column_block_view(&block);
+        size_t size_to_fetch = size;
+        status = rle_page_decoder.next_batch(&size_to_fetch, &column_block_view);
+        ASSERT_TRUE(status.ok());
+        ASSERT_EQ(size, size_to_fetch);
+
+        for (uint i = 0; i < size; i++) {
+            if (src[i] != values[i]) {
+                FAIL() << "Fail at index " << i <<
+                    " inserted=" << src[i] << " got=" << values[i];
+            }
+        }
+
+        // Test Seek within block by ordinal
+        for (int i = 0; i < 100; i++) {
+            int seek_off = random() % size;
+            rle_page_decoder.seek_to_position_in_page(seek_off);
+            EXPECT_EQ((int32_t )(seek_off), rle_page_decoder.current_index());
+            CppType ret;
+            copy_one<Type, PageDecoderType>(&rle_page_decoder, &ret);
+            EXPECT_EQ(values[seek_off], ret);
+        }
+    }
+};
+
+// Test for rle block, for INT32, BOOL
+TEST_F(RlePageTest, TestRleInt32BlockEncoderRandom) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+    for (int i = 0; i < size; i++) {
+        ints.get()[i] = random();
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_INT>,
+        segment_v2::RlePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+TEST_F(RlePageTest, TestRleInt32BlockEncoderEqual) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+    for (int i = 0; i < size; i++) {
+        ints.get()[i] = 12345;
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_INT>,
+        segment_v2::RlePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+TEST_F(RlePageTest, TestRleInt32BlockEncoderSequence) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+    for (int i = 0; i < size; i++) {
+        ints.get()[i] = 12345 + i;
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_INT, segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_INT>,
+        segment_v2::RlePageDecoder<OLAP_FIELD_TYPE_INT> >(ints.get(), size);
+}
+
+TEST_F(RlePageTest, TestRleInt32BlockEncoderSize) {
+    size_t size = 100;
+
+    std::unique_ptr<int32_t[]> ints(new int32_t[size]);
+    for (int i = 0; i < size; i++) {
+        ints.get()[i] = 0;
+    }
+    PageBuilderOptions builder_options;
+    builder_options.data_page_size = 256 * 1024;
+    segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_INT> rle_page_builder(builder_options);
+    rle_page_builder.add(reinterpret_cast<const uint8_t *>(ints.get()), &size);
+    Slice s = rle_page_builder.finish();
+    // 4 bytes header
+    // 2 bytes indicate_value(): 0x64 << 1 | 1 = 201
+    // 4 bytes values
+    ASSERT_EQ(10, s.size);
+}
+
+TEST_F(RlePageTest, TestRleBoolBlockEncoderRandom) {
+    const uint32_t size = 10000;
+
+    std::unique_ptr<bool[]> bools(new bool[size]);
+    for (int i = 0; i < size; i++) {
+        if (random() % 2 == 0) {
+            bools.get()[i] = true;
+        } else {
+            bools.get()[i] = false;
+        }
+    }
+
+    test_encode_decode_page_template<OLAP_FIELD_TYPE_BOOL, segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_BOOL>,
+        segment_v2::RlePageDecoder<OLAP_FIELD_TYPE_BOOL> >(bools.get(), size);
+}
+
+TEST_F(RlePageTest, TestRleBoolBlockEncoderSize) {
+    size_t size = 100;
+
+    std::unique_ptr<bool[]> bools(new bool[size]);
+    for (int i = 0; i < size; i++) {
+        bools.get()[i] = true;
+    }
+    PageBuilderOptions builder_options;
+    builder_options.data_page_size = 256 * 1024;
+    segment_v2::RlePageBuilder<OLAP_FIELD_TYPE_BOOL> rle_page_builder(builder_options);
+    rle_page_builder.add(reinterpret_cast<const uint8_t *>(bools.get()), &size);
+    Slice s = rle_page_builder.finish();
+    // 4 bytes header
+    // 2 bytes indicate_value(): 0x64 << 1 | 1 = 201
+    // 1 bytes values
+    ASSERT_EQ(7, s.size);
+}
+
+}
+
+int main(int argc, char** argv) {
+    testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/util/faststring_test.cpp b/be/test/util/faststring_test.cpp
index 2a6120f3fd..68231c0dc9 100644
--- a/be/test/util/faststring_test.cpp
+++ b/be/test/util/faststring_test.cpp
@@ -1,83 +1,83 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <algorithm>
-#include <cstring>
-#include <memory>
-
-#include <gtest/gtest.h>
-#include <time.h>
-
-#include "util/faststring.h"
-#include "util/random.h"
-
-namespace doris {
-class FaststringTest : public ::testing::Test {};
-
-void RandomString(void* dest, size_t n, doris::Random* rng) {
-  size_t i = 0;
-  uint32_t random = rng->Next();
-  char* cdest = static_cast<char*>(dest);
-  static const size_t sz = sizeof(random);
-  if (n >= sz) {
-    for (i = 0; i <= n - sz; i += sz) {
-      memcpy(&cdest[i], &random, sizeof(random));
-      random = rng->Next();
-    }
-  }
-  memcpy(cdest + i, &random, n - i);
-}
-
-TEST_F(FaststringTest, TestShrinkToFit_Empty) {
-  faststring s;
-  s.shrink_to_fit();
-  ASSERT_EQ(faststring::kInitialCapacity, s.capacity());
-}
-
-// Test that, if the string contents is shorter than the initial capacity
-// of the faststring, shrink_to_fit() leaves the string in the built-in
-// array.
-TEST_F(FaststringTest, TestShrinkToFit_SmallerThanInitialCapacity) {
-  faststring s;
-  s.append("hello");
-  s.shrink_to_fit();
-  ASSERT_EQ(faststring::kInitialCapacity, s.capacity());
-}
-
-TEST_F(FaststringTest, TestShrinkToFit_Random) {
-  doris::Random r(time(nullptr));
-  int kMaxSize = faststring::kInitialCapacity * 2;
-  std::unique_ptr<char[]> random_bytes(new char[kMaxSize]);
-  RandomString(random_bytes.get(), kMaxSize, &r);
-
-  faststring s;
-  for (int i = 0; i < 100; i++) {
-    int new_size = r.Uniform(kMaxSize);
-    s.resize(new_size);
-    memcpy(s.data(), random_bytes.get(), new_size);
-    s.shrink_to_fit();
-    ASSERT_EQ(0, memcmp(s.data(), random_bytes.get(), new_size));
-    ASSERT_EQ(std::max<int>(faststring::kInitialCapacity, new_size), s.capacity());
-  }
-}
-
-} // namespace doris
-
-int main(int argc, char** argv) {
-    testing::InitGoogleTest(&argc, argv);
-    return RUN_ALL_TESTS();
-}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <cstring>
+#include <memory>
+
+#include <gtest/gtest.h>
+#include <time.h>
+
+#include "util/faststring.h"
+#include "util/random.h"
+
+namespace doris {
+class FaststringTest : public ::testing::Test {};
+
+void RandomString(void* dest, size_t n, doris::Random* rng) {
+  size_t i = 0;
+  uint32_t random = rng->Next();
+  char* cdest = static_cast<char*>(dest);
+  static const size_t sz = sizeof(random);
+  if (n >= sz) {
+    for (i = 0; i <= n - sz; i += sz) {
+      memcpy(&cdest[i], &random, sizeof(random));
+      random = rng->Next();
+    }
+  }
+  memcpy(cdest + i, &random, n - i);
+}
+
+TEST_F(FaststringTest, TestShrinkToFit_Empty) {
+  faststring s;
+  s.shrink_to_fit();
+  ASSERT_EQ(faststring::kInitialCapacity, s.capacity());
+}
+
+// Test that, if the string contents is shorter than the initial capacity
+// of the faststring, shrink_to_fit() leaves the string in the built-in
+// array.
+TEST_F(FaststringTest, TestShrinkToFit_SmallerThanInitialCapacity) {
+  faststring s;
+  s.append("hello");
+  s.shrink_to_fit();
+  ASSERT_EQ(faststring::kInitialCapacity, s.capacity());
+}
+
+TEST_F(FaststringTest, TestShrinkToFit_Random) {
+  doris::Random r(time(nullptr));
+  int kMaxSize = faststring::kInitialCapacity * 2;
+  std::unique_ptr<char[]> random_bytes(new char[kMaxSize]);
+  RandomString(random_bytes.get(), kMaxSize, &r);
+
+  faststring s;
+  for (int i = 0; i < 100; i++) {
+    int new_size = r.Uniform(kMaxSize);
+    s.resize(new_size);
+    memcpy(s.data(), random_bytes.get(), new_size);
+    s.shrink_to_fit();
+    ASSERT_EQ(0, memcmp(s.data(), random_bytes.get(), new_size));
+    ASSERT_EQ(std::max<int>(faststring::kInitialCapacity, new_size), s.capacity());
+  }
+}
+
+} // namespace doris
+
+int main(int argc, char** argv) {
+    testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/be/test/util/rle_encoding_test.cpp b/be/test/util/rle_encoding_test.cpp
index 50c92c707d..8c8491ca44 100644
--- a/be/test/util/rle_encoding_test.cpp
+++ b/be/test/util/rle_encoding_test.cpp
@@ -1,426 +1,426 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-#include <algorithm>
-#include <cstdint>
-#include <cstdlib>
-#include <cstring>
-#include <ostream>
-#include <string>
-#include <vector>
-#include <limits>
-
-// Must come before gtest.h.
-#include <boost/utility/binary.hpp>
-#include <glog/logging.h>
-#include <gtest/gtest.h>
-
-#include "util/bit_stream_utils.h"
-#include "util/bit_stream_utils.inline.h"
-#include "util/bit_util.h"
-#include "util/faststring.h"
-#include "util/rle_encoding.h"
-#include "util/debug_util.h"
-
-using std::string;
-using std::vector;
-
-namespace doris {
-
-const int kMaxWidth = 64;
-
-class TestRle : public testing::Test {};
-// Validates encoding of values by encoding and decoding them.  If
-// expected_encoding != NULL, also validates that the encoded buffer is
-// exactly 'expected_encoding'.
-// if expected_len is not -1, it will validate the encoded size is correct.
-template<typename T>
-void ValidateRle(const vector<T>& values, int bit_width,
-    uint8_t* expected_encoding, int expected_len) {
-  faststring buffer;
-  RleEncoder<T> encoder(&buffer, bit_width);
-
-  for (const auto& value : values) {
-    encoder.Put(value);
-  }
-  int encoded_len = encoder.Flush();
-
-  if (expected_len != -1) {
-    EXPECT_EQ(encoded_len, expected_len);
-  }
-  if (expected_encoding != nullptr) {
-    EXPECT_EQ(memcmp(buffer.data(), expected_encoding, expected_len), 0)
-      << "\n"
-      << "Expected: " << hexdump((const char*)expected_encoding, expected_len) << "\n"
-      << "Got:      " << hexdump((const char*)buffer.data(), buffer.size());
-  }
-
-  // Verify read
-  RleDecoder<T> decoder(buffer.data(), encoded_len, bit_width);
-  for (const auto& value : values) {
-    T val = 0;
-    bool result = decoder.Get(&val);
-    EXPECT_TRUE(result);
-    EXPECT_EQ(value, val);
-  }
-}
-
-TEST(Rle, SpecificSequences) {
-  const int kTestLen = 1024;
-  uint8_t expected_buffer[kTestLen];
-  vector<uint64_t> values;
-
-  // Test 50 0' followed by 50 1's
-  values.resize(100);
-  for (int i = 0; i < 50; ++i) {
-    values[i] = 0;
-  }
-  for (int i = 50; i < 100; ++i) {
-    values[i] = 1;
-  }
-
-  // expected_buffer valid for bit width <= 1 byte
-  expected_buffer[0] = (50 << 1);
-  expected_buffer[1] = 0;
-  expected_buffer[2] = (50 << 1);
-  expected_buffer[3] = 1;
-  for (int width = 1; width <= 8; ++width) {
-    ValidateRle(values, width, expected_buffer, 4);
-  }
-
-  for (int width = 9; width <= kMaxWidth; ++width) {
-    ValidateRle(values, width, nullptr, 2 * (1 + BitUtil::Ceil(width, 8)));
-  }
-
-  // Test 100 0's and 1's alternating
-  for (int i = 0; i < 100; ++i) {
-    values[i] = i % 2;
-  }
-  int num_groups = BitUtil::Ceil(100, 8);
-  expected_buffer[0] = (num_groups << 1) | 1;
-  for (int i = 0; i < 100/8; ++i) {
-    expected_buffer[i + 1] = BOOST_BINARY(1 0 1 0 1 0 1 0); // 0xaa
-  }
-  // Values for the last 4 0 and 1's
-  expected_buffer[1 + 100/8] = BOOST_BINARY(0 0 0 0 1 0 1 0); // 0x0a
-
-  // num_groups and expected_buffer only valid for bit width = 1
-  ValidateRle(values, 1, expected_buffer, 1 + num_groups);
-  for (int width = 2; width <= kMaxWidth; ++width) {
-    ValidateRle(values, width, nullptr, 1 + BitUtil::Ceil(width * 100, 8));
-  }
-}
-
-// ValidateRle on 'num_vals' values with width 'bit_width'. If 'value' != -1, that value
-// is used, otherwise alternating values are used.
-void TestRleValues(int bit_width, int num_vals, int value = -1) {
-  const uint64_t mod = bit_width == 64 ? 1ULL : 1ULL << bit_width;
-  vector<uint64_t> values;
-  for (uint64_t v = 0; v < num_vals; ++v) {
-    values.push_back((value != -1) ? value : (bit_width == 64 ? v : (v % mod)));
-  }
-  ValidateRle(values, bit_width, nullptr, -1);
-}
-
-TEST(Rle, TestValues) {
-  for (int width = 1; width <= kMaxWidth; ++width) {
-    TestRleValues(width, 1);
-    TestRleValues(width, 1024);
-    TestRleValues(width, 1024, 0);
-    TestRleValues(width, 1024, 1);
-  }
-}
-
-class BitRle : public testing::Test {
-public:
-    BitRle() {
-    }
-
-    virtual ~BitRle() {
-    }
-};
-
-// Tests all true/false values
-TEST_F(BitRle, AllSame) {
-  const int kTestLen = 1024;
-  vector<bool> values;
-
-  for (int v = 0; v < 2; ++v) {
-    values.clear();
-    for (int i = 0; i < kTestLen; ++i) {
-      values.push_back(v ? true : false);
-    }
-
-    ValidateRle(values, 1, nullptr, 3);
-  }
-}
-
-// Test that writes out a repeated group and then a literal
-// group but flush before finishing.
-TEST_F(BitRle, Flush) {
-  vector<bool> values;
-  for (int i = 0; i < 16; ++i) values.push_back(1);
-  values.push_back(false);
-  ValidateRle(values, 1, nullptr, -1);
-  values.push_back(true);
-  ValidateRle(values, 1, nullptr, -1);
-  values.push_back(true);
-  ValidateRle(values, 1, nullptr, -1);
-  values.push_back(true);
-  ValidateRle(values, 1, nullptr, -1);
-}
-
-// Test some random bool sequences.
-TEST_F(BitRle, RandomBools) {
-  int iters = 0;
-  const int n_iters = 20;
-  while (iters < n_iters) {
-    srand(iters++);
-    if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters;
-    vector<uint64_t > values;
-    bool parity = 0;
-    for (int i = 0; i < 1000; ++i) {
-      int group_size = rand() % 20 + 1; // NOLINT(*)
-      if (group_size > 16) {
-        group_size = 1;
-      }
-      for (int i = 0; i < group_size; ++i) {
-        values.push_back(parity);
-      }
-      parity = !parity;
-    }
-    ValidateRle(values, (iters % kMaxWidth) + 1, nullptr, -1);
-  }
-}
-
-// Test some random 64-bit sequences.
-TEST_F(BitRle, Random64Bit) {
-  int iters = 0;
-  const int n_iters = 20;
-  while (iters < n_iters) {
-    srand(iters++);
-    if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters;
-    vector<uint64_t > values;
-    for (int i = 0; i < 1000; ++i) {
-      int group_size = rand() % 20 + 1; // NOLINT(*)
-      uint64_t cur_value = (static_cast<uint64_t>(rand()) << 32) + static_cast<uint64_t>(rand());
-      if (group_size > 16) {
-        group_size = 1;
-      }
-      for (int i = 0; i < group_size; ++i) {
-        values.push_back(cur_value);
-      }
-
-    }
-    ValidateRle(values, 64, nullptr, -1);
-  }
-}
-
-// Test a sequence of 1 0's, 2 1's, 3 0's. etc
-// e.g. 011000111100000
-TEST_F(BitRle, RepeatedPattern) {
-  vector<bool> values;
-  const int min_run = 1;
-  const int max_run = 32;
-
-  for (int i = min_run; i <= max_run; ++i) {
-    int v = i % 2;
-    for (int j = 0; j < i; ++j) {
-      values.push_back(v);
-    }
-  }
-
-  // And go back down again
-  for (int i = max_run; i >= min_run; --i) {
-    int v = i % 2;
-    for (int j = 0; j < i; ++j) {
-      values.push_back(v);
-    }
-  }
-
-  ValidateRle(values, 1, nullptr, -1);
-}
-
-TEST_F(TestRle, TestBulkPut) {
-  size_t run_length;
-  bool val = false;
-
-  faststring buffer(1);
-  RleEncoder<bool> encoder(&buffer, 1);
-  encoder.Put(true, 10);
-  encoder.Put(false, 7);
-  encoder.Put(true, 5);
-  encoder.Put(true, 15);
-  encoder.Flush();
-
-  RleDecoder<bool> decoder(buffer.data(), encoder.len(), 1);
-  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
-  ASSERT_TRUE(val);
-  ASSERT_EQ(10, run_length);
-
-  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
-  ASSERT_FALSE(val);
-  ASSERT_EQ(7, run_length);
-
-  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
-  ASSERT_TRUE(val);
-  ASSERT_EQ(20, run_length);
-
-  ASSERT_EQ(0, decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max()));
-}
-
-TEST_F(TestRle, TestGetNextRun) {
-  // Repeat the test with different number of items
-  for (int num_items = 7; num_items < 200; num_items += 13) {
-    // Test different block patterns
-    //    1: 01010101 01010101
-    //    2: 00110011 00110011
-    //    3: 00011100 01110001
-    //    ...
-    for (int block = 1; block <= 20; ++block) {
-      faststring buffer(1);
-      RleEncoder<bool> encoder(&buffer, 1);
-      for (int j = 0; j < num_items; ++j) {
-        encoder.Put(!!(j & 1), block);
-      }
-      encoder.Flush();
-
-      RleDecoder<bool> decoder(buffer.data(), encoder.len(), 1);
-      size_t count = num_items * block;
-      for (int j = 0; j < num_items; ++j) {
-        size_t run_length;
-        bool val = false;
-        DCHECK_GT(count, 0);
-        run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
-        run_length = std::min(run_length, count);
-
-        ASSERT_EQ(!!(j & 1), val);
-        ASSERT_EQ(block, run_length);
-        count -= run_length;
-      }
-      DCHECK_EQ(count, 0);
-    }
-  }
-}
-
-// Generate a random bit string which consists of 'num_runs' runs,
-// each with a random length between 1 and 100. Returns the number
-// of values encoded (i.e the sum run length).
-static size_t GenerateRandomBitString(int num_runs, faststring* enc_buf, string* string_rep) {
-  RleEncoder<bool> enc(enc_buf, 1);
-  int num_bits = 0;
-  for (int i = 0; i < num_runs; i++) {
-    int run_length = random() % 100;
-    bool value = static_cast<bool>(i & 1);
-    enc.Put(value, run_length);
-    string_rep->append(run_length, value ? '1' : '0');
-    num_bits += run_length;
-  }
-  enc.Flush();
-  return num_bits;
-}
-
-TEST_F(TestRle, TestRoundTripRandomSequencesWithRuns) {
-  srand(time(nullptr));
-
-  // Test the limiting function of GetNextRun.
-  const int kMaxToReadAtOnce = (random() % 20) + 1;
-
-  // Generate a bunch of random bit sequences, and "round-trip" them
-  // through the encode/decode sequence.
-  for (int rep = 0; rep < 100; rep++) {
-    faststring buf;
-    string string_rep;
-    int num_bits = GenerateRandomBitString(10, &buf, &string_rep);
-    RleDecoder<bool> decoder(buf.data(), buf.size(), 1);
-    string roundtrip_str;
-    int rem_to_read = num_bits;
-    size_t run_len;
-    bool val;
-    while (rem_to_read > 0 &&
-           (run_len = decoder.GetNextRun(&val, std::min(kMaxToReadAtOnce, rem_to_read))) != 0) {
-      ASSERT_LE(run_len, kMaxToReadAtOnce);
-      roundtrip_str.append(run_len, val ? '1' : '0');
-      rem_to_read -= run_len;
-    }
-
-    ASSERT_EQ(string_rep, roundtrip_str);
-  }
-}
-TEST_F(TestRle, TestSkip) {
-  faststring buffer(1);
-  RleEncoder<bool> encoder(&buffer, 1);
-
-  // 0101010[1] 01010101 01
-  //        "A"
-  for (int j = 0; j < 18; ++j) {
-    encoder.Put(!!(j & 1));
-  }
-
-  // 0011[00] 11001100 11001100 11001100 11001100
-  //      "B"
-  for (int j = 0; j < 19; ++j) {
-    encoder.Put(!!(j & 1), 2);
-  }
-
-  // 000000000000 11[1111111111] 000000000000 111111111111
-  //                   "C"
-  // 000000000000 111111111111 0[00000000000] 111111111111
-  //                                  "D"
-  // 000000000000 111111111111 000000000000 111111111111
-  for (int j = 0; j < 12; ++j) {
-    encoder.Put(!!(j & 1), 12);
-  }
-  encoder.Flush();
-
-  bool val = false;
-  size_t run_length;
-  RleDecoder<bool> decoder(buffer.data(), encoder.len(), 1);
-
-  // position before "A"
-  ASSERT_EQ(3, decoder.Skip(7));
-  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
-  ASSERT_TRUE(val);
-  ASSERT_EQ(1, run_length);
-
-  // position before "B"
-  ASSERT_EQ(7, decoder.Skip(14));
-  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
-  ASSERT_FALSE(val);
-  ASSERT_EQ(2, run_length);
-
-  // position before "C"
-  ASSERT_EQ(18, decoder.Skip(46));
-  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
-  ASSERT_TRUE(val);
-  ASSERT_EQ(10, run_length);
-
-  // position before "D"
-  ASSERT_EQ(24, decoder.Skip(49));
-  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
-  ASSERT_FALSE(val);
-  ASSERT_EQ(11, run_length);
-
-  encoder.Flush();
-}
-
-} // namespace doris
-
-int main(int argc, char** argv) {
-    ::testing::InitGoogleTest(&argc, argv);
-    return RUN_ALL_TESTS();
-}
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+#include <algorithm>
+#include <cstdint>
+#include <cstdlib>
+#include <cstring>
+#include <ostream>
+#include <string>
+#include <vector>
+#include <limits>
+
+// Must come before gtest.h.
+#include <boost/utility/binary.hpp>
+#include <glog/logging.h>
+#include <gtest/gtest.h>
+
+#include "util/bit_stream_utils.h"
+#include "util/bit_stream_utils.inline.h"
+#include "util/bit_util.h"
+#include "util/faststring.h"
+#include "util/rle_encoding.h"
+#include "util/debug_util.h"
+
+using std::string;
+using std::vector;
+
+namespace doris {
+
+const int kMaxWidth = 64;
+
+class TestRle : public testing::Test {};
+// Validates encoding of values by encoding and decoding them.  If
+// expected_encoding != NULL, also validates that the encoded buffer is
+// exactly 'expected_encoding'.
+// if expected_len is not -1, it will validate the encoded size is correct.
+template<typename T>
+void ValidateRle(const vector<T>& values, int bit_width,
+    uint8_t* expected_encoding, int expected_len) {
+  faststring buffer;
+  RleEncoder<T> encoder(&buffer, bit_width);
+
+  for (const auto& value : values) {
+    encoder.Put(value);
+  }
+  int encoded_len = encoder.Flush();
+
+  if (expected_len != -1) {
+    EXPECT_EQ(encoded_len, expected_len);
+  }
+  if (expected_encoding != nullptr) {
+    EXPECT_EQ(memcmp(buffer.data(), expected_encoding, expected_len), 0)
+      << "\n"
+      << "Expected: " << hexdump((const char*)expected_encoding, expected_len) << "\n"
+      << "Got:      " << hexdump((const char*)buffer.data(), buffer.size());
+  }
+
+  // Verify read
+  RleDecoder<T> decoder(buffer.data(), encoded_len, bit_width);
+  for (const auto& value : values) {
+    T val = 0;
+    bool result = decoder.Get(&val);
+    EXPECT_TRUE(result);
+    EXPECT_EQ(value, val);
+  }
+}
+
+TEST(Rle, SpecificSequences) {
+  const int kTestLen = 1024;
+  uint8_t expected_buffer[kTestLen];
+  vector<uint64_t> values;
+
+  // Test 50 0' followed by 50 1's
+  values.resize(100);
+  for (int i = 0; i < 50; ++i) {
+    values[i] = 0;
+  }
+  for (int i = 50; i < 100; ++i) {
+    values[i] = 1;
+  }
+
+  // expected_buffer valid for bit width <= 1 byte
+  expected_buffer[0] = (50 << 1);
+  expected_buffer[1] = 0;
+  expected_buffer[2] = (50 << 1);
+  expected_buffer[3] = 1;
+  for (int width = 1; width <= 8; ++width) {
+    ValidateRle(values, width, expected_buffer, 4);
+  }
+
+  for (int width = 9; width <= kMaxWidth; ++width) {
+    ValidateRle(values, width, nullptr, 2 * (1 + BitUtil::Ceil(width, 8)));
+  }
+
+  // Test 100 0's and 1's alternating
+  for (int i = 0; i < 100; ++i) {
+    values[i] = i % 2;
+  }
+  int num_groups = BitUtil::Ceil(100, 8);
+  expected_buffer[0] = (num_groups << 1) | 1;
+  for (int i = 0; i < 100/8; ++i) {
+    expected_buffer[i + 1] = BOOST_BINARY(1 0 1 0 1 0 1 0); // 0xaa
+  }
+  // Values for the last 4 0 and 1's
+  expected_buffer[1 + 100/8] = BOOST_BINARY(0 0 0 0 1 0 1 0); // 0x0a
+
+  // num_groups and expected_buffer only valid for bit width = 1
+  ValidateRle(values, 1, expected_buffer, 1 + num_groups);
+  for (int width = 2; width <= kMaxWidth; ++width) {
+    ValidateRle(values, width, nullptr, 1 + BitUtil::Ceil(width * 100, 8));
+  }
+}
+
+// ValidateRle on 'num_vals' values with width 'bit_width'. If 'value' != -1, that value
+// is used, otherwise alternating values are used.
+void TestRleValues(int bit_width, int num_vals, int value = -1) {
+  const uint64_t mod = bit_width == 64 ? 1ULL : 1ULL << bit_width;
+  vector<uint64_t> values;
+  for (uint64_t v = 0; v < num_vals; ++v) {
+    values.push_back((value != -1) ? value : (bit_width == 64 ? v : (v % mod)));
+  }
+  ValidateRle(values, bit_width, nullptr, -1);
+}
+
+TEST(Rle, TestValues) {
+  for (int width = 1; width <= kMaxWidth; ++width) {
+    TestRleValues(width, 1);
+    TestRleValues(width, 1024);
+    TestRleValues(width, 1024, 0);
+    TestRleValues(width, 1024, 1);
+  }
+}
+
+class BitRle : public testing::Test {
+public:
+    BitRle() {
+    }
+
+    virtual ~BitRle() {
+    }
+};
+
+// Tests all true/false values
+TEST_F(BitRle, AllSame) {
+  const int kTestLen = 1024;
+  vector<bool> values;
+
+  for (int v = 0; v < 2; ++v) {
+    values.clear();
+    for (int i = 0; i < kTestLen; ++i) {
+      values.push_back(v ? true : false);
+    }
+
+    ValidateRle(values, 1, nullptr, 3);
+  }
+}
+
+// Test that writes out a repeated group and then a literal
+// group but flush before finishing.
+TEST_F(BitRle, Flush) {
+  vector<bool> values;
+  for (int i = 0; i < 16; ++i) values.push_back(1);
+  values.push_back(false);
+  ValidateRle(values, 1, nullptr, -1);
+  values.push_back(true);
+  ValidateRle(values, 1, nullptr, -1);
+  values.push_back(true);
+  ValidateRle(values, 1, nullptr, -1);
+  values.push_back(true);
+  ValidateRle(values, 1, nullptr, -1);
+}
+
+// Test some random bool sequences.
+TEST_F(BitRle, RandomBools) {
+  int iters = 0;
+  const int n_iters = 20;
+  while (iters < n_iters) {
+    srand(iters++);
+    if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters;
+    vector<uint64_t > values;
+    bool parity = 0;
+    for (int i = 0; i < 1000; ++i) {
+      int group_size = rand() % 20 + 1; // NOLINT(*)
+      if (group_size > 16) {
+        group_size = 1;
+      }
+      for (int i = 0; i < group_size; ++i) {
+        values.push_back(parity);
+      }
+      parity = !parity;
+    }
+    ValidateRle(values, (iters % kMaxWidth) + 1, nullptr, -1);
+  }
+}
+
+// Test some random 64-bit sequences.
+TEST_F(BitRle, Random64Bit) {
+  int iters = 0;
+  const int n_iters = 20;
+  while (iters < n_iters) {
+    srand(iters++);
+    if (iters % 10000 == 0) LOG(ERROR) << "Seed: " << iters;
+    vector<uint64_t > values;
+    for (int i = 0; i < 1000; ++i) {
+      int group_size = rand() % 20 + 1; // NOLINT(*)
+      uint64_t cur_value = (static_cast<uint64_t>(rand()) << 32) + static_cast<uint64_t>(rand());
+      if (group_size > 16) {
+        group_size = 1;
+      }
+      for (int i = 0; i < group_size; ++i) {
+        values.push_back(cur_value);
+      }
+
+    }
+    ValidateRle(values, 64, nullptr, -1);
+  }
+}
+
+// Test a sequence of 1 0's, 2 1's, 3 0's. etc
+// e.g. 011000111100000
+TEST_F(BitRle, RepeatedPattern) {
+  vector<bool> values;
+  const int min_run = 1;
+  const int max_run = 32;
+
+  for (int i = min_run; i <= max_run; ++i) {
+    int v = i % 2;
+    for (int j = 0; j < i; ++j) {
+      values.push_back(v);
+    }
+  }
+
+  // And go back down again
+  for (int i = max_run; i >= min_run; --i) {
+    int v = i % 2;
+    for (int j = 0; j < i; ++j) {
+      values.push_back(v);
+    }
+  }
+
+  ValidateRle(values, 1, nullptr, -1);
+}
+
+TEST_F(TestRle, TestBulkPut) {
+  size_t run_length;
+  bool val = false;
+
+  faststring buffer(1);
+  RleEncoder<bool> encoder(&buffer, 1);
+  encoder.Put(true, 10);
+  encoder.Put(false, 7);
+  encoder.Put(true, 5);
+  encoder.Put(true, 15);
+  encoder.Flush();
+
+  RleDecoder<bool> decoder(buffer.data(), encoder.len(), 1);
+  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
+  ASSERT_TRUE(val);
+  ASSERT_EQ(10, run_length);
+
+  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
+  ASSERT_FALSE(val);
+  ASSERT_EQ(7, run_length);
+
+  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
+  ASSERT_TRUE(val);
+  ASSERT_EQ(20, run_length);
+
+  ASSERT_EQ(0, decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max()));
+}
+
+TEST_F(TestRle, TestGetNextRun) {
+  // Repeat the test with different number of items
+  for (int num_items = 7; num_items < 200; num_items += 13) {
+    // Test different block patterns
+    //    1: 01010101 01010101
+    //    2: 00110011 00110011
+    //    3: 00011100 01110001
+    //    ...
+    for (int block = 1; block <= 20; ++block) {
+      faststring buffer(1);
+      RleEncoder<bool> encoder(&buffer, 1);
+      for (int j = 0; j < num_items; ++j) {
+        encoder.Put(!!(j & 1), block);
+      }
+      encoder.Flush();
+
+      RleDecoder<bool> decoder(buffer.data(), encoder.len(), 1);
+      size_t count = num_items * block;
+      for (int j = 0; j < num_items; ++j) {
+        size_t run_length;
+        bool val = false;
+        DCHECK_GT(count, 0);
+        run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
+        run_length = std::min(run_length, count);
+
+        ASSERT_EQ(!!(j & 1), val);
+        ASSERT_EQ(block, run_length);
+        count -= run_length;
+      }
+      DCHECK_EQ(count, 0);
+    }
+  }
+}
+
+// Generate a random bit string which consists of 'num_runs' runs,
+// each with a random length between 1 and 100. Returns the number
+// of values encoded (i.e the sum run length).
+static size_t GenerateRandomBitString(int num_runs, faststring* enc_buf, string* string_rep) {
+  RleEncoder<bool> enc(enc_buf, 1);
+  int num_bits = 0;
+  for (int i = 0; i < num_runs; i++) {
+    int run_length = random() % 100;
+    bool value = static_cast<bool>(i & 1);
+    enc.Put(value, run_length);
+    string_rep->append(run_length, value ? '1' : '0');
+    num_bits += run_length;
+  }
+  enc.Flush();
+  return num_bits;
+}
+
+TEST_F(TestRle, TestRoundTripRandomSequencesWithRuns) {
+  srand(time(nullptr));
+
+  // Test the limiting function of GetNextRun.
+  const int kMaxToReadAtOnce = (random() % 20) + 1;
+
+  // Generate a bunch of random bit sequences, and "round-trip" them
+  // through the encode/decode sequence.
+  for (int rep = 0; rep < 100; rep++) {
+    faststring buf;
+    string string_rep;
+    int num_bits = GenerateRandomBitString(10, &buf, &string_rep);
+    RleDecoder<bool> decoder(buf.data(), buf.size(), 1);
+    string roundtrip_str;
+    int rem_to_read = num_bits;
+    size_t run_len;
+    bool val;
+    while (rem_to_read > 0 &&
+           (run_len = decoder.GetNextRun(&val, std::min(kMaxToReadAtOnce, rem_to_read))) != 0) {
+      ASSERT_LE(run_len, kMaxToReadAtOnce);
+      roundtrip_str.append(run_len, val ? '1' : '0');
+      rem_to_read -= run_len;
+    }
+
+    ASSERT_EQ(string_rep, roundtrip_str);
+  }
+}
+TEST_F(TestRle, TestSkip) {
+  faststring buffer(1);
+  RleEncoder<bool> encoder(&buffer, 1);
+
+  // 0101010[1] 01010101 01
+  //        "A"
+  for (int j = 0; j < 18; ++j) {
+    encoder.Put(!!(j & 1));
+  }
+
+  // 0011[00] 11001100 11001100 11001100 11001100
+  //      "B"
+  for (int j = 0; j < 19; ++j) {
+    encoder.Put(!!(j & 1), 2);
+  }
+
+  // 000000000000 11[1111111111] 000000000000 111111111111
+  //                   "C"
+  // 000000000000 111111111111 0[00000000000] 111111111111
+  //                                  "D"
+  // 000000000000 111111111111 000000000000 111111111111
+  for (int j = 0; j < 12; ++j) {
+    encoder.Put(!!(j & 1), 12);
+  }
+  encoder.Flush();
+
+  bool val = false;
+  size_t run_length;
+  RleDecoder<bool> decoder(buffer.data(), encoder.len(), 1);
+
+  // position before "A"
+  ASSERT_EQ(3, decoder.Skip(7));
+  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
+  ASSERT_TRUE(val);
+  ASSERT_EQ(1, run_length);
+
+  // position before "B"
+  ASSERT_EQ(7, decoder.Skip(14));
+  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
+  ASSERT_FALSE(val);
+  ASSERT_EQ(2, run_length);
+
+  // position before "C"
+  ASSERT_EQ(18, decoder.Skip(46));
+  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
+  ASSERT_TRUE(val);
+  ASSERT_EQ(10, run_length);
+
+  // position before "D"
+  ASSERT_EQ(24, decoder.Skip(49));
+  run_length = decoder.GetNextRun(&val, std::numeric_limits<std::size_t>::max());
+  ASSERT_FALSE(val);
+  ASSERT_EQ(11, run_length);
+
+  encoder.Flush();
+}
+
+} // namespace doris
+
+int main(int argc, char** argv) {
+    ::testing::InitGoogleTest(&argc, argv);
+    return RUN_ALL_TESTS();
+}
diff --git a/docs/documentation/cn/community/subscribe-mail-list.md b/docs/documentation/cn/community/subscribe-mail-list.md
index dbd23c074f..9f56cd0611 100644
--- a/docs/documentation/cn/community/subscribe-mail-list.md
+++ b/docs/documentation/cn/community/subscribe-mail-list.md
@@ -1,43 +1,43 @@
-# 订阅邮件列表
-
+# 订阅邮件列表
+
 邮件列表（Mail List）是 Apache 社区最被认可的交流方式。一般来说，开源社区的提问与解答、技术讨论、事务决策等都通过邮件列表来承载。邮件列表异步、广播的特性，也非常适合开源社区的沟通交流。那么，如何订阅 Apache Doris (incubating) 的邮件列表呢？主要包括以下五个步骤。
-
-## 1. 发送订阅邮件
-
-打开自己的邮箱，新建邮件，向`dev-subscribe@doris.apache.org`发送一封邮件（邮件主题和内容任意）
-
-![step1](../../../resources/images/subscribe-mail-list-step1.png)
-
-## 2. 接收来自 dev-help@doris.apache.org 的确认邮件
-
-执行完第一步之后，您将收到一封来自`dev-help@doris.apache.org`的确认邮件，邮件内容如下图所示。（**如果长时间未能收到，请确认该邮件是否已被拦截，或已经被自动归入“订阅邮件”、“垃圾邮件”、“推广邮件”等文件夹**）
+
+## 1. 发送订阅邮件
+
+打开自己的邮箱，新建邮件，向`dev-subscribe@doris.apache.org`发送一封邮件（邮件主题和内容任意）
+
+![step1](../../../resources/images/subscribe-mail-list-step1.png)
+
+## 2. 接收来自 dev-help@doris.apache.org 的确认邮件
+
+执行完第一步之后，您将收到一封来自`dev-help@doris.apache.org`的确认邮件，邮件内容如下图所示。（**如果长时间未能收到，请确认该邮件是否已被拦截，或已经被自动归入“订阅邮件”、“垃圾邮件”、“推广邮件”等文件夹**）
 
 ![step2](../../../resources/images/subscribe-mail-list-step2.png)
-
-## 3. 回复确认邮件
-
-​针对上一步接收到的邮件，
-
-​**a.直接回复该邮件**
-
-​***或***
-
-**b. 新建一封`收件人`为上一步中的`回复地址`的邮件**
-
-​均可，内容主题不限
-
-![step3](../../../resources/images/subscribe-mail-list-step3.png)
-
-
-## 4. 接收欢迎邮件
-
-​完成第三步之后，将会受到一封标题为**WELCOME to dev@doris.apache.org**的欢迎邮件。至此，订阅邮件列表的工作已经完成了，社区的动态都会通过邮件的方式通知您。
-
-![step4](../../../resources/images/subscribe-mail-list-step4.png)
-
-
-## 5. 发起邮件讨论（可选）
-
+
+## 3. 回复确认邮件
+
+​针对上一步接收到的邮件，
+
+​**a.直接回复该邮件**
+
+​***或***
+
+**b. 新建一封`收件人`为上一步中的`回复地址`的邮件**
+
+​均可，内容主题不限
+
+![step3](../../../resources/images/subscribe-mail-list-step3.png)
+
+
+## 4. 接收欢迎邮件
+
+​完成第三步之后，将会受到一封标题为**WELCOME to dev@doris.apache.org**的欢迎邮件。至此，订阅邮件列表的工作已经完成了，社区的动态都会通过邮件的方式通知您。
+
+![step4](../../../resources/images/subscribe-mail-list-step4.png)
+
+
+## 5. 发起邮件讨论（可选）
+
 ​成功订阅邮件列表后，若想发起讨论，直接往`dev@doris.apache.org`发送邮件即可。所有订阅了邮件列表的人都会收到邮件。
 ​
 ​
\ No newline at end of file
diff --git a/docs/documentation/cn/internal/doris_storage_optimization.md b/docs/documentation/cn/internal/doris_storage_optimization.md
index 51661cefd4..ab34b206aa 100644
--- a/docs/documentation/cn/internal/doris_storage_optimization.md
+++ b/docs/documentation/cn/internal/doris_storage_optimization.md
@@ -1,206 +1,206 @@
-# Doris存储文件格式优化 #
-
-## 文件格式 ##
-
-![](../../../resources/images/segment_v2.png)
-<center>图1. doris segment文件格式</center>
-
-文件包括：
-- 文件开始是8个字节的magic code，用于识别文件格式和版本
-- Data Region：用于存储各个列的数据信息，这里的数据是按需分page加载的
-- Index Region: doris中将各个列的index数据统一存储在Index Region，这里的数据会按照列粒度进行加载，所以跟列的数据信息分开存储
-- Footer信息
-	- FileFooterPB:定义文件的元数据信息
-	- 4个字节的footer pb内容的checksum
-	- 4个字节的FileFooterPB消息长度，用于读取FileFooterPB
-	- 8个字节的MAGIC CODE，之所以在末位存储，是方便不同的场景进行文件类型的识别
-
-文件中的数据按照page的方式进行组织，page是编码和压缩的基本单位。现在的page类型包括以下几种:
-
-### DataPage ###
-
-DataPage分为两种：nullable和non-nullable的data page。
-
-nullable的data page内容包括：
-```
-
-                 +----------------+
-                 |  value count   |
-                 |----------------|
-                 |  first row id  |
-                 |----------------|
-                 | bitmap length  |
-                 |----------------|
-                 |  null bitmap   |
-                 |----------------|
-                 |     data       |
-                 |----------------|
-                 |    checksum    |
-                 +----------------+
-```
-
-non-nullable data page结构如下：
-
-```
-                 |----------------|
-                 |   value count  |
-                 |----------------|
-                 |  first row id  |
-                 |----------------|
-                 |     data       |
-                 |----------------|
-                 |    checksum    |
-                 +----------------+
-```
-
-其中各个字段含义如下：
-
-- value count
-	- 表示page中的行数
-- first row id
-	- page中第一行的行号
-- bitmap length
-	- 表示接下来bitmap的字节数
-- null bitmap
-	- 表示null信息的bitmap
-- data
-	- 存储经过encoding和compress之后的数据
-	- 需要在数据的头部信息中写入：is_compressed
-	- 各种不同编码的data需要在头部信息写入一些字段信息，以实现数据的解析
-		- TODO：添加各种encoding的header信息
-- checksum
-	- 存储page粒度的校验和，包括page的header和之后的实际数据
-
-
-### Bloom Filter Pages ###
-
-针对每个bloom filter列,会在page的粒度相应的生成一个bloom filter的page，保存在bloom filter pages区域
-
-### Ordinal Index Page ###
-
-针对每个列，都会按照page粒度，建立行号的稀疏索引。内容为这个page的起始行的行号到这个block的指针（包括offset和length）
-
-### Short Key Index page ###
-
-我们会每隔N行（可配置）生成一个short key的稀疏索引，索引的内容为：short key->行号(ordinal)
-
-### Column的其他索引 ###
-
-该格式设计支持后续扩展其他的索引信息，比如bitmap索引，spatial索引等等，只需要将需要的数据写到现有的列数据后面，并且添加对应的元数据字段到FileFooterPB中
-
-### 元数据定义 ###
-FileFooterPB的定义为：
-
-```
-message ColumnPB {
-    optional uint32 column_id = 1; // 这里使用column id，不使用column name是因为计划支持修改列名
-    optional string type = 2; // 列类型
-    optional string aggregation = 3; // 是否聚合
-    optional uint32 length = 4; // 长度
-    optional bool is_key = 5; // 是否是主键列
-    optional string default_value = 6; // 默认值
-    optional uint32 precision = 9 [default = 27]; // 精度
-    optional uint32 frac = 10 [default = 9];
-    optional bool is_nullable = 11 [default=false]; // 是否有null
-    optional bool is_bf_column = 15 [default=false]; // 是否有bf词典
-	optional bool is_bitmap_column = 16 [default=false]; // 是否有bitmap索引
-}
-
-// page偏移
-message PagePointerPB {
-	required uint64 offset; // page在文件中的偏移
-	required uint32 length; // page的大小
-}
-
-message MetadataPairPB {
-  optional string key = 1;
-  optional bytes value = 2;
-}
-
-message ColumnMetaPB {
-	optional ColumnMessage encoding; // 编码方式
-
-	optional PagePointerPB dict_page // 词典page
-	repeated PagePointerPB bloom_filter_pages; // bloom filter词典信息
-	optional PagePointerPB ordinal_index_page; // 行号索引数据
-	optional PagePointerPB page_zone_map_page; // page级别统计信息索引数据
-
-	optional PagePointerPB bitmap_index_page; // bitmap索引数据
-
-	optional uint64 data_footprint; // 列中索引的大小
-	optional uint64 index_footprint; // 列中数据的大小
-	optional uint64 raw_data_footprint; // 原始列数据大小
-
-	optional CompressKind compress_kind; // 列的压缩方式
-
-	optional ZoneMapPB column_zone_map; //文件级别的过滤条件
-	repeated MetadataPairPB column_meta_datas;
-}
-
-message FileFooterPB {
-	optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用
-	repeated ColumnPB schema = 5; // 列Schema
-    optional uint64 num_values = 4; // 文件中保存的行数
-    optional uint64 index_footprint = 7; // 索引大小
-    optional uint64 data_footprint = 8; // 数据大小
-	optional uint64 raw_data_footprint = 8; // 原始数据大小
-
-    optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
-    repeated ColumnMetaPB column_metas = 10; // 列元数据
-	optional PagePointerPB key_index_page; // short key索引page
-}
-
-```
-
-## 读写逻辑 ##
-
-### 写入 ###
-
-大体的写入流程如下：
-1. 写入magic
-2. 根据schema信息，生成对应的ColumnWriter，每个ColumnWriter按照不同的类型，获取对应的encoding信息（可配置），根据encoding，生成对应的encoder
-3. 调用encoder->add(value)进行数据写入，每个K行，生成一个short key index entry，并且，如果当前的page满足一定条件（大小超过1M或者行数为K），就生成一个新的page，缓存在内存中。
-4. 不断的循环步骤3，直到数据写入完成。将各个列的数据依序刷入文件中
-5. 生成FileFooterPB信息，写入文件中。
-
-相关的问题：
-
-- short key的索引如何生成？
-	- 现在还是按照每隔多少行生成一个short key的稀疏索引，保持每隔1024行生成一个short的稀疏索引,具体的内容是：short key -> ordinal
-
-- ordinal索引里面应该存什么？
-	- 存储page的第一个ordinal到page pointer的映射信息
-- 不同encoding类型的page里存什么？
-	- 词典压缩
-	- plain
-	- rle
-	- bshuf
-
-### 读取 ###
-
-1. 读取文件的magic，判断文件类型和版本
-2. 读取FileFooterPB，进行checksum校验
-3. 按照需要的列，读取short key索引和对应列的数据ordinal索引信息
-4. 使用start key和end key，通过short key索引定位到要读取的行号，然后通过ordinal索引确定需要读取的row ranges, 同时需要通过统计信息、bitmap索引等过滤需要读取的row ranges
-5. 然后按照row ranges通过ordinal索引读取行的数据
-
-相关的问题：
-1. 如何实现在page内部快速的定位到某一行？
-
-	page内部是的数据是经过encoding的，无法快速进行行级数据的定位。不同的encoding方式，在内部进行快速的行号定位的方案不一样，需要具体分析：
-	- 如果是rle编码的，需要通过解析rle的header进行skip，直到到达包含该行的那个rle块之后，再进行反解。
-	- binary plain encoding：会在page的中存储offset信息，并且会在page header中指定offset信息的offset，读取的时候会先解析offset信息到数组中，这样子就可以通过各个行的offset数据信息快速的定位block某一行的数据
-2. 如何实现块的高效读取？可以考虑将相邻的块在读取的时候进行merge，一次性读取？
-	这个需要在读取的时候，判断block是否连续，如果连续，就一次性的读取
-
-## 编码 ##
-
-现有的doris存储中，针对string类型的编码，采用plain encoding的方式，效率比较低。经过对比，发现在百度统计的场景下，数据会因为string类型的编码膨胀超过一倍。所以，计划引入基于词典的编码压缩。
-
-## 压缩 ##
-
-实现可扩展的压缩框架，支持多种压缩算法，方便后续添加新的压缩算法，计划引入zstd压缩。
-
-## TODO ##
-1. 如何实现嵌套类型？如何在嵌套类型中进行行号定位？
-2. 如何优化现在的ScanRange拆分导致的下游bitmap、column statistic统计等进行多次？
+# Doris存储文件格式优化 #
+
+## 文件格式 ##
+
+![](../../../resources/images/segment_v2.png)
+<center>图1. doris segment文件格式</center>
+
+文件包括：
+- 文件开始是8个字节的magic code，用于识别文件格式和版本
+- Data Region：用于存储各个列的数据信息，这里的数据是按需分page加载的
+- Index Region: doris中将各个列的index数据统一存储在Index Region，这里的数据会按照列粒度进行加载，所以跟列的数据信息分开存储
+- Footer信息
+	- FileFooterPB:定义文件的元数据信息
+	- 4个字节的footer pb内容的checksum
+	- 4个字节的FileFooterPB消息长度，用于读取FileFooterPB
+	- 8个字节的MAGIC CODE，之所以在末位存储，是方便不同的场景进行文件类型的识别
+
+文件中的数据按照page的方式进行组织，page是编码和压缩的基本单位。现在的page类型包括以下几种:
+
+### DataPage ###
+
+DataPage分为两种：nullable和non-nullable的data page。
+
+nullable的data page内容包括：
+```
+
+                 +----------------+
+                 |  value count   |
+                 |----------------|
+                 |  first row id  |
+                 |----------------|
+                 | bitmap length  |
+                 |----------------|
+                 |  null bitmap   |
+                 |----------------|
+                 |     data       |
+                 |----------------|
+                 |    checksum    |
+                 +----------------+
+```
+
+non-nullable data page结构如下：
+
+```
+                 |----------------|
+                 |   value count  |
+                 |----------------|
+                 |  first row id  |
+                 |----------------|
+                 |     data       |
+                 |----------------|
+                 |    checksum    |
+                 +----------------+
+```
+
+其中各个字段含义如下：
+
+- value count
+	- 表示page中的行数
+- first row id
+	- page中第一行的行号
+- bitmap length
+	- 表示接下来bitmap的字节数
+- null bitmap
+	- 表示null信息的bitmap
+- data
+	- 存储经过encoding和compress之后的数据
+	- 需要在数据的头部信息中写入：is_compressed
+	- 各种不同编码的data需要在头部信息写入一些字段信息，以实现数据的解析
+		- TODO：添加各种encoding的header信息
+- checksum
+	- 存储page粒度的校验和，包括page的header和之后的实际数据
+
+
+### Bloom Filter Pages ###
+
+针对每个bloom filter列,会在page的粒度相应的生成一个bloom filter的page，保存在bloom filter pages区域
+
+### Ordinal Index Page ###
+
+针对每个列，都会按照page粒度，建立行号的稀疏索引。内容为这个page的起始行的行号到这个block的指针（包括offset和length）
+
+### Short Key Index page ###
+
+我们会每隔N行（可配置）生成一个short key的稀疏索引，索引的内容为：short key->行号(ordinal)
+
+### Column的其他索引 ###
+
+该格式设计支持后续扩展其他的索引信息，比如bitmap索引，spatial索引等等，只需要将需要的数据写到现有的列数据后面，并且添加对应的元数据字段到FileFooterPB中
+
+### 元数据定义 ###
+FileFooterPB的定义为：
+
+```
+message ColumnPB {
+    optional uint32 column_id = 1; // 这里使用column id，不使用column name是因为计划支持修改列名
+    optional string type = 2; // 列类型
+    optional string aggregation = 3; // 是否聚合
+    optional uint32 length = 4; // 长度
+    optional bool is_key = 5; // 是否是主键列
+    optional string default_value = 6; // 默认值
+    optional uint32 precision = 9 [default = 27]; // 精度
+    optional uint32 frac = 10 [default = 9];
+    optional bool is_nullable = 11 [default=false]; // 是否有null
+    optional bool is_bf_column = 15 [default=false]; // 是否有bf词典
+	optional bool is_bitmap_column = 16 [default=false]; // 是否有bitmap索引
+}
+
+// page偏移
+message PagePointerPB {
+	required uint64 offset; // page在文件中的偏移
+	required uint32 length; // page的大小
+}
+
+message MetadataPairPB {
+  optional string key = 1;
+  optional bytes value = 2;
+}
+
+message ColumnMetaPB {
+	optional ColumnMessage encoding; // 编码方式
+
+	optional PagePointerPB dict_page // 词典page
+	repeated PagePointerPB bloom_filter_pages; // bloom filter词典信息
+	optional PagePointerPB ordinal_index_page; // 行号索引数据
+	optional PagePointerPB page_zone_map_page; // page级别统计信息索引数据
+
+	optional PagePointerPB bitmap_index_page; // bitmap索引数据
+
+	optional uint64 data_footprint; // 列中索引的大小
+	optional uint64 index_footprint; // 列中数据的大小
+	optional uint64 raw_data_footprint; // 原始列数据大小
+
+	optional CompressKind compress_kind; // 列的压缩方式
+
+	optional ZoneMapPB column_zone_map; //文件级别的过滤条件
+	repeated MetadataPairPB column_meta_datas;
+}
+
+message FileFooterPB {
+	optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用
+	repeated ColumnPB schema = 5; // 列Schema
+    optional uint64 num_values = 4; // 文件中保存的行数
+    optional uint64 index_footprint = 7; // 索引大小
+    optional uint64 data_footprint = 8; // 数据大小
+	optional uint64 raw_data_footprint = 8; // 原始数据大小
+
+    optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
+    repeated ColumnMetaPB column_metas = 10; // 列元数据
+	optional PagePointerPB key_index_page; // short key索引page
+}
+
+```
+
+## 读写逻辑 ##
+
+### 写入 ###
+
+大体的写入流程如下：
+1. 写入magic
+2. 根据schema信息，生成对应的ColumnWriter，每个ColumnWriter按照不同的类型，获取对应的encoding信息（可配置），根据encoding，生成对应的encoder
+3. 调用encoder->add(value)进行数据写入，每个K行，生成一个short key index entry，并且，如果当前的page满足一定条件（大小超过1M或者行数为K），就生成一个新的page，缓存在内存中。
+4. 不断的循环步骤3，直到数据写入完成。将各个列的数据依序刷入文件中
+5. 生成FileFooterPB信息，写入文件中。
+
+相关的问题：
+
+- short key的索引如何生成？
+	- 现在还是按照每隔多少行生成一个short key的稀疏索引，保持每隔1024行生成一个short的稀疏索引,具体的内容是：short key -> ordinal
+
+- ordinal索引里面应该存什么？
+	- 存储page的第一个ordinal到page pointer的映射信息
+- 不同encoding类型的page里存什么？
+	- 词典压缩
+	- plain
+	- rle
+	- bshuf
+
+### 读取 ###
+
+1. 读取文件的magic，判断文件类型和版本
+2. 读取FileFooterPB，进行checksum校验
+3. 按照需要的列，读取short key索引和对应列的数据ordinal索引信息
+4. 使用start key和end key，通过short key索引定位到要读取的行号，然后通过ordinal索引确定需要读取的row ranges, 同时需要通过统计信息、bitmap索引等过滤需要读取的row ranges
+5. 然后按照row ranges通过ordinal索引读取行的数据
+
+相关的问题：
+1. 如何实现在page内部快速的定位到某一行？
+
+	page内部是的数据是经过encoding的，无法快速进行行级数据的定位。不同的encoding方式，在内部进行快速的行号定位的方案不一样，需要具体分析：
+	- 如果是rle编码的，需要通过解析rle的header进行skip，直到到达包含该行的那个rle块之后，再进行反解。
+	- binary plain encoding：会在page的中存储offset信息，并且会在page header中指定offset信息的offset，读取的时候会先解析offset信息到数组中，这样子就可以通过各个行的offset数据信息快速的定位block某一行的数据
+2. 如何实现块的高效读取？可以考虑将相邻的块在读取的时候进行merge，一次性读取？
+	这个需要在读取的时候，判断block是否连续，如果连续，就一次性的读取
+
+## 编码 ##
+
+现有的doris存储中，针对string类型的编码，采用plain encoding的方式，效率比较低。经过对比，发现在百度统计的场景下，数据会因为string类型的编码膨胀超过一倍。所以，计划引入基于词典的编码压缩。
+
+## 压缩 ##
+
+实现可扩展的压缩框架，支持多种压缩算法，方便后续添加新的压缩算法，计划引入zstd压缩。
+
+## TODO ##
+1. 如何实现嵌套类型？如何在嵌套类型中进行行号定位？
+2. 如何优化现在的ScanRange拆分导致的下游bitmap、column statistic统计等进行多次？
diff --git a/docs/documentation/en/internal/doris_storage_optimization_EN.md b/docs/documentation/en/internal/doris_storage_optimization_EN.md
index ef7721e8dc..0376aa0631 100644
--- a/docs/documentation/en/internal/doris_storage_optimization_EN.md
+++ b/docs/documentation/en/internal/doris_storage_optimization_EN.md
@@ -22,35 +22,35 @@ The data in the file is organized in the form of page, which is the basic unit o
 Data Page is divided into two types: nullable and non-nullable data pages.
 
 Nullable's data page includes:
-```
-
-                 +----------------+
-                 |  value count   |
-                 |----------------|
-                 |  first row id  |
-                 |----------------|
-                 | bitmap length  |
-                 |----------------|
-                 |  null bitmap   |
-                 |----------------|
-                 |     data       |
-                 |----------------|
-                 |    checksum    |
-                 +----------------+
+```
+
+                 +----------------+
+                 |  value count   |
+                 |----------------|
+                 |  first row id  |
+                 |----------------|
+                 | bitmap length  |
+                 |----------------|
+                 |  null bitmap   |
+                 |----------------|
+                 |     data       |
+                 |----------------|
+                 |    checksum    |
+                 +----------------+
 ```
 
 non -zero data page32467;- 26500;- 229140;-
 
-```
-                 |----------------|
-                 |   value count  |
-                 |----------------|
-                 |  first row id  |
-                 |----------------|
-                 |     data       |
-                 |----------------|
-                 |    checksum    |
-                 +----------------+
+```
+                 |----------------|
+                 |   value count  |
+                 |----------------|
+                 |  first row id  |
+                 |----------------|
+                 |     data       |
+                 |----------------|
+                 |    checksum    |
+                 +----------------+
 ```
 
 The meanings of each field are as follows:
@@ -91,65 +91,65 @@ The format design supports the subsequent expansion of other index information,
 ### Metadata Definition###
 FileFooterPB is defined as:
 
-```
-message ColumnPB {
-    optional uint32 column_id = 1; // 这里使用column id，不使用column name是因为计划支持修改列名
-    optional string type = 2; // 列类型
-    optional string aggregation = 3; // 是否聚合
-    optional uint32 length = 4; // 长度
-    optional bool is_key = 5; // 是否是主键列
-    optional string default_value = 6; // 默认值
-    optional uint32 precision = 9 [default = 27]; // 精度
-    optional uint32 frac = 10 [default = 9];
-    optional bool is_nullable = 11 [default=false]; // 是否有null
-    optional bool is_bf_column = 15 [default=false]; // 是否有bf词典
-	optional bool is_bitmap_column = 16 [default=false]; // 是否有bitmap索引
-}
-
-// page偏移
-message PagePointerPB {
-	required uint64 offset; // page在文件中的偏移
-	required uint32 length; // page的大小
-}
-
-message MetadataPairPB {
-  optional string key = 1;
-  optional bytes value = 2;
-}
-
-message ColumnMetaPB {
-	optional ColumnMessage encoding; // 编码方式
-
-	optional PagePointerPB dict_page // 词典page
-	repeated PagePointerPB bloom_filter_pages; // bloom filter词典信息
-	optional PagePointerPB ordinal_index_page; // 行号索引数据
-	optional PagePointerPB page_zone_map_page; // page级别统计信息索引数据
-
-	optional PagePointerPB bitmap_index_page; // bitmap索引数据
-
-	optional uint64 data_footprint; // 列中索引的大小
-	optional uint64 index_footprint; // 列中数据的大小
-	optional uint64 raw_data_footprint; // 原始列数据大小
-
-	optional CompressKind compress_kind; // 列的压缩方式
-
-	optional ZoneMapPB column_zone_map; //文件级别的过滤条件
-	repeated MetadataPairPB column_meta_datas;
-}
-
-message FileFooterPB {
-	optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用
-	repeated ColumnPB schema = 5; // 列Schema
-    optional uint64 num_values = 4; // 文件中保存的行数
-    optional uint64 index_footprint = 7; // 索引大小
-    optional uint64 data_footprint = 8; // 数据大小
-	optional uint64 raw_data_footprint = 8; // 原始数据大小
-
-    optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
-    repeated ColumnMetaPB column_metas = 10; // 列元数据
-	optional PagePointerPB key_index_page; // short key索引page
-}
-
+```
+message ColumnPB {
+    optional uint32 column_id = 1; // 这里使用column id，不使用column name是因为计划支持修改列名
+    optional string type = 2; // 列类型
+    optional string aggregation = 3; // 是否聚合
+    optional uint32 length = 4; // 长度
+    optional bool is_key = 5; // 是否是主键列
+    optional string default_value = 6; // 默认值
+    optional uint32 precision = 9 [default = 27]; // 精度
+    optional uint32 frac = 10 [default = 9];
+    optional bool is_nullable = 11 [default=false]; // 是否有null
+    optional bool is_bf_column = 15 [default=false]; // 是否有bf词典
+	optional bool is_bitmap_column = 16 [default=false]; // 是否有bitmap索引
+}
+
+// page偏移
+message PagePointerPB {
+	required uint64 offset; // page在文件中的偏移
+	required uint32 length; // page的大小
+}
+
+message MetadataPairPB {
+  optional string key = 1;
+  optional bytes value = 2;
+}
+
+message ColumnMetaPB {
+	optional ColumnMessage encoding; // 编码方式
+
+	optional PagePointerPB dict_page // 词典page
+	repeated PagePointerPB bloom_filter_pages; // bloom filter词典信息
+	optional PagePointerPB ordinal_index_page; // 行号索引数据
+	optional PagePointerPB page_zone_map_page; // page级别统计信息索引数据
+
+	optional PagePointerPB bitmap_index_page; // bitmap索引数据
+
+	optional uint64 data_footprint; // 列中索引的大小
+	optional uint64 index_footprint; // 列中数据的大小
+	optional uint64 raw_data_footprint; // 原始列数据大小
+
+	optional CompressKind compress_kind; // 列的压缩方式
+
+	optional ZoneMapPB column_zone_map; //文件级别的过滤条件
+	repeated MetadataPairPB column_meta_datas;
+}
+
+message FileFooterPB {
+	optional uint32 version = 2 [default = 1]; // 用于版本兼容和升级使用
+	repeated ColumnPB schema = 5; // 列Schema
+    optional uint64 num_values = 4; // 文件中保存的行数
+    optional uint64 index_footprint = 7; // 索引大小
+    optional uint64 data_footprint = 8; // 数据大小
+	optional uint64 raw_data_footprint = 8; // 原始数据大小
+
+    optional CompressKind compress_kind = 9 [default = COMPRESS_LZO]; // 压缩方式
+    repeated ColumnMetaPB column_metas = 10; // 列元数据
+	optional PagePointerPB key_index_page; // short key索引page
+}
+
 ```
 
 ## Read-write logic##