[feature-wip](new-scan) Support stream load with csv in new scan framework (#13354)

1. Refactor the file reader creation in FileFactory, for simplicity. Previously, FileFactory had too many `create_file_reader` interfaces. Now unified into two categories: the interface used by the previous BrokerScanNode, and the interface used by the new FileScanNode. And separate the creation methods of readers that read `StreamLoadPipe` and other readers that read files. 2. Modify the StreamLoadPlanner on FE side to support using ExternalFileScanNode 3. Now for generic reader, the file reader will be created inside the reader, not passed from the outside. 4. Add some test cases for csv stream load, the behavior is same as the old broker scanner.
2022-10-17 23:33:41 +08:00
parent c114d87d13
commit dbf71ed3be
58 changed files with 3671 additions and 566 deletions
--- a/.gitignore
+++ b/.gitignore
@ -5,7 +5,6 @@
 *.iml
 *.swp
 *.jar
-*.gz
 *.log
 *.so.tmp
 *.flattened-pom.xml
--- a/be/src/exec/broker_scanner.cpp
+++ b/be/src/exec/broker_scanner.cpp
@ -136,10 +136,16 @@ Status BrokerScanner::open_file_reader() {
        }
    }

-    RETURN_IF_ERROR(FileFactory::create_file_reader(range.file_type, _state->exec_env(), _profile,
-                                                    _broker_addresses, _params.properties, range,
-                                                    start_offset, _cur_file_reader));
-    return _cur_file_reader->open();
+    if (range.file_type == TFileType::FILE_STREAM) {
+        RETURN_IF_ERROR(FileFactory::create_pipe_reader(range.load_id, _cur_file_reader_s));
+        _real_reader = _cur_file_reader_s.get();
+    } else {
+        RETURN_IF_ERROR(FileFactory::create_file_reader(
+                range.file_type, _state->exec_env(), _profile, _broker_addresses,
+                _params.properties, range, start_offset, _cur_file_reader));
+        _real_reader = _cur_file_reader.get();
+    }
+    return _real_reader->open();
 }

 Status BrokerScanner::create_decompressor(TFileFormatType::type type) {
@ -215,12 +221,11 @@ Status BrokerScanner::open_line_reader() {
    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
    case TFileFormatType::FORMAT_CSV_LZOP:
    case TFileFormatType::FORMAT_CSV_DEFLATE:
-        _cur_line_reader =
-                new PlainTextLineReader(_profile, _cur_file_reader.get(), _cur_decompressor, size,
-                                        _line_delimiter, _line_delimiter_length);
+        _cur_line_reader = new PlainTextLineReader(_profile, _real_reader, _cur_decompressor, size,
+                                                   _line_delimiter, _line_delimiter_length);
        break;
    case TFileFormatType::FORMAT_PROTO:
-        _cur_line_reader = new PlainBinaryLineReader(_cur_file_reader.get());
+        _cur_line_reader = new PlainBinaryLineReader(_real_reader);
        break;
    default: {
        return Status::InternalError("Unknown format type, cannot init line reader, type={}",
--- a/be/src/exec/broker_scanner.h
+++ b/be/src/exec/broker_scanner.h
@ -106,7 +106,12 @@ protected:
    int _line_delimiter_length;

    // Reader
-    std::shared_ptr<FileReader> _cur_file_reader;
+    // _cur_file_reader_s is for stream load pipe reader,
+    // and _cur_file_reader is for other file reader.
+    // TODO: refactor this to use only shared_ptr or unique_ptr
+    std::unique_ptr<FileReader> _cur_file_reader;
+    std::shared_ptr<FileReader> _cur_file_reader_s;
+    FileReader* _real_reader;
    LineReader* _cur_line_reader;
    Decompressor* _cur_decompressor;
    bool _cur_line_reader_eof;
--- a/be/src/exec/json_scanner.cpp
+++ b/be/src/exec/json_scanner.cpp
@ -36,6 +36,8 @@ JsonScanner::JsonScanner(RuntimeState* state, RuntimeProfile* profile,
                         const std::vector<TExpr>& pre_filter_texprs, ScannerCounter* counter)
        : BaseScanner(state, profile, params, ranges, broker_addresses, pre_filter_texprs, counter),
          _cur_file_reader(nullptr),
+          _cur_file_reader_s(nullptr),
+          _real_reader(nullptr),
          _cur_line_reader(nullptr),
          _cur_json_reader(nullptr),
          _cur_reader_eof(false),
@ -61,7 +63,7 @@ Status JsonScanner::get_next(Tuple* tuple, MemPool* tuple_pool, bool* eof, bool*
    SCOPED_TIMER(_read_timer);
    // Get one line
    while (!_scanner_eof) {
-        if (!_cur_file_reader || _cur_reader_eof) {
+        if (!_real_reader || _cur_reader_eof) {
            RETURN_IF_ERROR(open_next_reader());
            // If there isn't any more reader, break this
            if (_scanner_eof) {
@ -127,11 +129,17 @@ Status JsonScanner::open_file_reader() {
        _read_json_by_line = range.read_json_by_line;
    }

-    RETURN_IF_ERROR(FileFactory::create_file_reader(range.file_type, _state->exec_env(), _profile,
-                                                    _broker_addresses, _params.properties, range,
-                                                    start_offset, _cur_file_reader));
+    if (range.file_type == TFileType::FILE_STREAM) {
+        RETURN_IF_ERROR(FileFactory::create_pipe_reader(range.load_id, _cur_file_reader_s));
+        _real_reader = _cur_file_reader_s.get();
+    } else {
+        RETURN_IF_ERROR(FileFactory::create_file_reader(
+                range.file_type, _state->exec_env(), _profile, _broker_addresses,
+                _params.properties, range, start_offset, _cur_file_reader));
+        _real_reader = _cur_file_reader.get();
+    }
    _cur_reader_eof = false;
-    return _cur_file_reader->open();
+    return _real_reader->open();
 }

 Status JsonScanner::open_line_reader() {
@ -148,7 +156,7 @@ Status JsonScanner::open_line_reader() {
    } else {
        _skip_next_line = false;
    }
-    _cur_line_reader = new PlainTextLineReader(_profile, _cur_file_reader.get(), nullptr, size,
+    _cur_line_reader = new PlainTextLineReader(_profile, _real_reader, nullptr, size,
                                               _line_delimiter, _line_delimiter_length);
    _cur_reader_eof = false;
    return Status::OK();
@ -173,9 +181,8 @@ Status JsonScanner::open_json_reader() {
                new JsonReader(_state, _counter, _profile, strip_outer_array, num_as_string,
                               fuzzy_parse, &_scanner_eof, nullptr, _cur_line_reader);
    } else {
-        _cur_json_reader =
-                new JsonReader(_state, _counter, _profile, strip_outer_array, num_as_string,
-                               fuzzy_parse, &_scanner_eof, _cur_file_reader.get());
+        _cur_json_reader = new JsonReader(_state, _counter, _profile, strip_outer_array,
+                                          num_as_string, fuzzy_parse, &_scanner_eof, _real_reader);
    }

    RETURN_IF_ERROR(_cur_json_reader->init(jsonpath, json_root));
--- a/be/src/exec/json_scanner.h
+++ b/be/src/exec/json_scanner.h
@ -87,7 +87,12 @@ protected:
    int _line_delimiter_length;

    // Reader
-    std::shared_ptr<FileReader> _cur_file_reader;
+    // _cur_file_reader_s is for stream load pipe reader,
+    // and _cur_file_reader is for other file reader.
+    // TODO: refactor this to use only shared_ptr or unique_ptr
+    std::unique_ptr<FileReader> _cur_file_reader;
+    std::shared_ptr<FileReader> _cur_file_reader_s;
+    FileReader* _real_reader;
    LineReader* _cur_line_reader;
    JsonReader* _cur_json_reader;
    bool _cur_reader_eof;
--- a/be/src/exec/plain_text_line_reader.h
+++ b/be/src/exec/plain_text_line_reader.h
@ -60,6 +60,9 @@ private:
    RuntimeProfile* _profile;
    FileReader* _file_reader;
    Decompressor* _decompressor;
+    // the min length that should be read.
+    // -1 means endless(for stream load)
+    // and only valid if the content is uncompressed
    size_t _min_length;
    size_t _total_read_bytes;
    std::string _line_delimiter;
--- a/be/src/http/action/stream_load.cpp
+++ b/be/src/http/action/stream_load.cpp
@ -71,39 +71,47 @@ DEFINE_GAUGE_METRIC_PROTOTYPE_2ARG(streaming_load_current_processing, MetricUnit
 TStreamLoadPutResult k_stream_load_put_result;
 #endif

-static TFileFormatType::type parse_format(const std::string& format_str,
-                                          const std::string& compress_type) {
+static void parse_format(const std::string& format_str, const std::string& compress_type_str,
+                         TFileFormatType::type* format_type,
+                         TFileCompressType::type* compress_type) {
    if (format_str.empty()) {
-        return parse_format("CSV", compress_type);
+        parse_format("CSV", compress_type_str, format_type, compress_type);
+        return;
    }
-    TFileFormatType::type format_type = TFileFormatType::FORMAT_UNKNOWN;
+    *compress_type = TFileCompressType::PLAIN;
+    *format_type = TFileFormatType::FORMAT_UNKNOWN;
    if (iequal(format_str, "CSV")) {
-        if (compress_type.empty()) {
-            format_type = TFileFormatType::FORMAT_CSV_PLAIN;
-        }
-        if (iequal(compress_type, "GZ")) {
-            format_type = TFileFormatType::FORMAT_CSV_GZ;
-        } else if (iequal(compress_type, "LZO")) {
-            format_type = TFileFormatType::FORMAT_CSV_LZO;
-        } else if (iequal(compress_type, "BZ2")) {
-            format_type = TFileFormatType::FORMAT_CSV_BZ2;
-        } else if (iequal(compress_type, "LZ4FRAME")) {
-            format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME;
-        } else if (iequal(compress_type, "LZOP")) {
-            format_type = TFileFormatType::FORMAT_CSV_LZOP;
-        } else if (iequal(compress_type, "DEFLATE")) {
-            format_type = TFileFormatType::FORMAT_CSV_DEFLATE;
+        if (compress_type_str.empty()) {
+            *format_type = TFileFormatType::FORMAT_CSV_PLAIN;
+        } else if (iequal(compress_type_str, "GZ")) {
+            *format_type = TFileFormatType::FORMAT_CSV_GZ;
+            *compress_type = TFileCompressType::GZ;
+        } else if (iequal(compress_type_str, "LZO")) {
+            *format_type = TFileFormatType::FORMAT_CSV_LZO;
+            *compress_type = TFileCompressType::LZO;
+        } else if (iequal(compress_type_str, "BZ2")) {
+            *format_type = TFileFormatType::FORMAT_CSV_BZ2;
+            *compress_type = TFileCompressType::BZ2;
+        } else if (iequal(compress_type_str, "LZ4")) {
+            *format_type = TFileFormatType::FORMAT_CSV_LZ4FRAME;
+            *compress_type = TFileCompressType::LZ4FRAME;
+        } else if (iequal(compress_type_str, "LZOP")) {
+            *format_type = TFileFormatType::FORMAT_CSV_LZOP;
+            *compress_type = TFileCompressType::LZO;
+        } else if (iequal(compress_type_str, "DEFLATE")) {
+            *format_type = TFileFormatType::FORMAT_CSV_DEFLATE;
+            *compress_type = TFileCompressType::DEFLATE;
        }
    } else if (iequal(format_str, "JSON")) {
-        if (compress_type.empty()) {
-            format_type = TFileFormatType::FORMAT_JSON;
+        if (compress_type_str.empty()) {
+            *format_type = TFileFormatType::FORMAT_JSON;
        }
    } else if (iequal(format_str, "PARQUET")) {
-        format_type = TFileFormatType::FORMAT_PARQUET;
+        *format_type = TFileFormatType::FORMAT_PARQUET;
    } else if (iequal(format_str, "ORC")) {
-        format_type = TFileFormatType::FORMAT_ORC;
+        *format_type = TFileFormatType::FORMAT_ORC;
    }
-    return format_type;
+    return;
 }

 static bool is_format_support_streaming(TFileFormatType::type format) {
@ -275,7 +283,8 @@ Status StreamLoadAction::_on_header(HttpRequest* http_req, StreamLoadContext* ct
        //treat as CSV
        format_str = BeConsts::CSV;
    }
-    ctx->format = parse_format(format_str, http_req->header(HTTP_COMPRESS_TYPE));
+    parse_format(format_str, http_req->header(HTTP_COMPRESS_TYPE), &ctx->format,
+                 &ctx->compress_type);
    if (ctx->format == TFileFormatType::FORMAT_UNKNOWN) {
        return Status::InternalError("unknown data format, format={}",
                                     http_req->header(HTTP_FORMAT_KEY));
@ -387,6 +396,7 @@ Status StreamLoadAction::_process_put(HttpRequest* http_req, StreamLoadContext*
    request.tbl = ctx->table;
    request.txnId = ctx->txn_id;
    request.formatType = ctx->format;
+    request.__set_compress_type(ctx->compress_type);
    request.__set_header_type(ctx->header_type);
    request.__set_loadId(ctx->id.to_thrift());
    if (ctx->use_streaming) {
--- a/be/src/io/file_factory.cpp
+++ b/be/src/io/file_factory.cpp
@ -52,31 +52,34 @@ doris::Status doris::FileFactory::create_file_writer(
        break;
    }
    default:
-        return Status::InternalError("UnSupport File Writer Type: " + std::to_string(type));
+        return Status::InternalError("unsupported file writer type: {}", std::to_string(type));
    }

    return Status::OK();
 }

-doris::Status doris::FileFactory::_new_file_reader(
+// ============================
+// broker scan node/unique ptr
+doris::Status doris::FileFactory::create_file_reader(
        doris::TFileType::type type, doris::ExecEnv* env, RuntimeProfile* profile,
        const std::vector<TNetworkAddress>& broker_addresses,
-        const std::map<std::string, std::string>& properties, const TBrokerRangeDesc& range,
-        int64_t start_offset, FileReader*& file_reader) {
+        const std::map<std::string, std::string>& properties, const doris::TBrokerRangeDesc& range,
+        int64_t start_offset, std::unique_ptr<FileReader>& file_reader) {
+    FileReader* file_reader_ptr;
    switch (type) {
    case TFileType::FILE_LOCAL: {
-        file_reader = new LocalFileReader(range.path, start_offset);
+        file_reader_ptr = new LocalFileReader(range.path, start_offset);
        break;
    }
    case TFileType::FILE_BROKER: {
-        file_reader = new BufferedReader(
+        file_reader_ptr = new BufferedReader(
                profile,
                new BrokerReader(env, broker_addresses, properties, range.path, start_offset,
                                 range.__isset.file_size ? range.file_size : 0));
        break;
    }
    case TFileType::FILE_S3: {
-        file_reader =
+        file_reader_ptr =
                new BufferedReader(profile, new S3Reader(properties, range.path, start_offset));
        break;
    }
@ -84,149 +87,49 @@ doris::Status doris::FileFactory::_new_file_reader(
        FileReader* hdfs_reader = nullptr;
        RETURN_IF_ERROR(HdfsReaderWriter::create_reader(range.hdfs_params, range.path, start_offset,
                                                        &hdfs_reader));
-        file_reader = new BufferedReader(profile, hdfs_reader);
-        break;
-    }
-    default:
-        return Status::InternalError("UnSupport File Reader Type: " + std::to_string(type));
-    }
-
-    return Status::OK();
-}
-
-doris::Status doris::FileFactory::create_file_reader(
-        doris::TFileType::type type, doris::ExecEnv* env, RuntimeProfile* profile,
-        const std::vector<TNetworkAddress>& broker_addresses,
-        const std::map<std::string, std::string>& properties, const doris::TBrokerRangeDesc& range,
-        int64_t start_offset, std::unique_ptr<FileReader>& file_reader) {
-    if (type == TFileType::FILE_STREAM) {
-        return Status::InternalError("UnSupport UniquePtr For FileStream type");
-    }
-
-    FileReader* file_reader_ptr;
-    RETURN_IF_ERROR(_new_file_reader(type, env, profile, broker_addresses, properties, range,
-                                     start_offset, file_reader_ptr));
-    file_reader.reset(file_reader_ptr);
-
-    return Status::OK();
-}
-
-doris::Status doris::FileFactory::create_file_reader(
-        doris::TFileType::type type, doris::ExecEnv* env, RuntimeProfile* profile,
-        const std::vector<TNetworkAddress>& broker_addresses,
-        const std::map<std::string, std::string>& properties, const doris::TBrokerRangeDesc& range,
-        int64_t start_offset, std::shared_ptr<FileReader>& file_reader) {
-    if (type == TFileType::FILE_STREAM) {
-        file_reader = env->load_stream_mgr()->get(range.load_id);
-        if (!file_reader) {
-            VLOG_NOTICE << "unknown stream load id: " << UniqueId(range.load_id);
-            return Status::InternalError("unknown stream load id");
-        }
-    } else {
-        FileReader* file_reader_ptr;
-        RETURN_IF_ERROR(_new_file_reader(type, env, profile, broker_addresses, properties, range,
-                                         start_offset, file_reader_ptr));
-        file_reader.reset(file_reader_ptr);
-    }
-    return Status::OK();
-}
-
-doris::Status doris::FileFactory::_new_file_reader(doris::ExecEnv* env, RuntimeProfile* profile,
-                                                   const TFileScanRangeParams& params,
-                                                   const doris::TFileRangeDesc& range,
-                                                   FileReader*& file_reader_ptr) {
-    doris::TFileType::type type = params.file_type;
-
-    if (type == TFileType::FILE_STREAM) {
-        return Status::InternalError("UnSupport UniquePtr For FileStream type");
-    }
-
-    int64_t start_offset = range.start_offset;
-    switch (params.format_type) {
-    case TFileFormatType::FORMAT_CSV_PLAIN:
-    case TFileFormatType::FORMAT_CSV_GZ:
-    case TFileFormatType::FORMAT_CSV_BZ2:
-    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
-    case TFileFormatType::FORMAT_CSV_LZOP:
-    case TFileFormatType::FORMAT_CSV_DEFLATE:
-        if (start_offset != 0) {
-            start_offset -= 1;
-        }
-        break;
-    default:
-        break;
-    }
-
-    switch (type) {
-    case TFileType::FILE_LOCAL: {
-        file_reader_ptr = new LocalFileReader(range.path, start_offset);
-        break;
-    }
-    case TFileType::FILE_S3: {
-        file_reader_ptr = new BufferedReader(
-                profile, new S3Reader(params.properties, range.path, start_offset));
-        break;
-    }
-    case TFileType::FILE_HDFS: {
-        FileReader* hdfs_reader = nullptr;
-        RETURN_IF_ERROR(HdfsReaderWriter::create_reader(params.hdfs_params, range.path,
-                                                        start_offset, &hdfs_reader));
        file_reader_ptr = new BufferedReader(profile, hdfs_reader);
        break;
    }
    default:
-        return Status::InternalError("Unsupported File Reader Type: " + std::to_string(type));
+        return Status::InternalError("unsupported file reader type: " + std::to_string(type));
    }
-
-    return Status::OK();
-}
-
-doris::Status doris::FileFactory::create_file_reader(doris::ExecEnv* env, RuntimeProfile* profile,
-                                                     const TFileScanRangeParams& params,
-                                                     const doris::TFileRangeDesc& range,
-                                                     std::shared_ptr<FileReader>& file_reader) {
-    FileReader* file_reader_ptr;
-    RETURN_IF_ERROR(_new_file_reader(env, profile, params, range, file_reader_ptr));
-    file_reader.reset(file_reader_ptr);
-
-    return Status::OK();
-}
-
-doris::Status doris::FileFactory::create_file_reader(doris::ExecEnv* env, RuntimeProfile* profile,
-                                                     const TFileScanRangeParams& params,
-                                                     const doris::TFileRangeDesc& range,
-                                                     std::unique_ptr<FileReader>& file_reader) {
-    FileReader* file_reader_ptr;
-    RETURN_IF_ERROR(_new_file_reader(env, profile, params, range, file_reader_ptr));
    file_reader.reset(file_reader_ptr);

    return Status::OK();
 }

+// ============================
+// file scan node/unique ptr
 doris::Status doris::FileFactory::create_file_reader(RuntimeProfile* profile,
                                                     const TFileScanRangeParams& params,
-                                                     const TFileRangeDesc& range,
-                                                     std::unique_ptr<FileReader>& file_reader,
-                                                     int64_t buffer_size) {
-    doris::TFileType::type type = params.file_type;
+                                                     const std::string& path, int64_t start_offset,
+                                                     int64_t file_size, int64_t buffer_size,
+                                                     std::unique_ptr<FileReader>& file_reader) {
    FileReader* file_reader_ptr;
+    doris::TFileType::type type = params.file_type;
    switch (type) {
    case TFileType::FILE_LOCAL: {
-        file_reader_ptr = new LocalFileReader(range.path, range.start_offset);
+        file_reader_ptr = new LocalFileReader(path, start_offset);
        break;
    }
    case TFileType::FILE_S3: {
-        file_reader_ptr = new S3Reader(params.properties, range.path, range.start_offset);
+        file_reader_ptr = new S3Reader(params.properties, path, start_offset);
        break;
    }
    case TFileType::FILE_HDFS: {
-        RETURN_IF_ERROR(HdfsReaderWriter::create_reader(params.hdfs_params, range.path,
-                                                        range.start_offset, &file_reader_ptr));
+        RETURN_IF_ERROR(HdfsReaderWriter::create_reader(params.hdfs_params, path, start_offset,
+                                                        &file_reader_ptr));
+        break;
+    }
+    case TFileType::FILE_BROKER: {
+        file_reader_ptr = new BrokerReader(ExecEnv::GetInstance(), params.broker_addresses,
+                                           params.properties, path, start_offset, file_size);
        break;
    }
    default:
-        return Status::InternalError("Unsupported File Reader Type: " + std::to_string(type));
+        return Status::InternalError("unsupported file reader type: {}", std::to_string(type));
    }
+
    if (buffer_size > 0) {
        file_reader.reset(new BufferedReader(profile, file_reader_ptr, buffer_size));
    } else {
@ -234,3 +137,13 @@ doris::Status doris::FileFactory::create_file_reader(RuntimeProfile* profile,
    }
    return Status::OK();
 }
+
+// file scan node/stream load pipe
+doris::Status doris::FileFactory::create_pipe_reader(const TUniqueId& load_id,
+                                                     std::shared_ptr<FileReader>& file_reader) {
+    file_reader = ExecEnv::GetInstance()->load_stream_mgr()->get(load_id);
+    if (!file_reader) {
+        return Status::InternalError("unknown stream load id: {}", UniqueId(load_id).to_string());
+    }
+    return Status::OK();
+}
--- a/be/src/io/file_factory.h
+++ b/be/src/io/file_factory.h
@ -28,43 +28,34 @@ class RuntimeProfile;

 class FileFactory {
 public:
+    // Create FileWriter
    static Status create_file_writer(TFileType::type type, ExecEnv* env,
                                     const std::vector<TNetworkAddress>& broker_addresses,
                                     const std::map<std::string, std::string>& properties,
                                     const std::string& path, int64_t start_offset,
                                     std::unique_ptr<FileWriter>& file_writer);

-    // Because StreamLoadPipe use std::shared_ptr, here we have to support both unique_ptr
-    // and shared_ptr create_file_reader
-    static Status create_file_reader(TFileType::type type, ExecEnv* env, RuntimeProfile* profile,
-                                     const std::vector<TNetworkAddress>& broker_addresses,
-                                     const std::map<std::string, std::string>& properties,
-                                     const TBrokerRangeDesc& range, int64_t start_offset,
-                                     std::unique_ptr<FileReader>& file_reader);
-
-    static Status create_file_reader(TFileType::type type, ExecEnv* env, RuntimeProfile* profile,
-                                     const std::vector<TNetworkAddress>& broker_addresses,
-                                     const std::map<std::string, std::string>& properties,
-                                     const TBrokerRangeDesc& range, int64_t start_offset,
-                                     std::shared_ptr<FileReader>& file_reader);
-
-    static Status create_file_reader(ExecEnv* env, RuntimeProfile* profile,
-                                     const TFileScanRangeParams& params,
-                                     const TFileRangeDesc& range,
-                                     std::unique_ptr<FileReader>& file_reader);
-
-    static Status create_file_reader(ExecEnv* env, RuntimeProfile* profile,
-                                     const TFileScanRangeParams& params,
-                                     const TFileRangeDesc& range,
-                                     std::shared_ptr<FileReader>& file_reader);
-
    /**
-     * Create FileReader. If buffer_size > 0, use BufferedReader to wrap the underlying FileReader;
+     * Create FileReader for broker scan node related scanners and readers
+     */
+    static Status create_file_reader(TFileType::type type, ExecEnv* env, RuntimeProfile* profile,
+                                     const std::vector<TNetworkAddress>& broker_addresses,
+                                     const std::map<std::string, std::string>& properties,
+                                     const TBrokerRangeDesc& range, int64_t start_offset,
+                                     std::unique_ptr<FileReader>& file_reader);
+    /**
+     * Create FileReader for file scan node rlated scanners and readers
+     * If buffer_size > 0, use BufferedReader to wrap the underlying FileReader;
     * Otherwise, return the underlying FileReader directly.
     */
    static Status create_file_reader(RuntimeProfile* profile, const TFileScanRangeParams& params,
-                                     const TFileRangeDesc& range,
-                                     std::unique_ptr<FileReader>& file_reader, int64_t buffer_size);
+                                     const std::string& path, int64_t start_offset,
+                                     int64_t file_size, int64_t buffer_size,
+                                     std::unique_ptr<FileReader>& file_reader);
+
+    // Create FileReader for stream load pipe
+    static Status create_pipe_reader(const TUniqueId& load_id,
+                                     std::shared_ptr<FileReader>& file_reader);

    static TFileType::type convert_storage_type(TStorageBackendType::type type) {
        switch (type) {
@ -81,19 +72,6 @@ public:
        }
        __builtin_unreachable();
    }
-
-private:
-    // Note: if the function return Status::OK() means new the file_reader. the caller
-    // should delete the memory of file_reader or use the smart_ptr to hold the own of file_reader
-    static Status _new_file_reader(TFileType::type type, ExecEnv* env, RuntimeProfile* profile,
-                                   const std::vector<TNetworkAddress>& broker_addresses,
-                                   const std::map<std::string, std::string>& properties,
-                                   const TBrokerRangeDesc& range, int64_t start_offset,
-                                   FileReader*& file_reader);
-
-    static Status _new_file_reader(ExecEnv* env, RuntimeProfile* profile,
-                                   const TFileScanRangeParams& params, const TFileRangeDesc& range,
-                                   FileReader*& file_reader);
 };

-} // namespace doris
+} // namespace doris
--- a/be/src/runtime/stream_load/stream_load_context.h
+++ b/be/src/runtime/stream_load/stream_load_context.h
@ -161,6 +161,7 @@ public:
    // otherwise we save source data to file first, then process it.
    bool use_streaming = false;
    TFileFormatType::type format = TFileFormatType::FORMAT_CSV_PLAIN;
+    TFileCompressType::type compress_type = TFileCompressType::UNKNOWN;

    std::shared_ptr<MessageBodySink> body_sink;

--- a/be/src/vec/CMakeLists.txt
+++ b/be/src/vec/CMakeLists.txt
@ -261,7 +261,7 @@ set(VEC_FILES
  exec/scan/new_jdbc_scan_node.cpp
  exec/scan/new_es_scanner.cpp
  exec/scan/new_es_scan_node.cpp
-  exec/format/csv/vcsv_reader.cpp
+  exec/format/csv/csv_reader.cpp
  )

 add_library(Vec STATIC
--- a/be/src/vec/exec/file_arrow_scanner.cpp
+++ b/be/src/vec/exec/file_arrow_scanner.cpp
@ -59,9 +59,9 @@ Status FileArrowScanner::_open_next_reader() {
        }
        const TFileRangeDesc& range = _ranges[_next_range++];
        std::unique_ptr<FileReader> file_reader;
-
-        RETURN_IF_ERROR(FileFactory::create_file_reader(_state->exec_env(), _profile, _params,
-                                                        range, file_reader));
+        RETURN_IF_ERROR(FileFactory::create_file_reader(_profile, _params, range.path,
+                                                        range.start_offset, range.file_size, 0,
+                                                        file_reader));
        RETURN_IF_ERROR(file_reader->open());
        if (file_reader->size() == 0) {
            file_reader->close();
--- a/be/src/vec/exec/file_text_scanner.cpp
+++ b/be/src/vec/exec/file_text_scanner.cpp
@ -158,7 +158,8 @@ Status FileTextScanner::_open_next_reader() {

 Status FileTextScanner::_open_file_reader() {
    const TFileRangeDesc& range = _ranges[_next_range];
-    RETURN_IF_ERROR(FileFactory::create_file_reader(_state->exec_env(), _profile, _params, range,
+    RETURN_IF_ERROR(FileFactory::create_file_reader(_profile, _params, range.path,
+                                                    range.start_offset, range.file_size, 0,
                                                    _cur_file_reader));
    return _cur_file_reader->open();
 }
--- a/be/src/vec/exec/file_text_scanner.h
+++ b/be/src/vec/exec/file_text_scanner.h
@ -53,7 +53,7 @@ private:
    Status _line_split_to_values(const Slice& line);
    Status _split_line(const Slice& line);
    // Reader
-    std::shared_ptr<FileReader> _cur_file_reader;
+    std::unique_ptr<FileReader> _cur_file_reader;
    LineReader* _cur_line_reader;
    bool _cur_line_reader_eof;

--- a/be/src/vec/exec/format/csv/vcsv_reader.cpp
+++ b/be/src/vec/exec/format/csv/vcsv_reader.cpp
@ -15,7 +15,7 @@
 // specific language governing permissions and limitations
 // under the License.

-#include "vcsv_reader.h"
+#include "csv_reader.h"

 #include <gen_cpp/PlanNodes_types.h>
 #include <gen_cpp/internal_service.pb.h>
@ -35,24 +35,32 @@
 namespace doris::vectorized {
 CsvReader::CsvReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter,
                     const TFileScanRangeParams& params, const TFileRangeDesc& range,
-                     const std::vector<SlotDescriptor*>& file_slot_descs, FileReader* file_reader)
+                     const std::vector<SlotDescriptor*>& file_slot_descs)
        : _state(state),
          _profile(profile),
          _counter(counter),
          _params(params),
          _range(range),
          _file_slot_descs(file_slot_descs),
-          _file_reader(file_reader),
          _line_reader(nullptr),
          _line_reader_eof(false),
          _text_converter(nullptr),
          _decompressor(nullptr),
          _skip_lines(0) {
    _file_format_type = _params.format_type;
+    _file_compress_type = _params.compress_type;
    _size = _range.size;

-    //means first range
-    if (_range.start_offset == 0 && _params.__isset.file_attributes &&
+    _text_converter.reset(new (std::nothrow) TextConverter('\\'));
+    _split_values.reserve(sizeof(Slice) * _file_slot_descs.size());
+}
+
+CsvReader::~CsvReader() {}
+
+Status CsvReader::init_reader() {
+    // set the skip lines and start offset
+    int64_t start_offset = _range.start_offset;
+    if (start_offset == 0 && _params.__isset.file_attributes &&
        _params.file_attributes.__isset.header_type &&
        _params.file_attributes.header_type.size() > 0) {
        std::string header_type = to_lower(_params.file_attributes.header_type);
@ -61,52 +69,43 @@ CsvReader::CsvReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounte
        } else if (header_type == BeConsts::CSV_WITH_NAMES_AND_TYPES) {
            _skip_lines = 2;
        }
-    }
-
-    _text_converter.reset(new (std::nothrow) TextConverter('\\'));
-    _split_values.reserve(sizeof(Slice) * _file_slot_descs.size());
-}
-
-CsvReader::~CsvReader() {
-    if (_decompressor != nullptr) {
-        delete _decompressor;
-        _decompressor = nullptr;
-    }
-    if (_file_reader != nullptr) {
-        delete _file_reader;
-        _file_reader = nullptr;
-    }
-}
-
-Status CsvReader::init_reader() {
-    // get column_separator and line_delimiter
-    if (_params.__isset.file_attributes && _params.file_attributes.__isset.text_params &&
-        _params.file_attributes.text_params.__isset.column_separator) {
-        _value_separator = _params.file_attributes.text_params.column_separator;
-        _value_separator_length = _value_separator.size();
-    } else {
-        return Status::InternalError("Can not find column_separator");
-    }
-    if (_params.__isset.file_attributes && _params.file_attributes.__isset.text_params &&
-        _params.file_attributes.text_params.__isset.line_delimiter) {
-        _line_delimiter = _params.file_attributes.text_params.line_delimiter;
-        _line_delimiter_length = _line_delimiter.size();
-    } else {
-        return Status::InternalError("Can not find line_delimiter");
-    }
-
-    if (_range.start_offset != 0) {
-        if (_file_format_type != TFileFormatType::FORMAT_CSV_PLAIN) {
+    } else if (start_offset != 0) {
+        if (_file_format_type != TFileFormatType::FORMAT_CSV_PLAIN ||
+            (_file_compress_type != TFileCompressType::UNKNOWN &&
+             _file_compress_type != TFileCompressType::PLAIN)) {
            return Status::InternalError("For now we do not support split compressed file");
        }
+        start_offset -= 1;
        _size += 1;
        // not first range will always skip one line
        _skip_lines = 1;
    }

+    // create and open file reader
+    FileReader* real_reader = nullptr;
+    if (_params.file_type == TFileType::FILE_STREAM) {
+        RETURN_IF_ERROR(FileFactory::create_pipe_reader(_range.load_id, _file_reader_s));
+        real_reader = _file_reader_s.get();
+    } else {
+        RETURN_IF_ERROR(FileFactory::create_file_reader(
+                _profile, _params, _range.path, start_offset, _range.file_size, 0, _file_reader));
+        real_reader = _file_reader.get();
+    }
+    RETURN_IF_ERROR(real_reader->open());
+    if (real_reader->size() == 0 && _params.file_type != TFileType::FILE_STREAM &&
+        _params.file_type != TFileType::FILE_BROKER) {
+        return Status::EndOfFile("Empty File");
+    }
+
+    // get column_separator and line_delimiter
+    _value_separator = _params.file_attributes.text_params.column_separator;
+    _value_separator_length = _value_separator.size();
+    _line_delimiter = _params.file_attributes.text_params.line_delimiter;
+    _line_delimiter_length = _line_delimiter.size();
+
    // create decompressor.
    // _decompressor may be nullptr if this is not a compressed file
-    RETURN_IF_ERROR(_create_decompressor(_file_format_type));
+    RETURN_IF_ERROR(_create_decompressor());

    switch (_file_format_type) {
    case TFileFormatType::FORMAT_CSV_PLAIN:
@ -115,8 +114,8 @@ Status CsvReader::init_reader() {
    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
    case TFileFormatType::FORMAT_CSV_LZOP:
    case TFileFormatType::FORMAT_CSV_DEFLATE:
-        _line_reader.reset(new PlainTextLineReader(_profile, _file_reader, _decompressor, _size,
-                                                   _line_delimiter, _line_delimiter_length));
+        _line_reader.reset(new PlainTextLineReader(_profile, real_reader, _decompressor.get(),
+                                                   _size, _line_delimiter, _line_delimiter_length));

        break;
    default:
@ -173,33 +172,58 @@ Status CsvReader::get_columns(std::unordered_map<std::string, TypeDescriptor>* n
    return Status::OK();
 }

-Status CsvReader::_create_decompressor(TFileFormatType::type type) {
+Status CsvReader::_create_decompressor() {
    CompressType compress_type;
-    switch (type) {
-    case TFileFormatType::FORMAT_CSV_PLAIN:
-        compress_type = CompressType::UNCOMPRESSED;
-        break;
-    case TFileFormatType::FORMAT_CSV_GZ:
-        compress_type = CompressType::GZIP;
-        break;
-    case TFileFormatType::FORMAT_CSV_BZ2:
-        compress_type = CompressType::BZIP2;
-        break;
-    case TFileFormatType::FORMAT_CSV_LZ4FRAME:
-        compress_type = CompressType::LZ4FRAME;
-        break;
-    case TFileFormatType::FORMAT_CSV_LZOP:
-        compress_type = CompressType::LZOP;
-        break;
-    case TFileFormatType::FORMAT_CSV_DEFLATE:
-        compress_type = CompressType::DEFLATE;
-        break;
-    default: {
-        return Status::InternalError(
-                "Unknown format type, cannot inference compress type in csv reader, type={}", type);
+    if (_file_compress_type != TFileCompressType::UNKNOWN) {
+        switch (_file_compress_type) {
+        case TFileCompressType::PLAIN:
+            compress_type = CompressType::UNCOMPRESSED;
+            break;
+        case TFileCompressType::GZ:
+            compress_type = CompressType::GZIP;
+            break;
+        case TFileCompressType::LZO:
+            compress_type = CompressType::LZOP;
+            break;
+        case TFileCompressType::BZ2:
+            compress_type = CompressType::BZIP2;
+            break;
+        case TFileCompressType::LZ4FRAME:
+            compress_type = CompressType::LZ4FRAME;
+            break;
+        case TFileCompressType::DEFLATE:
+            compress_type = CompressType::DEFLATE;
+            break;
+        default:
+            return Status::InternalError("unknown compress type: {}", _file_compress_type);
+        }
+    } else {
+        switch (_file_format_type) {
+        case TFileFormatType::FORMAT_CSV_PLAIN:
+            compress_type = CompressType::UNCOMPRESSED;
+            break;
+        case TFileFormatType::FORMAT_CSV_GZ:
+            compress_type = CompressType::GZIP;
+            break;
+        case TFileFormatType::FORMAT_CSV_BZ2:
+            compress_type = CompressType::BZIP2;
+            break;
+        case TFileFormatType::FORMAT_CSV_LZ4FRAME:
+            compress_type = CompressType::LZ4FRAME;
+            break;
+        case TFileFormatType::FORMAT_CSV_LZOP:
+            compress_type = CompressType::LZOP;
+            break;
+        case TFileFormatType::FORMAT_CSV_DEFLATE:
+            compress_type = CompressType::DEFLATE;
+            break;
+        default:
+            return Status::InternalError("unknown format type: {}", _file_format_type);
+        }
    }
-    }
-    RETURN_IF_ERROR(Decompressor::create_decompressor(compress_type, &_decompressor));
+    Decompressor* decompressor;
+    RETURN_IF_ERROR(Decompressor::create_decompressor(compress_type, &decompressor));
+    _decompressor.reset(decompressor);

    return Status::OK();
 }
@ -248,16 +272,17 @@ Status CsvReader::_line_split_to_values(const Slice& line, bool* success) {

    _split_line(line);

-    // if actual column number in csv file is less than _file_slot_descs.size()
+    // if actual column number in csv file is not equal to _file_slot_descs.size()
    // then filter this line.
-    if (_split_values.size() < _file_slot_descs.size()) {
+    if (_split_values.size() != _file_slot_descs.size()) {
+        std::string cmp_str =
+                _split_values.size() > _file_slot_descs.size() ? "more than" : "less than";
        RETURN_IF_ERROR(_state->append_error_msg_to_file(
                [&]() -> std::string { return std::string(line.data, line.size); },
                [&]() -> std::string {
                    fmt::memory_buffer error_msg;
-                    fmt::format_to(
-                            error_msg, "{}",
-                            "actual column number in csv file is less than schema column number.");
+                    fmt::format_to(error_msg, "{} {} {}", "actual column number in csv file is ",
+                                   cmp_str, " schema column number.");
                    fmt::format_to(error_msg, "actual number: {}, column separator: [{}], ",
                                   _split_values.size(), _value_separator);
                    fmt::format_to(error_msg, "line delimiter: [{}], schema column number: {}; ",
--- a/be/src/vec/exec/format/csv/vcsv_reader.h
+++ b/be/src/vec/exec/format/csv/vcsv_reader.h
@ -33,7 +33,7 @@ class CsvReader : public GenericReader {
 public:
    CsvReader(RuntimeState* state, RuntimeProfile* profile, ScannerCounter* counter,
              const TFileScanRangeParams& params, const TFileRangeDesc& range,
-              const std::vector<SlotDescriptor*>& file_slot_descs, FileReader* file_reader);
+              const std::vector<SlotDescriptor*>& file_slot_descs);
    ~CsvReader() override;

    Status init_reader();
@ -42,7 +42,7 @@ public:
                       std::unordered_set<std::string>* missing_cols) override;

 private:
-    Status _create_decompressor(TFileFormatType::type type);
+    Status _create_decompressor();
    Status _fill_dest_columns(const Slice& line, std::vector<MutableColumnPtr>& columns);
    Status _line_split_to_values(const Slice& line, bool* success);
    void _split_line(const Slice& line);
@ -58,13 +58,18 @@ private:
    const TFileRangeDesc& _range;
    const std::vector<SlotDescriptor*>& _file_slot_descs;

-    FileReader* _file_reader;
+    // _file_reader_s is for stream load pipe reader,
+    // and _file_reader is for other file reader.
+    // TODO: refactor this to use only shared_ptr or unique_ptr
+    std::unique_ptr<FileReader> _file_reader;
+    std::shared_ptr<FileReader> _file_reader_s;
    std::unique_ptr<LineReader> _line_reader;
    bool _line_reader_eof;
    std::unique_ptr<TextConverter> _text_converter;
-    Decompressor* _decompressor;
+    std::unique_ptr<Decompressor> _decompressor;

    TFileFormatType::type _file_format_type;
+    TFileCompressType::type _file_compress_type;
    int64_t _size;
    // When we fetch range start from 0, header_type="csv_with_names" skip first line
    // When we fetch range start from 0, header_type="csv_with_names_and_types" skip first two line
--- a/be/src/vec/exec/format/parquet/vparquet_reader.cpp
+++ b/be/src/vec/exec/format/parquet/vparquet_reader.cpp
@ -121,8 +121,9 @@ Status ParquetReader::init_reader(
        std::unordered_map<std::string, ColumnValueRangeType>* colname_to_value_range) {
    SCOPED_RAW_TIMER(&_statistics.parse_meta_time);
    if (_file_reader == nullptr) {
-        RETURN_IF_ERROR(FileFactory::create_file_reader(_profile, _scan_params, _scan_range,
-                                                        _file_reader, 0));
+        RETURN_IF_ERROR(FileFactory::create_file_reader(_profile, _scan_params, _scan_range.path,
+                                                        _scan_range.start_offset,
+                                                        _scan_range.file_size, 0, _file_reader));
    }
    RETURN_IF_ERROR(_file_reader->open());
    if (_file_reader->size() == 0) {
--- a/be/src/vec/exec/scan/new_file_arrow_scanner.cpp
+++ b/be/src/vec/exec/scan/new_file_arrow_scanner.cpp
@ -202,9 +202,9 @@ Status NewFileArrowScanner::_open_next_reader() {
        }
        const TFileRangeDesc& range = _ranges[_next_range++];
        std::unique_ptr<FileReader> file_reader;
-
-        RETURN_IF_ERROR(FileFactory::create_file_reader(_state->exec_env(), _profile, _params,
-                                                        range, file_reader));
+        RETURN_IF_ERROR(FileFactory::create_file_reader(_profile, _params, range.path,
+                                                        range.start_offset, range.file_size, 0,
+                                                        file_reader));
        RETURN_IF_ERROR(file_reader->open());
        if (file_reader->size() == 0) {
            file_reader->close();
--- a/be/src/vec/exec/scan/new_file_text_scanner.cpp
+++ b/be/src/vec/exec/scan/new_file_text_scanner.cpp
@ -139,7 +139,8 @@ Status NewFileTextScanner::_open_next_reader() {

 Status NewFileTextScanner::_open_file_reader() {
    const TFileRangeDesc& range = _ranges[_next_range];
-    RETURN_IF_ERROR(FileFactory::create_file_reader(_state->exec_env(), _profile, _params, range,
+    RETURN_IF_ERROR(FileFactory::create_file_reader(_profile, _params, range.path,
+                                                    range.start_offset, range.file_size, 0,
                                                    _cur_file_reader));
    return _cur_file_reader->open();
 }
--- a/be/src/vec/exec/scan/new_file_text_scanner.h
+++ b/be/src/vec/exec/scan/new_file_text_scanner.h
@ -47,7 +47,7 @@ private:
    Status _line_split_to_values(const Slice& line);
    Status _split_line(const Slice& line);
    // Reader
-    std::shared_ptr<FileReader> _cur_file_reader;
+    std::unique_ptr<FileReader> _cur_file_reader;
    LineReader* _cur_line_reader;
    bool _cur_line_reader_eof;

--- a/be/src/vec/exec/scan/vfile_scanner.cpp
+++ b/be/src/vec/exec/scan/vfile_scanner.cpp
@ -30,7 +30,7 @@
 #include "runtime/descriptors.h"
 #include "runtime/raw_value.h"
 #include "runtime/runtime_state.h"
-#include "vec/exec/format/csv/vcsv_reader.h"
+#include "vec/exec/format/csv/csv_reader.h"
 #include "vec/exec/format/parquet/vparquet_reader.h"
 #include "vec/exec/scan/new_file_scan_node.h"
 #include "vec/functions/simple_function_factory.h"
@ -47,7 +47,11 @@ VFileScanner::VFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t
          _cur_reader_eof(false),
          _mem_pool(std::make_unique<MemPool>()),
          _profile(profile),
-          _strict_mode(false) {}
+          _strict_mode(false) {
+    if (scan_range.params.__isset.strict_mode) {
+        _strict_mode = scan_range.params.strict_mode;
+    }
+}

 Status VFileScanner::prepare(
        VExprContext** vconjunct_ctx_ptr,
@ -158,11 +162,11 @@ Status VFileScanner::_get_block_impl(RuntimeState* state, Block* block, bool* eo
    } while (true);

    // Update filtered rows and unselected rows for load, reset counter.
-    {
-        state->update_num_rows_load_filtered(_counter.num_rows_filtered);
-        state->update_num_rows_load_unselected(_counter.num_rows_unselected);
-        _reset_counter();
-    }
+    // {
+    //     state->update_num_rows_load_filtered(_counter.num_rows_filtered);
+    //     state->update_num_rows_load_unselected(_counter.num_rows_unselected);
+    //     _reset_counter();
+    // }

    return Status::OK();
 }
@ -447,7 +451,6 @@ Status VFileScanner::_convert_to_output_block(Block* block) {
                                                    "filter column"));
    RETURN_IF_ERROR(vectorized::Block::filter_block(block, dest_size, dest_size));
    _counter.num_rows_filtered += rows - block->rows();
-
    return Status::OK();
 }

@ -461,22 +464,8 @@ Status VFileScanner::_get_next_reader() {
        }
        const TFileRangeDesc& range = _ranges[_next_range++];

-        // 1. create file reader
-        // TODO: Each format requires its own FileReader to achieve a special access mode,
-        //  so create the FileReader inner the format.
-        std::unique_ptr<FileReader> file_reader;
-        if (_params.format_type != TFileFormatType::FORMAT_PARQUET) {
-            RETURN_IF_ERROR(FileFactory::create_file_reader(_state->exec_env(), _profile, _params,
-                                                            range, file_reader));
-            RETURN_IF_ERROR(file_reader->open());
-            if (file_reader->size() == 0) {
-                file_reader->close();
-                continue;
-            }
-        }
-
        // 2. create reader for specific format
-        // TODO: add csv, json, avro
+        // TODO: add json, avro
        Status init_status;
        switch (_params.format_type) {
        case TFileFormatType::FORMAT_PARQUET: {
@ -488,6 +477,18 @@ Status VFileScanner::_get_next_reader() {
            break;
        }
        case TFileFormatType::FORMAT_ORC: {
+            // create file reader of orc reader.
+            std::unique_ptr<FileReader> file_reader;
+            RETURN_IF_ERROR(FileFactory::create_file_reader(_profile, _params, range.path,
+                                                            range.start_offset, range.file_size, 0,
+                                                            file_reader));
+            RETURN_IF_ERROR(file_reader->open());
+            if (file_reader->size() == 0) {
+                file_reader->close();
+                init_status = Status::EndOfFile("Empty orc file");
+                break;
+            }
+
            _cur_reader.reset(new ORCReaderWrap(_state, _file_slot_descs, file_reader.release(),
                                                _num_of_columns_from_file, range.start_offset,
                                                range.size, false));
@ -502,8 +503,8 @@ Status VFileScanner::_get_next_reader() {
        case TFileFormatType::FORMAT_CSV_LZ4FRAME:
        case TFileFormatType::FORMAT_CSV_LZOP:
        case TFileFormatType::FORMAT_CSV_DEFLATE: {
-            _cur_reader.reset(new CsvReader(_state, _profile, &_counter, _params, range,
-                                            _file_slot_descs, file_reader.release()));
+            _cur_reader.reset(
+                    new CsvReader(_state, _profile, &_counter, _params, range, _file_slot_descs));
            init_status = ((CsvReader*)(_cur_reader.get()))->init_reader();
            break;
        }
--- a/be/src/vec/exec/scan/vfile_scanner.h
+++ b/be/src/vec/exec/scan/vfile_scanner.h
@ -30,16 +30,6 @@ namespace doris::vectorized {

 class NewFileScanNode;

-// The counter will be passed to each scanner.
-// Note that this struct is not thread safe.
-// So if we support concurrent scan in the future, we need to modify this struct.
-struct ScannerCounter {
-    ScannerCounter() : num_rows_filtered(0), num_rows_unselected(0) {}
-
-    int64_t num_rows_filtered;   // unqualified rows (unmatched the dest schema, or no partition)
-    int64_t num_rows_unselected; // rows filtered by predicates
-};
-
 class VFileScanner : public VScanner {
 public:
    VFileScanner(RuntimeState* state, NewFileScanNode* parent, int64_t limit,
@ -115,7 +105,6 @@ protected:

    // Profile
    RuntimeProfile* _profile;
-    ScannerCounter _counter;

    bool _scanner_eof = false;
    int _rows = 0;
--- a/be/src/vec/exec/scan/vscanner.cpp
+++ b/be/src/vec/exec/scan/vscanner.cpp
@ -70,7 +70,11 @@ Status VScanner::get_block(RuntimeState* state, Block* block, bool* eof) {
 }

 Status VScanner::_filter_output_block(Block* block) {
-    return VExprContext::filter_block(_vconjunct_ctx, block, _output_tuple_desc->slots().size());
+    auto old_rows = block->rows();
+    Status st =
+            VExprContext::filter_block(_vconjunct_ctx, block, _output_tuple_desc->slots().size());
+    _counter.num_rows_unselected += old_rows - block->rows();
+    return st;
 }

 Status VScanner::try_append_late_arrival_runtime_filter() {
@ -116,8 +120,14 @@ Status VScanner::close(RuntimeState* state) {
 }

 void VScanner::_update_counters_before_close() {
-    if (!_state->enable_profile()) return;
+    LOG(INFO) << "cmy _update_counters_before_close: _counter.num_rows_filtered: "
+              << _counter.num_rows_filtered
+              << ", _counter.num_rows_unselected: " << _counter.num_rows_unselected;
+    if (!_state->enable_profile() && !_is_load) return;
    COUNTER_UPDATE(_parent->_rows_read_counter, _num_rows_read);
+    // Update stats for load
+    _state->update_num_rows_load_filtered(_counter.num_rows_filtered);
+    _state->update_num_rows_load_unselected(_counter.num_rows_unselected);
 }

 } // namespace doris::vectorized
--- a/be/src/vec/exec/scan/vscanner.h
+++ b/be/src/vec/exec/scan/vscanner.h
@ -28,6 +28,14 @@ namespace doris::vectorized {
 class Block;
 class VScanNode;

+// Counter for load
+struct ScannerCounter {
+    ScannerCounter() : num_rows_filtered(0), num_rows_unselected(0) {}
+
+    int64_t num_rows_filtered;   // unqualified rows (unmatched the dest schema, or no partition)
+    int64_t num_rows_unselected; // rows filtered by predicates
+};
+
 class VScanner {
 public:
    VScanner(RuntimeState* state, VScanNode* parent, int64_t limit);
@ -162,6 +170,8 @@ protected:
    bool _is_load = false;
    // set to true after decrease the "_num_unfinished_scanners" in scanner context
    bool _is_counted_down = false;
+
+    ScannerCounter _counter;
 };

 } // namespace doris::vectorized
--- a/be/src/vec/exec/vjson_scanner.cpp
+++ b/be/src/vec/exec/vjson_scanner.cpp
@ -47,7 +47,7 @@ Status VJsonScanner<JsonReader>::get_next(vectorized::Block* output_block, bool*
    auto columns = _src_block.mutate_columns();
    // Get one line
    while (columns[0]->size() < batch_size && !_scanner_eof) {
-        if (_cur_file_reader == nullptr || _cur_reader_eof) {
+        if (_real_reader == nullptr || _cur_reader_eof) {
            RETURN_IF_ERROR(open_next_reader());
            // If there isn't any more reader, break this
            if (_scanner_eof) {
@ -110,7 +110,7 @@ Status VJsonScanner<JsonReader>::open_vjson_reader() {
                                                  num_as_string, fuzzy_parse));
    _cur_vjson_reader.reset(new JsonReader(_state, _counter, _profile, strip_outer_array,
                                           num_as_string, fuzzy_parse, &_scanner_eof,
-                                           _read_json_by_line ? nullptr : _cur_file_reader.get(),
+                                           _read_json_by_line ? nullptr : _real_reader,
                                           _read_json_by_line ? _cur_line_reader : nullptr));

    RETURN_IF_ERROR(_cur_vjson_reader->init(jsonpath, json_root));
--- a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md
+++ b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md
@ -174,8 +174,11 @@ ERRORS:
        SHOW LOAD WARNINGS ON 'url
       ````

+    where url is the url given by ErrorURL.

- where url is the url given by ErrorURL.
+23: compress_type
+
+    Specify compress type file. Only support compressed csv file now. Support gz, lzo, bz2, lz4, lzop, deflate.

 ### Example

--- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md
+++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/STREAM-LOAD.md
@ -135,7 +135,7 @@ curl --location-trusted -u user:passwd [-H ""...] -T data.file -XPUT http://fe_h
 21. send_batch_parallelism: 整型，用于设置发送批处理数据的并行度，如果并行度的值超过 BE 配置中的 `max_send_batch_parallelism_per_job`，那么作为协调点的 BE 将使用 `max_send_batch_parallelism_per_job` 的值。

 22. hidden_columns: 用于指定导入数据中包含的隐藏列，在Header中不包含columns时生效，多个hidden column用逗号分割。
-       ```
+      ```
           hidden_columns: __DORIS_DELETE_SIGN__,__DORIS_SEQUENCE_COL__
           系统会使用用户指定的数据导入数据。在上述用例中，导入数据中最后一列数据为__DORIS_SEQUENCE_COL__。
       ```
@ -166,11 +166,14 @@ ERRORS:
        可以通过以下语句查看导入错误详细信息：

       ```sql
-        SHOW LOAD WARNINGS ON 'url
+        SHOW LOAD WARNINGS ON 'url'
       ```

+    其中 url 为 ErrorURL 给出的 url。

-        其中 url 为 ErrorURL 给出的 url。
+23: compress_type
+
+    指定文件的压缩格式。目前只支持 csv 文件的压缩。支持 gz, lzo, bz2, lz4, lzop, deflate 压缩格式。

 ### Example

--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/BrokerDesc.java
@ -17,6 +17,7 @@

 package org.apache.doris.analysis;

+import org.apache.doris.analysis.StorageBackend.StorageType;
 import org.apache.doris.backup.BlobStorage;
 import org.apache.doris.common.io.Text;
 import org.apache.doris.common.io.Writable;
@ -53,6 +54,13 @@ public class BrokerDesc extends StorageDesc implements Writable {
        this.storageType = StorageBackend.StorageType.BROKER;
    }

+    // for empty broker desc
+    public BrokerDesc(String name) {
+        this.name = name;
+        this.properties = Maps.newHashMap();
+        this.storageType = StorageType.LOCAL;
+    }
+
    public BrokerDesc(String name, Map<String, String> properties) {
        this.name = name;
        this.properties = properties;
@ -77,6 +85,11 @@ public class BrokerDesc extends StorageDesc implements Writable {
        tryConvertToS3();
    }

+    public static BrokerDesc createForStreamLoad() {
+        BrokerDesc brokerDesc = new BrokerDesc("", StorageType.STREAM, null);
+        return brokerDesc;
+    }
+
    public String getName() {
        return name;
    }
@ -94,19 +107,19 @@ public class BrokerDesc extends StorageDesc implements Writable {
    }

    public TFileType getFileType() {
-        if (storageType == StorageBackend.StorageType.LOCAL) {
-            return TFileType.FILE_LOCAL;
+        switch (storageType) {
+            case LOCAL:
+                return TFileType.FILE_LOCAL;
+            case S3:
+                return TFileType.FILE_S3;
+            case HDFS:
+                return TFileType.FILE_HDFS;
+            case STREAM:
+                return TFileType.FILE_STREAM;
+            case BROKER:
+            default:
+                return TFileType.FILE_BROKER;
        }
-        if (storageType == StorageBackend.StorageType.BROKER) {
-            return TFileType.FILE_BROKER;
-        }
-        if (storageType == StorageBackend.StorageType.S3) {
-            return TFileType.FILE_S3;
-        }
-        if (storageType == StorageBackend.StorageType.HDFS) {
-            return TFileType.FILE_HDFS;
-        }
-        return TFileType.FILE_BROKER;
    }

    public StorageBackend.StorageType storageType() {
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java
@ -28,9 +28,13 @@ import org.apache.doris.common.ErrorCode;
 import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.Pair;
 import org.apache.doris.common.util.SqlParserUtils;
+import org.apache.doris.common.util.Util;
 import org.apache.doris.load.loadv2.LoadTask;
 import org.apache.doris.mysql.privilege.PrivPredicate;
 import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.task.LoadTaskInfo;
+import org.apache.doris.thrift.TFileCompressType;
+import org.apache.doris.thrift.TFileFormatType;
 import org.apache.doris.thrift.TNetworkAddress;

 import com.google.common.base.Function;
@ -94,7 +98,8 @@ public class DataDescription {
    private final PartitionNames partitionNames;
    private final List<String> filePaths;
    private final Separator columnSeparator;
-    private final String fileFormat;
+    private String fileFormat;
+    private TFileCompressType compressType = TFileCompressType.UNKNOWN;
    private final boolean isNegative;
    // column names in the path
    private final List<String> columnsFromPath;
@ -210,9 +215,57 @@ public class DataDescription {
        this.properties = properties;
    }

+    // For stream load using external file scan node.
+    public DataDescription(String tableName, LoadTaskInfo taskInfo) {
+        this.tableName = tableName;
+        this.partitionNames = taskInfo.getPartitions();
+        // Add a dummy path to just make analyze() happy.
+        // Stream load does not need this field.
+        this.filePaths = Lists.newArrayList("dummy");
+        this.fileFieldNames = taskInfo.getColumnExprDescs().getFileColNames();
+        this.columnSeparator = taskInfo.getColumnSeparator();
+        this.lineDelimiter = taskInfo.getLineDelimiter();
+        getFileFormatAndCompressType(taskInfo);
+        this.columnsFromPath = null;
+        this.isNegative = taskInfo.getNegative();
+        this.columnMappingList = taskInfo.getColumnExprDescs().getColumnMappingList();
+        this.precedingFilterExpr = taskInfo.getPrecedingFilter();
+        this.whereExpr = taskInfo.getWhereExpr();
+        this.srcTableName = null;
+        this.mergeType = taskInfo.getMergeType();
+        this.deleteCondition = taskInfo.getDeleteCondition();
+        this.sequenceCol = taskInfo.getSequenceCol();
+        this.stripOuterArray = taskInfo.isStripOuterArray();
+        this.jsonPaths = taskInfo.getJsonPaths();
+        this.jsonRoot = taskInfo.getJsonRoot();
+        this.fuzzyParse = taskInfo.isFuzzyParse();
+        this.readJsonByLine = taskInfo.isReadJsonByLine();
+        this.numAsString = taskInfo.isNumAsString();
+        this.properties = Maps.newHashMap();
+    }
+
+    private void getFileFormatAndCompressType(LoadTaskInfo taskInfo) {
+        // get file format
+        if (!Strings.isNullOrEmpty(taskInfo.getHeaderType())) {
+            // for "csv_with_name" and "csv_with_name_and_type"
+            this.fileFormat = taskInfo.getHeaderType();
+        } else {
+            TFileFormatType type = taskInfo.getFormatType();
+            if (Util.isCsvFormat(type)) {
+                // ignore the "compress type" in format, such as FORMAT_CSV_GZ
+                // the compress type is saved in "compressType"
+                this.fileFormat = "csv";
+            } else {
+                this.fileFormat = "json";
+            }
+        }
+        // get compress type
+        this.compressType = taskInfo.getCompressType();
+    }
+
    public static void validateMappingFunction(String functionName, List<String> args,
-                                               Map<String, String> columnNameMap,
-                                               Column mappingColumn, boolean isHadoopLoad) throws AnalysisException {
+            Map<String, String> columnNameMap,
+            Column mappingColumn, boolean isHadoopLoad) throws AnalysisException {
        if (functionName.equalsIgnoreCase("alignment_timestamp")) {
            validateAlignmentTimestamp(args, columnNameMap);
        } else if (functionName.equalsIgnoreCase("strftime")) {
@ -425,6 +478,10 @@ public class DataDescription {
        return fileFormat;
    }

+    public TFileCompressType getCompressType() {
+        return compressType;
+    }
+
    public List<String> getColumnsFromPath() {
        return columnsFromPath;
    }
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/ImportColumnDesc.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/ImportColumnDesc.java
@ -17,8 +17,11 @@

 package org.apache.doris.analysis;

+import org.apache.doris.analysis.BinaryPredicate.Operator;
 import org.apache.doris.catalog.Column;

+import com.google.common.base.Preconditions;
+
 public class ImportColumnDesc {
    private String columnName;
    private Expr expr;
@ -59,6 +62,12 @@ public class ImportColumnDesc {
        return expr == null;
    }

+    public Expr toBinaryPredicate() {
+        Preconditions.checkState(!isColumn());
+        BinaryPredicate pred = new BinaryPredicate(Operator.EQ, new SlotRef(null, columnName), expr);
+        return pred;
+    }
+
    @Override
    public String toString() {
        StringBuilder sb = new StringBuilder();
@ -68,4 +77,5 @@ public class ImportColumnDesc {
        }
        return sb.toString();
    }
+
 }
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/LargeIntLiteral.java
@ -87,6 +87,25 @@ public class LargeIntLiteral extends LiteralExpr {
        analysisDone();
    }

+    public LargeIntLiteral(BigDecimal value) throws AnalysisException {
+        super();
+        BigInteger bigInt;
+        try {
+            bigInt = new BigInteger(value.toPlainString());
+            // ATTN: value from 'sql_parser.y' is always be positive. for example: '-256' will to be
+            // 256, and for int8_t, 256 is invalid, while -256 is valid. So we check the right border
+            // is LARGE_INT_MAX_ABS
+            if (bigInt.compareTo(LARGE_INT_MIN) < 0 || bigInt.compareTo(LARGE_INT_MAX_ABS) > 0) {
+                throw new AnalysisException("Large int literal is out of range: " + value);
+            }
+        } catch (NumberFormatException e) {
+            throw new AnalysisException("Invalid integer literal: " + value, e);
+        }
+        this.value = bigInt;
+        type = Type.LARGEINT;
+        analysisDone();
+    }
+
    protected LargeIntLiteral(LargeIntLiteral other) {
        super(other);
        value = other.value;
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/OutFileClause.java
@ -31,6 +31,7 @@ import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.BrokerUtil;
 import org.apache.doris.common.util.ParseUtil;
 import org.apache.doris.common.util.PrintableMap;
+import org.apache.doris.common.util.Util;
 import org.apache.doris.qe.ConnectContext;
 import org.apache.doris.thrift.TFileFormatType;
 import org.apache.doris.thrift.TParquetCompressionType;
@ -563,7 +564,7 @@ public class OutFileClause {
        analyzeBrokerDesc(processedPropKeys);

        if (properties.containsKey(PROP_COLUMN_SEPARATOR)) {
-            if (!isCsvFormat()) {
+            if (!Util.isCsvFormat(fileFormatType)) {
                throw new AnalysisException(PROP_COLUMN_SEPARATOR + " is only for CSV format");
            }
            columnSeparator = Separator.convertSeparator(properties.get(PROP_COLUMN_SEPARATOR));
@ -571,7 +572,7 @@ public class OutFileClause {
        }

        if (properties.containsKey(PROP_LINE_DELIMITER)) {
-            if (!isCsvFormat()) {
+            if (!Util.isCsvFormat(fileFormatType)) {
                throw new AnalysisException(PROP_LINE_DELIMITER + " is only for CSV format");
            }
            lineDelimiter = Separator.convertSeparator(properties.get(PROP_LINE_DELIMITER));
@ -772,16 +773,6 @@ public class OutFileClause {
        processedPropKeys.add(SCHEMA);
    }

-    private boolean isCsvFormat() {
-        return fileFormatType == TFileFormatType.FORMAT_CSV_BZ2
-                || fileFormatType == TFileFormatType.FORMAT_CSV_DEFLATE
-                || fileFormatType == TFileFormatType.FORMAT_CSV_GZ
-                || fileFormatType == TFileFormatType.FORMAT_CSV_LZ4FRAME
-                || fileFormatType == TFileFormatType.FORMAT_CSV_LZO
-                || fileFormatType == TFileFormatType.FORMAT_CSV_LZOP
-                || fileFormatType == TFileFormatType.FORMAT_CSV_PLAIN;
-    }
-
    private boolean isParquetFormat() {
        return fileFormatType == TFileFormatType.FORMAT_PARQUET;
    }
@ -817,7 +808,7 @@ public class OutFileClause {

    public TResultFileSinkOptions toSinkOptions() {
        TResultFileSinkOptions sinkOptions = new TResultFileSinkOptions(filePath, fileFormatType);
-        if (isCsvFormat()) {
+        if (Util.isCsvFormat(fileFormatType)) {
            sinkOptions.setColumnSeparator(columnSeparator);
            sinkOptions.setLineDelimiter(lineDelimiter);
        }
--- a/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/StorageBackend.java
@ -114,7 +114,8 @@ public class StorageBackend extends StorageDesc implements ParseNode {
        S3("Amazon S3 Simple Storage Service"),
        HDFS("Hadoop Distributed File System"),
        LOCAL("Local file system"),
-        OFS("Tencent CHDFS");
+        OFS("Tencent CHDFS"),
+        STREAM("Stream load pipe");

        private final String description;

--- a/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/common/util/Util.java
@ -24,12 +24,14 @@ import org.apache.doris.common.Config;
 import org.apache.doris.common.FeNameFormat;
 import org.apache.doris.datasource.InternalCatalog;
 import org.apache.doris.qe.ConnectContext;
+import org.apache.doris.thrift.TFileFormatType;

 import com.google.common.base.Preconditions;
 import com.google.common.base.Strings;
 import com.google.common.collect.Lists;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;
+import org.jetbrains.annotations.NotNull;

 import java.io.BufferedReader;
 import java.io.DataInput;
@ -520,4 +522,35 @@ public class Util {
        }
        return new String(hexChars);
    }
+
+
+    @NotNull
+    public static TFileFormatType getFileFormatType(String path) {
+        String lowerCasePath = path.toLowerCase();
+        if (lowerCasePath.endsWith(".parquet") || lowerCasePath.endsWith(".parq")) {
+            return TFileFormatType.FORMAT_PARQUET;
+        } else if (lowerCasePath.endsWith(".gz")) {
+            return TFileFormatType.FORMAT_CSV_GZ;
+        } else if (lowerCasePath.endsWith(".bz2")) {
+            return TFileFormatType.FORMAT_CSV_BZ2;
+        } else if (lowerCasePath.endsWith(".lz4")) {
+            return TFileFormatType.FORMAT_CSV_LZ4FRAME;
+        } else if (lowerCasePath.endsWith(".lzo")) {
+            return TFileFormatType.FORMAT_CSV_LZOP;
+        } else if (lowerCasePath.endsWith(".deflate")) {
+            return TFileFormatType.FORMAT_CSV_DEFLATE;
+        } else {
+            return TFileFormatType.FORMAT_CSV_PLAIN;
+        }
+    }
+
+    public static boolean isCsvFormat(TFileFormatType fileFormatType) {
+        return fileFormatType == TFileFormatType.FORMAT_CSV_BZ2
+                || fileFormatType == TFileFormatType.FORMAT_CSV_DEFLATE
+                || fileFormatType == TFileFormatType.FORMAT_CSV_GZ
+                || fileFormatType == TFileFormatType.FORMAT_CSV_LZ4FRAME
+                || fileFormatType == TFileFormatType.FORMAT_CSV_LZO
+                || fileFormatType == TFileFormatType.FORMAT_CSV_LZOP
+                || fileFormatType == TFileFormatType.FORMAT_CSV_PLAIN;
+    }
 }
--- a/fe/fe-core/src/main/java/org/apache/doris/load/BrokerFileGroup.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/BrokerFileGroup.java
@ -39,6 +39,7 @@ import org.apache.doris.common.Pair;
 import org.apache.doris.common.io.Text;
 import org.apache.doris.common.io.Writable;
 import org.apache.doris.load.loadv2.LoadTask;
+import org.apache.doris.thrift.TFileCompressType;
 import org.apache.doris.thrift.TNetworkAddress;

 import com.google.common.base.Strings;
@ -66,6 +67,7 @@ public class BrokerFileGroup implements Writable {
    private String lineDelimiter;
    // fileFormat may be null, which means format will be decided by file's suffix
    private String fileFormat;
+    private TFileCompressType compressType = TFileCompressType.UNKNOWN;
    private boolean isNegative;
    private List<Long> partitionIds; // can be null, means no partition specified
    private List<String> filePaths;
@ -217,6 +219,7 @@ public class BrokerFileGroup implements Writable {
                throw new DdlException("File Format Type " + fileFormat + " is invalid.");
            }
        }
+        compressType = dataDescription.getCompressType();
        isNegative = dataDescription.isNegative();

        // FilePath
@ -276,6 +279,10 @@ public class BrokerFileGroup implements Writable {
        return fileFormat;
    }

+    public TFileCompressType getCompressType() {
+        return compressType;
+    }
+
    public boolean isNegative() {
        return isNegative;
    }
@ -405,7 +412,7 @@ public class BrokerFileGroup implements Writable {
            // null means default: csv
            return false;
        }
-        return fileFormat.toLowerCase().equals("parquet") || fileFormat.toLowerCase().equals("orc");
+        return fileFormat.equalsIgnoreCase("parquet") || fileFormat.equalsIgnoreCase("orc");
    }

    @Override
--- a/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/loadv2/LoadingTaskPlanner.java
@ -146,7 +146,7 @@ public class LoadingTaskPlanner {
        // 1. Broker scan node
        ScanNode scanNode;
        if (Config.enable_new_load_scan_node) {
-            scanNode = new ExternalFileScanNode(new PlanNodeId(nextNodeId++), scanTupleDesc, "FileScanNode");
+            scanNode = new ExternalFileScanNode(new PlanNodeId(nextNodeId++), scanTupleDesc);
            ((ExternalFileScanNode) scanNode).setLoadInfo(loadJobId, txnId, table, brokerDesc, fileGroups,
                    fileStatusesList, filesAdded, strictMode, loadParallelism, userInfo);
        } else {
--- a/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/load/routineload/KafkaRoutineLoadJob.java
@ -39,6 +39,7 @@ import org.apache.doris.common.util.SmallFileMgr;
 import org.apache.doris.common.util.SmallFileMgr.SmallFile;
 import org.apache.doris.common.util.TimeUtils;
 import org.apache.doris.persist.AlterRoutineLoadJobOperationLog;
+import org.apache.doris.thrift.TFileCompressType;
 import org.apache.doris.transaction.TransactionState;
 import org.apache.doris.transaction.TransactionStatus;

@ -724,6 +725,11 @@ public class KafkaRoutineLoadJob extends RoutineLoadJob {
        return gson.toJson(partitionIdToOffsetLag);
    }

+    @Override
+    public TFileCompressType getCompressType() {
+        return TFileCompressType.PLAIN;
+    }
+
    @Override
    public double getMaxFilterRatio() {
        // for kafka routine load, the max filter ratio is always 1, because it use max error num instead of this.
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/BrokerScanNode.java
@ -38,6 +38,7 @@ import org.apache.doris.common.DdlException;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.BrokerUtil;
+import org.apache.doris.common.util.Util;
 import org.apache.doris.common.util.VectorizedUtil;
 import org.apache.doris.load.BrokerFileGroup;
 import org.apache.doris.load.Load;
@ -460,22 +461,7 @@ public class BrokerScanNode extends LoadScanNode {
            }
        }

-        String lowerCasePath = path.toLowerCase();
-        if (lowerCasePath.endsWith(".parquet") || lowerCasePath.endsWith(".parq")) {
-            return TFileFormatType.FORMAT_PARQUET;
-        } else if (lowerCasePath.endsWith(".gz")) {
-            return TFileFormatType.FORMAT_CSV_GZ;
-        } else if (lowerCasePath.endsWith(".bz2")) {
-            return TFileFormatType.FORMAT_CSV_BZ2;
-        } else if (lowerCasePath.endsWith(".lz4")) {
-            return TFileFormatType.FORMAT_CSV_LZ4FRAME;
-        } else if (lowerCasePath.endsWith(".lzo")) {
-            return TFileFormatType.FORMAT_CSV_LZOP;
-        } else if (lowerCasePath.endsWith(".deflate")) {
-            return TFileFormatType.FORMAT_CSV_DEFLATE;
-        } else {
-            return TFileFormatType.FORMAT_CSV_PLAIN;
-        }
+        return Util.getFileFormatType(path);
    }

    public String getHostUri() throws UserException {
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/SingleNodePlanner.java
@ -1940,7 +1940,7 @@ public class SingleNodePlanner {
                        "TableValuedFunctionScanNode", ((TableValuedFunctionRef) tblRef).getTableFunction());
                break;
            case HMS_EXTERNAL_TABLE:
-                scanNode = new ExternalFileScanNode(ctx.getNextNodeId(), tblRef.getDesc(), "HMS_FILE_SCAN_NODE");
+                scanNode = new ExternalFileScanNode(ctx.getNextNodeId(), tblRef.getDesc());
                break;
            case ES_EXTERNAL_TABLE:
                scanNode = new EsScanNode(ctx.getNextNodeId(), tblRef.getDesc(), "EsScanNode", true);
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadPlanner.java
@ -18,6 +18,8 @@
 package org.apache.doris.planner;

 import org.apache.doris.analysis.Analyzer;
+import org.apache.doris.analysis.BrokerDesc;
+import org.apache.doris.analysis.DataDescription;
 import org.apache.doris.analysis.DescriptorTable;
 import org.apache.doris.analysis.Expr;
 import org.apache.doris.analysis.ImportColumnDesc;
@ -40,12 +42,16 @@ import org.apache.doris.common.DdlException;
 import org.apache.doris.common.ErrorCode;
 import org.apache.doris.common.ErrorReport;
 import org.apache.doris.common.UserException;
+import org.apache.doris.common.util.Util;
+import org.apache.doris.load.BrokerFileGroup;
 import org.apache.doris.load.LoadErrorHub;
 import org.apache.doris.load.loadv2.LoadTask;
 import org.apache.doris.load.routineload.RoutineLoadJob;
+import org.apache.doris.planner.external.ExternalFileScanNode;
 import org.apache.doris.service.FrontendOptions;
 import org.apache.doris.task.LoadTaskInfo;
 import org.apache.doris.thrift.PaloInternalServiceVersion;
+import org.apache.doris.thrift.TBrokerFileStatus;
 import org.apache.doris.thrift.TExecPlanFragmentParams;
 import org.apache.doris.thrift.TLoadErrorHubInfo;
 import org.apache.doris.thrift.TNetworkAddress;
@ -85,9 +91,8 @@ public class StreamLoadPlanner {
    private Analyzer analyzer;
    private DescriptorTable descTable;

-    private StreamLoadScanNode scanNode;
+    private ScanNode scanNode;
    private TupleDescriptor tupleDesc;
-    private TupleDescriptor scanTupleDesc;

    public StreamLoadPlanner(Database db, OlapTable destTable, LoadTaskInfo taskInfo) {
        this.db = db;
@ -167,13 +172,35 @@ public class StreamLoadPlanner {
        }

        // create scan node
-        scanNode = new StreamLoadScanNode(loadId, new PlanNodeId(0), scanTupleDesc, destTable, taskInfo);
+        if (Config.enable_new_load_scan_node) {
+            ExternalFileScanNode fileScanNode = new ExternalFileScanNode(new PlanNodeId(0), scanTupleDesc);
+            if (!Util.isCsvFormat(taskInfo.getFormatType())) {
+                throw new AnalysisException(
+                        "New stream load scan load not support non-csv type now: " + taskInfo.getFormatType());
+            }
+            // 1. create file group
+            DataDescription dataDescription = new DataDescription(destTable.getName(), taskInfo);
+            dataDescription.analyzeWithoutCheckPriv(db.getFullName());
+            BrokerFileGroup fileGroup = new BrokerFileGroup(dataDescription);
+            fileGroup.parse(db, dataDescription);
+            // 2. create dummy file status
+            TBrokerFileStatus fileStatus = new TBrokerFileStatus();
+            fileStatus.setPath("");
+            fileStatus.setIsDir(false);
+            fileStatus.setSize(-1); // must set to -1, means stream.
+            fileScanNode.setLoadInfo(loadId, taskInfo.getTxnId(), destTable, BrokerDesc.createForStreamLoad(),
+                    fileGroup, fileStatus, taskInfo.isStrictMode(), taskInfo.getFileType());
+            scanNode = fileScanNode;
+        } else {
+            scanNode = new StreamLoadScanNode(loadId, new PlanNodeId(0), scanTupleDesc, destTable, taskInfo);
+        }
+
        scanNode.init(analyzer);
-        descTable.computeStatAndMemLayout();
        scanNode.finalize(analyzer);
        if (Config.enable_vectorized_load) {
            scanNode.convertToVectoriezd();
        }
+        descTable.computeStatAndMemLayout();

        int timeout = taskInfo.getTimeout();
        if (taskInfo instanceof RoutineLoadJob) {
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/StreamLoadScanNode.java
@ -143,7 +143,7 @@ public class StreamLoadScanNode extends LoadScanNode {
        }

        if (params.getSrcSlotIds() == null) {
-            params.setSrcSlotIds(new java.util.ArrayList<java.lang.Integer>());
+            params.setSrcSlotIds(Lists.newArrayList());
        }
        Load.initColumns(dstTable, columnExprDescs, null /* no hadoop function */, exprsByName, analyzer, srcTupleDesc,
                slotDescByName, params.getSrcSlotIds(), taskInfo.getFormatType(), taskInfo.getHiddenColumns(),
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/ExternalFileScanNode.java
@ -49,9 +49,11 @@ import org.apache.doris.thrift.TExplainLevel;
 import org.apache.doris.thrift.TExpr;
 import org.apache.doris.thrift.TFileScanNode;
 import org.apache.doris.thrift.TFileScanRangeParams;
+import org.apache.doris.thrift.TFileType;
 import org.apache.doris.thrift.TPlanNode;
 import org.apache.doris.thrift.TPlanNodeType;
 import org.apache.doris.thrift.TScanRangeLocations;
+import org.apache.doris.thrift.TUniqueId;

 import com.google.common.base.Preconditions;
 import com.google.common.collect.Lists;
@ -123,23 +125,32 @@ public class ExternalFileScanNode extends ExternalScanNode {
     * 1. Query hms table
     * 2. Load from file
     */
-    public ExternalFileScanNode(PlanNodeId id, TupleDescriptor desc, String planNodeName) {
-        super(id, desc, planNodeName, StatisticalType.FILE_SCAN_NODE);
+    public ExternalFileScanNode(PlanNodeId id, TupleDescriptor desc) {
+        super(id, desc, "EXTERNAL_FILE_SCAN_NODE", StatisticalType.FILE_SCAN_NODE);
    }

-    // Only for load job.
+    // Only for broker load job.
    public void setLoadInfo(long loadJobId, long txnId, Table targetTable, BrokerDesc brokerDesc,
            List<BrokerFileGroup> fileGroups, List<List<TBrokerFileStatus>> fileStatusesList, int filesAdded,
            boolean strictMode, int loadParallelism, UserIdentity userIdentity) {
        Preconditions.checkState(fileGroups.size() == fileStatusesList.size());
        for (int i = 0; i < fileGroups.size(); ++i) {
            FileGroupInfo fileGroupInfo = new FileGroupInfo(loadJobId, txnId, targetTable, brokerDesc,
-                    fileGroups.get(i), fileStatusesList.get(i), filesAdded, strictMode, loadParallelism, userIdentity);
+                    fileGroups.get(i), fileStatusesList.get(i), filesAdded, strictMode, loadParallelism);
            fileGroupInfos.add(fileGroupInfo);
        }
        this.type = Type.LOAD;
    }

+    // Only for stream load/routine load job.
+    public void setLoadInfo(TUniqueId loadId, long txnId, Table targetTable, BrokerDesc brokerDesc,
+            BrokerFileGroup fileGroup, TBrokerFileStatus fileStatus, boolean strictMode, TFileType fileType) {
+        FileGroupInfo fileGroupInfo = new FileGroupInfo(loadId, txnId, targetTable, brokerDesc,
+                fileGroup, fileStatus, strictMode, fileType);
+        fileGroupInfos.add(fileGroupInfo);
+        this.type = Type.LOAD;
+    }
+
    @Override
    public void init(Analyzer analyzer) throws UserException {
        super.init(analyzer);
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileGroupInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/FileGroupInfo.java
@ -19,7 +19,6 @@ package org.apache.doris.planner.external;

 import org.apache.doris.analysis.BrokerDesc;
 import org.apache.doris.analysis.StorageBackend;
-import org.apache.doris.analysis.UserIdentity;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.FsBroker;
 import org.apache.doris.catalog.Table;
@ -28,6 +27,7 @@ import org.apache.doris.common.Config;
 import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.BrokerUtil;
+import org.apache.doris.common.util.Util;
 import org.apache.doris.load.BrokerFileGroup;
 import org.apache.doris.planner.external.ExternalFileScanNode.ParamCreateContext;
 import org.apache.doris.system.Backend;
@ -37,11 +37,14 @@ import org.apache.doris.thrift.TFileFormatType;
 import org.apache.doris.thrift.TFileRangeDesc;
 import org.apache.doris.thrift.TFileScanRange;
 import org.apache.doris.thrift.TFileScanRangeParams;
+import org.apache.doris.thrift.TFileType;
 import org.apache.doris.thrift.TNetworkAddress;
 import org.apache.doris.thrift.TScanRange;
 import org.apache.doris.thrift.TScanRangeLocation;
 import org.apache.doris.thrift.TScanRangeLocations;
+import org.apache.doris.thrift.TUniqueId;

+import com.google.common.collect.Lists;
 import org.apache.logging.log4j.LogManager;
 import org.apache.logging.log4j.Logger;

@ -59,6 +62,14 @@ public class FileGroupInfo {
    private static final String HIVE_DEFAULT_COLUMN_SEPARATOR = "\001";
    private static final String HIVE_DEFAULT_LINE_DELIMITER = "\n";

+    public enum JobType {
+        BULK_LOAD,
+        STREAM_LOAD
+    }
+
+    private JobType jobType;
+
+    private TUniqueId loadId;
    private long loadJobId;
    private long txnId;
    private Table targetTable;
@ -68,13 +79,16 @@ public class FileGroupInfo {
    private int filesAdded;
    private boolean strictMode;
    private int loadParallelism;
-    private UserIdentity userIdentity;
    // set by getFileStatusAndCalcInstance
-    long bytesPerInstance = 0;
+    private long bytesPerInstance = 0;
+    // used for stream load, FILE_LOCAL or FILE_STREAM
+    private TFileType fileType;

+    // for broker load
    public FileGroupInfo(long loadJobId, long txnId, Table targetTable, BrokerDesc brokerDesc,
            BrokerFileGroup fileGroup, List<TBrokerFileStatus> fileStatuses, int filesAdded, boolean strictMode,
-            int loadParallelism, UserIdentity userIdentity) {
+            int loadParallelism) {
+        this.jobType = JobType.BULK_LOAD;
        this.loadJobId = loadJobId;
        this.txnId = txnId;
        this.targetTable = targetTable;
@ -84,7 +98,22 @@ public class FileGroupInfo {
        this.filesAdded = filesAdded;
        this.strictMode = strictMode;
        this.loadParallelism = loadParallelism;
-        this.userIdentity = userIdentity;
+    }
+
+    // for stream load
+    public FileGroupInfo(TUniqueId loadId, long txnId, Table targetTable, BrokerDesc brokerDesc,
+            BrokerFileGroup fileGroup, TBrokerFileStatus fileStatus, boolean strictMode, TFileType fileType) {
+        this.jobType = JobType.STREAM_LOAD;
+        this.loadId = loadId;
+        this.txnId = txnId;
+        this.targetTable = targetTable;
+        this.brokerDesc = brokerDesc;
+        this.fileGroup = fileGroup;
+        this.fileStatuses = Lists.newArrayList();
+        this.fileStatuses.add(fileStatus);
+        this.filesAdded = 1;
+        this.strictMode = strictMode;
+        this.fileType = fileType;
    }

    public Table getTargetTable() {
@ -111,10 +140,6 @@ public class FileGroupInfo {
        return loadParallelism;
    }

-    public UserIdentity getUserIdentity() {
-        return userIdentity;
-    }
-
    public String getExplainString(String prefix) {
        StringBuilder sb = new StringBuilder();
        sb.append("file scan\n");
@ -126,19 +151,26 @@ public class FileGroupInfo {
            throw new UserException("No source file in this table(" + targetTable.getName() + ").");
        }

-        long totalBytes = 0;
-        for (TBrokerFileStatus fileStatus : fileStatuses) {
-            totalBytes += fileStatus.size;
-        }
-        int numInstances = (int) (totalBytes / Config.min_bytes_per_broker_scanner);
-        int totalLoadParallelism = loadParallelism * backendPolicy.numBackends();
-        numInstances = Math.min(totalLoadParallelism, numInstances);
-        numInstances = Math.min(numInstances, Config.max_broker_concurrency);
-        numInstances = Math.max(1, numInstances);
+        int numInstances = 1;
+        if (jobType == JobType.BULK_LOAD) {
+            long totalBytes = 0;
+            for (TBrokerFileStatus fileStatus : fileStatuses) {
+                totalBytes += fileStatus.size;
+            }
+            numInstances = (int) (totalBytes / Config.min_bytes_per_broker_scanner);
+            int totalLoadParallelism = loadParallelism * backendPolicy.numBackends();
+            numInstances = Math.min(totalLoadParallelism, numInstances);
+            numInstances = Math.min(numInstances, Config.max_broker_concurrency);
+            numInstances = Math.max(1, numInstances);

-        bytesPerInstance = totalBytes / numInstances + 1;
-        if (bytesPerInstance > Config.max_bytes_per_broker_scanner) {
-            throw new UserException("Scan bytes per file scanner exceed limit: " + Config.max_bytes_per_broker_scanner);
+            bytesPerInstance = totalBytes / numInstances + 1;
+            if (bytesPerInstance > Config.max_bytes_per_broker_scanner) {
+                throw new UserException(
+                        "Scan bytes per file scanner exceed limit: " + Config.max_bytes_per_broker_scanner);
+            }
+        } else {
+            // stream load, not need to split
+            bytesPerInstance = Long.MAX_VALUE;
        }
        LOG.info("number instance of file scan node is: {}, bytes per instance: {}", numInstances, bytesPerInstance);
    }
@ -156,7 +188,9 @@ public class FileGroupInfo {
            TFileFormatType formatType = formatType(context.fileGroup.getFileFormat(), fileStatus.path);
            List<String> columnsFromPath = BrokerUtil.parseColumnsFromPath(fileStatus.path,
                    context.fileGroup.getColumnNamesFromPath());
-            if (tmpBytes > bytesPerInstance) {
+            // Assign scan range locations only for broker load.
+            // stream load has only one file, and no need to set multi scan ranges.
+            if (tmpBytes > bytesPerInstance && jobType != JobType.STREAM_LOAD) {
                // Now only support split plain text
                if ((formatType == TFileFormatType.FORMAT_CSV_PLAIN && fileStatus.isSplitable)
                        || formatType == TFileFormatType.FORMAT_JSON) {
@ -224,69 +258,55 @@ public class FileGroupInfo {
        TScanRangeLocations locations = new TScanRangeLocations();
        locations.setScanRange(scanRange);

-        TScanRangeLocation location = new TScanRangeLocation();
-        location.setBackendId(selectedBackend.getId());
-        location.setServer(new TNetworkAddress(selectedBackend.getHost(), selectedBackend.getBePort()));
-        locations.addToLocations(location);
+        if (jobType == JobType.BULK_LOAD) {
+            TScanRangeLocation location = new TScanRangeLocation();
+            location.setBackendId(selectedBackend.getId());
+            location.setServer(new TNetworkAddress(selectedBackend.getHost(), selectedBackend.getBePort()));
+            locations.addToLocations(location);
+        } else {
+            // stream load do not need locations
+            locations.setLocations(Lists.newArrayList());
+        }

        return locations;
    }

-    private String getHeaderType(String formatType) {
-        if (formatType != null) {
-            if (formatType.toLowerCase().equals(FeConstants.csv_with_names) || formatType.toLowerCase()
-                    .equals(FeConstants.csv_with_names_and_types)) {
-                return formatType;
-            }
-        }
-        return "";
-    }
-
    private TFileFormatType formatType(String fileFormat, String path) throws UserException {
        if (fileFormat != null) {
-            if (fileFormat.toLowerCase().equals("parquet")) {
+            if (fileFormat.equalsIgnoreCase("parquet")) {
                return TFileFormatType.FORMAT_PARQUET;
-            } else if (fileFormat.toLowerCase().equals("orc")) {
+            } else if (fileFormat.equalsIgnoreCase("orc")) {
                return TFileFormatType.FORMAT_ORC;
-            } else if (fileFormat.toLowerCase().equals("json")) {
+            } else if (fileFormat.equalsIgnoreCase("json")) {
                return TFileFormatType.FORMAT_JSON;
                // csv/csv_with_name/csv_with_names_and_types treat as csv format
-            } else if (fileFormat.toLowerCase().equals(FeConstants.csv) || fileFormat.toLowerCase()
+            } else if (fileFormat.equalsIgnoreCase(FeConstants.csv) || fileFormat.toLowerCase()
                    .equals(FeConstants.csv_with_names) || fileFormat.toLowerCase()
                    .equals(FeConstants.csv_with_names_and_types)
                    // TODO: Add TEXTFILE to TFileFormatType to Support hive text file format.
-                    || fileFormat.toLowerCase().equals(FeConstants.text)) {
+                    || fileFormat.equalsIgnoreCase(FeConstants.text)) {
                return TFileFormatType.FORMAT_CSV_PLAIN;
            } else {
                throw new UserException("Not supported file format: " + fileFormat);
            }
        }

-        String lowerCasePath = path.toLowerCase();
-        if (lowerCasePath.endsWith(".parquet") || lowerCasePath.endsWith(".parq")) {
-            return TFileFormatType.FORMAT_PARQUET;
-        } else if (lowerCasePath.endsWith(".gz")) {
-            return TFileFormatType.FORMAT_CSV_GZ;
-        } else if (lowerCasePath.endsWith(".bz2")) {
-            return TFileFormatType.FORMAT_CSV_BZ2;
-        } else if (lowerCasePath.endsWith(".lz4")) {
-            return TFileFormatType.FORMAT_CSV_LZ4FRAME;
-        } else if (lowerCasePath.endsWith(".lzo")) {
-            return TFileFormatType.FORMAT_CSV_LZOP;
-        } else if (lowerCasePath.endsWith(".deflate")) {
-            return TFileFormatType.FORMAT_CSV_DEFLATE;
-        } else {
-            return TFileFormatType.FORMAT_CSV_PLAIN;
-        }
+        return Util.getFileFormatType(path);
    }

    private TFileRangeDesc createFileRangeDesc(long curFileOffset, TBrokerFileStatus fileStatus, long rangeBytes,
            List<String> columnsFromPath) {
        TFileRangeDesc rangeDesc = new TFileRangeDesc();
-        rangeDesc.setPath(fileStatus.path);
-        rangeDesc.setStartOffset(curFileOffset);
-        rangeDesc.setSize(rangeBytes);
-        rangeDesc.setColumnsFromPath(columnsFromPath);
+        if (jobType == JobType.BULK_LOAD) {
+            rangeDesc.setPath(fileStatus.path);
+            rangeDesc.setStartOffset(curFileOffset);
+            rangeDesc.setSize(rangeBytes);
+            rangeDesc.setColumnsFromPath(columnsFromPath);
+        } else {
+            rangeDesc.setLoadId(loadId);
+            rangeDesc.setSize(fileStatus.size);
+        }
        return rangeDesc;
    }
 }
+
--- a/fe/fe-core/src/main/java/org/apache/doris/planner/external/LoadScanProvider.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/planner/external/LoadScanProvider.java
@ -30,6 +30,7 @@ import org.apache.doris.common.FeConstants;
 import org.apache.doris.common.MetaNotFoundException;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.BrokerUtil;
+import org.apache.doris.common.util.Util;
 import org.apache.doris.common.util.VectorizedUtil;
 import org.apache.doris.load.BrokerFileGroup;
 import org.apache.doris.load.Load;
@ -98,7 +99,8 @@ public class LoadScanProvider implements FileScanProviderIf {
        ctx.timezone = analyzer.getTimezone();

        TFileScanRangeParams params = new TFileScanRangeParams();
-        params.format_type = formatType(fileGroupInfo.getFileGroup().getFileFormat(), "");
+        params.setFormatType(formatType(fileGroupInfo.getFileGroup().getFileFormat(), ""));
+        params.setCompressType(fileGroupInfo.getFileGroup().getCompressType());
        params.setStrictMode(fileGroupInfo.isStrictMode());
        params.setProperties(fileGroupInfo.getBrokerDesc().getProperties());
        if (fileGroupInfo.getBrokerDesc().getFileType() == TFileType.FILE_HDFS) {
@ -233,23 +235,9 @@ public class LoadScanProvider implements FileScanProviderIf {
            } else {
                throw new UserException("Not supported file format: " + fileFormat);
            }
-        }
-
-        String lowerCasePath = path.toLowerCase();
-        if (lowerCasePath.endsWith(".parquet") || lowerCasePath.endsWith(".parq")) {
-            return TFileFormatType.FORMAT_PARQUET;
-        } else if (lowerCasePath.endsWith(".gz")) {
-            return TFileFormatType.FORMAT_CSV_GZ;
-        } else if (lowerCasePath.endsWith(".bz2")) {
-            return TFileFormatType.FORMAT_CSV_BZ2;
-        } else if (lowerCasePath.endsWith(".lz4")) {
-            return TFileFormatType.FORMAT_CSV_LZ4FRAME;
-        } else if (lowerCasePath.endsWith(".lzo")) {
-            return TFileFormatType.FORMAT_CSV_LZOP;
-        } else if (lowerCasePath.endsWith(".deflate")) {
-            return TFileFormatType.FORMAT_CSV_DEFLATE;
        } else {
-            return TFileFormatType.FORMAT_CSV_PLAIN;
+            // get file format by the suffix of file
+            return Util.getFileFormatType(path);
        }
    }

--- a/fe/fe-core/src/main/java/org/apache/doris/task/LoadTaskInfo.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/task/LoadTaskInfo.java
@ -22,6 +22,7 @@ import org.apache.doris.analysis.ImportColumnDesc;
 import org.apache.doris.analysis.PartitionNames;
 import org.apache.doris.analysis.Separator;
 import org.apache.doris.load.loadv2.LoadTask;
+import org.apache.doris.thrift.TFileCompressType;
 import org.apache.doris.thrift.TFileFormatType;
 import org.apache.doris.thrift.TFileType;

@ -54,6 +55,8 @@ public interface LoadTaskInfo {

    TFileFormatType getFormatType();

+    TFileCompressType getCompressType();
+
    String getJsonPaths();

    String getJsonRoot();
@ -93,5 +96,25 @@ public interface LoadTaskInfo {
    class ImportColumnDescs {
        public List<ImportColumnDesc> descs = Lists.newArrayList();
        public boolean isColumnDescsRewrited = false;
+
+        public List<String> getFileColNames() {
+            List<String> colNames = Lists.newArrayList();
+            for (ImportColumnDesc desc : descs) {
+                if (desc.isColumn()) {
+                    colNames.add(desc.getColumnName());
+                }
+            }
+            return colNames;
+        }
+
+        public List<Expr> getColumnMappingList() {
+            List<Expr> exprs = Lists.newArrayList();
+            for (ImportColumnDesc desc : descs) {
+                if (!desc.isColumn()) {
+                    exprs.add(desc.toBinaryPredicate());
+                }
+            }
+            return exprs;
+        }
    }
 }
--- a/fe/fe-core/src/main/java/org/apache/doris/task/StreamLoadTask.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/task/StreamLoadTask.java
@ -30,6 +30,7 @@ import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.SqlParserUtils;
 import org.apache.doris.common.util.TimeUtils;
 import org.apache.doris.load.loadv2.LoadTask;
+import org.apache.doris.thrift.TFileCompressType;
 import org.apache.doris.thrift.TFileFormatType;
 import org.apache.doris.thrift.TFileType;
 import org.apache.doris.thrift.TStreamLoadPutRequest;
@ -52,6 +53,7 @@ public class StreamLoadTask implements LoadTaskInfo {
    private long txnId;
    private TFileType fileType;
    private TFileFormatType formatType;
+    private TFileCompressType compressType = TFileCompressType.UNKNOWN;
    private boolean stripOuterArray;
    private boolean numAsString;
    private String jsonPaths;
@ -80,11 +82,13 @@ public class StreamLoadTask implements LoadTaskInfo {
    private String headerType = "";
    private List<String> hiddenColumns;

-    public StreamLoadTask(TUniqueId id, long txnId, TFileType fileType, TFileFormatType formatType) {
+    public StreamLoadTask(TUniqueId id, long txnId, TFileType fileType, TFileFormatType formatType,
+            TFileCompressType compressType) {
        this.id = id;
        this.txnId = txnId;
        this.fileType = fileType;
        this.formatType = formatType;
+        this.compressType = compressType;
        this.jsonPaths = "";
        this.jsonRoot = "";
        this.stripOuterArray = false;
@ -109,6 +113,10 @@ public class StreamLoadTask implements LoadTaskInfo {
        return formatType;
    }

+    public TFileCompressType getCompressType() {
+        return compressType;
+    }
+
    public ImportColumnDescs getColumnExprDescs() {
        return columnExprDescs;
    }
@ -238,7 +246,8 @@ public class StreamLoadTask implements LoadTaskInfo {

    public static StreamLoadTask fromTStreamLoadPutRequest(TStreamLoadPutRequest request) throws UserException {
        StreamLoadTask streamLoadTask = new StreamLoadTask(request.getLoadId(), request.getTxnId(),
-                                                           request.getFileType(), request.getFormatType());
+                request.getFileType(), request.getFormatType(),
+                request.getCompressType());
        streamLoadTask.setOptionalFromTSLPutRequest(request);
        return streamLoadTask;
    }
--- a/gensrc/thrift/FrontendService.thrift
+++ b/gensrc/thrift/FrontendService.thrift
@ -543,6 +543,7 @@ struct TStreamLoadPutRequest {
    37: optional bool load_to_single_tablet
    38: optional string header_type
    39: optional string hidden_columns
+    40: optional PlanNodes.TFileCompressType compress_type
 }

 struct TStreamLoadPutResult {
--- a/gensrc/thrift/PlanNodes.thrift
+++ b/gensrc/thrift/PlanNodes.thrift
@ -114,6 +114,21 @@ enum TFileFormatType {
    FORMAT_PROTO,
 }

+// In previous versions, the data compression format and file format were stored together, as TFileFormatType,
+// which was inconvenient for flexible combination of file format and compression format.
+// Therefore, the compressed format is separately added here.
+// In order to ensure forward compatibility, if this type is set, the type shall prevail,
+// otherwise, the TFileFormatType shall prevail
+enum TFileCompressType {
+    UNKNOWN,
+    PLAIN,
+    GZ,
+    LZO,
+    BZ2,
+    LZ4FRAME,
+    DEFLATE
+}
+
 struct THdfsConf {
    1: required string key
    2: required string value
@ -245,45 +260,50 @@ struct TFileAttributes {
 struct TFileScanRangeParams {
    1: optional Types.TFileType file_type;
    2: optional TFileFormatType format_type;
+    3: optional TFileCompressType compress_type;
    // If this is for load job, src point to the source table and dest point to the doris table.
    // If this is for query, only dest_tuple_id is set, including both file slot and partition slot.
-    3: optional Types.TTupleId src_tuple_id;
-    4: optional Types.TTupleId dest_tuple_id
+    4: optional Types.TTupleId src_tuple_id;
+    5: optional Types.TTupleId dest_tuple_id
    // num_of_columns_from_file can spilt the all_file_slot and all_partition_slot
-    5: optional i32 num_of_columns_from_file;
+    6: optional i32 num_of_columns_from_file;
    // all selected slots which may compose from file and partition value.
-    6: optional list<TFileScanSlotInfo> required_slots;
+    7: optional list<TFileScanSlotInfo> required_slots;

-    7: optional THdfsParams hdfs_params;
+    8: optional THdfsParams hdfs_params;
    // properties for file such as s3 information
-    8: optional map<string, string> properties;
+    9: optional map<string, string> properties;

    // The convert exprt map for load job
    // desc slot id -> expr
-    9: optional map<Types.TSlotId, Exprs.TExpr> expr_of_dest_slot
-    10: optional map<Types.TSlotId, Exprs.TExpr> default_value_of_src_slot
+    10: optional map<Types.TSlotId, Exprs.TExpr> expr_of_dest_slot
+    11: optional map<Types.TSlotId, Exprs.TExpr> default_value_of_src_slot
    // This is the mapping of dest slot id and src slot id in load expr
    // It excludes the slot id which has the transform expr
-    11: optional map<Types.TSlotId, Types.TSlotId> dest_sid_to_src_sid_without_trans
+    12: optional map<Types.TSlotId, Types.TSlotId> dest_sid_to_src_sid_without_trans

    // strictMode is a boolean
    // if strict mode is true, the incorrect data (the result of cast is null) will not be loaded
-    12: optional bool strict_mode
+    13: optional bool strict_mode

-    13: optional list<Types.TNetworkAddress> broker_addresses
-    14: optional TFileAttributes file_attributes
-    15: optional Exprs.TExpr pre_filter_exprs
+    14: optional list<Types.TNetworkAddress> broker_addresses
+    15: optional TFileAttributes file_attributes
+    16: optional Exprs.TExpr pre_filter_exprs
 }

 struct TFileRangeDesc {
+    // If load_id is set, this is for stream/routine load.
+    // If path is set, this is for bulk load.
+    1: optional Types.TUniqueId load_id
    // Path of this range
-    1: optional string path;
+    2: optional string path;
    // Offset of this file start
-    2: optional i64 start_offset;
+    3: optional i64 start_offset;
    // Size of this range, if size = -1, this means that will read to the end of file
-    3: optional i64 size;
+    4: optional i64 size;
+    5: optional i64 file_size;
    // columns parsed from file path should be after the columns read from file
-    4: optional list<string> columns_from_path;
+    6: optional list<string> columns_from_path;
 }

 // TFileScanRange represents a set of descriptions of a file and the rules for reading and converting it.
--- a/regression-test/conf/regression-conf.groovy
+++ b/regression-test/conf/regression-conf.groovy
@ -20,11 +20,11 @@
 // **Note**: default db will be create if not exist
 defaultDb = "regression_test"

-jdbcUrl = "jdbc:mysql://127.0.0.1:9030/?"
+jdbcUrl = "jdbc:mysql://127.0.0.1:9033/?"
 jdbcUser = "root"
 jdbcPassword = ""

-feHttpAddress = "127.0.0.1:8030"
+feHttpAddress = "127.0.0.1:8033"
 feHttpUser = "root"
 feHttpPassword = ""

--- a/regression-test/data/load_p0/stream_load/all_types.csv
+++ b/regression-test/data/load_p0/stream_load/all_types.csv
--- a/regression-test/data/load_p0/stream_load/all_types.csv.gz
+++ b/regression-test/data/load_p0/stream_load/all_types.csv.gz
--- a/regression-test/data/load_p0/stream_load/bitmap_hll.csv.bz2
+++ b/regression-test/data/load_p0/stream_load/bitmap_hll.csv.bz2
--- a/regression-test/data/load_p0/stream_load/test_stream_load.out
+++ b/regression-test/data/load_p0/stream_load/test_stream_load.out
@ -3,3 +3,51 @@
 -2	-51	\N	1	\N	\N	\N	\N	\N	\N	\N	2	\N	\N
 -2	-50	\N	1	\N	\N	\N	\N	\N	\N	\N	\N	j	\N

+-- !sql1 --
+2019	9	9	9	7.7	a	2019-09-09	1970-01-01T08:33:39	k7	9.0	9.0
+
+-- !all11 --
+2500
+
+-- !all12 --
+11
+
+-- !all21 --
+2500
+
+-- !all22 --
+0
+
+-- !all23 --
+2500
+
+-- !all24 --
+2500
+
+-- !all31 --
+11
+
+-- !all32 --
+11
+
+-- !all33 --
+11
+
+-- !all41 --
+2500
+
+-- !all51 --
+0
+
+-- !all61 --
+0
+
+-- !all71 --
+1	2	1025	1028
+
+-- !all81 --
+2
+
+-- !all91 --
+1
+
--- a/regression-test/suites/load_p0/stream_load/data/test_time.data
+++ b/regression-test/suites/load_p0/stream_load/data/test_time.data
--- a/regression-test/data/load_p0/stream_load/unique_key.csv.lz4
+++ b/regression-test/data/load_p0/stream_load/unique_key.csv.lz4
--- a/regression-test/data/load_p0/stream_load/unique_key_with_delete.csv
+++ b/regression-test/data/load_p0/stream_load/unique_key_with_delete.csv
@ -0,0 +1,6 @@
+1,2,1,0,1
+2,2,1,0,2
+3,2,1,0,3
+1,2,2,1,4
+2,2,1,1,5
+3,2,2,0,0
--- a/regression-test/suites/load_p0/stream_load/load_nullable_to_not_nullable.groovy
+++ b/regression-test/suites/load_p0/stream_load/load_nullable_to_not_nullable.groovy
@ -1,64 +0,0 @@
-// Licensed to the Apache Software Foundation (ASF) under one
-// or more contributor license agreements.  See the NOTICE file
-// distributed with this work for additional information
-// regarding copyright ownership.  The ASF licenses this file
-// to you under the Apache License, Version 2.0 (the
-// "License"); you may not use this file except in compliance
-// with the License.  You may obtain a copy of the License at
-//
-//   http://www.apache.org/licenses/LICENSE-2.0
-//
-// Unless required by applicable law or agreed to in writing,
-// software distributed under the License is distributed on an
-// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-// KIND, either express or implied.  See the License for the
-// specific language governing permissions and limitations
-// under the License.
-
-import org.codehaus.groovy.runtime.IOGroovyMethods
-
-import java.nio.charset.StandardCharsets
-
-suite("load_nullable_to_not_nullable") {
-    def tableName = "load_nullable_to_not_nullable"
-    def dbName = "test_query_db"
-    sql "CREATE DATABASE IF NOT EXISTS ${dbName}"
-    sql "USE $dbName"
-    sql "DROP TABLE IF EXISTS ${tableName} "
-    sql """
-        CREATE TABLE `${tableName}` (
-            k1 int(32) NOT NULL,
-            k2 smallint NOT NULL,
-            k3 int NOT NULL,
-            k4 bigint NOT NULL,
-            k5 decimal(9, 3) NOT NULL,
-            k6 char(5) NOT NULL,
-            k10 date NOT NULL,
-            k11 datetime NOT NULL,
-            k7 varchar(20) NOT NULL,
-            k8 double max NOT NULL,
-            k9 float sum NOT NULL )
-        AGGREGATE KEY(k1,k2,k3,k4,k5,k6,k10,k11,k7)
-        PARTITION BY RANGE(k2) (
-            PARTITION partition_a VALUES LESS THAN MAXVALUE
-        )
-        DISTRIBUTED BY HASH(k1, k2, k5)
-        BUCKETS 3
-        PROPERTIES ( "replication_allocation" = "tag.location.default: 1");
-        """
-
-    StringBuilder commandBuilder = new StringBuilder()
-    commandBuilder.append("""curl -v --location-trusted -u ${context.config.feHttpUser}:${context.config.feHttpPassword}""")
-    commandBuilder.append(""" -H columns:col,k1=year(col),k2=month(col),k3=month(col),k4=day(col),k5=7.7,k6='a',k10=date(col),k11=FROM_UNIXTIME(2019,'%Y-%m-%dT%H:%i:%s'),k7='k7',k8=month(col),k9=day(col) -T ${context.file.parent}/data/test_time.data http://${context.config.feHttpAddress}/api/""" + dbName + "/" + tableName + "/_stream_load")
-    String command = commandBuilder.toString()
-    def process = command.execute()
-    int code = process.waitFor()
-    String err = IOGroovyMethods.getText(new BufferedReader(new InputStreamReader(process.getErrorStream())));
-    String out = process.getText()
-    logger.info("Run command: command=" + command + ",code=" + code + ", out=" + out + ", err=" + err)
-    assertEquals(code, 0)
-    sql "sync"
-    qt_sql " SELECT * FROM ${tableName} "
-    sql "DROP TABLE ${tableName} "
-}
-
--- a/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
+++ b/regression-test/suites/load_p0/stream_load/test_stream_load.groovy
@ -132,4 +132,422 @@ suite("test_stream_load", "p0") {
    sql "sync"
    rowCount = sql "select count(1) from ${tableName}"
    assertEquals(3, rowCount[0][0])
+
+    // test load_nullable_to_not_nullable
+    def tableName2 = "load_nullable_to_not_nullable"
+    sql """ DROP TABLE IF EXISTS ${tableName2} """
+    sql """
+    CREATE TABLE `${tableName2}` (
+        k1 int(32) NOT NULL,
+        k2 smallint NOT NULL,
+        k3 int NOT NULL,
+        k4 bigint NOT NULL,
+        k5 decimal(9, 3) NOT NULL,
+        k6 char(5) NOT NULL,
+        k10 date NOT NULL,
+        k11 datetime NOT NULL,
+        k7 varchar(20) NOT NULL,
+        k8 double max NOT NULL,
+        k9 float sum NOT NULL )
+    AGGREGATE KEY(k1,k2,k3,k4,k5,k6,k10,k11,k7)
+    PARTITION BY RANGE(k2) (
+        PARTITION partition_a VALUES LESS THAN MAXVALUE
+    )
+    DISTRIBUTED BY HASH(k1, k2, k5)
+    BUCKETS 3
+    PROPERTIES ( "replication_allocation" = "tag.location.default: 1");
+    """
+
+    streamLoad {
+        table "${tableName2}"
+
+        set 'column_separator', '\t'
+        set 'columns', 'col,k1=year(col),k2=month(col),k3=month(col),k4=day(col),k5=7.7,k6="a",k10=date(col),k11=FROM_UNIXTIME(2019,"%Y-%m-%dT%H:%i:%s"),k7="k7",k8=month(col),k9=day(col)'
+
+        file 'test_time.data'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(1, json.NumberTotalRows)
+            assertEquals(0, json.NumberFilteredRows)
+        }
+    }
+    order_qt_sql1 " SELECT * FROM ${tableName2}"
+
+    // test common case
+    def tableName3 = "test_all"
+    def tableName4 = "test_less_col"
+    def tableName5 = "test_bitmap_and_hll"
+    def tableName6 = "test_unique_key"
+    def tableName7 = "test_unique_key_with_delete"
+    sql """ DROP TABLE IF EXISTS ${tableName3} """
+    sql """ DROP TABLE IF EXISTS ${tableName4} """
+    sql """ DROP TABLE IF EXISTS ${tableName5} """
+    sql """ DROP TABLE IF EXISTS ${tableName6} """
+    sql """ DROP TABLE IF EXISTS ${tableName7} """
+    sql """
+    CREATE TABLE ${tableName3} (
+      `k1` int(11) NULL,
+      `k2` tinyint(4) NULL,
+      `k3` smallint(6) NULL,
+      `k4` bigint(20) NULL,
+      `k5` largeint(40) NULL,
+      `k6` float NULL,
+      `k7` double NULL,
+      `k8` decimal(9, 0) NULL,
+      `k9` char(10) NULL,
+      `k10` varchar(1024) NULL,
+      `k11` text NULL,
+      `k12` date NULL,
+      `k13` datetime NULL
+    ) ENGINE=OLAP
+    DISTRIBUTED BY HASH(`k1`) BUCKETS 3
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+
+    sql """
+    CREATE TABLE ${tableName4} (
+      `k1` int(11) NULL,
+      `k2` tinyint(4) NULL,
+      `k3` smallint(6) NULL,
+      `k4` bigint(20) NULL,
+      `k5` largeint(40) NULL
+    ) ENGINE=OLAP
+    DISTRIBUTED BY HASH(`k1`) BUCKETS 3
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+
+    sql """
+    CREATE TABLE ${tableName5} (
+      `k1` int(11) NULL,
+      `k2` tinyint(4) NULL,
+      `v1` bitmap bitmap_union,
+      `v2` hll hll_union
+    ) ENGINE=OLAP
+    DISTRIBUTED BY HASH(`k1`) BUCKETS 3
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+
+    sql """
+    CREATE TABLE ${tableName6} (
+      `k1` int(11) NULL,
+      `k2` tinyint(4) NULL,
+      `v1` varchar(1024)
+    ) ENGINE=OLAP
+    UNIQUE KEY(k1, k2)
+    DISTRIBUTED BY HASH(`k1`) BUCKETS 3
+    PROPERTIES (
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+
+    sql """
+    CREATE TABLE ${tableName7} (
+      `k1` int(11) NULL,
+      `k2` tinyint(4) NULL,
+      `v1` varchar(1024)
+    ) ENGINE=OLAP
+    UNIQUE KEY(k1, k2)
+    DISTRIBUTED BY HASH(`k1`) BUCKETS 3
+    PROPERTIES (
+    "function_column.sequence_type" = "int",
+    "replication_allocation" = "tag.location.default: 1"
+    );
+    """
+
+    // load all columns
+    streamLoad {
+        table "${tableName3}"
+
+        set 'column_separator', ','
+
+        file 'all_types.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(2500, json.NumberTotalRows)
+            assertEquals(0, json.NumberFilteredRows)
+        }
+    }
+    sql "sync"
+    order_qt_all11 "SELECT count(*) FROM ${tableName3}" // 2500
+    order_qt_all12 "SELECT count(*) FROM ${tableName3} where k1 <= 10"  // 11
+    sql """truncate table ${tableName3}"""
+    sql """sync"""
+
+    // load part of columns
+    streamLoad {
+        table "${tableName3}"
+
+        set 'column_separator', ','
+        set 'columns', 'k1, k2'
+
+        file 'all_types.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("fail", json.Status.toLowerCase())
+            assertEquals(0, json.NumberLoadedRows)
+        }
+    }
+
+    // load with skip 2 columns, with gzip
+    streamLoad {
+        table "${tableName3}"
+
+        set 'column_separator', ','
+        set 'columns', 'k1, k2, k3, k4, tmp1, tmp2, k7, k8, k9, k10, k11, k12, k13'
+        set 'compress_type', 'gz'
+
+        file 'all_types.csv.gz'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(2500, json.NumberTotalRows)
+            assertEquals(2500, json.NumberLoadedRows)
+            assertEquals(0, json.NumberFilteredRows)
+            assertEquals(0, json.NumberUnselectedRows)
+        }
+    }
+    sql "sync"
+    order_qt_all21 "SELECT count(*) FROM ${tableName3}" // 2500
+    order_qt_all22 "SELECT count(*) FROM ${tableName3} where k1 is null"  // 0
+    order_qt_all23 "SELECT count(*) FROM ${tableName3} where k5 is null"  // 2500
+    order_qt_all24 "SELECT count(*) FROM ${tableName3} where k6 is null"  // 2500
+    sql """truncate table ${tableName3}"""
+    sql """sync"""
+
+    // load with column mapping and where predicate
+    streamLoad {
+        table "${tableName3}"
+
+        set 'column_separator', ','
+        set 'columns', 'k1, k2, k3, k4, tmp5, k6, tmpk7, k8, k9, k10, k11, k12, k13, k7=tmpk7+1'
+        set 'where', 'k1 <= 10'
+
+        file 'all_types.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(2500, json.NumberTotalRows)
+            assertEquals(11, json.NumberLoadedRows)
+            assertEquals(0, json.NumberFilteredRows)
+            assertEquals(2489, json.NumberUnselectedRows)
+        }
+    }
+    sql "sync"
+    order_qt_all31 "SELECT count(*) FROM ${tableName3}" // 11
+    order_qt_all32 "SELECT count(*) FROM ${tableName3} where k7 >= 7"  // 11
+    order_qt_all33 "SELECT count(*) FROM ${tableName3} where k5 is null"  // 11
+    sql """truncate table ${tableName3}"""
+    sql """sync"""
+
+    // load without strict_mode
+    streamLoad {
+        table "${tableName3}"
+
+        set 'column_separator', ','
+        set 'columns', 'tmpk1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11, k12, k13, k1=k13'
+
+        file 'all_types.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(2500, json.NumberTotalRows)
+            assertEquals(2500, json.NumberLoadedRows)
+            assertEquals(0, json.NumberFilteredRows)
+            assertEquals(0, json.NumberUnselectedRows)
+        }
+    }
+    sql "sync"
+    order_qt_all41 "SELECT count(*) FROM ${tableName3} where k1 is null" // 2500
+    sql """truncate table ${tableName3}"""
+    sql """sync"""
+
+    // load with strict_mode false and max_filter_ratio
+    streamLoad {
+        table "${tableName4}"
+
+        set 'column_separator', ','
+        set 'columns', 'k1, k2, k3, k4, tmpk5, tmpk6, tmpk7, tmpk8, tmpk9, tmpk10, tmpk11, tmpk12, k5'
+        set 'max_filter_ratio', '1'
+        set 'strict_mode', 'true'
+
+        file 'all_types.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(2500, json.NumberTotalRows)
+            assertEquals(0, json.NumberLoadedRows)
+            assertEquals(2500, json.NumberFilteredRows)
+            assertEquals(0, json.NumberUnselectedRows)
+        }
+    }
+    sql "sync"
+    order_qt_all51 "SELECT count(*) FROM ${tableName4}" // 0
+    sql """truncate table ${tableName4}"""
+    sql """sync"""
+
+    // load with strict_mode true and max_filter_ratio
+    streamLoad {
+        table "${tableName4}"
+
+        set 'column_separator', ','
+        set 'columns', 'k1, k2, k3, k4, tmpk5, tmpk6, tmpk7, tmpk8, tmpk9, tmpk10, tmpk11, tmpk12, k5'
+        set 'max_filter_ratio', '0'
+        set 'strict_mode', 'true'
+
+        file 'all_types.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("fail", json.Status.toLowerCase())
+            assertEquals(0, json.NumberLoadedRows)
+        }
+    }
+    sql "sync"
+    order_qt_all61 "SELECT count(*) FROM ${tableName4}" // 0
+    sql """truncate table ${tableName4}"""
+    sql """sync"""
+
+    // load bitmap and hll with bzip2
+    streamLoad {
+        table "${tableName5}"
+
+        set 'column_separator', ','
+        set 'columns', 'k1, k2, tmp1, tmp2, v1=to_bitmap(tmp1), v2=hll_hash(tmp2)'
+        set 'compress_type', 'bz2'
+
+        file 'bitmap_hll.csv.bz2'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(1025, json.NumberTotalRows)
+            assertEquals(1025, json.NumberLoadedRows)
+            assertEquals(0, json.NumberFilteredRows)
+            assertEquals(0, json.NumberUnselectedRows)
+        }
+    }
+    sql "sync"
+    order_qt_all71 "SELECT k1, k2, bitmap_union_count(v1), HLL_UNION_AGG(v2) FROM ${tableName5} group by k1, k2" // 1,2,1025,1028
+    sql """truncate table ${tableName5}"""
+    sql """sync"""
+
+    // load unique key
+    streamLoad {
+        table "${tableName6}"
+
+        set 'column_separator', ','
+        set 'compress_type', 'lz4'
+
+        file 'unique_key.csv.lz4'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(8001, json.NumberTotalRows)
+            assertEquals(8001, json.NumberLoadedRows)
+            assertEquals(0, json.NumberFilteredRows)
+            assertEquals(0, json.NumberUnselectedRows)
+        }
+    }
+    sql "sync"
+    order_qt_all81 "SELECT count(*) from ${tableName6}" // 2
+    sql """truncate table ${tableName6}"""
+    sql """sync"""
+
+    // load unique key with delete and sequence
+    streamLoad {
+        table "${tableName7}"
+
+        set 'column_separator', ','
+        set 'columns', 'k1,k2,v1,del,seq'
+        set 'delete', 'del=1'
+        set 'merge_type', 'merge'
+        set 'function_column.sequence_col', 'seq'
+
+        file 'unique_key_with_delete.csv'
+        time 10000 // limit inflight 10s
+
+        check { result, exception, startTime, endTime ->
+            if (exception != null) {
+                throw exception
+            }
+            log.info("Stream load result: ${result}".toString())
+            def json = parseJson(result)
+            assertEquals("success", json.Status.toLowerCase())
+            assertEquals(6, json.NumberTotalRows)
+            assertEquals(6, json.NumberLoadedRows)
+            assertEquals(0, json.NumberFilteredRows)
+            assertEquals(0, json.NumberUnselectedRows)
+        }
+    }
+    sql "sync"
+    order_qt_all91 "SELECT count(*) from ${tableName7}" // 2
+    sql """truncate table ${tableName7}"""
+    sql """sync"""
+
 }