From 39f59f554ad5ff5459b47b0d201fe47d5c0e06a5 Mon Sep 17 00:00:00 2001
From: Mingyu Chen <morningman@163.com>
Date: Thu, 2 Mar 2023 16:51:27 +0800
Subject: [PATCH] [improvement](dry-run)(tvf) support csv schema in tvf and add
 "dry_run_query" variable (#16983)

This CL mainly changes:

Support specifying csv schema manually in s3/hdfs table valued function

s3 (
'URI' = 'https://bucket1/inventory.dat',
'ACCESS_KEY'= 'ak',
'SECRET_KEY' = 'sk',
'FORMAT' = 'csv',
'column_separator' = '|',
'csv_schema' = 'k1:int;k2:int;k3:int;k4:decimal(38,10)',
'use_path_style'='true'
)
Add new session variable dry_run_query

If set to true, the real query result will not be returned, instead, it will only return the number of returned rows.

mysql> select * from bigtable;
+--------------+
| ReturnedRows |
+--------------+
| 10000000     |
+--------------+
This can avoid large result set transmission time and focus on real execution time of query engine.
For debug and analysis purpose.
---
 be/src/vec/sink/vmysql_result_writer.cpp      |  14 ++-
 be/src/vec/sink/vmysql_result_writer.h        |   2 +
 docs/en/docs/advanced/variables.md            |  17 +++
 docs/en/docs/lakehouse/file.md                |  43 +++++++
 docs/zh-CN/docs/advanced/variables.md         |  17 +++
 docs/zh-CN/docs/lakehouse/file.md             |  43 +++++++
 fe/check/checkstyle/import-control.xml        |   2 +
 .../org/apache/doris/qe/CommonResultSet.java  |   1 -
 .../java/org/apache/doris/qe/Coordinator.java |   5 +
 .../org/apache/doris/qe/SessionVariable.java  |  11 ++
 .../org/apache/doris/qe/StmtExecutor.java     |  18 ++-
 .../ExternalFileTableValuedFunction.java      | 103 ++++++++++++++-
 .../ExternalFileTableValuedFunctionTest.java  | 117 ++++++++++++++++++
 gensrc/thrift/PaloInternalService.thrift      |   1 +
 .../test_segcompaction_unique_keys_mow.groovy |   2 +-
 15 files changed, 384 insertions(+), 12 deletions(-)
 create mode 100644 fe/fe-core/src/test/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunctionTest.java

diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp
index bade808912..2fe215d6f9 100644
--- a/be/src/vec/sink/vmysql_result_writer.cpp
+++ b/be/src/vec/sink/vmysql_result_writer.cpp
@@ -54,6 +54,7 @@ Status VMysqlResultWriter<is_binary_format>::init(RuntimeState* state) {
         return Status::InternalError("sinker is NULL pointer.");
     }
     set_output_object_data(state->return_object_data_as_binary());
+    _is_dry_run = state->query_options().dry_run_query;
     return Status::OK();
 }
 
@@ -801,13 +802,14 @@ Status VMysqlResultWriter<is_binary_format>::append_block(Block& input_block) {
 
     if (status) {
         SCOPED_TIMER(_result_send_timer);
-        // push this batch to back
-        if (_sinker) {
-            status = _sinker->add_batch(result);
-        } else {
-            _results.push_back(std::move(result));
+        // If this is a dry run task, no need to send data block
+        if (!_is_dry_run) {
+            if (_sinker) {
+                status = _sinker->add_batch(result);
+            } else {
+                _results.push_back(std::move(result));
+            }
         }
-
         if (status.ok()) {
             _written_rows += num_rows;
         } else {
diff --git a/be/src/vec/sink/vmysql_result_writer.h b/be/src/vec/sink/vmysql_result_writer.h
index a62e1f3420..07a44fa0af 100644
--- a/be/src/vec/sink/vmysql_result_writer.h
+++ b/be/src/vec/sink/vmysql_result_writer.h
@@ -74,6 +74,8 @@ private:
     RuntimeProfile::Counter* _sent_rows_counter = nullptr;
     // for synchronized results
     ResultList _results;
+    // If true, no block will be sent
+    bool _is_dry_run = false;
 };
 } // namespace vectorized
 } // namespace doris
diff --git a/docs/en/docs/advanced/variables.md b/docs/en/docs/advanced/variables.md
index 2ccd506c44..960a1d637a 100644
--- a/docs/en/docs/advanced/variables.md
+++ b/docs/en/docs/advanced/variables.md
@@ -598,3 +598,20 @@ Translated with www.DeepL.com/Translator (free version)
 * `use_fix_replica`
 
     Use a fixed replica to query. If use_fix_replica is 1, the smallest one is used, if use_fix_replica is 2, the second smallest one is used, and so on. The default value is -1, which means it is not enabled.
+
+* `dry_run_query`
+
+    <version since="dev"></version>
+
+    If set to true, for query requests, the actual result set will no longer be returned, but only the number of rows. The default is false.
+
+    This parameter can be used to avoid the time-consuming result set transmission when testing a large number of data sets, and focus on the time-consuming underlying query execution.
+
+    ```
+    mysql> select * from bigtable;
+    +--------------+
+    | ReturnedRows |
+    +--------------+
+    | 10000000     |
+    +--------------+
+    ```
diff --git a/docs/en/docs/lakehouse/file.md b/docs/en/docs/lakehouse/file.md
index 4d12847fc6..4fd0176c9d 100644
--- a/docs/en/docs/lakehouse/file.md
+++ b/docs/en/docs/lakehouse/file.md
@@ -85,6 +85,49 @@ As can be seen, Doris is able to automatically infer column types based on the m
 
 Besides Parquet, Doris supports analysis and auto column type inference of ORC, CSV, and Json files.
 
+**CSV Schema**
+
+<version since="dev"></version>
+
+By default, for CSV format files, all columns are of type String. Column names and column types can be specified individually via the `csv_schema` attribute. Doris will use the specified column type for file reading. The format is as follows:
+
+`name1:type1;name2:type2;...`
+
+For columns with mismatched formats (such as string in the file and int defined by the user), or missing columns (such as 4 columns in the file and 5 columns defined by the user), these columns will return null.
+
+Currently supported column types are:
+
+| name | mapping type |
+| --- | --- |
+|tinyint |tinyint |
+|smallint |smallint |
+|int |int |
+| bigint | bigint |
+| largeint | largeint |
+| float| float |
+| double| double|
+| decimal(p,s) | decimalv3(p,s) |
+| date | datev2 |
+| datetime | datetimev2 |
+| char |string |
+|varchar |string |
+|string|string |
+|boolean| boolean |
+
+Example:
+
+```
+s3 (
+    'URI' = 'https://bucket1/inventory.dat',
+    'ACCESS_KEY'= 'ak',
+    'SECRET_KEY' = 'sk',
+    'FORMAT' = 'csv',
+    'column_separator' = '|',
+    'csv_schema' = 'k1:int;k2:int;k3:int;k4:decimal(38,10)',
+    'use_path_style'='true'
+)
+```
+
 ### Query and Analysis
 
 You can conduct queries and analysis on this Parquet file using any SQL statements:
diff --git a/docs/zh-CN/docs/advanced/variables.md b/docs/zh-CN/docs/advanced/variables.md
index 4606376e75..fab01ff959 100644
--- a/docs/zh-CN/docs/advanced/variables.md
+++ b/docs/zh-CN/docs/advanced/variables.md
@@ -585,3 +585,20 @@ SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3);
 * `use_fix_replica`
 
     使用固定的replica进行查询，该值表示固定使用第几小的replica，默认为-1表示不启用。
+
+* `dry_run_query`
+
+    <version since="dev"></version>
+
+    如果设置为true，对于查询请求，将不再返回实际结果集，而仅返回行数。默认为 false。
+
+    该参数可以用于测试返回大量数据集时，规避结果集传输的耗时，重点关注底层查询执行的耗时。
+
+    ```
+    mysql> select * from bigtable;
+    +--------------+
+    | ReturnedRows |
+    +--------------+
+    | 10000000     |
+    +--------------+
+    ```
diff --git a/docs/zh-CN/docs/lakehouse/file.md b/docs/zh-CN/docs/lakehouse/file.md
index 701d914de7..abb1061573 100644
--- a/docs/zh-CN/docs/lakehouse/file.md
+++ b/docs/zh-CN/docs/lakehouse/file.md
@@ -85,6 +85,49 @@ s3(
 
 目前支持对 Parquet、ORC、CSV、Json 格式进行分析和列类型推断。
 
+**CSV Schema**
+
+<version since="dev"></version>
+
+在默认情况下，对 CSV 格式文件，所有列类型均为 String。可以通过 `csv_schema` 属性单独指定列名和列类型。Doris 会使用指定的列类型进行文件读取。格式如下：
+
+`name1:type1;name2:type2;...`
+
+对于格式不匹配的列（比如文件中为字符串，用户定义为 int），或缺失列（比如文件中有4列，用户定义了5列），则这些列将返回null。
+
+当前支持的列类型为：
+
+| 名称 | 映射类型 |
+| --- | --- |
+|tinyint |tinyint |
+|smallint |smallint |
+|int |int |
+| bigint | bigint |
+| largeint | largeint |
+| float| float |
+| double| double|
+| decimal(p,s) | decimalv3(p,s) |
+| date | datev2 |
+| datetime | datetimev2 |
+| char |string |
+|varchar |string |
+|string|string |
+|boolean| boolean |
+
+示例：
+
+```
+s3 (
+    'URI' = 'https://bucket1/inventory.dat',
+    'ACCESS_KEY'= 'ak',
+    'SECRET_KEY' = 'sk',
+    'FORMAT' = 'csv',
+    'column_separator' = '|',
+    'csv_schema' = 'k1:int;k2:int;k3:int;k4:decimal(38,10)',
+    'use_path_style'='true'
+)
+```
+
 ### 查询分析
 
 你可以使用任意的 SQL 语句对这个文件进行分析
diff --git a/fe/check/checkstyle/import-control.xml b/fe/check/checkstyle/import-control.xml
index f38810a364..8f06378337 100644
--- a/fe/check/checkstyle/import-control.xml
+++ b/fe/check/checkstyle/import-control.xml
@@ -31,6 +31,8 @@ under the License.
     <disallow pkg="org.checkerframework.com.google" />
     <disallow pkg="org.apache.iceberg.relocated" />
     <disallow pkg="com.alibaba.fastjson2" />
+    <disallow pkg="org.apache.log4j.LogManager" />
+    <disallow pkg="org.apache.log4j.Logger" />
     <subpackage name="nereids">
         <allow pkg="org.junit.jupiter"/>
         <disallow pkg="org.junit"/>
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/CommonResultSet.java b/fe/fe-core/src/main/java/org/apache/doris/qe/CommonResultSet.java
index 2735739e65..1729811a4e 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/CommonResultSet.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/CommonResultSet.java
@@ -32,5 +32,4 @@ public class CommonResultSet extends AbstractResultSet {
             super(columns);
         }
     }
-
 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
index 518742ae1c..3ffade3aeb 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java
@@ -1213,6 +1213,10 @@ public class Coordinator {
                 LOG.debug("no block query, return num >= limit rows, need cancel");
                 cancelInternal(Types.PPlanFragmentCancelReason.LIMIT_REACH);
             }
+            if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().dryRunQuery) {
+                numReceivedRows = 0;
+                numReceivedRows += resultBatch.getQueryStatistics().getReturnedRows();
+            }
         } else if (resultBatch.getBatch() != null) {
             numReceivedRows += resultBatch.getBatch().getRowsSize();
         }
@@ -3324,3 +3328,4 @@ public class Coordinator {
     }
 }
 
+
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
index 1c9baee38b..2ece4ede37 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java
@@ -283,6 +283,8 @@ public class SessionVariable implements Serializable, Writable {
     // fix replica to query. If num = 1, query the smallest replica, if 2 is the second smallest replica.
     public static final String USE_FIX_REPLICA = "use_fix_replica";
 
+    public static final String DRY_RUN_QUERY = "dry_run_query";
+
     // session origin value
     public Map<Field, String> sessionOriginValue = new HashMap<Field, String>();
     // check stmt is or not [select /*+ SET_VAR(...)*/ ...]
@@ -752,6 +754,11 @@ public class SessionVariable implements Serializable, Writable {
     @VariableMgr.VarAttr(name = USE_FIX_REPLICA)
     public int useFixReplica = -1;
 
+    // If set to true, all query will be executed without returning result
+    @VariableMgr.VarAttr(name = DRY_RUN_QUERY, needForward = true)
+    public boolean dryRunQuery = false;
+
+
     // If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables,
     // not the default value set in the code.
     public void initFuzzyModeVariables() {
@@ -1617,6 +1624,10 @@ public class SessionVariable implements Serializable, Writable {
 
         tResult.setEnableFileCache(enableFileCache);
 
+        if (dryRunQuery) {
+            tResult.setDryRunQuery(true);
+        }
+
         return tResult;
     }
 
diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
index dbf7226344..f09c62e03d 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java
@@ -109,6 +109,7 @@ import org.apache.doris.planner.ScanNode;
 import org.apache.doris.proto.Data;
 import org.apache.doris.proto.InternalService;
 import org.apache.doris.proto.Types;
+import org.apache.doris.qe.CommonResultSet.CommonResultSetMetaData;
 import org.apache.doris.qe.QueryState.MysqlStateType;
 import org.apache.doris.qe.cache.Cache;
 import org.apache.doris.qe.cache.CacheAnalyzer;
@@ -198,6 +199,11 @@ public class StmtExecutor implements ProfileWriter {
     private String stmtName;
     private PrepareStmt prepareStmt;
 
+    // The result schema if "dry_run_query" is true.
+    // Only one column to indicate the real return row numbers.
+    private static final CommonResultSetMetaData DRY_RUN_QUERY_METADATA = new CommonResultSetMetaData(
+            Lists.newArrayList(new Column("ReturnedRows", PrimitiveType.STRING)));
+
     // this constructor is mainly for proxy
     public StmtExecutor(ConnectContext context, OriginStatement originStmt, boolean isProxy) {
         this.context = context;
@@ -1289,7 +1295,17 @@ public class StmtExecutor implements ProfileWriter {
             }
             if (!isSendFields) {
                 if (!isOutfileQuery) {
-                    sendFields(queryStmt.getColLabels(), exprToType(queryStmt.getResultExprs()));
+                    if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().dryRunQuery) {
+                        // Return a one row one column result set, with the real result number
+                        List<String> data = Lists.newArrayList(batch.getQueryStatistics() == null ? "0"
+                                : batch.getQueryStatistics().getReturnedRows() + "");
+                        ResultSet resultSet = new CommonResultSet(DRY_RUN_QUERY_METADATA,
+                                Collections.singletonList(data));
+                        sendResultSet(resultSet);
+                        return;
+                    } else {
+                        sendFields(queryStmt.getColLabels(), exprToType(queryStmt.getResultExprs()));
+                    }
                 } else {
                     sendFields(OutFileClause.RESULT_COL_NAMES, OutFileClause.RESULT_COL_TYPES);
                 }
diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
index 3cc34f34e7..e9f4203436 100644
--- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
+++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java
@@ -23,8 +23,10 @@ import org.apache.doris.catalog.Column;
 import org.apache.doris.catalog.Env;
 import org.apache.doris.catalog.HdfsResource;
 import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.catalog.ScalarType;
 import org.apache.doris.common.AnalysisException;
 import org.apache.doris.common.FeConstants;
+import org.apache.doris.common.FeNameFormat;
 import org.apache.doris.common.UserException;
 import org.apache.doris.common.util.BrokerUtil;
 import org.apache.doris.planner.PlanNodeId;
@@ -51,11 +53,12 @@ import org.apache.doris.thrift.TNetworkAddress;
 import org.apache.doris.thrift.TPrimitiveType;
 import org.apache.doris.thrift.TStatusCode;
 
+import com.google.common.base.Strings;
 import com.google.common.collect.ImmutableSet;
 import com.google.common.collect.Lists;
 import com.google.protobuf.ByteString;
-import org.apache.log4j.LogManager;
-import org.apache.log4j.Logger;
+import org.apache.logging.log4j.LogManager;
+import org.apache.logging.log4j.Logger;
 import org.apache.thrift.TException;
 import org.apache.thrift.TSerializer;
 
@@ -63,6 +66,8 @@ import java.util.List;
 import java.util.Map;
 import java.util.concurrent.ExecutionException;
 import java.util.concurrent.Future;
+import java.util.regex.Matcher;
+import java.util.regex.Pattern;
 
 /**
  * ExternalFileTableValuedFunction is used for S3/HDFS/LOCAL table-valued-function
@@ -82,6 +87,11 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio
     protected static final String FUZZY_PARSE = "fuzzy_parse";
     protected static final String TRIM_DOUBLE_QUOTES = "trim_double_quotes";
     protected static final String SKIP_LINES = "skip_lines";
+    protected static final String CSV_SCHEMA = "csv_schema";
+    // decimal(p,s)
+    private static final Pattern DECIMAL_TYPE_PATTERN = Pattern.compile("decimal\\((\\d+),(\\d+)\\)");
+    // datetime(p)
+    private static final Pattern DATETIME_TYPE_PATTERN = Pattern.compile("datetime\\((\\d+)\\)");
 
     protected static final ImmutableSet<String> FILE_FORMAT_PROPERTIES = new ImmutableSet.Builder<String>()
             .add(FORMAT)
@@ -95,10 +105,14 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio
             .add(LINE_DELIMITER)
             .add(TRIM_DOUBLE_QUOTES)
             .add(SKIP_LINES)
+            .add(CSV_SCHEMA)
             .build();
 
-
+    // Columns got from file
     protected List<Column> columns = null;
+    // User specified csv columns, it will override columns got from file
+    private List<Column> csvSchema = Lists.newArrayList();
+
     protected List<TBrokerFileStatus> fileStatuses = Lists.newArrayList();
     protected Map<String, String> locationProperties;
 
@@ -130,6 +144,10 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio
         return locationProperties;
     }
 
+    public List<Column> getCsvSchema() {
+        return csvSchema;
+    }
+
     public String getFsName() {
         TFileType fileType = getTFileType();
         if (fileType == TFileType.FILE_HDFS) {
@@ -187,6 +205,82 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio
         fuzzyParse = Boolean.valueOf(validParams.get(FUZZY_PARSE)).booleanValue();
         trimDoubleQuotes = Boolean.valueOf(validParams.get(TRIM_DOUBLE_QUOTES)).booleanValue();
         skipLines = Integer.valueOf(validParams.getOrDefault(SKIP_LINES, "0")).intValue();
+
+        if (formatString.equals("csv") || formatString.equals("csv_with_names")
+                || formatString.equals("csv_with_names_and_types")) {
+            parseCsvSchema(csvSchema, validParams);
+        }
+    }
+
+    // public for unit test
+    public static void parseCsvSchema(List<Column> csvSchema, Map<String, String> validParams)
+            throws AnalysisException {
+        String csvSchemaStr = validParams.get(CSV_SCHEMA);
+        if (Strings.isNullOrEmpty(csvSchemaStr)) {
+            return;
+        }
+        // the schema str is like: "k1:int;k2:bigint;k3:varchar(20);k4:datetime(6)"
+        String[] schemaStrs = csvSchemaStr.split(";");
+        try {
+            for (String schemaStr : schemaStrs) {
+                String[] kv = schemaStr.replace(" ", "").split(":");
+                if (kv.length != 2) {
+                    throw new AnalysisException("invalid csv schema: " + csvSchemaStr);
+                }
+                Column column = null;
+                String name = kv[0].toLowerCase();
+                FeNameFormat.checkColumnName(name);
+                String type = kv[1].toLowerCase();
+                if (type.equals("tinyint")) {
+                    column = new Column(name, PrimitiveType.TINYINT, true);
+                } else if (type.equals("smallint")) {
+                    column = new Column(name, PrimitiveType.SMALLINT, true);
+                } else if (type.equals("int")) {
+                    column = new Column(name, PrimitiveType.INT, true);
+                } else if (type.equals("bigint")) {
+                    column = new Column(name, PrimitiveType.BIGINT, true);
+                } else if (type.equals("largeint")) {
+                    column = new Column(name, PrimitiveType.LARGEINT, true);
+                } else if (type.equals("float")) {
+                    column = new Column(name, PrimitiveType.FLOAT, true);
+                } else if (type.equals("double")) {
+                    column = new Column(name, PrimitiveType.DOUBLE, true);
+                } else if (type.startsWith("decimal")) {
+                    // regex decimal(p, s)
+                    Matcher matcher = DECIMAL_TYPE_PATTERN.matcher(type);
+                    if (!matcher.find()) {
+                        throw new AnalysisException("invalid decimal type: " + type);
+                    }
+                    int precision = Integer.parseInt(matcher.group(1));
+                    int scale = Integer.parseInt(matcher.group(2));
+                    column = new Column(name, ScalarType.createDecimalV3Type(precision, scale), false, null, true, null,
+                            "");
+                } else if (type.equals("date")) {
+                    column = new Column(name, ScalarType.createDateType(), false, null, true, null, "");
+                } else if (type.startsWith("datetime")) {
+                    int scale = 0;
+                    if (!type.equals("datetime")) {
+                        // regex datetime(s)
+                        Matcher matcher = DATETIME_TYPE_PATTERN.matcher(type);
+                        if (!matcher.find()) {
+                            throw new AnalysisException("invalid datetime type: " + type);
+                        }
+                        scale = Integer.parseInt(matcher.group(1));
+                    }
+                    column = new Column(name, ScalarType.createDatetimeV2Type(scale), false, null, true, null, "");
+                } else if (type.equals("string")) {
+                    column = new Column(name, PrimitiveType.STRING, true);
+                } else if (type.equals("boolean")) {
+                    column = new Column(name, PrimitiveType.BOOLEAN, true);
+                } else {
+                    throw new AnalysisException("unsupported column type: " + type);
+                }
+                csvSchema.add(column);
+            }
+            LOG.debug("get csv schema: {}", csvSchema);
+        } catch (Exception e) {
+            throw new AnalysisException("invalid csv schema: " + e.getMessage());
+        }
     }
 
     public List<TBrokerFileStatus> getFileStatuses() {
@@ -221,6 +315,9 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio
 
     @Override
     public List<Column> getTableColumns() throws AnalysisException {
+        if (!csvSchema.isEmpty()) {
+            return csvSchema;
+        }
         if (this.columns != null) {
             return columns;
         }
diff --git a/fe/fe-core/src/test/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunctionTest.java
new file mode 100644
index 0000000000..f664415e6d
--- /dev/null
+++ b/fe/fe-core/src/test/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunctionTest.java
@@ -0,0 +1,117 @@
+// Licensed to the Apache Software Foundation (ASF) under one
+// or more contributor license agreements.  See the NOTICE file
+// distributed with this work for additional information
+// regarding copyright ownership.  The ASF licenses this file
+// to you under the Apache License, Version 2.0 (the
+// "License"); you may not use this file except in compliance
+// with the License.  You may obtain a copy of the License at
+//
+//   http://www.apache.org/licenses/LICENSE-2.0
+//
+// Unless required by applicable law or agreed to in writing,
+// software distributed under the License is distributed on an
+// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+// KIND, either express or implied.  See the License for the
+// specific language governing permissions and limitations
+// under the License.
+
+package org.apache.doris.tablefunction;
+
+import org.apache.doris.catalog.Column;
+import org.apache.doris.catalog.PrimitiveType;
+import org.apache.doris.common.AnalysisException;
+import org.apache.doris.common.Config;
+
+import com.google.common.collect.Lists;
+import com.google.common.collect.Maps;
+import org.junit.Assert;
+import org.junit.Test;
+
+import java.util.List;
+import java.util.Map;
+
+public class ExternalFileTableValuedFunctionTest {
+    @Test
+    public void testCsvSchemaParse() {
+        Config.enable_date_conversion = true;
+        Map<String, String> properties = Maps.newHashMap();
+        properties.put(ExternalFileTableValuedFunction.CSV_SCHEMA,
+                "k1:int;k2:bigint;k3:float;k4:double;k5:smallint;k6:tinyint;k7:bool;"
+                        + "k8:char(10);k9:varchar(20);k10:date;k11:datetime;k12:decimal(10,2)");
+        List<Column> csvSchema = Lists.newArrayList();
+        try {
+            ExternalFileTableValuedFunction.parseCsvSchema(csvSchema, properties);
+            Assert.fail();
+        } catch (AnalysisException e) {
+            e.printStackTrace();
+            Assert.assertTrue(e.getMessage().contains("unsupported column type: bool"));
+        }
+
+        csvSchema.clear();
+        properties.put(ExternalFileTableValuedFunction.CSV_SCHEMA,
+                "k1:int;k2:bigint;k3:float;k4:double;k5:smallint;k6:tinyint;k7:boolean;"
+                        + "k8:string;k9:date;k10:datetime;k11:decimal(10, 2);k12:decimal( 38,10); k13:datetime(5)");
+        try {
+            ExternalFileTableValuedFunction.parseCsvSchema(csvSchema, properties);
+            Assert.assertEquals(13, csvSchema.size());
+            Column decimalCol = csvSchema.get(10);
+            Assert.assertEquals(10, decimalCol.getPrecision());
+            Assert.assertEquals(2, decimalCol.getScale());
+            decimalCol = csvSchema.get(11);
+            Assert.assertEquals(38, decimalCol.getPrecision());
+            Assert.assertEquals(10, decimalCol.getScale());
+            Column datetimeCol = csvSchema.get(12);
+            Assert.assertEquals(5, datetimeCol.getScale());
+
+            for (int i = 0; i < csvSchema.size(); i++) {
+                Column col = csvSchema.get(i);
+                switch (col.getName()) {
+                    case "k1":
+                        Assert.assertEquals(PrimitiveType.INT, col.getType().getPrimitiveType());
+                        break;
+                    case "k2":
+                        Assert.assertEquals(PrimitiveType.BIGINT, col.getType().getPrimitiveType());
+                        break;
+                    case "k3":
+                        Assert.assertEquals(PrimitiveType.FLOAT, col.getType().getPrimitiveType());
+                        break;
+                    case "k4":
+                        Assert.assertEquals(PrimitiveType.DOUBLE, col.getType().getPrimitiveType());
+                        break;
+                    case "k5":
+                        Assert.assertEquals(PrimitiveType.SMALLINT, col.getType().getPrimitiveType());
+                        break;
+                    case "k6":
+                        Assert.assertEquals(PrimitiveType.TINYINT, col.getType().getPrimitiveType());
+                        break;
+                    case "k7":
+                        Assert.assertEquals(PrimitiveType.BOOLEAN, col.getType().getPrimitiveType());
+                        break;
+                    case "k8":
+                        Assert.assertEquals(PrimitiveType.STRING, col.getType().getPrimitiveType());
+                        break;
+                    case "k9":
+                        Assert.assertEquals(PrimitiveType.DATEV2, col.getType().getPrimitiveType());
+                        break;
+                    case "k10":
+                        Assert.assertEquals(PrimitiveType.DATETIMEV2, col.getType().getPrimitiveType());
+                        break;
+                    case "k11":
+                        Assert.assertEquals(PrimitiveType.DECIMAL64, col.getType().getPrimitiveType());
+                        break;
+                    case "k12":
+                        Assert.assertEquals(PrimitiveType.DECIMAL128, col.getType().getPrimitiveType());
+                        break;
+                    case "k13":
+                        Assert.assertEquals(PrimitiveType.DATETIMEV2, col.getType().getPrimitiveType());
+                        break;
+                    default:
+                        Assert.fail("unknown column name: " + col.getName());
+                }
+            }
+        } catch (AnalysisException e) {
+            e.printStackTrace();
+            Assert.fail();
+        }
+    }
+}
diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift
index ac8fcb111b..d49bb72f3a 100644
--- a/gensrc/thrift/PaloInternalService.thrift
+++ b/gensrc/thrift/PaloInternalService.thrift
@@ -201,6 +201,7 @@ struct TQueryOptions {
 
   // For debug purpose, skip delete bitmap when reading data
   63: optional bool skip_delete_bitmap = false
+  64: optional bool dry_run_query = false
 }
     
 
diff --git a/regression-test/suites/segcompaction_p1/test_segcompaction_unique_keys_mow.groovy b/regression-test/suites/segcompaction_p1/test_segcompaction_unique_keys_mow.groovy
index a043345999..39ac856ae0 100644
--- a/regression-test/suites/segcompaction_p1/test_segcompaction_unique_keys_mow.groovy
+++ b/regression-test/suites/segcompaction_p1/test_segcompaction_unique_keys_mow.groovy
@@ -76,7 +76,7 @@ suite("test_segcompaction_unique_keys_mow") {
                 )
             UNIQUE KEY(`col_0`) DISTRIBUTED BY HASH(`col_0`) BUCKETS 1
             PROPERTIES (
-                "replication_num" = "1",
+                "replication_num" = "1"
                 );
         """
         //      "enable_unique_key_merge_on_write" = "true"