From 39f59f554ad5ff5459b47b0d201fe47d5c0e06a5 Mon Sep 17 00:00:00 2001 From: Mingyu Chen Date: Thu, 2 Mar 2023 16:51:27 +0800 Subject: [PATCH] [improvement](dry-run)(tvf) support csv schema in tvf and add "dry_run_query" variable (#16983) This CL mainly changes: Support specifying csv schema manually in s3/hdfs table valued function s3 ( 'URI' = 'https://bucket1/inventory.dat', 'ACCESS_KEY'= 'ak', 'SECRET_KEY' = 'sk', 'FORMAT' = 'csv', 'column_separator' = '|', 'csv_schema' = 'k1:int;k2:int;k3:int;k4:decimal(38,10)', 'use_path_style'='true' ) Add new session variable dry_run_query If set to true, the real query result will not be returned, instead, it will only return the number of returned rows. mysql> select * from bigtable; +--------------+ | ReturnedRows | +--------------+ | 10000000 | +--------------+ This can avoid large result set transmission time and focus on real execution time of query engine. For debug and analysis purpose. --- be/src/vec/sink/vmysql_result_writer.cpp | 14 ++- be/src/vec/sink/vmysql_result_writer.h | 2 + docs/en/docs/advanced/variables.md | 17 +++ docs/en/docs/lakehouse/file.md | 43 +++++++ docs/zh-CN/docs/advanced/variables.md | 17 +++ docs/zh-CN/docs/lakehouse/file.md | 43 +++++++ fe/check/checkstyle/import-control.xml | 2 + .../org/apache/doris/qe/CommonResultSet.java | 1 - .../java/org/apache/doris/qe/Coordinator.java | 5 + .../org/apache/doris/qe/SessionVariable.java | 11 ++ .../org/apache/doris/qe/StmtExecutor.java | 18 ++- .../ExternalFileTableValuedFunction.java | 103 ++++++++++++++- .../ExternalFileTableValuedFunctionTest.java | 117 ++++++++++++++++++ gensrc/thrift/PaloInternalService.thrift | 1 + .../test_segcompaction_unique_keys_mow.groovy | 2 +- 15 files changed, 384 insertions(+), 12 deletions(-) create mode 100644 fe/fe-core/src/test/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunctionTest.java diff --git a/be/src/vec/sink/vmysql_result_writer.cpp b/be/src/vec/sink/vmysql_result_writer.cpp index bade808912..2fe215d6f9 100644 --- a/be/src/vec/sink/vmysql_result_writer.cpp +++ b/be/src/vec/sink/vmysql_result_writer.cpp @@ -54,6 +54,7 @@ Status VMysqlResultWriter::init(RuntimeState* state) { return Status::InternalError("sinker is NULL pointer."); } set_output_object_data(state->return_object_data_as_binary()); + _is_dry_run = state->query_options().dry_run_query; return Status::OK(); } @@ -801,13 +802,14 @@ Status VMysqlResultWriter::append_block(Block& input_block) { if (status) { SCOPED_TIMER(_result_send_timer); - // push this batch to back - if (_sinker) { - status = _sinker->add_batch(result); - } else { - _results.push_back(std::move(result)); + // If this is a dry run task, no need to send data block + if (!_is_dry_run) { + if (_sinker) { + status = _sinker->add_batch(result); + } else { + _results.push_back(std::move(result)); + } } - if (status.ok()) { _written_rows += num_rows; } else { diff --git a/be/src/vec/sink/vmysql_result_writer.h b/be/src/vec/sink/vmysql_result_writer.h index a62e1f3420..07a44fa0af 100644 --- a/be/src/vec/sink/vmysql_result_writer.h +++ b/be/src/vec/sink/vmysql_result_writer.h @@ -74,6 +74,8 @@ private: RuntimeProfile::Counter* _sent_rows_counter = nullptr; // for synchronized results ResultList _results; + // If true, no block will be sent + bool _is_dry_run = false; }; } // namespace vectorized } // namespace doris diff --git a/docs/en/docs/advanced/variables.md b/docs/en/docs/advanced/variables.md index 2ccd506c44..960a1d637a 100644 --- a/docs/en/docs/advanced/variables.md +++ b/docs/en/docs/advanced/variables.md @@ -598,3 +598,20 @@ Translated with www.DeepL.com/Translator (free version) * `use_fix_replica` Use a fixed replica to query. If use_fix_replica is 1, the smallest one is used, if use_fix_replica is 2, the second smallest one is used, and so on. The default value is -1, which means it is not enabled. + +* `dry_run_query` + + + + If set to true, for query requests, the actual result set will no longer be returned, but only the number of rows. The default is false. + + This parameter can be used to avoid the time-consuming result set transmission when testing a large number of data sets, and focus on the time-consuming underlying query execution. + + ``` + mysql> select * from bigtable; + +--------------+ + | ReturnedRows | + +--------------+ + | 10000000 | + +--------------+ + ``` diff --git a/docs/en/docs/lakehouse/file.md b/docs/en/docs/lakehouse/file.md index 4d12847fc6..4fd0176c9d 100644 --- a/docs/en/docs/lakehouse/file.md +++ b/docs/en/docs/lakehouse/file.md @@ -85,6 +85,49 @@ As can be seen, Doris is able to automatically infer column types based on the m Besides Parquet, Doris supports analysis and auto column type inference of ORC, CSV, and Json files. +**CSV Schema** + + + +By default, for CSV format files, all columns are of type String. Column names and column types can be specified individually via the `csv_schema` attribute. Doris will use the specified column type for file reading. The format is as follows: + +`name1:type1;name2:type2;...` + +For columns with mismatched formats (such as string in the file and int defined by the user), or missing columns (such as 4 columns in the file and 5 columns defined by the user), these columns will return null. + +Currently supported column types are: + +| name | mapping type | +| --- | --- | +|tinyint |tinyint | +|smallint |smallint | +|int |int | +| bigint | bigint | +| largeint | largeint | +| float| float | +| double| double| +| decimal(p,s) | decimalv3(p,s) | +| date | datev2 | +| datetime | datetimev2 | +| char |string | +|varchar |string | +|string|string | +|boolean| boolean | + +Example: + +``` +s3 ( + 'URI' = 'https://bucket1/inventory.dat', + 'ACCESS_KEY'= 'ak', + 'SECRET_KEY' = 'sk', + 'FORMAT' = 'csv', + 'column_separator' = '|', + 'csv_schema' = 'k1:int;k2:int;k3:int;k4:decimal(38,10)', + 'use_path_style'='true' +) +``` + ### Query and Analysis You can conduct queries and analysis on this Parquet file using any SQL statements: diff --git a/docs/zh-CN/docs/advanced/variables.md b/docs/zh-CN/docs/advanced/variables.md index 4606376e75..fab01ff959 100644 --- a/docs/zh-CN/docs/advanced/variables.md +++ b/docs/zh-CN/docs/advanced/variables.md @@ -585,3 +585,20 @@ SELECT /*+ SET_VAR(query_timeout = 1, enable_partition_cache=true) */ sleep(3); * `use_fix_replica` 使用固定的replica进行查询,该值表示固定使用第几小的replica,默认为-1表示不启用。 + +* `dry_run_query` + + + + 如果设置为true,对于查询请求,将不再返回实际结果集,而仅返回行数。默认为 false。 + + 该参数可以用于测试返回大量数据集时,规避结果集传输的耗时,重点关注底层查询执行的耗时。 + + ``` + mysql> select * from bigtable; + +--------------+ + | ReturnedRows | + +--------------+ + | 10000000 | + +--------------+ + ``` diff --git a/docs/zh-CN/docs/lakehouse/file.md b/docs/zh-CN/docs/lakehouse/file.md index 701d914de7..abb1061573 100644 --- a/docs/zh-CN/docs/lakehouse/file.md +++ b/docs/zh-CN/docs/lakehouse/file.md @@ -85,6 +85,49 @@ s3( 目前支持对 Parquet、ORC、CSV、Json 格式进行分析和列类型推断。 +**CSV Schema** + + + +在默认情况下,对 CSV 格式文件,所有列类型均为 String。可以通过 `csv_schema` 属性单独指定列名和列类型。Doris 会使用指定的列类型进行文件读取。格式如下: + +`name1:type1;name2:type2;...` + +对于格式不匹配的列(比如文件中为字符串,用户定义为 int),或缺失列(比如文件中有4列,用户定义了5列),则这些列将返回null。 + +当前支持的列类型为: + +| 名称 | 映射类型 | +| --- | --- | +|tinyint |tinyint | +|smallint |smallint | +|int |int | +| bigint | bigint | +| largeint | largeint | +| float| float | +| double| double| +| decimal(p,s) | decimalv3(p,s) | +| date | datev2 | +| datetime | datetimev2 | +| char |string | +|varchar |string | +|string|string | +|boolean| boolean | + +示例: + +``` +s3 ( + 'URI' = 'https://bucket1/inventory.dat', + 'ACCESS_KEY'= 'ak', + 'SECRET_KEY' = 'sk', + 'FORMAT' = 'csv', + 'column_separator' = '|', + 'csv_schema' = 'k1:int;k2:int;k3:int;k4:decimal(38,10)', + 'use_path_style'='true' +) +``` + ### 查询分析 你可以使用任意的 SQL 语句对这个文件进行分析 diff --git a/fe/check/checkstyle/import-control.xml b/fe/check/checkstyle/import-control.xml index f38810a364..8f06378337 100644 --- a/fe/check/checkstyle/import-control.xml +++ b/fe/check/checkstyle/import-control.xml @@ -31,6 +31,8 @@ under the License. + + diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/CommonResultSet.java b/fe/fe-core/src/main/java/org/apache/doris/qe/CommonResultSet.java index 2735739e65..1729811a4e 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/CommonResultSet.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/CommonResultSet.java @@ -32,5 +32,4 @@ public class CommonResultSet extends AbstractResultSet { super(columns); } } - } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java index 518742ae1c..3ffade3aeb 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/Coordinator.java @@ -1213,6 +1213,10 @@ public class Coordinator { LOG.debug("no block query, return num >= limit rows, need cancel"); cancelInternal(Types.PPlanFragmentCancelReason.LIMIT_REACH); } + if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().dryRunQuery) { + numReceivedRows = 0; + numReceivedRows += resultBatch.getQueryStatistics().getReturnedRows(); + } } else if (resultBatch.getBatch() != null) { numReceivedRows += resultBatch.getBatch().getRowsSize(); } @@ -3324,3 +3328,4 @@ public class Coordinator { } } + diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java index 1c9baee38b..2ece4ede37 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/SessionVariable.java @@ -283,6 +283,8 @@ public class SessionVariable implements Serializable, Writable { // fix replica to query. If num = 1, query the smallest replica, if 2 is the second smallest replica. public static final String USE_FIX_REPLICA = "use_fix_replica"; + public static final String DRY_RUN_QUERY = "dry_run_query"; + // session origin value public Map sessionOriginValue = new HashMap(); // check stmt is or not [select /*+ SET_VAR(...)*/ ...] @@ -752,6 +754,11 @@ public class SessionVariable implements Serializable, Writable { @VariableMgr.VarAttr(name = USE_FIX_REPLICA) public int useFixReplica = -1; + // If set to true, all query will be executed without returning result + @VariableMgr.VarAttr(name = DRY_RUN_QUERY, needForward = true) + public boolean dryRunQuery = false; + + // If this fe is in fuzzy mode, then will use initFuzzyModeVariables to generate some variables, // not the default value set in the code. public void initFuzzyModeVariables() { @@ -1617,6 +1624,10 @@ public class SessionVariable implements Serializable, Writable { tResult.setEnableFileCache(enableFileCache); + if (dryRunQuery) { + tResult.setDryRunQuery(true); + } + return tResult; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java index dbf7226344..f09c62e03d 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java +++ b/fe/fe-core/src/main/java/org/apache/doris/qe/StmtExecutor.java @@ -109,6 +109,7 @@ import org.apache.doris.planner.ScanNode; import org.apache.doris.proto.Data; import org.apache.doris.proto.InternalService; import org.apache.doris.proto.Types; +import org.apache.doris.qe.CommonResultSet.CommonResultSetMetaData; import org.apache.doris.qe.QueryState.MysqlStateType; import org.apache.doris.qe.cache.Cache; import org.apache.doris.qe.cache.CacheAnalyzer; @@ -198,6 +199,11 @@ public class StmtExecutor implements ProfileWriter { private String stmtName; private PrepareStmt prepareStmt; + // The result schema if "dry_run_query" is true. + // Only one column to indicate the real return row numbers. + private static final CommonResultSetMetaData DRY_RUN_QUERY_METADATA = new CommonResultSetMetaData( + Lists.newArrayList(new Column("ReturnedRows", PrimitiveType.STRING))); + // this constructor is mainly for proxy public StmtExecutor(ConnectContext context, OriginStatement originStmt, boolean isProxy) { this.context = context; @@ -1289,7 +1295,17 @@ public class StmtExecutor implements ProfileWriter { } if (!isSendFields) { if (!isOutfileQuery) { - sendFields(queryStmt.getColLabels(), exprToType(queryStmt.getResultExprs())); + if (ConnectContext.get() != null && ConnectContext.get().getSessionVariable().dryRunQuery) { + // Return a one row one column result set, with the real result number + List data = Lists.newArrayList(batch.getQueryStatistics() == null ? "0" + : batch.getQueryStatistics().getReturnedRows() + ""); + ResultSet resultSet = new CommonResultSet(DRY_RUN_QUERY_METADATA, + Collections.singletonList(data)); + sendResultSet(resultSet); + return; + } else { + sendFields(queryStmt.getColLabels(), exprToType(queryStmt.getResultExprs())); + } } else { sendFields(OutFileClause.RESULT_COL_NAMES, OutFileClause.RESULT_COL_TYPES); } diff --git a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java index 3cc34f34e7..e9f4203436 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java +++ b/fe/fe-core/src/main/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunction.java @@ -23,8 +23,10 @@ import org.apache.doris.catalog.Column; import org.apache.doris.catalog.Env; import org.apache.doris.catalog.HdfsResource; import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.catalog.ScalarType; import org.apache.doris.common.AnalysisException; import org.apache.doris.common.FeConstants; +import org.apache.doris.common.FeNameFormat; import org.apache.doris.common.UserException; import org.apache.doris.common.util.BrokerUtil; import org.apache.doris.planner.PlanNodeId; @@ -51,11 +53,12 @@ import org.apache.doris.thrift.TNetworkAddress; import org.apache.doris.thrift.TPrimitiveType; import org.apache.doris.thrift.TStatusCode; +import com.google.common.base.Strings; import com.google.common.collect.ImmutableSet; import com.google.common.collect.Lists; import com.google.protobuf.ByteString; -import org.apache.log4j.LogManager; -import org.apache.log4j.Logger; +import org.apache.logging.log4j.LogManager; +import org.apache.logging.log4j.Logger; import org.apache.thrift.TException; import org.apache.thrift.TSerializer; @@ -63,6 +66,8 @@ import java.util.List; import java.util.Map; import java.util.concurrent.ExecutionException; import java.util.concurrent.Future; +import java.util.regex.Matcher; +import java.util.regex.Pattern; /** * ExternalFileTableValuedFunction is used for S3/HDFS/LOCAL table-valued-function @@ -82,6 +87,11 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio protected static final String FUZZY_PARSE = "fuzzy_parse"; protected static final String TRIM_DOUBLE_QUOTES = "trim_double_quotes"; protected static final String SKIP_LINES = "skip_lines"; + protected static final String CSV_SCHEMA = "csv_schema"; + // decimal(p,s) + private static final Pattern DECIMAL_TYPE_PATTERN = Pattern.compile("decimal\\((\\d+),(\\d+)\\)"); + // datetime(p) + private static final Pattern DATETIME_TYPE_PATTERN = Pattern.compile("datetime\\((\\d+)\\)"); protected static final ImmutableSet FILE_FORMAT_PROPERTIES = new ImmutableSet.Builder() .add(FORMAT) @@ -95,10 +105,14 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio .add(LINE_DELIMITER) .add(TRIM_DOUBLE_QUOTES) .add(SKIP_LINES) + .add(CSV_SCHEMA) .build(); - + // Columns got from file protected List columns = null; + // User specified csv columns, it will override columns got from file + private List csvSchema = Lists.newArrayList(); + protected List fileStatuses = Lists.newArrayList(); protected Map locationProperties; @@ -130,6 +144,10 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio return locationProperties; } + public List getCsvSchema() { + return csvSchema; + } + public String getFsName() { TFileType fileType = getTFileType(); if (fileType == TFileType.FILE_HDFS) { @@ -187,6 +205,82 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio fuzzyParse = Boolean.valueOf(validParams.get(FUZZY_PARSE)).booleanValue(); trimDoubleQuotes = Boolean.valueOf(validParams.get(TRIM_DOUBLE_QUOTES)).booleanValue(); skipLines = Integer.valueOf(validParams.getOrDefault(SKIP_LINES, "0")).intValue(); + + if (formatString.equals("csv") || formatString.equals("csv_with_names") + || formatString.equals("csv_with_names_and_types")) { + parseCsvSchema(csvSchema, validParams); + } + } + + // public for unit test + public static void parseCsvSchema(List csvSchema, Map validParams) + throws AnalysisException { + String csvSchemaStr = validParams.get(CSV_SCHEMA); + if (Strings.isNullOrEmpty(csvSchemaStr)) { + return; + } + // the schema str is like: "k1:int;k2:bigint;k3:varchar(20);k4:datetime(6)" + String[] schemaStrs = csvSchemaStr.split(";"); + try { + for (String schemaStr : schemaStrs) { + String[] kv = schemaStr.replace(" ", "").split(":"); + if (kv.length != 2) { + throw new AnalysisException("invalid csv schema: " + csvSchemaStr); + } + Column column = null; + String name = kv[0].toLowerCase(); + FeNameFormat.checkColumnName(name); + String type = kv[1].toLowerCase(); + if (type.equals("tinyint")) { + column = new Column(name, PrimitiveType.TINYINT, true); + } else if (type.equals("smallint")) { + column = new Column(name, PrimitiveType.SMALLINT, true); + } else if (type.equals("int")) { + column = new Column(name, PrimitiveType.INT, true); + } else if (type.equals("bigint")) { + column = new Column(name, PrimitiveType.BIGINT, true); + } else if (type.equals("largeint")) { + column = new Column(name, PrimitiveType.LARGEINT, true); + } else if (type.equals("float")) { + column = new Column(name, PrimitiveType.FLOAT, true); + } else if (type.equals("double")) { + column = new Column(name, PrimitiveType.DOUBLE, true); + } else if (type.startsWith("decimal")) { + // regex decimal(p, s) + Matcher matcher = DECIMAL_TYPE_PATTERN.matcher(type); + if (!matcher.find()) { + throw new AnalysisException("invalid decimal type: " + type); + } + int precision = Integer.parseInt(matcher.group(1)); + int scale = Integer.parseInt(matcher.group(2)); + column = new Column(name, ScalarType.createDecimalV3Type(precision, scale), false, null, true, null, + ""); + } else if (type.equals("date")) { + column = new Column(name, ScalarType.createDateType(), false, null, true, null, ""); + } else if (type.startsWith("datetime")) { + int scale = 0; + if (!type.equals("datetime")) { + // regex datetime(s) + Matcher matcher = DATETIME_TYPE_PATTERN.matcher(type); + if (!matcher.find()) { + throw new AnalysisException("invalid datetime type: " + type); + } + scale = Integer.parseInt(matcher.group(1)); + } + column = new Column(name, ScalarType.createDatetimeV2Type(scale), false, null, true, null, ""); + } else if (type.equals("string")) { + column = new Column(name, PrimitiveType.STRING, true); + } else if (type.equals("boolean")) { + column = new Column(name, PrimitiveType.BOOLEAN, true); + } else { + throw new AnalysisException("unsupported column type: " + type); + } + csvSchema.add(column); + } + LOG.debug("get csv schema: {}", csvSchema); + } catch (Exception e) { + throw new AnalysisException("invalid csv schema: " + e.getMessage()); + } } public List getFileStatuses() { @@ -221,6 +315,9 @@ public abstract class ExternalFileTableValuedFunction extends TableValuedFunctio @Override public List getTableColumns() throws AnalysisException { + if (!csvSchema.isEmpty()) { + return csvSchema; + } if (this.columns != null) { return columns; } diff --git a/fe/fe-core/src/test/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunctionTest.java b/fe/fe-core/src/test/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunctionTest.java new file mode 100644 index 0000000000..f664415e6d --- /dev/null +++ b/fe/fe-core/src/test/java/org/apache/doris/tablefunction/ExternalFileTableValuedFunctionTest.java @@ -0,0 +1,117 @@ +// Licensed to the Apache Software Foundation (ASF) under one +// or more contributor license agreements. See the NOTICE file +// distributed with this work for additional information +// regarding copyright ownership. The ASF licenses this file +// to you under the Apache License, Version 2.0 (the +// "License"); you may not use this file except in compliance +// with the License. You may obtain a copy of the License at +// +// http://www.apache.org/licenses/LICENSE-2.0 +// +// Unless required by applicable law or agreed to in writing, +// software distributed under the License is distributed on an +// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +// KIND, either express or implied. See the License for the +// specific language governing permissions and limitations +// under the License. + +package org.apache.doris.tablefunction; + +import org.apache.doris.catalog.Column; +import org.apache.doris.catalog.PrimitiveType; +import org.apache.doris.common.AnalysisException; +import org.apache.doris.common.Config; + +import com.google.common.collect.Lists; +import com.google.common.collect.Maps; +import org.junit.Assert; +import org.junit.Test; + +import java.util.List; +import java.util.Map; + +public class ExternalFileTableValuedFunctionTest { + @Test + public void testCsvSchemaParse() { + Config.enable_date_conversion = true; + Map properties = Maps.newHashMap(); + properties.put(ExternalFileTableValuedFunction.CSV_SCHEMA, + "k1:int;k2:bigint;k3:float;k4:double;k5:smallint;k6:tinyint;k7:bool;" + + "k8:char(10);k9:varchar(20);k10:date;k11:datetime;k12:decimal(10,2)"); + List csvSchema = Lists.newArrayList(); + try { + ExternalFileTableValuedFunction.parseCsvSchema(csvSchema, properties); + Assert.fail(); + } catch (AnalysisException e) { + e.printStackTrace(); + Assert.assertTrue(e.getMessage().contains("unsupported column type: bool")); + } + + csvSchema.clear(); + properties.put(ExternalFileTableValuedFunction.CSV_SCHEMA, + "k1:int;k2:bigint;k3:float;k4:double;k5:smallint;k6:tinyint;k7:boolean;" + + "k8:string;k9:date;k10:datetime;k11:decimal(10, 2);k12:decimal( 38,10); k13:datetime(5)"); + try { + ExternalFileTableValuedFunction.parseCsvSchema(csvSchema, properties); + Assert.assertEquals(13, csvSchema.size()); + Column decimalCol = csvSchema.get(10); + Assert.assertEquals(10, decimalCol.getPrecision()); + Assert.assertEquals(2, decimalCol.getScale()); + decimalCol = csvSchema.get(11); + Assert.assertEquals(38, decimalCol.getPrecision()); + Assert.assertEquals(10, decimalCol.getScale()); + Column datetimeCol = csvSchema.get(12); + Assert.assertEquals(5, datetimeCol.getScale()); + + for (int i = 0; i < csvSchema.size(); i++) { + Column col = csvSchema.get(i); + switch (col.getName()) { + case "k1": + Assert.assertEquals(PrimitiveType.INT, col.getType().getPrimitiveType()); + break; + case "k2": + Assert.assertEquals(PrimitiveType.BIGINT, col.getType().getPrimitiveType()); + break; + case "k3": + Assert.assertEquals(PrimitiveType.FLOAT, col.getType().getPrimitiveType()); + break; + case "k4": + Assert.assertEquals(PrimitiveType.DOUBLE, col.getType().getPrimitiveType()); + break; + case "k5": + Assert.assertEquals(PrimitiveType.SMALLINT, col.getType().getPrimitiveType()); + break; + case "k6": + Assert.assertEquals(PrimitiveType.TINYINT, col.getType().getPrimitiveType()); + break; + case "k7": + Assert.assertEquals(PrimitiveType.BOOLEAN, col.getType().getPrimitiveType()); + break; + case "k8": + Assert.assertEquals(PrimitiveType.STRING, col.getType().getPrimitiveType()); + break; + case "k9": + Assert.assertEquals(PrimitiveType.DATEV2, col.getType().getPrimitiveType()); + break; + case "k10": + Assert.assertEquals(PrimitiveType.DATETIMEV2, col.getType().getPrimitiveType()); + break; + case "k11": + Assert.assertEquals(PrimitiveType.DECIMAL64, col.getType().getPrimitiveType()); + break; + case "k12": + Assert.assertEquals(PrimitiveType.DECIMAL128, col.getType().getPrimitiveType()); + break; + case "k13": + Assert.assertEquals(PrimitiveType.DATETIMEV2, col.getType().getPrimitiveType()); + break; + default: + Assert.fail("unknown column name: " + col.getName()); + } + } + } catch (AnalysisException e) { + e.printStackTrace(); + Assert.fail(); + } + } +} diff --git a/gensrc/thrift/PaloInternalService.thrift b/gensrc/thrift/PaloInternalService.thrift index ac8fcb111b..d49bb72f3a 100644 --- a/gensrc/thrift/PaloInternalService.thrift +++ b/gensrc/thrift/PaloInternalService.thrift @@ -201,6 +201,7 @@ struct TQueryOptions { // For debug purpose, skip delete bitmap when reading data 63: optional bool skip_delete_bitmap = false + 64: optional bool dry_run_query = false } diff --git a/regression-test/suites/segcompaction_p1/test_segcompaction_unique_keys_mow.groovy b/regression-test/suites/segcompaction_p1/test_segcompaction_unique_keys_mow.groovy index a043345999..39ac856ae0 100644 --- a/regression-test/suites/segcompaction_p1/test_segcompaction_unique_keys_mow.groovy +++ b/regression-test/suites/segcompaction_p1/test_segcompaction_unique_keys_mow.groovy @@ -76,7 +76,7 @@ suite("test_segcompaction_unique_keys_mow") { ) UNIQUE KEY(`col_0`) DISTRIBUTED BY HASH(`col_0`) BUCKETS 1 PROPERTIES ( - "replication_num" = "1", + "replication_num" = "1" ); """ // "enable_unique_key_merge_on_write" = "true"