From e489b60ea39d8ec71924674b069caf42c1bf5f7a Mon Sep 17 00:00:00 2001 From: Siyang Tang <82279870+TangSiyang2001@users.noreply.github.com> Date: Fri, 21 Jul 2023 19:31:19 +0800 Subject: [PATCH] [feature](load) support line delimiter for old broker load (#22030) --- .../Load/BROKER-LOAD.md | 5 + .../Load/BROKER-LOAD.md | 5 + fe/fe-core/src/main/cup/sql_parser.cup | 3 +- .../doris/analysis/DataDescription.java | 26 ++++ .../apache/doris/analysis/S3TvfLoadStmt.java | 24 ++-- .../ddl/csv_s3_case_line_delimiter_create.sql | 25 ++++ .../ddl/csv_s3_case_line_delimiter_drop.sql | 1 + .../broker_load/test_broker_load.groovy | 33 +++-- .../test_tvf_based_broker_load.groovy | 123 +++--------------- 9 files changed, 123 insertions(+), 122 deletions(-) create mode 100644 regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_create.sql create mode 100644 regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_drop.sql diff --git a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md index 50b859113a..59956abc76 100644 --- a/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md +++ b/docs/en/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md @@ -65,6 +65,7 @@ WITH BROKER broker_name INTO TABLE `table_name` [PARTITION (p1, p2, ...)] [COLUMNS TERMINATED BY "column_separator"] + [LINES TERMINATED BY "line_delimiter"] [FORMAT AS "file_type"] [(column_list)] [COLUMNS FROM PATH AS (c1, c2, ...)] @@ -96,6 +97,10 @@ WITH BROKER broker_name Specifies the column separator. Only valid in CSV format. Only single-byte delimiters can be specified. + - `LINES TERMINATED BY` + + Specifies the line delimiter. Only valid in CSV format. Only single-byte delimiters can be specified. + - `FORMAT AS` Specifies the file type, CSV, PARQUET and ORC formats are supported. Default is CSV. diff --git a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md index 68bab36c72..993fa65386 100644 --- a/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md +++ b/docs/zh-CN/docs/sql-manual/sql-reference/Data-Manipulation-Statements/Load/BROKER-LOAD.md @@ -65,6 +65,7 @@ WITH BROKER broker_name INTO TABLE `table_name` [PARTITION (p1, p2, ...)] [COLUMNS TERMINATED BY "column_separator"] + [LINES TERMINATED BY "line_delimiter"] [FORMAT AS "file_type"] [(column_list)] [COLUMNS FROM PATH AS (c1, c2, ...)] @@ -96,6 +97,10 @@ WITH BROKER broker_name 指定列分隔符。仅在 CSV 格式下有效。仅能指定单字节分隔符。 + - `LINES TERMINATED BY` + + 指定行分隔符。仅在 CSV 格式下有效。仅能指定单字节分隔符。 + - `FORMAT AS` 指定文件类型,支持 CSV、PARQUET 和 ORC 格式。默认为 CSV。 diff --git a/fe/fe-core/src/main/cup/sql_parser.cup b/fe/fe-core/src/main/cup/sql_parser.cup index ef95f1a5a5..cc6d49eac1 100644 --- a/fe/fe-core/src/main/cup/sql_parser.cup +++ b/fe/fe-core/src/main/cup/sql_parser.cup @@ -2397,6 +2397,7 @@ data_desc ::= KW_INTO KW_TABLE ident:tableName opt_partition_names:partitionNames opt_field_term:colSep + opt_line_term:lineDelimiter opt_file_format:fileFormat opt_col_list:colList opt_columns_from_path:columnsFromPath @@ -2407,7 +2408,7 @@ data_desc ::= sequence_col_clause:sequenceColName opt_properties:properties {: - RESULT = new DataDescription(tableName, partitionNames, files, colList, colSep, fileFormat, + RESULT = new DataDescription(tableName, partitionNames, files, colList, colSep, lineDelimiter, fileFormat, columnsFromPath, isNeg, colMappingList, preFilterExpr, whereExpr, mergeType, deleteExpr, sequenceColName, properties); :} | opt_merge_type:mergeType KW_DATA KW_FROM KW_TABLE ident:srcTableName diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java index 658605abc5..88044394fe 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/DataDescription.java @@ -168,11 +168,32 @@ public class DataDescription implements InsertStmt.DataDesc { isNegative, columnMappingList, null, null, LoadTask.MergeType.APPEND, null, null, null); } + public DataDescription(String tableName, + PartitionNames partitionNames, + List filePaths, + List columns, + Separator columnSeparator, + String fileFormat, + List columnsFromPath, + boolean isNegative, + List columnMappingList, + Expr fileFilterExpr, + Expr whereExpr, + LoadTask.MergeType mergeType, + Expr deleteCondition, + String sequenceColName, + Map properties) { + this(tableName, partitionNames, filePaths, columns, columnSeparator, null, + fileFormat, columnsFromPath, isNegative, columnMappingList, fileFilterExpr, whereExpr, + mergeType, deleteCondition, sequenceColName, properties); + } + public DataDescription(String tableName, PartitionNames partitionNames, List filePaths, List columns, Separator columnSeparator, + Separator lineDelimiter, String fileFormat, List columnsFromPath, boolean isNegative, @@ -188,6 +209,7 @@ public class DataDescription implements InsertStmt.DataDesc { this.filePaths = filePaths; this.fileFieldNames = columns; this.columnSeparator = columnSeparator; + this.lineDelimiter = lineDelimiter; this.fileFormat = fileFormat; this.columnsFromPath = columnsFromPath; this.isNegative = isNegative; @@ -597,6 +619,10 @@ public class DataDescription implements InsertStmt.DataDesc { return lineDelimiter.getSeparator(); } + public Separator getLineDelimiterObj() { + return lineDelimiter; + } + public void setLineDelimiter(Separator lineDelimiter) { this.lineDelimiter = lineDelimiter; } diff --git a/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java b/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java index e10b9893b8..60ed5de036 100644 --- a/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java +++ b/fe/fe-core/src/main/java/org/apache/doris/analysis/S3TvfLoadStmt.java @@ -154,16 +154,10 @@ public class S3TvfLoadStmt extends NativeInsertStmt { final String format = Optional.ofNullable(dataDescription.getFileFormat()).orElse(DEFAULT_FORMAT); params.put(ExternalFileTableValuedFunction.FORMAT, format); + if (isCsvFormat(format)) { - final Separator separator = dataDescription.getColumnSeparatorObj(); - if (separator != null) { - try { - separator.analyze(); - } catch (AnalysisException e) { - throw new DdlException("failed to create s3 tvf ref", e); - } - params.put(ExternalFileTableValuedFunction.COLUMN_SEPARATOR, dataDescription.getColumnSeparator()); - } + parseSeparator(dataDescription.getColumnSeparatorObj(), params); + parseSeparator(dataDescription.getLineDelimiterObj(), params); } List columnsFromPath = dataDescription.getColumnsFromPath(); @@ -187,6 +181,18 @@ public class S3TvfLoadStmt extends NativeInsertStmt { } } + private static void parseSeparator(Separator separator, Map tvfParams) throws DdlException { + if (separator == null) { + return; + } + try { + separator.analyze(); + } catch (AnalysisException e) { + throw new DdlException(String.format("failed to parse separator:%s", separator), e); + } + tvfParams.put(ExternalFileTableValuedFunction.COLUMN_SEPARATOR, separator.getSeparator()); + } + private static boolean isCsvFormat(String format) { return Strings.isNullOrEmpty(format) || StringUtils.equalsIgnoreCase(format, FORMAT_CSV); } diff --git a/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_create.sql b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_create.sql new file mode 100644 index 0000000000..8042453190 --- /dev/null +++ b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_create.sql @@ -0,0 +1,25 @@ +CREATE TABLE csv_s3_case_line_delimiter ( + l_shipdate DATE NOT NULL, + l_orderkey bigint NOT NULL, + l_linenumber int not null, + l_partkey int NOT NULL, + l_suppkey int not null, + l_quantity decimal(15, 2) NOT NULL, + l_extendedprice decimal(15, 2) NOT NULL, + l_discount decimal(15, 2) NOT NULL, + l_tax decimal(15, 2) NOT NULL, + l_returnflag VARCHAR(1) NOT NULL, + l_linestatus VARCHAR(1) NOT NULL, + l_commitdate DATE NOT NULL, + l_receiptdate DATE NOT NULL, + l_shipinstruct VARCHAR(25) NOT NULL, + l_shipmode VARCHAR(10) NOT NULL, + l_comment VARCHAR(44) NOT NULL +)ENGINE=OLAP +DUPLICATE KEY(`l_shipdate`, `l_orderkey`) +COMMENT "OLAP" +DISTRIBUTED BY HASH(`l_orderkey`) BUCKETS 96 +PROPERTIES ( + "replication_num" = "1", + "colocate_with" = "lineitem_orders" +); \ No newline at end of file diff --git a/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_drop.sql b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_drop.sql new file mode 100644 index 0000000000..f3ab15d05e --- /dev/null +++ b/regression-test/suites/load_p2/broker_load/ddl/csv_s3_case_line_delimiter_drop.sql @@ -0,0 +1 @@ +DROP TABLE IF EXISTS csv_s3_case_line_delimiter \ No newline at end of file diff --git a/regression-test/suites/load_p2/broker_load/test_broker_load.groovy b/regression-test/suites/load_p2/broker_load/test_broker_load.groovy index d9b4cfea37..0aaa95f6c0 100644 --- a/regression-test/suites/load_p2/broker_load/test_broker_load.groovy +++ b/regression-test/suites/load_p2/broker_load/test_broker_load.groovy @@ -48,6 +48,7 @@ suite("test_broker_load_p2", "p2") { "orc_s3_case7", // table column uppercase * load column lowercase * orc file lowercase "orc_s3_case8", // table column uppercase * load column uppercase * orc file uppercase "orc_s3_case9", // table column uppercase * load column lowercase * orc file uppercase + "csv_s3_case_line_delimiter" // csv format table with special line delimiter ] def paths = ["s3://doris-build-1308700295/regression/load/data/part*", "s3://doris-build-1308700295/regression/load/data/part*", @@ -80,6 +81,7 @@ suite("test_broker_load_p2", "p2") { "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_lowercase.orc", "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc", "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc", + "s3://doris-build-1308700295/regression/line_delimiter/lineitem_0x7.csv.gz" ] def columns_list = ["""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", @@ -113,6 +115,7 @@ suite("test_broker_load_p2", "p2") { """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid""", """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid""", """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid""", + """""" //TODO: uncomment blow 8 rows after jibing fix // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid""", // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGINEID,ISARTIFICAL,WINDOWCLIENTWIDTH,WINDOWCLIENTHEIGHT,CLIENTTIMEZONE,CLIENTEVENTTIME,SILVERLIGHTVERSION1,SILVERLIGHTVERSION2,SILVERLIGHTVERSION3,SILVERLIGHTVERSION4,PAGECHARSET,CODEVERSION,ISLINK,ISDOWNLOAD,ISNOTBOUNCE,FUNIQID,ORIGINALURL,HID,ISOLDCOUNTER,ISEVENT,ISPARAMETER,DONTCOUNTHITS,WITHHASH,HITCOLOR,LOCALEVENTTIME,AGE,SEX,INCOME,INTERESTS,ROBOTNESS,REMOTEIP,WINDOWNAME,OPENERNAME,HISTORYLENGTH,BROWSERLANGUAGE,BROWSERCOUNTRY,SOCIALNETWORK,SOCIALACTION,HTTPERROR,SENDTIMING,DNSTIMING,CONNECTTIMING,RESPONSESTARTTIMING,RESPONSEENDTIMING,FETCHTIMING,SOCIALSOURCENETWORKID,SOCIALSOURCEPAGE,PARAMPRICE,PARAMORDERID,PARAMCURRENCY,PARAMCURRENCYID,OPENSTATSERVICENAME,OPENSTATCAMPAIGNID,OPENSTATADID,OPENSTATSOURCEID,UTMSOURCE,UTMMEDIUM,UTMCAMPAIGN,UTMCONTENT,UTMTERM,FROMTAG,HASGCLID,REFERERHASH,URLHASH,CLID""", @@ -123,8 +126,8 @@ suite("test_broker_load_p2", "p2") { // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGINEID,ISARTIFICAL,WINDOWCLIENTWIDTH,WINDOWCLIENTHEIGHT,CLIENTTIMEZONE,CLIENTEVENTTIME,SILVERLIGHTVERSION1,SILVERLIGHTVERSION2,SILVERLIGHTVERSION3,SILVERLIGHTVERSION4,PAGECHARSET,CODEVERSION,ISLINK,ISDOWNLOAD,ISNOTBOUNCE,FUNIQID,ORIGINALURL,HID,ISOLDCOUNTER,ISEVENT,ISPARAMETER,DONTCOUNTHITS,WITHHASH,HITCOLOR,LOCALEVENTTIME,AGE,SEX,INCOME,INTERESTS,ROBOTNESS,REMOTEIP,WINDOWNAME,OPENERNAME,HISTORYLENGTH,BROWSERLANGUAGE,BROWSERCOUNTRY,SOCIALNETWORK,SOCIALACTION,HTTPERROR,SENDTIMING,DNSTIMING,CONNECTTIMING,RESPONSESTARTTIMING,RESPONSEENDTIMING,FETCHTIMING,SOCIALSOURCENETWORKID,SOCIALSOURCEPAGE,PARAMPRICE,PARAMORDERID,PARAMCURRENCY,PARAMCURRENCYID,OPENSTATSERVICENAME,OPENSTATCAMPAIGNID,OPENSTATADID,OPENSTATSOURCEID,UTMSOURCE,UTMMEDIUM,UTMCAMPAIGN,UTMCONTENT,UTMTERM,FROMTAG,HASGCLID,REFERERHASH,URLHASH,CLID""", // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid""", ] - def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", "", "COLUMNS FROM PATH AS (city)", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] - def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "", "preceding filter p_size < 10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", "", "COLUMNS FROM PATH AS (city)", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "", "preceding filter p_size < 10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] def set_values = ["", "", "SET(comment=p_comment, retailprice=p_retailprice, container=p_container, size=p_size, type=p_type, brand=p_brand, mfgr=p_mfgr, name=p_name, partkey=p_partkey)", @@ -155,9 +158,12 @@ suite("test_broker_load_p2", "p2") { "", "", "", + "", "" ] - def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where p_partkey>10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where p_partkey>10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + + def line_delimiters = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "\u0007"] def etl_info = ["unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", @@ -190,6 +196,7 @@ suite("test_broker_load_p2", "p2") { "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", + "\\N" ] def task_info = ["cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", @@ -223,6 +230,7 @@ suite("test_broker_load_p2", "p2") { "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", + "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0" ] def error_msg = ["", @@ -255,6 +263,7 @@ suite("test_broker_load_p2", "p2") { "", "", "", + "" ] String ak = getS3AK() @@ -262,11 +271,18 @@ suite("test_broker_load_p2", "p2") { String enabled = context.config.otherConfigs.get("enableBrokerLoad") def do_load_job = {uuid, path, table, columns, column_in_path, preceding_filter, - set_value, where_expr -> - String columns_str = ("$columns" != "") ? "($columns)" : ""; - String format_str = table.startsWith("orc_s3_case") ? "ORC" : "PARQUET" + set_value, where_expr, line_delimiter -> + String columns_str = ("$columns" != "") ? "($columns)" : ""; + String format_str + if (table.startsWith("orc_s3_case")) { + format_str = "ORC" + } else if (table.startsWith("csv")) { + format_str = "CSV" + } else { + format_str = "PARQUET" + } sql """ - LOAD LABEL $uuid ( + LOAD LABEL $uuid ( DATA INFILE("$path") INTO TABLE $table FORMAT AS $format_str @@ -275,6 +291,7 @@ suite("test_broker_load_p2", "p2") { $preceding_filter $set_value $where_expr + $line_delimiter ) WITH S3 ( "AWS_ACCESS_KEY" = "$ak", @@ -300,7 +317,7 @@ suite("test_broker_load_p2", "p2") { def uuid = UUID.randomUUID().toString().replace("-", "0") uuids.add(uuid) do_load_job.call(uuid, paths[i], table, columns_list[i], column_in_paths[i], preceding_filters[i], - set_values[i], where_exprs[i]) + set_values[i], where_exprs[i], line_delimiters[i]) i++ } diff --git a/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy b/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy index 288e459823..a6af747de0 100644 --- a/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy +++ b/regression-test/suites/load_p2/broker_load/test_tvf_based_broker_load.groovy @@ -48,6 +48,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { "orc_s3_case7", // table column uppercase * load column lowercase * orc file lowercase "orc_s3_case8", // table column uppercase * load column uppercase * orc file uppercase "orc_s3_case9", // table column uppercase * load column lowercase * orc file uppercase + "csv_s3_case_line_delimiter" // csv format table with special line delimiter ] def paths = ["s3://doris-build-1308700295/regression/load/data/part*", "s3://doris-build-1308700295/regression/load/data/part*", @@ -80,6 +81,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_lowercase.orc", "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc", "s3://doris-build-1308700295/regression/load/data/orc/hits_10k_rows_uppercase.orc", + "s3://doris-build-1308700295/regression/line_delimiter/lineitem_0x7.csv.gz" ] def columns_list = ["""p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", """p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment""", @@ -113,6 +115,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid""", """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid""", """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid""", + """""" //TODO: uncomment blow 8 rows after jibing fix // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid""", // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGINEID,ISARTIFICAL,WINDOWCLIENTWIDTH,WINDOWCLIENTHEIGHT,CLIENTTIMEZONE,CLIENTEVENTTIME,SILVERLIGHTVERSION1,SILVERLIGHTVERSION2,SILVERLIGHTVERSION3,SILVERLIGHTVERSION4,PAGECHARSET,CODEVERSION,ISLINK,ISDOWNLOAD,ISNOTBOUNCE,FUNIQID,ORIGINALURL,HID,ISOLDCOUNTER,ISEVENT,ISPARAMETER,DONTCOUNTHITS,WITHHASH,HITCOLOR,LOCALEVENTTIME,AGE,SEX,INCOME,INTERESTS,ROBOTNESS,REMOTEIP,WINDOWNAME,OPENERNAME,HISTORYLENGTH,BROWSERLANGUAGE,BROWSERCOUNTRY,SOCIALNETWORK,SOCIALACTION,HTTPERROR,SENDTIMING,DNSTIMING,CONNECTTIMING,RESPONSESTARTTIMING,RESPONSEENDTIMING,FETCHTIMING,SOCIALSOURCENETWORKID,SOCIALSOURCEPAGE,PARAMPRICE,PARAMORDERID,PARAMCURRENCY,PARAMCURRENCYID,OPENSTATSERVICENAME,OPENSTATCAMPAIGNID,OPENSTATADID,OPENSTATSOURCEID,UTMSOURCE,UTMMEDIUM,UTMCAMPAIGN,UTMCONTENT,UTMTERM,FROMTAG,HASGCLID,REFERERHASH,URLHASH,CLID""", @@ -123,8 +126,8 @@ suite("test_tvf_based_broker_load_p2", "p2") { // """WATCHID,JAVAENABLE,TITLE,GOODEVENT,EVENTTIME,EVENTDATE,COUNTERID,CLIENTIP,REGIONID,USERID,COUNTERCLASS,OS,USERAGENT,URL,REFERER,ISREFRESH,REFERERCATEGORYID,REFERERREGIONID,URLCATEGORYID,URLREGIONID,RESOLUTIONWIDTH,RESOLUTIONHEIGHT,RESOLUTIONDEPTH,FLASHMAJOR,FLASHMINOR,FLASHMINOR2,NETMAJOR,NETMINOR,USERAGENTMAJOR,USERAGENTMINOR,COOKIEENABLE,JAVASCRIPTENABLE,ISMOBILE,MOBILEPHONE,MOBILEPHONEMODEL,PARAMS,IPNETWORKID,TRAFICSOURCEID,SEARCHENGINEID,SEARCHPHRASE,ADVENGINEID,ISARTIFICAL,WINDOWCLIENTWIDTH,WINDOWCLIENTHEIGHT,CLIENTTIMEZONE,CLIENTEVENTTIME,SILVERLIGHTVERSION1,SILVERLIGHTVERSION2,SILVERLIGHTVERSION3,SILVERLIGHTVERSION4,PAGECHARSET,CODEVERSION,ISLINK,ISDOWNLOAD,ISNOTBOUNCE,FUNIQID,ORIGINALURL,HID,ISOLDCOUNTER,ISEVENT,ISPARAMETER,DONTCOUNTHITS,WITHHASH,HITCOLOR,LOCALEVENTTIME,AGE,SEX,INCOME,INTERESTS,ROBOTNESS,REMOTEIP,WINDOWNAME,OPENERNAME,HISTORYLENGTH,BROWSERLANGUAGE,BROWSERCOUNTRY,SOCIALNETWORK,SOCIALACTION,HTTPERROR,SENDTIMING,DNSTIMING,CONNECTTIMING,RESPONSESTARTTIMING,RESPONSEENDTIMING,FETCHTIMING,SOCIALSOURCENETWORKID,SOCIALSOURCEPAGE,PARAMPRICE,PARAMORDERID,PARAMCURRENCY,PARAMCURRENCYID,OPENSTATSERVICENAME,OPENSTATCAMPAIGNID,OPENSTATADID,OPENSTATSOURCEID,UTMSOURCE,UTMMEDIUM,UTMCAMPAIGN,UTMCONTENT,UTMTERM,FROMTAG,HASGCLID,REFERERHASH,URLHASH,CLID""", // """watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid""", ] - def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", "", "COLUMNS FROM PATH AS (city)", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] - def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "", "preceding filter p_size < 10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + def column_in_paths = ["", "", "", "", "", "", "", "", "", "", "", "", "COLUMNS FROM PATH AS (city)", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] + def preceding_filters = ["", "", "", "", "", "", "", "", "", "", "", "preceding filter p_size < 10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] def set_values = ["", "", "SET(comment=p_comment, retailprice=p_retailprice, container=p_container, size=p_size, type=p_type, brand=p_brand, mfgr=p_mfgr, name=p_name, partkey=p_partkey)", @@ -155,116 +158,27 @@ suite("test_tvf_based_broker_load_p2", "p2") { "", "", "", + "", "" ] - def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where p_partkey>10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", ""] - - def etl_info = ["unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=163706; dpp.abnorm.ALL=0; dpp.norm.ALL=36294", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "\\N", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "\\N", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=200000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=4096", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=100000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - "unselected.rows=0; dpp.abnorm.ALL=0; dpp.norm.ALL=10000", - ] - - def task_info = ["cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - "cluster:cos.ap-beijing.myqcloud.com; timeout(s):14400; max_filter_ratio:0.0", - ] - - def error_msg = ["", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "[INTERNAL_ERROR]failed to find default value expr for slot: x1", - "", - "", - "[INTERNAL_ERROR]failed to find default value expr for slot: x1", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - "", - ] + def where_exprs = ["", "", "", "", "", "", "", "", "", "", "", "where p_partkey>10", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "",""] + def line_delimiters = ["", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "", "\u0007"] String ak = getS3AK() String sk = getS3SK() String enabled = context.config.otherConfigs.get("enableBrokerLoad") def do_load_job = { uuid, path, table, columns, column_in_path, preceding_filter, - set_value, where_expr -> + set_value, where_expr, line_delimiter -> String columns_str = ("$columns" != "") ? "($columns)" : ""; - String format_str = table.startsWith("orc_s3_case") ? "ORC" : "PARQUET" + String format_str + if (table.startsWith("orc_s3_case")) { + format_str = "ORC" + } else if (table.startsWith("csv")) { + format_str = "CSV" + } else { + format_str = "PARQUET" + } sql """ LOAD LABEL $uuid ( DATA INFILE("$path") @@ -275,6 +189,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { $preceding_filter $set_value $where_expr + $line_delimiter ) WITH S3 ( "AWS_ACCESS_KEY" = "$ak", @@ -299,7 +214,7 @@ suite("test_tvf_based_broker_load_p2", "p2") { def uuid = UUID.randomUUID().toString().replace("-", "0") uuids.add(uuid) do_load_job.call(uuid, paths[i], table, columns_list[i], column_in_paths[i], preceding_filters[i], - set_values[i], where_exprs[i]) + set_values[i], where_exprs[i], line_delimiters[i]) i++ }