[fix](regression-test) disable test for hdfs and fix double type with null (#9080)

1. add a new config in regression-conf.groovy
    enableHdfs, default is false, to skip tests with hdfs
2. fix a bug that when double type column result is null, exception will be thrown
This commit is contained in:
Mingyu Chen
2022-04-18 19:37:37 +08:00
committed by GitHub
parent 0f86fed547
commit 2cecb5dc82
9 changed files with 259 additions and 245 deletions

View File

@ -44,7 +44,8 @@ testDirectories = ""
customConf1 = "test_custom_conf_value"
// for test csv with header
enableHdfs=false // set to true if hdfs is ready
hdfsFs = "hdfs://127.0.0.1:9000"
hdfsUser = "palo-qa"
hdfsUser = "doris-test"
hdfsPasswd = ""
brokerName = "broker_name"

View File

@ -199,6 +199,7 @@ class StreamLoadAction implements SuiteAction {
def file = new File(fileName)
if (!file.exists()) {
log.warn("Stream load input file not exists: ${file}".toString())
throw new IllegalStateException("Stream load input file not exists: ${file}");
}
log.info("Set stream load input: ${file.canonicalPath}".toString())
entity = new FileEntity(file)
@ -286,4 +287,4 @@ class StreamLoadAction implements SuiteAction {
}
}
}
}
}

View File

@ -281,6 +281,11 @@ class Suite implements GroovyInterceptable {
return hdfs.genRemoteDataDir()
}
boolean enableHdfs() {
String enableHdfs = context.config.otherConfigs.get("enableHdfs");
return enableHdfs.equals("true");
}
String uploadToHdfs(String localFile) {
String dataDir = context.config.dataPath + "/" + group + "/"
localFile = dataDir + localFile

View File

@ -54,15 +54,23 @@ class OutputUtils {
}
static String checkCell(String info, int line, String expectCell, String realCell, String dataType) {
if(dataType == "FLOAT" || dataType == "DOUBLE") {
double expectDouble = Double.parseDouble(expectCell)
double realDouble = Double.parseDouble(realCell)
if (dataType == "FLOAT" || dataType == "DOUBLE") {
Boolean expectNull = expectCell.equals("\\\\N")
Boolean actualNull = realCell.equals("\\\\N")
double realRelativeError = Math.abs(expectDouble - realDouble) / realDouble
double expectRelativeError = 1e-10
if (expectNull != actualNull) {
return "${info}, line ${line}, ${dataType} result mismatch.\nExpect cell: ${expectCell}\nBut real is: ${realCell}"
} else if (!expectNull) {
// both are not null
double expectDouble = Double.parseDouble(expectCell)
double realDouble = Double.parseDouble(realCell)
if(expectRelativeError < realRelativeError) {
return "${info}, line ${line}, ${dataType} result mismatch.\nExpect cell is: ${expectCell}\nBut real is: ${realCell}\nrelative error is: ${realRelativeError}, bigger than ${expectRelativeError}"
double realRelativeError = Math.abs(expectDouble - realDouble) / realDouble
double expectRelativeError = 1e-10
if(expectRelativeError < realRelativeError) {
return "${info}, line ${line}, ${dataType} result mismatch.\nExpect cell is: ${expectCell}\nBut real is: ${realCell}\nrelative error is: ${realRelativeError}, bigger than ${expectRelativeError}"
}
}
} else if(dataType == "DATE" || dataType =="DATETIME") {
expectCell = expectCell.replace("T", " ")

View File

@ -0,0 +1,235 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_csv_with_header", "csv_with_header") {
//define format
def format_csv = "csv"
def format_csv_with_names = "csv_with_names"
def format_csv_with_names_and_types = "csv_with_names_and_types"
//define format data file
def format_csv_file = "csv.txt"
def format_csv_with_names_file = "csv_with_names.txt"
def format_csv_with_names_and_types_file = "csv_with_names_and_types.txt"
def expect_rows = 10
def testTable = "test_csv_with_header"
def test_stream_load = {testTablex, label, format, file_path, exect_rows->
streamLoad {
table testTablex
set 'label',label
set 'column_separator',','
set 'format',format
file file_path
time 10000
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(json.NumberTotalRows, exect_rows)
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
}
}
}
def import_from_hdfs = {testTable1, label1, hdfsFilePath, format1, brokerName, hdfsUser, hdfsPasswd->
sql """
LOAD LABEL ${label1} (
DATA INFILE("${hdfsFilePath}")
INTO TABLE ${testTable1} COLUMNS TERMINATED BY ","
FORMAT as "${format1}" )
with BROKER "${brokerName}"
("username"="${hdfsUser}", "password"="${hdfsPasswd}")
"""
}
def check_import_result = {checklabel, testTable4, expected_rows->
max_try_secs = 100000
while(max_try_secs--) {
result = sql "show load where label = '${checklabel}'"
if(result[0][2] == "FINISHED") {
result_count = sql "select count(*) from ${testTable4}"
assertEquals(result_count[0][0], expected_rows)
break
} else {
sleep(1000)
max_try_secs = max_try_secs - 1000
if(max_try_secs < 0) {
assertEquals(1, 2)
}
}
}
}
sql "DROP TABLE IF EXISTS ${testTable}"
def result1 = sql """
CREATE TABLE `${testTable}` (
`event_day` date NULL COMMENT "",
`siteid` int(11) NULL DEFAULT "10" COMMENT "",
`citycode` smallint(6) NULL COMMENT "",
`username` varchar(32) NULL DEFAULT "" COMMENT "",
`pv` bigint(20) NULL DEFAULT "0" COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`event_day`, `siteid`, `citycode`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`siteid`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
//[stream load] format=csv
def label = UUID.randomUUID().toString()
test_stream_load.call(testTable, label, format_csv, format_csv_file, expect_rows)
//[stream load] format=csv_with_names
label = UUID.randomUUID().toString()
test_stream_load.call(testTable, label, format_csv_with_names, format_csv_with_names_file, expect_rows)
//[stream load] format=csv_with_names_and_types
label = UUID.randomUUID().toString()
test_stream_load.call(testTable, label, format_csv_with_names_and_types, format_csv_with_names_and_types_file, expect_rows)
// check total rows
def result_count = sql "select count(*) from ${testTable}"
assertEquals(result_count[0][0], expect_rows*3)
if (enableHdfs()) {
//test import data from hdfs
hdfsUser = getHdfsUser()
brokerName =getBrokerName()
hdfsPasswd = getHdfsPasswd()
hdfsFs = getHdfsFs()
//[broker load] test normal
label = UUID.randomUUID().toString().replaceAll("-", "")
remote_csv_file = uploadToHdfs format_csv_file
export_result = import_from_hdfs.call(testTable, label, remote_csv_file, format_csv, brokerName, hdfsUser, hdfsPasswd)
check_import_result.call(label, testTable, expect_rows * 4)
//[broker load] csv_with_names
label = UUID.randomUUID().toString().replaceAll("-", "")
remote_csv_file = uploadToHdfs format_csv_with_names_file
export_result = import_from_hdfs.call(testTable, label, remote_csv_file, format_csv_with_names, brokerName, hdfsUser, hdfsPasswd)
check_import_result.call(label, testTable, expect_rows * 5)
//[broker load] csv_with_names_and_types
label = UUID.randomUUID().toString().replaceAll("-", "")
remote_csv_file = uploadToHdfs format_csv_with_names_and_types_file
export_result = import_from_hdfs.call(testTable, label, remote_csv_file, format_csv_with_names_and_types, brokerName, hdfsUser, hdfsPasswd)
check_import_result.call(label, testTable, expect_rows * 6)
def export_to_hdfs = {exportTable, exportLable, hdfsPath, exportFormat, exportBrokerName, exportUserName, exportPasswd->
sql """ EXPORT TABLE ${exportTable}
TO "${hdfsPath}"
PROPERTIES ("label" = "${exportLable}", "column_separator"=",","format"="${exportFormat}")
WITH BROKER "${exportBrokerName}" ("username"="${exportUserName}", "password"="${exportPasswd}")
"""
}
def check_export_result = {checklabel1->
max_try_secs = 100000
while(max_try_secs--) {
result = sql "show export where label='${checklabel1}'"
if(result[0][2] == "FINISHED") {
break
} else {
sleep(1000)
max_try_secs = max_try_secs - 1000
if(max_try_secs < 0) {
assertEquals(1,2)
}
}
}
}
def check_download_result={resultlist, fileFormat, expectedTotalRows->
int totalLines = 0
if(fileFormat == format_csv_with_names) {
expectedTotalRows += resultlist.size()
}else if(fileFormat == format_csv_with_names_and_types) {
expectedTotalRows += resultlist.size() * 2
}
for(String oneFile :resultlist) {
totalLines += getTotalLine(oneFile)
deleteFile(oneFile)
}
assertEquals(expectedTotalRows,totalLines)
}
resultCount = sql "select count(*) from ${testTable}"
currentTotalRows = resultCount[0][0]
// export table to hdfs format=csv
hdfsDataDir = getHdfsDataDir()
label = UUID.randomUUID().toString().replaceAll("-", "")
export_to_hdfs.call(testTable, label, hdfsDataDir + "/" + label, format_csv, brokerName, hdfsUser, hdfsPasswd)
check_export_result(label)
result = downloadExportFromHdfs(label + "/export-data")
check_download_result(result, format_csv, currentTotalRows)
// export table to hdfs format=csv_with_names
label = UUID.randomUUID().toString().replaceAll("-", "")
export_to_hdfs.call(testTable, label, hdfsDataDir + "/" + label, format_csv_with_names, brokerName, hdfsUser, hdfsPasswd)
check_export_result(label)
result = downloadExportFromHdfs(label + "/export-data")
check_download_result(result, format_csv_with_names, currentTotalRows)
// export table to hdfs format=csv_with_names_and_types
label = UUID.randomUUID().toString().replaceAll("-", "")
export_to_hdfs.call(testTable, label, hdfsDataDir + "/" + label, format_csv_with_names_and_types, brokerName, hdfsUser, hdfsPasswd)
check_export_result(label)
result = downloadExportFromHdfs(label + "/export-data")
check_download_result(result, format_csv_with_names_and_types, currentTotalRows)
// select out file to hdfs
select_out_file = {outTable, outHdfsPath, outFormat, outHdfsFs, outBroker, outHdfsUser, outPasswd->
sql """
SELECT * FROM ${outTable}
INTO OUTFILE "${outHdfsPath}"
FORMAT AS "${outFormat}"
PROPERTIES
(
"broker.name" = "${outBroker}",
"column_separator" = ",",
"line_delimiter" = "\n",
"max_file_size" = "5MB",
"broker.username"="${hdfsUser}",
"broker.password"="${outPasswd}"
)
"""
}
// select out file to hdfs format=csv
label = UUID.randomUUID().toString().replaceAll("-", "")
select_out_file(testTable, hdfsDataDir + "/" + label + "/csv", format_csv, hdfsFs, brokerName, hdfsUser, hdfsPasswd)
result = downloadExportFromHdfs(label + "/csv")
check_download_result(result, format_csv, currentTotalRows)
// select out file to hdfs format=csv_with_names
label = UUID.randomUUID().toString().replaceAll("-", "")
select_out_file(testTable, hdfsDataDir + "/" + label + "/csv", format_csv_with_names, hdfsFs, brokerName, hdfsUser, hdfsPasswd)
result = downloadExportFromHdfs(label + "/csv")
check_download_result(result, format_csv_with_names, currentTotalRows)
// select out file to hdfs format=csv_with_names_and_types
label = UUID.randomUUID().toString().replaceAll("-", "")
select_out_file(testTable, hdfsDataDir + "/" + label + "/csv", format_csv_with_names_and_types, hdfsFs, brokerName, hdfsUser, hdfsPasswd)
result = downloadExportFromHdfs(label + "/csv")
check_download_result(result, format_csv_with_names_and_types, currentTotalRows)
}
}

View File

@ -1,236 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_csv_with_header", "demo") {
//define format
def format_csv = "csv"
def format_csv_with_names = "csv_with_names"
def format_csv_with_names_and_types = "csv_with_names_and_types"
//define format data file
def format_csv_file = "testheader/csv.txt"
def format_csv_with_names_file = "testheader/csv_with_names.txt"
def format_csv_with_names_and_types_file = "testheader/csv_with_names_and_types.txt"
def expect_rows = 10
def testTable = "test_csv_with_header"
def test_stream_load = {testTablex, label, format, file_path, exect_rows->
streamLoad {
table testTablex
set 'label',label
set 'column_separator',','
set 'format',format
file file_path
time 10000
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(json.NumberTotalRows, exect_rows)
assertTrue(json.NumberLoadedRows > 0 && json.LoadBytes > 0)
}
}
}
def import_from_hdfs = {testTable1, label1, hdfsFilePath, format1, brokerName, hdfsUser, hdfsPasswd->
sql """
LOAD LABEL ${label1} (
DATA INFILE("${hdfsFilePath}")
INTO TABLE ${testTable1} COLUMNS TERMINATED BY ","
FORMAT as "${format1}" )
with BROKER "${brokerName}"
("username"="${hdfsUser}", "password"="${hdfsPasswd}")
"""
}
def check_import_result = {checklabel, testTable4, expected_rows->
max_try_secs = 100000
while(max_try_secs--) {
result = sql "show load where label = '${checklabel}'"
if(result[0][2] == "FINISHED") {
result_count = sql "select count(*) from ${testTable4}"
assertEquals(result_count[0][0], expected_rows)
break
} else {
sleep(1000)
max_try_secs = max_try_secs - 1000
if(max_try_secs < 0) {
assertEquals(1, 2)
}
}
}
}
sql "DROP TABLE IF EXISTS ${testTable}"
def result1 = sql """
CREATE TABLE `${testTable}` (
`event_day` date NULL COMMENT "",
`siteid` int(11) NULL DEFAULT "10" COMMENT "",
`citycode` smallint(6) NULL COMMENT "",
`username` varchar(32) NULL DEFAULT "" COMMENT "",
`pv` bigint(20) NULL DEFAULT "0" COMMENT ""
) ENGINE=OLAP
DUPLICATE KEY(`event_day`, `siteid`, `citycode`)
COMMENT "OLAP"
DISTRIBUTED BY HASH(`siteid`) BUCKETS 10
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"in_memory" = "false",
"storage_format" = "V2"
)
"""
//[stream load] format=csv
def label = UUID.randomUUID().toString()
test_stream_load.call(testTable, label, format_csv, format_csv_file, expect_rows)
//[stream load] format=csv_with_names
label = UUID.randomUUID().toString()
test_stream_load.call(testTable, label, format_csv_with_names, format_csv_with_names_file, expect_rows)
//[stream load] format=csv_with_names_and_types
label = UUID.randomUUID().toString()
test_stream_load.call(testTable, label, format_csv_with_names_and_types, format_csv_with_names_and_types_file, expect_rows)
// check total rows
def result_count = sql "select count(*) from ${testTable}"
assertEquals(result_count[0][0], expect_rows*3)
//test import data from hdfs
hdfsUser = getHdfsUser()
brokerName =getBrokerName()
hdfsPasswd = getHdfsPasswd()
hdfsFs = getHdfsFs()
//[broker load] test normal
label = UUID.randomUUID().toString().replaceAll("-", "")
remote_csv_file = uploadToHdfs format_csv_file
export_result = import_from_hdfs.call(testTable, label, remote_csv_file, format_csv, brokerName, hdfsUser, hdfsPasswd)
check_import_result.call(label, testTable, expect_rows * 4)
//[broker load] csv_with_names
label = UUID.randomUUID().toString().replaceAll("-", "")
remote_csv_file = uploadToHdfs format_csv_with_names_file
export_result = import_from_hdfs.call(testTable, label, remote_csv_file, format_csv_with_names, brokerName, hdfsUser, hdfsPasswd)
check_import_result.call(label, testTable, expect_rows * 5)
//[broker load] csv_with_names_and_types
label = UUID.randomUUID().toString().replaceAll("-", "")
remote_csv_file = uploadToHdfs format_csv_with_names_and_types_file
export_result = import_from_hdfs.call(testTable, label, remote_csv_file, format_csv_with_names_and_types, brokerName, hdfsUser, hdfsPasswd)
check_import_result.call(label, testTable, expect_rows * 6)
def export_to_hdfs = {exportTable, exportLable, hdfsPath, exportFormat, exportBrokerName, exportUserName, exportPasswd->
sql """ EXPORT TABLE ${exportTable}
TO "${hdfsPath}"
PROPERTIES ("label" = "${exportLable}", "column_separator"=",","format"="${exportFormat}")
WITH BROKER "${exportBrokerName}" ("username"="${exportUserName}", "password"="${exportPasswd}")
"""
}
def check_export_result = {checklabel1->
max_try_secs = 100000
while(max_try_secs--) {
result = sql "show export where label='${checklabel1}'"
if(result[0][2] == "FINISHED") {
break
} else {
sleep(1000)
max_try_secs = max_try_secs - 1000
if(max_try_secs < 0) {
assertEquals(1,2)
}
}
}
}
def check_download_result={resultlist, fileFormat, expectedTotalRows->
int totalLines = 0
if(fileFormat == format_csv_with_names) {
expectedTotalRows += resultlist.size()
}else if(fileFormat == format_csv_with_names_and_types) {
expectedTotalRows += resultlist.size() * 2
}
for(String oneFile :resultlist) {
totalLines += getTotalLine(oneFile)
deleteFile(oneFile)
}
assertEquals(expectedTotalRows,totalLines)
}
resultCount = sql "select count(*) from ${testTable}"
currentTotalRows = resultCount[0][0]
// export table to hdfs format=csv
hdfsDataDir = getHdfsDataDir()
label = UUID.randomUUID().toString().replaceAll("-", "")
export_to_hdfs.call(testTable, label, hdfsDataDir + "/" + label, format_csv, brokerName, hdfsUser, hdfsPasswd)
check_export_result(label)
result = downloadExportFromHdfs(label + "/export-data")
check_download_result(result, format_csv, currentTotalRows)
// export table to hdfs format=csv_with_names
label = UUID.randomUUID().toString().replaceAll("-", "")
export_to_hdfs.call(testTable, label, hdfsDataDir + "/" + label, format_csv_with_names, brokerName, hdfsUser, hdfsPasswd)
check_export_result(label)
result = downloadExportFromHdfs(label + "/export-data")
check_download_result(result, format_csv_with_names, currentTotalRows)
// export table to hdfs format=csv_with_names_and_types
label = UUID.randomUUID().toString().replaceAll("-", "")
export_to_hdfs.call(testTable, label, hdfsDataDir + "/" + label, format_csv_with_names_and_types, brokerName, hdfsUser, hdfsPasswd)
check_export_result(label)
result = downloadExportFromHdfs(label + "/export-data")
check_download_result(result, format_csv_with_names_and_types, currentTotalRows)
// select out file to hdfs
select_out_file = {outTable, outHdfsPath, outFormat, outHdfsFs, outBroker, outHdfsUser, outPasswd->
sql """
SELECT * FROM ${outTable}
INTO OUTFILE "${outHdfsPath}"
FORMAT AS "${outFormat}"
PROPERTIES
(
"broker.name" = "${outBroker}",
"column_separator" = ",",
"line_delimiter" = "\n",
"max_file_size" = "5MB",
"broker.username"="${hdfsUser}",
"broker.password"="${outPasswd}"
)
"""
}
// select out file to hdfs format=csv
label = UUID.randomUUID().toString().replaceAll("-", "")
select_out_file(testTable, hdfsDataDir + "/" + label + "/csv", format_csv, hdfsFs, brokerName, hdfsUser, hdfsPasswd)
result = downloadExportFromHdfs(label + "/csv")
check_download_result(result, format_csv, currentTotalRows)
// select out file to hdfs format=csv_with_names
label = UUID.randomUUID().toString().replaceAll("-", "")
select_out_file(testTable, hdfsDataDir + "/" + label + "/csv", format_csv_with_names, hdfsFs, brokerName, hdfsUser, hdfsPasswd)
result = downloadExportFromHdfs(label + "/csv")
check_download_result(result, format_csv_with_names, currentTotalRows)
// select out file to hdfs format=csv_with_names_and_types
label = UUID.randomUUID().toString().replaceAll("-", "")
select_out_file(testTable, hdfsDataDir + "/" + label + "/csv", format_csv_with_names_and_types, hdfsFs, brokerName, hdfsUser, hdfsPasswd)
result = downloadExportFromHdfs(label + "/csv")
check_download_result(result, format_csv_with_names_and_types, currentTotalRows)
}