[improvement](regression-test) add compression algorithm regression test (#22303)

This commit is contained in:
HHoflittlefish777
2023-07-28 17:28:52 +08:00
committed by GitHub
parent 3eeca7ee55
commit 05abfbc5ef
8 changed files with 566 additions and 46 deletions

View File

@ -611,6 +611,8 @@ public class PropertyAnalyzer {
return TCompressionType.LZ4;
} else if (compressionType.equalsIgnoreCase("lz4f")) {
return TCompressionType.LZ4F;
} else if (compressionType.equalsIgnoreCase("lz4hc")) {
return TCompressionType.LZ4HC;
} else if (compressionType.equalsIgnoreCase("zlib")) {
return TCompressionType.ZLIB;
} else if (compressionType.equalsIgnoreCase("zstd")) {

View File

@ -103,7 +103,8 @@ enum TCompressionType {
LZ4 = 4,
LZ4F = 5,
ZLIB = 6,
ZSTD = 7
ZSTD = 7,
LZ4HC = 8
}

View File

@ -0,0 +1,5 @@
1, 2, 3, 4, 5, 6, 2023-1-1, 2023-1-1, test, test, 1.0
1, 2, 3, 4, 5, 6, 2023-1-1, 2023-1-1, test, test, 1.0
1, 2, 3, 4, 5, 6, 2023-1-1, 2023-1-1, test, test, 1.0
1, 2, 3, 4, 5, 6, 2023-1-1, 2023-1-1, test, test, 1.0
1, 2, 3, 4, 5, 6, 2023-1-1, 2023-1-1, test, test, 1.0
1 1 2 3 4 5 6 2023-1-1 2023-1-1 test test 1.0
2 1 2 3 4 5 6 2023-1-1 2023-1-1 test test 1.0
3 1 2 3 4 5 6 2023-1-1 2023-1-1 test test 1.0
4 1 2 3 4 5 6 2023-1-1 2023-1-1 test test 1.0
5 1 2 3 4 5 6 2023-1-1 2023-1-1 test test 1.0

View File

@ -0,0 +1,43 @@
-- This file is automatically generated. You should know what you did if you want to edit this
-- !sql1 --
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
-- !sql2 --
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
-- !sql3 --
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
-- !sql4 --
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
-- !sql5 --
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
-- !sql6 --
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000
1 2 3 4 5 6 2023-01-01T00:00 2023-01-01 test test 1.000000000

View File

@ -1,45 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_snappy", "p1") {
def tableName = "test_snappy"
// create table
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` varchar(40) NULL
) ENGINE=OLAP
DUPLICATE KEY(`k1`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`) BUCKETS 1
PROPERTIES ("replication_allocation" = "tag.location.default: 1",
"compression" = "snappy");
"""
// skip 3 lines and file have 4 lines
streamLoad {
table "${tableName}"
file 'ipv4.csv'
}
sql "sync"
def count = sql "select count(*) from ${tableName} limit 10"
assertEquals(82845, count[0][0])
}

View File

@ -0,0 +1,335 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_compression", "p0") {
// test snappy compression algorithm
def tableName = "test_snappy"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` bigint(20) NULL,
`k2` tinyint(4) NULL,
`k3` smallint(6) NULL,
`k4` int(11) NULL,
`k5` bigint(20) NULL,
`k6` largeint(40) NULL,
`k7` datetime NULL,
`k8` date NULL,
`k9` char(10) NULL,
`k10` varchar(6) NULL,
`k11` decimal(27, 9) NULL
) ENGINE=OLAP
Duplicate KEY(`k1`, `k2`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"compression" = "snappy"
);
"""
streamLoad {
table "${tableName}"
set 'column_separator', ','
set 'columns', 'k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11'
file 'load.csv'
time 10000 // limit inflight 10s
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(5, json.NumberTotalRows)
assertEquals(0, json.NumberFilteredRows)
assertEquals(0, json.NumberUnselectedRows)
}
}
sql "sync"
order_qt_sql1 "select * from ${tableName} order by k1, k2"
// test LZ4 compression algorithm
tableName = "test_LZ4"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` bigint(20) NULL,
`k2` tinyint(4) NULL,
`k3` smallint(6) NULL,
`k4` int(11) NULL,
`k5` bigint(20) NULL,
`k6` largeint(40) NULL,
`k7` datetime NULL,
`k8` date NULL,
`k9` char(10) NULL,
`k10` varchar(6) NULL,
`k11` decimal(27, 9) NULL
) ENGINE=OLAP
Duplicate KEY(`k1`, `k2`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"compression" = "LZ4"
);
"""
streamLoad {
table "${tableName}"
set 'column_separator', ','
set 'columns', 'k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11'
file 'load.csv'
time 10000 // limit inflight 10s
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(5, json.NumberTotalRows)
assertEquals(0, json.NumberFilteredRows)
assertEquals(0, json.NumberUnselectedRows)
}
}
sql "sync"
order_qt_sql2 "select * from ${tableName} order by k1, k2"
// test LZ4F compression algorithm
tableName = "test_LZ4F"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` bigint(20) NULL,
`k2` tinyint(4) NULL,
`k3` smallint(6) NULL,
`k4` int(11) NULL,
`k5` bigint(20) NULL,
`k6` largeint(40) NULL,
`k7` datetime NULL,
`k8` date NULL,
`k9` char(10) NULL,
`k10` varchar(6) NULL,
`k11` decimal(27, 9) NULL
) ENGINE=OLAP
Duplicate KEY(`k1`, `k2`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"compression" = "LZ4F"
);
"""
streamLoad {
table "${tableName}"
set 'column_separator', ','
set 'columns', 'k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11'
file 'load.csv'
time 10000 // limit inflight 10s
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(5, json.NumberTotalRows)
assertEquals(0, json.NumberFilteredRows)
assertEquals(0, json.NumberUnselectedRows)
}
}
sql "sync"
order_qt_sql3 "select * from ${tableName} order by k1, k2"
// test LZ4HC compression algorithm
tableName = "test_LZ4HC"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` bigint(20) NULL,
`k2` tinyint(4) NULL,
`k3` smallint(6) NULL,
`k4` int(11) NULL,
`k5` bigint(20) NULL,
`k6` largeint(40) NULL,
`k7` datetime NULL,
`k8` date NULL,
`k9` char(10) NULL,
`k10` varchar(6) NULL,
`k11` decimal(27, 9) NULL
) ENGINE=OLAP
Duplicate KEY(`k1`, `k2`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"compression" = "LZ4HC"
);
"""
streamLoad {
table "${tableName}"
set 'column_separator', ','
set 'columns', 'k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11'
file 'load.csv'
time 10000 // limit inflight 10s
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(5, json.NumberTotalRows)
assertEquals(0, json.NumberFilteredRows)
assertEquals(0, json.NumberUnselectedRows)
}
}
sql "sync"
order_qt_sql4 "select * from ${tableName} order by k1, k2"
// test ZLIB compression algorithm
tableName = "test_ZLIB"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` bigint(20) NULL,
`k2` tinyint(4) NULL,
`k3` smallint(6) NULL,
`k4` int(11) NULL,
`k5` bigint(20) NULL,
`k6` largeint(40) NULL,
`k7` datetime NULL,
`k8` date NULL,
`k9` char(10) NULL,
`k10` varchar(6) NULL,
`k11` decimal(27, 9) NULL
) ENGINE=OLAP
Duplicate KEY(`k1`, `k2`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"compression" = "ZLIB"
);
"""
streamLoad {
table "${tableName}"
set 'column_separator', ','
set 'columns', 'k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11'
file 'load.csv'
time 10000 // limit inflight 10s
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(5, json.NumberTotalRows)
assertEquals(0, json.NumberFilteredRows)
assertEquals(0, json.NumberUnselectedRows)
}
}
sql "sync"
order_qt_sql5 "select * from ${tableName} order by k1, k2"
// test ZSTD compression algorithm
tableName = "test_ZSTD"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` bigint(20) NULL,
`k2` tinyint(4) NULL,
`k3` smallint(6) NULL,
`k4` int(11) NULL,
`k5` bigint(20) NULL,
`k6` largeint(40) NULL,
`k7` datetime NULL,
`k8` date NULL,
`k9` char(10) NULL,
`k10` varchar(6) NULL,
`k11` decimal(27, 9) NULL
) ENGINE=OLAP
Duplicate KEY(`k1`, `k2`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`, `k2`) BUCKETS 3
PROPERTIES (
"replication_allocation" = "tag.location.default: 1",
"compression" = "ZSTD"
);
"""
streamLoad {
table "${tableName}"
set 'column_separator', ','
set 'columns', 'k1, k2, k3, k4, k5, k6, k7, k8, k9, k10, k11'
file 'load.csv'
time 10000 // limit inflight 10s
check { result, exception, startTime, endTime ->
if (exception != null) {
throw exception
}
log.info("Stream load result: ${result}".toString())
def json = parseJson(result)
assertEquals("success", json.Status.toLowerCase())
assertEquals(5, json.NumberTotalRows)
assertEquals(0, json.NumberFilteredRows)
assertEquals(0, json.NumberUnselectedRows)
}
}
sql "sync"
order_qt_sql6 "select * from ${tableName} order by k1, k2"
}

View File

@ -0,0 +1,179 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_compression_p1", "p1") {
// test snappy compression algorithm
def tableName = "test_snappy"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` varchar(40) NULL
) ENGINE=OLAP
DUPLICATE KEY(`k1`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`) BUCKETS 1
PROPERTIES ("replication_allocation" = "tag.location.default: 1",
"compression" = "snappy");
"""
// skip 3 lines and file have 4 lines
streamLoad {
table "${tableName}"
file 'ipv4.csv'
}
sql "sync"
def count = sql "select count(*) from ${tableName} limit 10"
assertEquals(82845, count[0][0])
// test LZ4 compression algorithm
tableName = "test_LZ4"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` varchar(40) NULL
) ENGINE=OLAP
DUPLICATE KEY(`k1`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`) BUCKETS 1
PROPERTIES ("replication_allocation" = "tag.location.default: 1",
"compression" = "LZ4");
"""
// skip 3 lines and file have 4 lines
streamLoad {
table "${tableName}"
file 'ipv4.csv'
}
sql "sync"
count = sql "select count(*) from ${tableName} limit 10"
assertEquals(82845, count[0][0])
// test LZ4F compression algorithm
tableName = "test_LZ4F"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` varchar(40) NULL
) ENGINE=OLAP
DUPLICATE KEY(`k1`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`) BUCKETS 1
PROPERTIES ("replication_allocation" = "tag.location.default: 1",
"compression" = "LZ4F");
"""
// skip 3 lines and file have 4 lines
streamLoad {
table "${tableName}"
file 'ipv4.csv'
}
sql "sync"
count = sql "select count(*) from ${tableName} limit 10"
assertEquals(82845, count[0][0])
// test LZ4HC compression algorithm
tableName = "test_LZ4HC"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` varchar(40) NULL
) ENGINE=OLAP
DUPLICATE KEY(`k1`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`) BUCKETS 1
PROPERTIES ("replication_allocation" = "tag.location.default: 1",
"compression" = "LZ4HC");
"""
// skip 3 lines and file have 4 lines
streamLoad {
table "${tableName}"
file 'ipv4.csv'
}
sql "sync"
count = sql "select count(*) from ${tableName} limit 10"
assertEquals(82845, count[0][0])
// test ZLIB compression algorithm
tableName = "test_ZLIB"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` varchar(40) NULL
) ENGINE=OLAP
DUPLICATE KEY(`k1`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`) BUCKETS 1
PROPERTIES ("replication_allocation" = "tag.location.default: 1",
"compression" = "ZLIB");
"""
// skip 3 lines and file have 4 lines
streamLoad {
table "${tableName}"
file 'ipv4.csv'
}
sql "sync"
count = sql "select count(*) from ${tableName} limit 10"
assertEquals(82845, count[0][0])
// test ZSTD compression algorithm
tableName = "test_ZSTD"
sql """ DROP TABLE IF EXISTS ${tableName} """
sql """
CREATE TABLE IF NOT EXISTS ${tableName} (
`k1` varchar(40) NULL
) ENGINE=OLAP
DUPLICATE KEY(`k1`)
COMMENT 'OLAP'
DISTRIBUTED BY HASH(`k1`) BUCKETS 1
PROPERTIES ("replication_allocation" = "tag.location.default: 1",
"compression" = "ZSTD");
"""
// skip 3 lines and file have 4 lines
streamLoad {
table "${tableName}"
file 'ipv4.csv'
}
sql "sync"
count = sql "select count(*) from ${tableName} limit 10"
assertEquals(82845, count[0][0])
}