[improvement](test) add tpch1 orc for hive catalog and refactor some test dir (#14669)

Add tpch 1g orc test case in hive docker

Refactor some suites dir of catalog test cases.

And "-internal" for dlf endpoint, to support access oss with aliyun vpc.
This commit is contained in:
Mingyu Chen
2022-11-30 10:03:58 +08:00
committed by GitHub
parent 4faca56819
commit dd7ec8f4ca
66 changed files with 21402 additions and 214 deletions

View File

@ -72,7 +72,9 @@ header:
- "regression-test/script/README"
- "regression-test/suites/load_p0/stream_load/data"
- "docker/thirdparties/docker-compose/hive/scripts/README"
- "docker/thirdparties/docker-compose/hive/scripts/create.hql"
- "docker/thirdparties/docker-compose/hive/scripts/data_case/*/*"
- "docker/thirdparties/docker-compose/hive/scripts/create_preinstalled_table.hql"
- "docker/thirdparties/docker-compose/hive/scripts/create_tpch1_orc.hql"
- "docker/thirdparties/docker-compose/hive/scripts/create_tpch1_parquet.hql"
- "docker/thirdparties/docker-compose/hive/scripts/preinstalled_data/"
comment: on-failure

View File

@ -1,162 +1,5 @@
use default;
CREATE TABLE `customer`(
`c_custkey` int,
`c_name` string,
`c_address` string,
`c_nationkey` int,
`c_phone` string,
`c_acctbal` decimal(12,2),
`c_mktsegment` string,
`c_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/customer/'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `lineitem`(
`l_orderkey` int,
`l_partkey` int,
`l_suppkey` int,
`l_linenumber` int,
`l_quantity` decimal(12,2),
`l_extendedprice` decimal(12,2),
`l_discount` decimal(12,2),
`l_tax` decimal(12,2),
`l_returnflag` string,
`l_linestatus` string,
`l_shipdate` date,
`l_commitdate` date,
`l_receiptdate` date,
`l_shipinstruct` string,
`l_shipmode` string,
`l_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/lineitem'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `nation`(
`n_nationkey` int,
`n_name` string,
`n_regionkey` int,
`n_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/nation'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `orders`(
`o_orderkey` int,
`o_custkey` int,
`o_orderstatus` string,
`o_totalprice` decimal(12,2),
`o_orderdate` date,
`o_orderpriority` string,
`o_clerk` string,
`o_shippriority` int,
`o_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/orders'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `part`(
`p_partkey` int,
`p_name` string,
`p_mfgr` string,
`p_brand` string,
`p_type` string,
`p_size` int,
`p_container` string,
`p_retailprice` decimal(12,2),
`p_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/part'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `partsupp`(
`ps_partkey` int,
`ps_suppkey` int,
`ps_availqty` int,
`ps_supplycost` decimal(12,2),
`ps_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/partsupp'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `region`(
`r_regionkey` int,
`r_name` string,
`r_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/region'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `supplier`(
`s_suppkey` int,
`s_name` string,
`s_address` string,
`s_nationkey` int,
`s_phone` string,
`s_acctbal` decimal(12,2),
`s_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/supplier'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `partition_table`(
`l_orderkey` int,
`l_partkey` int,
@ -182,7 +25,7 @@ STORED AS INPUTFORMAT
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/parquet/partition_table'
'/user/doris/preinstalled_data/parquet/partition_table'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
@ -215,7 +58,7 @@ STORED AS INPUTFORMAT
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/orc/orc_all_types';
'/user/doris/preinstalled_data/orc/orc_all_types';
msck repair table orc_all_types;
@ -228,7 +71,7 @@ CREATE TABLE `student` (
phone varchar(50)
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ','
LOCATION '/user/doris/data_case/student'
LOCATION '/user/doris/preinstalled_data/data_case/student'
TBLPROPERTIES ('transient_lastDdlTime'='1658816839');
CREATE TABLE `lineorder` (
@ -251,7 +94,7 @@ CREATE TABLE `lineorder` (
`lo_shipmode` varchar(11)
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ','
LOCATION '/user/doris/data_case/lineorder'
LOCATION '/user/doris/preinstalled_data/data_case/lineorder'
TBLPROPERTIES ('transient_lastDdlTime'='1658816839');
CREATE TABLE `test1` (
@ -262,7 +105,7 @@ CREATE TABLE `test1` (
col_5 varchar(20)
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ','
LOCATION '/user/doris/data_case/test1'
LOCATION '/user/doris/preinstalled_data/data_case/test1'
TBLPROPERTIES ('transient_lastDdlTime'='1658816839');
CREATE TABLE `string_table` (
@ -277,7 +120,7 @@ CREATE TABLE `string_table` (
p_comment string
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ','
LOCATION '/user/doris/data_case/string_table'
LOCATION '/user/doris/preinstalled_data/data_case/string_table'
TBLPROPERTIES ('transient_lastDdlTime'='1658816839');
CREATE TABLE `account_fund` (
@ -298,7 +141,7 @@ STORED AS INPUTFORMAT
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/data_case/account_fund'
'/user/doris/preinstalled_data/data_case/account_fund'
TBLPROPERTIES ('transient_lastDdlTime'='1658816839');
create table sale_table (
@ -315,7 +158,7 @@ STORED AS INPUTFORMAT
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/data_case/sale_table'
'/user/doris/preinstalled_data/data_case/sale_table'
TBLPROPERTIES ('transient_lastDdlTime'='1658816839');
create table t_hive (
@ -331,7 +174,7 @@ STORED AS INPUTFORMAT
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/data_case/t_hive'
'/user/doris/preinstalled_data/data_case/t_hive'
TBLPROPERTIES ('transient_lastDdlTime'='1658816839');
create table hive01 (
@ -342,7 +185,7 @@ create table hive01 (
)
ROW FORMAT DELIMITED FIELDS TERMINATED by ','
LOCATION
'/user/doris/data_case/hive01'
'/user/doris/preinstalled_data/data_case/hive01'
TBLPROPERTIES ('transient_lastDdlTime'='1658816839');
CREATE TABLE test2 (
@ -354,7 +197,7 @@ dt date
)
row format delimited fields terminated by ','
stored as textfile
LOCATION '/user/doris/data_case/test2'
LOCATION '/user/doris/preinstalled_data/data_case/test2'
TBLPROPERTIES ('transient_lastDdlTime'='1658816839');
create table test_hive_doris(
@ -363,7 +206,7 @@ age varchar(100)
)
row format delimited fields terminated by ','
stored as textfile
LOCATION '/user/doris/data_case/test_hive_doris'
LOCATION '/user/doris/preinstalled_data/data_case/test_hive_doris'
TBLPROPERTIES ('transient_lastDdlTime'='1658816839');
CREATE external TABLE `table_with_vertical_line`(
@ -394,7 +237,7 @@ STORED AS INPUTFORMAT
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/csv_partition_table/table_with_vertical_line/'
'/user/doris/preinstalled_data/csv_partition_table/table_with_vertical_line/'
TBLPROPERTIES (
'transient_lastDdlTime'='1669304897');
@ -425,11 +268,10 @@ STORED AS INPUTFORMAT
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/csv_partition_table/table_with_x01/'
'/user/doris/preinstalled_data/csv_partition_table/table_with_x01/'
TBLPROPERTIES (
'transient_lastDdlTime'='1669360080');
msck repair table table_with_x01;
show tables;

View File

@ -0,0 +1,183 @@
create database tpch1_orc;
use tpch1_orc;
CREATE TABLE `customer`(
`c_custkey` int,
`c_name` string,
`c_address` string,
`c_nationkey` int,
`c_phone` string,
`c_acctbal` decimal(12,2),
`c_mktsegment` string,
`c_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
WITH SERDEPROPERTIES (
'orc.compress'='snappy',
'path'='/user/doris/tpch1.db/tpch1_orc/customer')
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_orc/customer/'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `lineitem`(
`l_orderkey` int,
`l_partkey` int,
`l_suppkey` int,
`l_linenumber` int,
`l_quantity` decimal(12,2),
`l_extendedprice` decimal(12,2),
`l_discount` decimal(12,2),
`l_tax` decimal(12,2),
`l_returnflag` string,
`l_linestatus` string,
`l_shipdate` date,
`l_commitdate` date,
`l_receiptdate` date,
`l_shipinstruct` string,
`l_shipmode` string,
`l_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
WITH SERDEPROPERTIES (
'orc.compress'='snappy',
'path'='/user/doris/tpch1.db/tpch1_orc/customer')
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_orc/lineitem'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `nation`(
`n_nationkey` int,
`n_name` string,
`n_regionkey` int,
`n_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
WITH SERDEPROPERTIES (
'orc.compress'='snappy',
'path'='/user/doris/tpch1.db/tpch1_orc/customer')
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_orc/nation'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `orders`(
`o_orderkey` int,
`o_custkey` int,
`o_orderstatus` string,
`o_totalprice` decimal(12,2),
`o_orderdate` date,
`o_orderpriority` string,
`o_clerk` string,
`o_shippriority` int,
`o_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
WITH SERDEPROPERTIES (
'orc.compress'='snappy',
'path'='/user/doris/tpch1.db/tpch1_orc/customer')
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_orc/orders'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `part`(
`p_partkey` int,
`p_name` string,
`p_mfgr` string,
`p_brand` string,
`p_type` string,
`p_size` int,
`p_container` string,
`p_retailprice` decimal(12,2),
`p_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
WITH SERDEPROPERTIES (
'orc.compress'='snappy',
'path'='/user/doris/tpch1.db/tpch1_orc/customer')
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_orc/part'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `partsupp`(
`ps_partkey` int,
`ps_suppkey` int,
`ps_availqty` int,
`ps_supplycost` decimal(12,2),
`ps_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
WITH SERDEPROPERTIES (
'orc.compress'='snappy',
'path'='/user/doris/tpch1.db/tpch1_orc/customer')
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_orc/partsupp'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `region`(
`r_regionkey` int,
`r_name` string,
`r_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
WITH SERDEPROPERTIES (
'orc.compress'='snappy',
'path'='/user/doris/tpch1.db/tpch1_orc/customer')
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_orc/region'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `supplier`(
`s_suppkey` int,
`s_name` string,
`s_address` string,
`s_nationkey` int,
`s_phone` string,
`s_acctbal` decimal(12,2),
`s_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.orc.OrcSerde'
WITH SERDEPROPERTIES (
'orc.compress'='snappy',
'path'='/user/doris/tpch1.db/tpch1_orc/customer')
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.orc.OrcOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_orc/supplier'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');

View File

@ -0,0 +1,160 @@
create database tpch1_parquet;
use tpch1_parquet;
CREATE TABLE `customer`(
`c_custkey` int,
`c_name` string,
`c_address` string,
`c_nationkey` int,
`c_phone` string,
`c_acctbal` decimal(12,2),
`c_mktsegment` string,
`c_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_parquet/customer/'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `lineitem`(
`l_orderkey` int,
`l_partkey` int,
`l_suppkey` int,
`l_linenumber` int,
`l_quantity` decimal(12,2),
`l_extendedprice` decimal(12,2),
`l_discount` decimal(12,2),
`l_tax` decimal(12,2),
`l_returnflag` string,
`l_linestatus` string,
`l_shipdate` date,
`l_commitdate` date,
`l_receiptdate` date,
`l_shipinstruct` string,
`l_shipmode` string,
`l_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_parquet/lineitem'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `nation`(
`n_nationkey` int,
`n_name` string,
`n_regionkey` int,
`n_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_parquet/nation'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `orders`(
`o_orderkey` int,
`o_custkey` int,
`o_orderstatus` string,
`o_totalprice` decimal(12,2),
`o_orderdate` date,
`o_orderpriority` string,
`o_clerk` string,
`o_shippriority` int,
`o_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_parquet/orders'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `part`(
`p_partkey` int,
`p_name` string,
`p_mfgr` string,
`p_brand` string,
`p_type` string,
`p_size` int,
`p_container` string,
`p_retailprice` decimal(12,2),
`p_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_parquet/part'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `partsupp`(
`ps_partkey` int,
`ps_suppkey` int,
`ps_availqty` int,
`ps_supplycost` decimal(12,2),
`ps_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_parquet/partsupp'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `region`(
`r_regionkey` int,
`r_name` string,
`r_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_parquet/region'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');
CREATE TABLE `supplier`(
`s_suppkey` int,
`s_name` string,
`s_address` string,
`s_nationkey` int,
`s_phone` string,
`s_acctbal` decimal(12,2),
`s_comment` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/tpch1.db/tpch1_parquet/supplier'
TBLPROPERTIES (
'transient_lastDdlTime'='1661955829');

View File

@ -24,31 +24,25 @@ if [[ ! -d "/mnt/scripts/tpch1.db" ]]; then
fi
# put data file
## put tpch1
echo "hadoop fs -mkdir /user/doris/"
hadoop fs -mkdir -p /user/doris/
echo "hadoop fs -put /mnt/scripts/tpch1.db /user/doris/"
hadoop fs -put /mnt/scripts/tpch1.db /user/doris/
echo "hadoop fs -put /mnt/scripts/json_format_test /user/doris/"
hadoop fs -put /mnt/scripts/json_format_test /user/doris/
echo "hadoop fs -put /mnt/scripts/parquet /user/doris/"
hadoop fs -put /mnt/scripts/parquet /user/doris/
echo "hadoop fs -put /mnt/scripts/orc /user/doris/"
hadoop fs -put /mnt/scripts/orc /user/doris/
echo "hadoop fs -put /mnt/scripts/csv_format_test /user/doris/"
hadoop fs -put /mnt/scripts/csv_format_test /user/doris/
echo "hadoop fs -put /mnt/scripts/data_case /user/doris/"
hadoop fs -put /mnt/scripts/data_case /user/doris/
echo "hadoop fs -mkdir -p /user/doris/csv_partition_table"
hadoop fs -mkdir -p /user/doris/csv_partition_table
echo "hadoop fs -put /mnt/scripts/csv_partition_table/table_with_vertical_line/ /user/doris/csv_partition_table/"
hadoop fs -put /mnt/scripts/csv_partition_table/table_with_vertical_line/ /user/doris/csv_partition_table/
echo "hadoop fs -put /mnt/scripts/csv_partition_table/table_with_x01/ /user/doris/csv_partition_table/"
hadoop fs -put /mnt/scripts/csv_partition_table/table_with_x01/ /user/doris/csv_partition_table/
## put other preinstalled data
echo "hadoop fs -put /mnt/scripts/preinstalled_data /user/doris/"
hadoop fs -put /mnt/scripts/preinstalled_data /user/doris/
# create table
echo "hive -f /mnt/scripts/create.hql"
hive -f /mnt/scripts/create.hql
echo "hive -f /mnt/scripts/create_tpch1_orc.hql"
hive -f /mnt/scripts/create_tpch1_orc.hql
echo "hive -f /mnt/scripts/create_tpch1_parquet.hql"
hive -f /mnt/scripts/create_tpch1_parquet.hql
echo "hive -f /mnt/scripts/create_preinstalled_table.hql"
hive -f /mnt/scripts/create_preinstalled_table.hql
echo "touch /mnt/SUCCESS"
touch /mnt/SUCCESS

View File

@ -645,7 +645,11 @@ Doris 支持一些外部署数据源的查询。所以回归框架也提供了
4. Elasticsearch
TODO
包括 ES6,ES7,ES8 三个版本的 docker 镜像。docker/thirdparties/docker-compose/elasticsearch/ 下。
* `es.yaml`:Docker compose文件。包括 ES6,ES7,ES8 三个版本。无需修改。
* `es.env`:配置文件,需配置 ES 的端口号。
* `scripts` 目录下存放了启动镜像后的初始化脚本。
2. 运行回归测试
@ -655,6 +659,10 @@ Doris 支持一些外部署数据源的查询。所以回归框架也提供了
* `mysql_57_port` 和 `pg_14_port` 分别对应 MySQL 和 Postgresql 的对外端口,默认为 3316 和 5442。
* `enableHiveTest`:开启 hive 外表测试,需要启动 hive 的 container。
* `hms_port` 对应 hive metastore 的对外端口,默认为 9183。
* `enableEsTest`:开启 es 外表测试。需要启动 es 的 container。
* `es_6_port`:ES6 的端口。
* `es_7_port`:ES7 的端口。
* `es_8_port`:ES8 的端口。
3. 如何在同一台机器上启动多套 Container。

View File

@ -897,8 +897,11 @@ public class HiveMetaStoreClientHelper {
// 1. region and endpoint. eg: cn-beijing
String region = hiveConf.get("dlf.catalog.region");
if (!Strings.isNullOrEmpty(region)) {
// See: https://help.aliyun.com/document_detail/31837.html
// And add "-internal" to access oss within vpc
// TODO: find to way to access oss on public?
res.put(HiveTable.AWS_REGION, "oss-" + region);
res.put(HiveTable.S3_ENDPOINT, "http://oss-" + region + ".aliyuncs.com");
res.put(HiveTable.S3_ENDPOINT, "http://oss-" + region + "-internal.aliyuncs.com");
}
// 2. ak and sk

File diff suppressed because it is too large Load Diff

View File

@ -29,7 +29,7 @@ suite("test_hdfs_tvf") {
sql """ADMIN SET FRONTEND CONFIG ("enable_new_load_scan_node" = "true");"""
// test csv foramt
uri = "${defaultFS}" + "/user/doris/csv_format_test/all_types.csv"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/csv_format_test/all_types.csv"
format = "csv"
qt_csv_all_types """ select * from HDFS(
"uri" = "${uri}",
@ -38,7 +38,7 @@ suite("test_hdfs_tvf") {
"format" = "${format}") order by c1; """
uri = "${defaultFS}" + "/user/doris/csv_format_test/student.csv"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/csv_format_test/student.csv"
format = "csv"
qt_csv_student """ select cast(c1 as INT) as id, c2 as name, c3 as age from HDFS(
"uri" = "${uri}",
@ -46,7 +46,7 @@ suite("test_hdfs_tvf") {
"hadoop.username" = "${hdfsUserName}",
"format" = "${format}") order by id; """
uri = "${defaultFS}" + "/user/doris/csv_format_test/array_malformat.csv"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/csv_format_test/array_malformat.csv"
format = "csv"
qt_csv_array_malformat """ select * from HDFS(
"URI" = "${uri}",
@ -55,7 +55,7 @@ suite("test_hdfs_tvf") {
"format" = "csv",
"column_separator" = "|") order by c1; """
uri = "${defaultFS}" + "/user/doris/csv_format_test/array_normal.csv"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/csv_format_test/array_normal.csv"
format = "csv"
qt_csv_array_normal """ select * from HDFS("URI" = "${uri}",
"fs.defaultFS"= "${defaultFS}",
@ -64,7 +64,7 @@ suite("test_hdfs_tvf") {
"column_separator" = "|") order by c1; """
// test csv_with_names file format
uri = "${defaultFS}" + "/user/doris/csv_format_test/student_with_names.csv"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/csv_format_test/student_with_names.csv"
format = "csv_with_names"
qt_csv_names """ select cast(id as INT) as id, name, age from HDFS(
"uri" = "${uri}",
@ -73,7 +73,7 @@ suite("test_hdfs_tvf") {
"format" = "${format}") order by id; """
// test csv_with_names_and_types file format
uri = "${defaultFS}" + "/user/doris/csv_format_test/student_with_names_and_types.csv"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/csv_format_test/student_with_names_and_types.csv"
format = "csv_with_names_and_types"
qt_csv_names_types """ select cast(id as INT) as id, name, age from HDFS(
"uri" = "${uri}",
@ -83,7 +83,7 @@ suite("test_hdfs_tvf") {
// test parquet
uri = "${defaultFS}" + "/user/doris/tpch1.db/hdfs_tvf/test_parquet.snappy.parquet"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/hdfs_tvf/test_parquet.snappy.parquet"
format = "parquet"
qt_parquet """ select * from HDFS(
"uri" = "${uri}",
@ -92,7 +92,7 @@ suite("test_hdfs_tvf") {
"format" = "${format}") order by s_suppkey limit 20; """
// test orc
uri = "${defaultFS}" + "/user/doris/tpch1.db/hdfs_tvf/test_orc.snappy.orc"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/hdfs_tvf/test_orc.snappy.orc"
format = "orc"
qt_orc """ select * from HDFS(
"uri" = "${uri}",
@ -102,7 +102,7 @@ suite("test_hdfs_tvf") {
// test josn format
uri = "${defaultFS}" + "/user/doris/json_format_test/simple_object_json.json"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/simple_object_json.json"
format = "json"
qt_json """ select * from HDFS(
"uri" = "${uri}",
@ -113,7 +113,7 @@ suite("test_hdfs_tvf") {
"read_json_by_line" = "true") order by id; """
// test json root
uri = "${defaultFS}" + "/user/doris/json_format_test/nest_json.json"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json"
format = "json"
qt_json_root """ select cast(id as INT) as id, city, cast(code as INT) as code from HDFS(
"uri" = "${uri}",
@ -125,7 +125,7 @@ suite("test_hdfs_tvf") {
"json_root" = "\$.item") order by id; """
// test json paths
uri = "${defaultFS}" + "/user/doris/json_format_test/simple_object_json.json"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/simple_object_json.json"
format = "json"
qt_json_paths """ select cast(id as INT) as id, cast(code as INT) as code from HDFS(
"uri" = "${uri}",
@ -137,7 +137,7 @@ suite("test_hdfs_tvf") {
"jsonpaths" = "[\\"\$.id\\", \\"\$.code\\"]") order by id; """
// test non read_json_by_line
uri = "${defaultFS}" + "/user/doris/json_format_test/one_array_json.json"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/one_array_json.json"
format = "json"
qt_one_array """ select cast(id as INT) as id, city, cast(code as INT) as code from HDFS(
"uri" = "${uri}",
@ -149,7 +149,7 @@ suite("test_hdfs_tvf") {
// test cast to int
uri = "${defaultFS}" + "/user/doris/json_format_test/simple_object_json.json"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/simple_object_json.json"
format = "json"
qt_cast """ select cast(id as INT) as id, city, cast(code as INT) as code from HDFS(
"uri" = "${uri}",
@ -176,7 +176,7 @@ suite("test_hdfs_tvf") {
assertTrue(result1[0].size() == 1)
assertTrue(result1[0][0] == 0, "Create table should update 0 rows")
uri = "${defaultFS}" + "/user/doris/json_format_test/nest_json.json"
uri = "${defaultFS}" + "/user/doris/preinstalled_data/json_format_test/nest_json.json"
format = "json"
def result2 = sql """ insert into ${testTable}(id,city,code)
select cast (id as INT) as id, city, cast (code as INT) as code
@ -196,4 +196,4 @@ suite("test_hdfs_tvf") {
sql """ADMIN SET FRONTEND CONFIG ("enable_new_load_scan_node" = "false");"""
}
}
}
}

View File

@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
suite("hive_catalog_case", "p0") {
suite("test_hive_other", "p0") {
def q01 = {
qt_q24 """ select name, count(1) as c from student group by name order by c desc;"""

View File

@ -47,7 +47,7 @@ suite("test_hdfs_json_load", "p0") {
// should be delete after new_load_scan is ready
sql """ADMIN SET FRONTEND CONFIG ("enable_new_load_scan_node" = "${new_json_reader_flag}");"""
def hdfsFilePath = "${fsPath}/user/doris/json_format_test/${fileName}"
def hdfsFilePath = "${fsPath}/user/doris/preinstalled_data/json_format_test/${fileName}"
def result1= sql """
LOAD LABEL ${label} (
DATA INFILE("${hdfsFilePath}")

View File

@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
suite("test_catalog_hive", "p0") {
suite("test_catalog_hive_orc", "p0") {
def q01 = {
sql """set exec_mem_limit=8589934592"""
@ -835,8 +835,7 @@ order by
);
"""
sql """switch hive"""
sql """use `default`"""
// order_qt_show_tables """show tables"""
sql """use `tpch1_orc`"""
q01()
q02()

View File

@ -0,0 +1,866 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_catalog_hive_parquet", "p0") {
def q01 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=false"""
sql """set enable_cost_based_join_reorder=false"""
qt_q01 """
select
l_returnflag,
l_linestatus,
sum(l_quantity) as sum_qty,
sum(l_extendedprice) as sum_base_price,
sum(l_extendedprice * (1 - l_discount)) as sum_disc_price,
sum(l_extendedprice * (1 - l_discount) * (1 + l_tax)) as sum_charge,
avg(l_quantity) as avg_qty,
avg(l_extendedprice) as avg_price,
avg(l_discount) as avg_disc,
count(*) as count_order
from
lineitem
where
l_shipdate <= date '1998-12-01' - interval '90' day
group by
l_returnflag,
l_linestatus
order by
l_returnflag,
l_linestatus;
"""
}
def q02 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=2"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
sql """set enable_projection=true"""
qt_q02 """
select
s_acctbal,
s_name,
n_name,
p_partkey,
p_mfgr,
s_address,
s_phone,
s_comment
from
partsupp,
(
select ps_partkey, min(ps_supplycost) as ps_s from
partsupp, supplier, nation, region
where s_suppkey = ps_suppkey
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
group by ps_partkey
) t1,
supplier,
part,
nation,
region
where p_partkey = t1.ps_partkey
and p_partkey = partsupp.ps_partkey
and s_suppkey = ps_suppkey
and p_size = 15
and p_type like '%BRASS'
and s_nationkey = n_nationkey
and n_regionkey = r_regionkey
and r_name = 'EUROPE'
and ps_supplycost = t1.ps_s
order by
s_acctbal desc,
n_name,
s_name,
p_partkey
limit 100;
"""
}
def q03 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
sql """set enable_projection=true"""
qt_q03 """
select
l_orderkey,
sum(l_extendedprice * (1 - l_discount)) as revenue,
o_orderdate,
o_shippriority
from
(
select l_orderkey, l_extendedprice, l_discount, o_orderdate, o_shippriority, o_custkey from
lineitem join[shuffle] orders
where l_orderkey = o_orderkey
and o_orderdate < date '1995-03-15'
and l_shipdate > date '1995-03-15'
) t1 join[shuffle] customer c
on c.c_custkey = t1.o_custkey
where c_mktsegment = 'BUILDING'
group by
l_orderkey,
o_orderdate,
o_shippriority
order by
revenue desc,
o_orderdate
limit 10;
"""
}
def q04 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=1"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
qt_q04 """
select
o_orderpriority,
count(*) as order_count
from
(
select
*
from
lineitem
where l_commitdate < l_receiptdate
) t1
right semi join orders
on t1.l_orderkey = o_orderkey
where
o_orderdate >= date '1993-07-01'
and o_orderdate < date '1993-07-01' + interval '3' month
group by
o_orderpriority
order by
o_orderpriority;
"""
}
def q05 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
qt_q05 """
select
n_name,
sum(l_extendedprice * (1 - l_discount)) as revenue
from
lineitem
join[shuffle] orders on l_orderkey = o_orderkey and o_orderdate >= date '1994-01-01' and o_orderdate < date '1994-01-01' + interval '1' year
join[shuffle] customer on c_custkey = o_custkey
join supplier on l_suppkey = s_suppkey and c_nationkey = s_nationkey
join nation on s_nationkey = n_nationkey
join region on n_regionkey = r_regionkey and r_name = 'ASIA'
group by n_name
order by revenue desc;
"""
}
def q06 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=1"""
sql """set disable_join_reorder=false"""
sql """set enable_cost_based_join_reorder=false"""
qt_q06 """
select
sum(l_extendedprice * l_discount) as revenue
from
lineitem
where
l_shipdate >= date '1994-01-01'
and l_shipdate < date '1994-01-01' + interval '1' year
and l_discount between .06 - 0.01 and .06 + 0.01
and l_quantity < 24;
"""
}
def q07 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=4"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
qt_q07 """
select
supp_nation,
cust_nation,
l_year,
sum(volume) as revenue
from
(
select
n1.n_name as supp_nation,
n2.n_name as cust_nation,
extract(year from l_shipdate) as l_year,
l_extendedprice * (1 - l_discount) as volume
from
lineitem join[shuffle] orders on o_orderkey = l_orderkey and l_shipdate between date '1995-01-01' and date '1996-12-31'
join[shuffle] customer on c_custkey = o_custkey
join[shuffle] supplier on s_suppkey = l_suppkey
join nation n1 on s_nationkey = n1.n_nationkey
join nation n2 on c_nationkey = n2.n_nationkey
and (
(n1.n_name = 'FRANCE' and n2.n_name = 'GERMANY')
or (n1.n_name = 'GERMANY' and n2.n_name = 'FRANCE')
)
) as shipping
group by
supp_nation,
cust_nation,
l_year
order by
supp_nation,
cust_nation,
l_year;
"""
}
def q08 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=1"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
qt_q08 """
select
o_year,
sum(case
when nation = 'BRAZIL' then volume
else 0
end) / sum(volume) as mkt_share
from
(
select
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) as volume,
n2.n_name as nation
from
lineitem,
orders,
customer,
supplier,
part,
nation n1,
nation n2,
region
where
p_partkey = l_partkey
and s_suppkey = l_suppkey
and l_orderkey = o_orderkey
and o_custkey = c_custkey
and c_nationkey = n1.n_nationkey
and n1.n_regionkey = r_regionkey
and r_name = 'AMERICA'
and s_nationkey = n2.n_nationkey
and o_orderdate between date '1995-01-01' and date '1996-12-31'
and p_type = 'ECONOMY ANODIZED STEEL'
) as all_nations
group by
o_year
order by
o_year;
"""
}
def q09 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=4"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
qt_q09 """
select
nation,
o_year,
sum(amount) as sum_profit
from
(
select
n_name as nation,
extract(year from o_orderdate) as o_year,
l_extendedprice * (1 - l_discount) - ps_supplycost * l_quantity as amount
from
lineitem join[shuffle] orders on o_orderkey = l_orderkey
join[shuffle] partsupp on ps_suppkey = l_suppkey and ps_partkey = l_partkey
join[shuffle] part on p_partkey = l_partkey and p_name like '%green%'
join supplier on s_suppkey = l_suppkey
join nation on s_nationkey = n_nationkey
) as profit
group by
nation,
o_year
order by
nation,
o_year desc;
"""
}
def q10 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
qt_q10 """
select
c_custkey,
c_name,
sum(t1.l_extendedprice * (1 - t1.l_discount)) as revenue,
c_acctbal,
n_name,
c_address,
c_phone,
c_comment
from
customer join[shuffle]
(
select o_custkey,l_extendedprice,l_discount from lineitem join[shuffle] orders
where l_orderkey = o_orderkey
and o_orderdate >= date '1993-10-01'
and o_orderdate < date '1993-10-01' + interval '3' month
and l_returnflag = 'R'
) t1,
nation
where
c_custkey = t1.o_custkey
and c_nationkey = n_nationkey
group by
c_custkey,
c_name,
c_acctbal,
c_phone,
n_name,
c_address,
c_comment
order by
revenue desc
limit 20;
"""
}
def q11 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=2"""
sql """set disable_join_reorder=false"""
sql """set enable_cost_based_join_reorder=false"""
qt_q11 """
select
ps_partkey,
sum(ps_supplycost * ps_availqty) as value
from
partsupp,
supplier,
nation
where
ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
group by
ps_partkey having
sum(ps_supplycost * ps_availqty) > (
select
sum(ps_supplycost * ps_availqty) * 0.0001000000
from
partsupp,
supplier,
nation
where
ps_suppkey = s_suppkey
and s_nationkey = n_nationkey
and n_name = 'GERMANY'
)
order by
value desc;
"""
}
def q12 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=2"""
sql """set disable_join_reorder=false"""
sql """set enable_cost_based_join_reorder=true"""
qt_q12 """
select
l_shipmode,
sum(case
when o_orderpriority = '1-URGENT'
or o_orderpriority = '2-HIGH'
then 1
else 0
end) as high_line_count,
sum(case
when o_orderpriority <> '1-URGENT'
and o_orderpriority <> '2-HIGH'
then 1
else 0
end) as low_line_count
from
orders join[shuffle] lineitem
where
o_orderkey = l_orderkey
and l_shipmode in ('MAIL', 'SHIP')
and l_commitdate < l_receiptdate
and l_shipdate < l_commitdate
and l_receiptdate >= date '1994-01-01'
and l_receiptdate < date '1994-01-01' + interval '1' year
group by
l_shipmode
order by
l_shipmode;
"""
}
def q13 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=4"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
qt_q13 """
select
c_count,
count(*) as custdist
from
(
select
c_custkey,
count(o_orderkey) as c_count
from
orders right outer join customer on
c_custkey = o_custkey
and o_comment not like '%special%requests%'
group by
c_custkey
) as c_orders
group by
c_count
order by
custdist desc,
c_count desc;
"""
}
def q14 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
qt_q14 """
select
100.00 * sum(case
when p_type like 'PROMO%'
then l_extendedprice * (1 - l_discount)
else 0
end) / sum(l_extendedprice * (1 - l_discount)) as promo_revenue
from
part,
lineitem
where
l_partkey = p_partkey
and l_shipdate >= date '1995-09-01'
and l_shipdate < date '1995-09-01' + interval '1' month;
"""
}
def q15 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=false"""
sql """set enable_cost_based_join_reorder=true"""
qt_q15 """
with revenue0 as
(select
l_suppkey as supplier_no,
sum(l_extendedprice * (1 - l_discount)) as total_revenue
from
lineitem
where
l_shipdate >= date '1996-01-01'
and l_shipdate < date '1996-01-01' + interval '3' month
group by
l_suppkey)
select
s_suppkey,
s_name,
s_address,
s_phone,
total_revenue
from
supplier,
revenue0
where
s_suppkey = supplier_no
and total_revenue = (
select
max(total_revenue)
from
revenue0
)
order by
s_suppkey;
"""
}
def q16 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=false"""
sql """set enable_cost_based_join_reorder=true"""
qt_q16 """
select
p_brand,
p_type,
p_size,
count(distinct ps_suppkey) as supplier_cnt
from
partsupp,
part
where
p_partkey = ps_partkey
and p_brand <> 'Brand#45'
and p_type not like 'MEDIUM POLISHED%'
and p_size in (49, 14, 23, 45, 19, 3, 36, 9)
and ps_suppkey not in (
select
s_suppkey
from
supplier
where
s_comment like '%Customer%Complaints%'
)
group by
p_brand,
p_type,
p_size
order by
supplier_cnt desc,
p_brand,
p_type,
p_size;
"""
}
def q17 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=1"""
sql """set disable_join_reorder=false"""
sql """set enable_cost_based_join_reorder=true"""
qt_q17 """
select
sum(l_extendedprice) / 7.0 as avg_yearly
from
lineitem join [broadcast]
part p1 on p1.p_partkey = l_partkey
where
p1.p_brand = 'Brand#23'
and p1.p_container = 'MED BOX'
and l_quantity < (
select
0.2 * avg(l_quantity)
from
lineitem join [broadcast]
part p2 on p2.p_partkey = l_partkey
where
l_partkey = p1.p_partkey
and p2.p_brand = 'Brand#23'
and p2.p_container = 'MED BOX'
);
"""
}
def q18 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=false"""
qt_q01 """
select
c_name,
c_custkey,
t3.o_orderkey,
t3.o_orderdate,
t3.o_totalprice,
sum(t3.l_quantity)
from
customer join
(
select * from
lineitem join
(
select * from
orders left semi join
(
select
l_orderkey
from
lineitem
group by
l_orderkey having sum(l_quantity) > 300
) t1
on o_orderkey = t1.l_orderkey
) t2
on t2.o_orderkey = l_orderkey
) t3
on c_custkey = t3.o_custkey
group by
c_name,
c_custkey,
t3.o_orderkey,
t3.o_orderdate,
t3.o_totalprice
order by
t3.o_totalprice desc,
t3.o_orderdate
limit 100;
"""
}
def q19 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=false"""
sql """set enable_cost_based_join_reorder=false"""
qt_q19 """
select
sum(l_extendedprice* (1 - l_discount)) as revenue
from
lineitem,
part
where
(
p_partkey = l_partkey
and p_brand = 'Brand#12'
and p_container in ('SM CASE', 'SM BOX', 'SM PACK', 'SM PKG')
and l_quantity >= 1 and l_quantity <= 1 + 10
and p_size between 1 and 5
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_partkey = l_partkey
and p_brand = 'Brand#23'
and p_container in ('MED BAG', 'MED BOX', 'MED PKG', 'MED PACK')
and l_quantity >= 10 and l_quantity <= 10 + 10
and p_size between 1 and 10
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
)
or
(
p_partkey = l_partkey
and p_brand = 'Brand#34'
and p_container in ('LG CASE', 'LG BOX', 'LG PACK', 'LG PKG')
and l_quantity >= 20 and l_quantity <= 20 + 10
and p_size between 1 and 15
and l_shipmode in ('AIR', 'AIR REG')
and l_shipinstruct = 'DELIVER IN PERSON'
);
"""
}
def q20 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=true"""
qt_q20 """
select
s_name, s_address from
supplier left semi join
(
select * from
(
select l_partkey,l_suppkey, 0.5 * sum(l_quantity) as l_q
from lineitem
where l_shipdate >= date '1994-01-01'
and l_shipdate < date '1994-01-01' + interval '1' year
group by l_partkey,l_suppkey
) t2 join
(
select ps_partkey, ps_suppkey, ps_availqty
from partsupp left semi join part
on ps_partkey = p_partkey and p_name like 'forest%'
) t1
on t2.l_partkey = t1.ps_partkey and t2.l_suppkey = t1.ps_suppkey
and t1.ps_availqty > t2.l_q
) t3
on s_suppkey = t3.ps_suppkey
join nation
where s_nationkey = n_nationkey
and n_name = 'CANADA'
order by s_name;
"""
}
def q21 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=true"""
sql """set enable_cost_based_join_reorder=true"""
qt_q21 """
select
s_name, count(*) as numwait
from orders join
(
select * from
lineitem l2 right semi join
(
select * from
lineitem l3 right anti join
(
select * from
lineitem l1 join
(
select * from
supplier join nation
where s_nationkey = n_nationkey
and n_name = 'SAUDI ARABIA'
) t1
where t1.s_suppkey = l1.l_suppkey and l1.l_receiptdate > l1.l_commitdate
) t2
on l3.l_orderkey = t2.l_orderkey and l3.l_suppkey <> t2.l_suppkey and l3.l_receiptdate > l3.l_commitdate
) t3
on l2.l_orderkey = t3.l_orderkey and l2.l_suppkey <> t3.l_suppkey
) t4
on o_orderkey = t4.l_orderkey and o_orderstatus = 'F'
group by
t4.s_name
order by
numwait desc,
t4.s_name
limit 100;
"""
}
def q22 = {
sql """set exec_mem_limit=8589934592"""
sql """set parallel_fragment_exec_instance_num=8"""
sql """set disable_join_reorder=false"""
sql """set enable_cost_based_join_reorder=true"""
qt_q22 """
select
cntrycode,
count(*) as numcust,
sum(c_acctbal) as totacctbal
from
(
select
substring(c_phone, 1, 2) as cntrycode,
c_acctbal
from
customer
where
substring(c_phone, 1, 2) in
('13', '31', '23', '29', '30', '18', '17')
and c_acctbal > (
select
avg(c_acctbal)
from
customer
where
c_acctbal > 0.00
and substring(c_phone, 1, 2) in
('13', '31', '23', '29', '30', '18', '17')
)
and not exists (
select
*
from
orders
where
o_custkey = c_custkey
)
) as custsale
group by
cntrycode
order by
cntrycode;
"""
}
def set_be_config = { ->
String[][] backends = sql """ show backends; """
assertTrue(backends.size() > 0)
for (String[] backend in backends) {
// No need to set this config anymore, but leave this code sample here
// StringBuilder setConfigCommand = new StringBuilder();
// setConfigCommand.append("curl -X POST http://")
// setConfigCommand.append(backend[2])
// setConfigCommand.append(":")
// setConfigCommand.append(backend[5])
// setConfigCommand.append("/api/update_config?")
// String command1 = setConfigCommand.toString() + "enable_new_load_scan_node=true"
// logger.info(command1)
// String command2 = setConfigCommand.toString() + "enable_new_file_scanner=true"
// logger.info(command2)
// def process1 = command1.execute()
// int code = process1.waitFor()
// assertEquals(code, 0)
// def process2 = command2.execute()
// code = process1.waitFor()
// assertEquals(code, 0)
}
}
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String hms_port = context.config.otherConfigs.get("hms_port")
set_be_config.call()
sql """admin set frontend config ("enable_multi_catalog" = "true")"""
sql """drop catalog if exists hive"""
sql """
create catalog hive properties (
"type"="hms",
'hive.metastore.uris' = 'thrift://127.0.0.1:${hms_port}'
);
"""
sql """switch hive"""
sql """use `tpch1_parquet`"""
q01()
q02()
q03()
q04()
q05()
q06()
q07()
q08()
q09()
q10()
q11()
q12()
q13()
q14()
q15()
q16()
q17()
q18()
q19()
q20()
q21()
q22()
}
}