[test](migrate) move 2 cases from p2 to p0 for 2.1 (#37139)

pick #37004
This commit is contained in:
wuwenchi
2024-07-02 22:50:53 +08:00
committed by GitHub
parent b445c783eb
commit e7e1e967cf
40 changed files with 1085 additions and 404 deletions

View File

@ -101,7 +101,7 @@ services:
- "${PG_PORT}:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
interval: 10s
timeout: 60s
retries: 120

View File

@ -101,7 +101,7 @@ services:
- "${PG_PORT}:5432"
healthcheck:
test: ["CMD-SHELL", "pg_isready -U postgres"]
interval: 5s
interval: 10s
timeout: 60s
retries: 120

View File

@ -0,0 +1,27 @@
create database if not exists multi_catalog;
use multi_catalog;
CREATE TABLE IF NOT EXISTS `hive_textfile_array_all_types`(
`col1` array<tinyint>,
`col2` array<smallint>,
`col3` array<int>,
`col4` array<bigint>,
`col5` array<boolean>,
`col6` array<float>,
`col7` array<double>,
`col8` array<string>,
`col9` array<timestamp>,
`col10` array<date>,
`col11` array<decimal(10,3)>,
`col12` array<char(1)>,
`col13` array<varchar(10)>)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/hive_textfile_array_all_types';
msck repair table hive_textfile_array_all_types;

View File

@ -0,0 +1,13 @@
#!/bin/bash
set -x
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
# create table
hive -f "${CUR_DIR}"/create_table.hql

View File

@ -0,0 +1,32 @@
create database if not exists multi_catalog;
use multi_catalog;
CREATE TABLE IF NOT EXISTS `hive_textfile_array_delimiter`(
`col1` array<tinyint>,
`col2` array<smallint>,
`col3` array<int>,
`col4` array<bigint>,
`col5` array<boolean>,
`col6` array<float>,
`col7` array<double>,
`col8` array<string>,
`col9` array<timestamp>,
`col10` array<date>,
`col11` array<decimal(10,3)>,
`col12` int,
`col13` array<array<array<int>>>)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'colelction.delim'=',',
'field.delim'='\t',
'line.delim'='\n',
'serialization.format'='\t')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/hive_textfile_array_delimiter';
msck repair table hive_textfile_array_delimiter;

View File

@ -0,0 +1,13 @@
#!/bin/bash
set -x
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
# create table
hive -f "${CUR_DIR}"/create_table.hql

View File

@ -0,0 +1,16 @@
create database if not exists multi_catalog;
use multi_catalog;
CREATE TABLE IF NOT EXISTS `hive_textfile_nestedarray`(
`col1` int,
`col2` array<array<array<int>>>)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/hive_textfile_nestedarray';
msck repair table hive_textfile_nestedarray;

View File

@ -0,0 +1,13 @@
#!/bin/bash
set -x
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
# create table
hive -f "${CUR_DIR}"/create_table.hql

View File

@ -0,0 +1,39 @@
create database if not exists multi_catalog;
use multi_catalog;
CREATE TABLE IF NOT EXISTS `logs1_parquet`(
`log_time` timestamp,
`machine_name` varchar(128),
`machine_group` varchar(128),
`cpu_idle` float,
`cpu_nice` float,
`cpu_system` float,
`cpu_user` float,
`cpu_wio` float,
`disk_free` float,
`disk_total` float,
`part_max_used` float,
`load_fifteen` float,
`load_five` float,
`load_one` float,
`mem_buffers` float,
`mem_cached` float,
`mem_free` float,
`mem_shared` float,
`swap_free` float,
`bytes_in` float,
`bytes_out` float)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
WITH SERDEPROPERTIES (
'field.delim'=',',
'serialization.format'=',')
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/logs1_parquet';
msck repair table logs1_parquet;

View File

@ -0,0 +1,21 @@
#!/bin/bash
set -x
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
if [[ ! -d "${CUR_DIR}/data" ]]; then
echo "${CUR_DIR}/data does not exist"
cd "${CUR_DIR}" && rm -f data.tar.gz \
&& curl -O https://s3BucketName.s3Endpoint/regression/datalake/pipeline_data/multi_catalog/logs1_parquet/data.tar.gz \
&& tar xzf data.tar.gz
cd -
else
echo "${CUR_DIR}/data exist, continue !"
fi
## mkdir and put data to hdfs
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
# create table
hive -f "${CUR_DIR}"/create_table.hql

View File

@ -0,0 +1,22 @@
create database if not exists multi_catalog;
use multi_catalog;
CREATE TABLE IF NOT EXISTS `one_partition`(
`id` int)
PARTITIONED BY (
`part1` int)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'field.delim'='|',
'serialization.format'='|')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/one_partition';
msck repair table one_partition;

View File

@ -0,0 +1,13 @@
#!/bin/bash
set -x
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
# create table
hive -f "${CUR_DIR}/create_table.hql"

View File

@ -0,0 +1,68 @@
create database if not exists multi_catalog;
use multi_catalog;
CREATE TABLE IF NOT EXISTS `test_csv_format_error`(
`device_id` string COMMENT '设备唯一识别ID ',
`user_id` bigint COMMENT '设备唯一识别ID HASH DEVICE_ID ',
`user_app_id` int COMMENT '使用样本应用的用户Id ',
`standard_app_id` int COMMENT '标准应用ID ',
`standard_app_name` string COMMENT '标准应用名称 ',
`standard_package_name` string COMMENT '标准应用包名 ',
`device_brand_id` int COMMENT '设备品牌ID ',
`device_brand_name` string COMMENT '设备品牌名称 ',
`device_eqp_id` int COMMENT '设备型号ID ',
`device_eqp_name` string COMMENT '设备型号名称 ',
`os_version_id` int COMMENT '系统版本ID ',
`os_version_name` string COMMENT '系统版本名称 ',
`os_type_id` int COMMENT '操作系统类型ID 0 安卓 1 IOS ',
`os_type_name` string COMMENT '操作系统类型名称 0 安卓 1 IOS ',
`os_name` string COMMENT '操作系统名称 ',
`oem_os_version` string COMMENT '厂商封装后的操作系统版本 ',
`oem_os_name` string COMMENT '厂商封装后的操作系统名称 ',
`app_version` string COMMENT '样本应用版本 ',
`app_key` string COMMENT '样本应用key ',
`app_channel` string COMMENT '推广渠道 ',
`package_name` string COMMENT '宿主APP包名 ',
`app_name` string COMMENT '宿主APP名称',
`sdk_version` string COMMENT 'SDK版本 ',
`api_level` string COMMENT 'API等级 ',
`carrier_id` int COMMENT '运营商ID ',
`carrier_name` string COMMENT '运营商名称 ',
`phone_num` string COMMENT '手机号码 ',
`ip` string COMMENT 'IP地址 ',
`country_id` int COMMENT '国家id',
`country_name` string COMMENT '国家name',
`province_id` int COMMENT '省份ID ',
`province_name` string COMMENT '省份名称 ',
`city_id` int COMMENT '地级市ID ',
`city_name` string COMMENT '地级市名称 ',
`county_id` int COMMENT '县级市ID ',
`county_name` string COMMENT '县级市名称 ',
`mac_address` string COMMENT 'MAC地址 ',
`network_id` int COMMENT '网络类型ID ',
`network_name` string COMMENT '网络类型 ',
`org_package_name` string COMMENT '原始应用包名 ',
`org_app_name` string COMMENT '原始应用程序名 ',
`org_app_version` string COMMENT '原始应用版本 ',
`app_flag` int COMMENT '安装、更新、还是卸载',
`action_time` string COMMENT '行为发生的时间',
`day_realy` string COMMENT '行为发生的日期',
`memo` map<string,string> COMMENT '备注')
COMMENT 'ods-App_Installed'
PARTITIONED BY (
`day` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'colelction.delim'=',',
'field.delim'='\t',
'mapkey.delim'=':',
'serialization.format'='\t')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/test_csv_format_error';
msck repair table test_csv_format_error;

View File

@ -0,0 +1,13 @@
#!/bin/bash
set -x
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
# create table
hive -f "${CUR_DIR}/create_table.hql"

View File

@ -0,0 +1,25 @@
create database if not exists multi_catalog;
use multi_catalog;
CREATE TABLE IF NOT EXISTS `test_date_string_partition`(
`k1` int)
PARTITIONED BY (
`day1` string,
`day2` date)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'field.delim'=',',
'serialization.format'=',')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/test_date_string_partition';
msck repair table test_date_string_partition;

View File

@ -0,0 +1,13 @@
#!/bin/bash
set -x
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
# create table
hive -f "${CUR_DIR}/create_table.hql"

View File

@ -0,0 +1,25 @@
create database if not exists multi_catalog;
use multi_catalog;
CREATE TABLE IF NOT EXISTS `two_partition`(
`id` int)
PARTITIONED BY (
`part1` int,
`part2` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.serde2.lazy.LazySimpleSerDe'
WITH SERDEPROPERTIES (
'field.delim'='|',
'serialization.format'='|')
STORED AS INPUTFORMAT
'org.apache.hadoop.mapred.TextInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.HiveIgnoreKeyTextOutputFormat'
LOCATION
'/user/doris/suites/multi_catalog/two_partition';
msck repair table two_partition;

View File

@ -0,0 +1,13 @@
#!/bin/bash
set -x
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/multi_catalog/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/multi_catalog/
# create table
hive -f "${CUR_DIR}/create_table.hql"

View File

@ -0,0 +1,33 @@
create database if not exists statistics;
use statistics;
CREATE TABLE IF NOT EXISTS `statistics`(
`lo_orderkey` int,
`lo_linenumber` int,
`lo_custkey` int,
`lo_partkey` int,
`lo_suppkey` int,
`lo_orderdate` int,
`lo_orderpriority` string,
`lo_shippriority` int,
`lo_quantity` int,
`lo_extendedprice` int,
`lo_ordtotalprice` int,
`lo_discount` int,
`lo_revenue` int,
`lo_supplycost` int,
`lo_tax` int,
`lo_commitdate` int,
`lo_shipmode` string)
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/statistics/statistics';
msck repair table statistics;

View File

@ -0,0 +1,13 @@
#!/bin/bash
set -x
CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
## mkdir and put data to hdfs
cd "${CUR_DIR}" && rm -rf data/ && tar xzf data.tar.gz
hadoop fs -mkdir -p /user/doris/suites/statistics/
hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/statistics/
# create table
hive -f "${CUR_DIR}/create_table.hql"

View File

@ -0,0 +1,24 @@
create database if not exists tpch_1000_parquet;
use tpch_1000_parquet;
CREATE TABLE IF NOT EXISTS `part`(
`p_partkey` int,
`p_name` varchar(55),
`p_mfgr` char(25),
`p_brand` char(10),
`p_type` varchar(25),
`p_size` int,
`p_container` char(10),
`p_retailprice` decimal(15,2),
`p_comment` varchar(23))
ROW FORMAT SERDE
'org.apache.hadoop.hive.ql.io.parquet.serde.ParquetHiveSerDe'
STORED AS INPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetInputFormat'
OUTPUTFORMAT
'org.apache.hadoop.hive.ql.io.parquet.MapredParquetOutputFormat'
LOCATION
'/user/doris/suites/tpch_1000_parquet/part';
msck repair table part;

View File

@ -0,0 +1,22 @@
#!/bin/bash
set -x
# CUR_DIR="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
# if [[ ! -d "${CUR_DIR}/data" ]]; then
# echo "${CUR_DIR}/data does not exist"
# cd "${CUR_DIR}" && rm -f data.tar.gz \
# && curl -O https://s3BucketName.s3Endpoint/regression/datalake/pipeline_data/tpch_1000_parquet/part/data.tar.gz \
# && tar xzf data.tar.gz
# cd -
# else
# echo "${CUR_DIR}/data exist, continue !"
# fi
# ## mkdir and put data to hdfs
# hadoop fs -mkdir -p /user/doris/suites/tpch_1000_parquet/
# hadoop fs -put "${CUR_DIR}"/data/* /user/doris/suites/tpch_1000_parquet/
# # create table
# hive -f "${CUR_DIR}"/create_table.hql

View File

@ -173,3 +173,177 @@
-- !string_part_prune5 --
2 2023-08-16 2023-08-16
-- !one_partition1 --
1 1
2 1
3 2
4 2
5 \N
6 \N
-- !one_partition2 --
5 \N
6 \N
-- !one_partition3 --
1
2
3
4
-- !one_partition4 --
1
1
2
2
-- !one_partition5 --
4 2
5 \N
6 \N
-- !one_partition6 --
3 2
4 2
5 \N
6 \N
-- !two_partition1 --
1 \N one
2 \N one
3 2 \N
4 2 \N
5 3 three
6 3 three
7 \N \N
8 \N \N
-- !two_partition2 --
1 \N one
2 \N one
7 \N \N
8 \N \N
-- !two_partition3 --
3 2 \N
4 2 \N
5 3 three
6 3 three
-- !two_partition4 --
3 2 \N
4 2 \N
7 \N \N
8 \N \N
-- !two_partition5 --
1 \N one
2 \N one
5 3 three
6 3 three
-- !two_partition6 --
5 3 three
6 3 three
-- !two_partition7 --
1 \N one
2 \N one
-- !two_partition8 --
3 2 \N
4 2 \N
-- !two_partition9 --
7 \N \N
8 \N \N
-- !two_partition10 --
1 \N one
2 \N one
3 2 \N
4 2 \N
5 3 three
6 3 three
-- !two_partition11 --
1 \N one
2 \N one
5 3 three
6 3 three
7 \N \N
8 \N \N
-- !two_partition12 --
3 2 \N
4 2 \N
5 3 three
6 3 three
7 \N \N
8 \N \N
-- !two_partition13 --
1 \N one
2 \N one
3 2 \N
4 2 \N
7 \N \N
8 \N \N
-- !two_partition14 --
1 \N one
2 \N one
3 2 \N
4 2 \N
5 3 three
6 3 three
-- !two_partition15 --
6 3 three
7 \N \N
8 \N \N
-- !two_partition16 --
3 2 \N
4 2 \N
5 3 three
6 3 three
-- !two_partition17 --
1 \N one
2 \N one
-- !two_partition18 --
5 3 three
6 3 three
-- !string_part_prune1 --
3 2023-08-17 2023-08-17
-- !string_part_prune2 --
-- !string_part_prune3 --
3 2023-08-17 2023-08-17
-- !string_part_prune4 --
1 2023-08-15 2023-08-15
1 2023-8-15 2023-08-15
2 2023-08-16 2023-08-16
3 2023-08-17 2023-08-17
-- !string_part_prune5 --
3 2023-08-17 2023-08-17
-- !string_part_prune5 --
1 2023-08-15 2023-08-15
2 2023-08-16 2023-08-16
-- !string_part_prune5 --
2 2023-08-16 2023-08-16
-- !string_part_prune5 --
2 2023-08-16 2023-08-16
-- !string_part_prune5 --
2 2023-08-16 2023-08-16

View File

@ -20,3 +20,24 @@
[4, 5, 6, 7] [2100, 3100, 4100, 5100] [110000, 220000, 330000] [60000000000000, 60000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hive", "text", "file", "format"] ["2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000"] ["2021-07-09", "2021-07-09", "2021-07-09"] [3311111.111, 2211111.111, 3121111.111] 5 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]], [[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]]]
[40, 50, 60, 70] [210, 310, 410, 510] [110000, 210000, 310000] [400000000000000, 50000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hello", "world"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"] [3311111.111, 2211111.111, 3121111.111] 4 [[[1]]]
-- !types --
[1, 2, 3, 4] [10, 20, 30, 40] [100, 200, 300] [100000000000000, 20000000000000, 30000000000000, 40000000000000] [1, 0, 1] [1.23, 4.56, 7.89] [10.1, 20.2, 30.3] ["apple", "banana", "orange"] ["2023-07-04 12:00:00.000000", "2023-07-05 12:00:00.000000", "2023-07-06 12:00:00.000000"] ["2023-07-04", "2023-07-05", "2023-07-06"] [1111111.111, 2111111.111, 3111111.111] ["a", "b", "c"] ["aa", "bb", "cc"]
[10, 20, 30] [100, 200, 300, 400] [1000, 2000, 3000] [1000000000000000, 200000000000000, 300000000000000, 400000000000000] [1, 1, 1, 1] [12.3, 45.6, 78.9] [100.1, 200.2, 300.3] ["grapes", "watermelon", "kiwi"] ["2023-07-03 12:00:00.000000", "2023-07-03 12:00:00.000000", "2023-07-03 12:00:00.000000"] ["2021-07-05", "2021-07-05", "2021-07-05"] [2222222.111, 2222222.112, 2222222.113] \N \N
[20, 30, 40, 50] [200, 300, 400, 500] [10000, 20000, 30000] [100000000000000, 20000000000000, 30000000000000] [1, 1, 1, 1, 0, 0] [120.3, 450.6, 780.9] [100.001, 200.002, 300.003] ["melon", "strawberry", "blueberry"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"] [1111111.111, 2111111.111, 3111111.111] \N \N
[4, 5, 6, 7] [2100, 3100, 4100, 5100] [110000, 220000, 330000] [60000000000000, 60000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hive", "text", "file", "format"] ["2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000"] ["2021-07-09", "2021-07-09", "2021-07-09"] [3311111.111, 2211111.111, 3121111.111] ["d", "d", "d", "d"] ["ffffffff"]
[40, 50, 60, 70] [210, 310, 410, 510] [110000, 210000, 310000] [400000000000000, 50000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hello", "world"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"] [3311111.111, 2211111.111, 3121111.111] ["1"] ["hello", "world"]
-- !array --
1 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]]]
2 [[[19, 20, 21], [22, 23, 24], [25, 26, 27]], [[28], [31], [34]], [[28, 29], [31, 32], [34, 35]]]
3 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]
4 [[[1]]]
5 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]], [[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]]]
-- !delimiter --
[1, 2, 3, 4] [10, 20, 30, 40] [100, 200, 300] [100000000000000, 20000000000000, 30000000000000, 40000000000000] [1, 0, 1] [1.23, 4.56, 7.89] [10.1, 20.2, 30.3] ["apple", "banana", "orange"] ["2023-07-04 12:00:00.000000", "2023-07-05 12:00:00.000000", "2023-07-06 12:00:00.000000"] ["2023-07-04", "2023-07-05", "2023-07-06"] [1111111.111, 2111111.111, 3111111.111] 1 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]]]
[10, 20, 30] [100, 200, 300, 400] [1000, 2000, 3000] [1000000000000000, 200000000000000, 300000000000000, 400000000000000] [1, 1, 1, 1] [12.3, 45.6, 78.9] [100.1, 200.2, 300.3] ["grapes", "watermelon", "kiwi"] ["2023-07-03 12:00:00.000000", "2023-07-03 12:00:00.000000", "2023-07-03 12:00:00.000000"] ["2021-07-05", "2021-07-05", "2021-07-05"] [2222222.111, 2222222.112, 2222222.113] 2 [[[19, 20, 21], [22, 23, 24], [25, 26, 27]], [[28], [31], [34]], [[28, 29], [31, 32], [34, 35]]]
[20, 30, 40, 50] [200, 300, 400, 500] [10000, 20000, 30000] [100000000000000, 20000000000000, 30000000000000] [1, 1, 1, 1, 0, 0] [120.3, 450.6, 780.9] [100.001, 200.002, 300.003] ["melon", "strawberry", "blueberry"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"] [1111111.111, 2111111.111, 3111111.111] 3 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]]]
[4, 5, 6, 7] [2100, 3100, 4100, 5100] [110000, 220000, 330000] [60000000000000, 60000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hive", "text", "file", "format"] ["2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000", "2023-07-09 12:00:00.000000"] ["2021-07-09", "2021-07-09", "2021-07-09"] [3311111.111, 2211111.111, 3121111.111] 5 [[[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]], [[1, 2, 3], [4, 5, 6], [7, 8, 9]], [[10, 11, 12], [13, 14, 15], [16, 17, 18]]]
[40, 50, 60, 70] [210, 310, 410, 510] [110000, 210000, 310000] [400000000000000, 50000000000000, 60000000000000] [1] [120.301, 450.602, 780.9001] [100.0000001, 200.0000002, 300.0000003] ["hello", "world"] ["2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000", "2023-07-02 12:00:00.000000"] ["2021-07-06", "2021-07-06", "2021-07-06"] [3311111.111, 2211111.111, 3121111.111] 4 [[[1]]]

View File

@ -15,7 +15,7 @@
// specific language governing permissions and limitations
// under the License.
suite("test_hive_default_partition", "p2,external,hive,external_remote,external_remote_hive") {
suite("test_hive_default_partition", "p0,external,hive,external_docker,external_docker_hive") {
def one_partition1 = """select * from one_partition order by id;"""
def one_partition2 = """select id, part1 from one_partition where part1 is null order by id;"""
def one_partition3 = """select id from one_partition where part1 is not null order by id;"""
@ -52,11 +52,16 @@ suite("test_hive_default_partition", "p2,external,hive,external_remote,external_
def string_part_prune8 = """select * from test_date_string_partition where cast(day1 as date) in ("2023-08-16", "2023-08-18");"""
def string_part_prune9 = """select * from test_date_string_partition where cast(day1 as date) in (cast("2023-08-16" as date), "2023-08-18");"""
String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort")
String catalog_name = "hive_default_partition"
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("disable hive test.")
return;
}
for (String hivePrefix : ["hive2", "hive3"]) {
String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp")
String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort")
String catalog_name = hivePrefix + "_hive_default_partition"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (

View File

@ -0,0 +1,344 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_hive_statistic", "p0,external,hive,external_docker,external_docker_hive") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("disable Hive test.")
return
}
for (String hivePrefix : ["hive2", "hive3"]) {
String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp")
String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort")
String catalog_name = hivePrefix + "_test_hive_statistic"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hadoop.username' = 'hadoop',
'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""
// TODO will be supported in future
// Test analyze table without init.
// sql """analyze table ${catalog_name}.tpch_1000_parquet.region with sync"""
// logger.info("switched to catalog " + catalog_name)
// sql """use statistics;"""
// sql """analyze table `statistics` with sync"""
// def result = sql """show column stats `statistics` (lo_quantity)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_quantity")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "46.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "1")
// assertEquals(result[0][8], "50")
// result = sql """show column stats `statistics` (lo_orderkey)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_orderkey")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "26.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "1")
// assertEquals(result[0][8], "98")
// result = sql """show column stats `statistics` (lo_linenumber)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_linenumber")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "7.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "1")
// assertEquals(result[0][8], "7")
// result = sql """show column stats `statistics` (lo_custkey)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_custkey")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "26.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "67423")
// assertEquals(result[0][8], "2735521")
// result = sql """show column stats `statistics` (lo_partkey)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_partkey")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "100.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "2250")
// assertEquals(result[0][8], "989601")
// result = sql """show column stats `statistics` (lo_suppkey)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_suppkey")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "100.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "4167")
// assertEquals(result[0][8], "195845")
// result = sql """show column stats `statistics` (lo_orderdate)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_orderdate")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "26.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "19920221")
// assertEquals(result[0][8], "19980721")
// result = sql """show column stats `statistics` (lo_orderpriority)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_orderpriority")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "5.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "880.0")
// assertEquals(result[0][6], "8.8")
// assertEquals(result[0][7], "'1-URGENT'")
// assertEquals(result[0][8], "'5-LOW'")
// result = sql """show column stats `statistics` (lo_shippriority)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_shippriority")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "1.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "0")
// assertEquals(result[0][8], "0")
// result = sql """show column stats `statistics` (lo_extendedprice)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_extendedprice")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "100.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "104300")
// assertEquals(result[0][8], "9066094")
// result = sql """show column stats `statistics` (lo_ordtotalprice)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_ordtotalprice")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "26.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "3428256")
// assertEquals(result[0][8], "36771805")
// result = sql """show column stats `statistics` (lo_discount)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_discount")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "11.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "0")
// assertEquals(result[0][8], "10")
// result = sql """show column stats `statistics` (lo_revenue)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_revenue")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "100.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "101171")
// assertEquals(result[0][8], "8703450")
// result = sql """show column stats `statistics` (lo_supplycost)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_supplycost")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "100.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "58023")
// assertEquals(result[0][8], "121374")
// result = sql """show column stats `statistics` (lo_tax)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_tax")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "9.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "0")
// assertEquals(result[0][8], "8")
// result = sql """show column stats `statistics` (lo_commitdate)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_commitdate")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "95.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "400.0")
// assertEquals(result[0][6], "4.0")
// assertEquals(result[0][7], "19920515")
// assertEquals(result[0][8], "19981016")
// result = sql """show column stats `statistics` (lo_shipmode)"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_shipmode")
// assertEquals(result[0][2], "100.0")
// assertEquals(result[0][3], "7.0")
// assertEquals(result[0][4], "0.0")
// assertEquals(result[0][5], "421.0")
// assertEquals(result[0][6], "4.21")
// assertEquals(result[0][7], "'AIR'")
// assertEquals(result[0][8], "'TRUCK'")
// sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')"""
// result = sql "show column stats `statistics` (lo_shipmode)"
// assertEquals(result.size(), 1)
// assertEquals(result[0][0], "lo_shipmode")
// assertEquals(result[0][2], "6001215.0")
// sql """drop stats statistics"""
// result = sql """show column stats statistics"""
// assertEquals(result.size(), 0)
// sql """analyze database `statistics` with sync"""
// result = sql """show table stats statistics"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][2], "100")
// result = sql """show table cached stats statistics"""
// assertEquals(result.size(), 1)
// assertEquals(result[0][2], "100")
// sql """drop stats statistics"""
// result = sql """show column cached stats statistics"""
// assertEquals(result.size(), 0)
sql """use multi_catalog"""
sql """analyze table logs1_parquet (log_time) with sync"""
def ctlId
def dbId
def tblId
result = sql """show catalogs"""
for (int i = 0; i < result.size(); i++) {
if (result[i][1] == catalog_name) {
ctlId = result[i][0]
}
}
result = sql """show proc '/catalogs/$ctlId'"""
for (int i = 0; i < result.size(); i++) {
if (result[i][1] == 'multi_catalog') {
dbId = result[i][0]
}
}
result = sql """show proc '/catalogs/$ctlId/$dbId'"""
for (int i = 0; i < result.size(); i++) {
if (result[i][1] == 'logs1_parquet') {
tblId = result[i][0]
}
}
result = sql """select * from internal.__internal_schema.column_statistics where id = '${tblId}--1-log_time'"""
assertEquals(result.size(), 1)
def id = result[0][0]
def catalog_id = result[0][1]
def db_id = result[0][2]
def tbl_id = result[0][3]
def idx_id = result[0][4]
def col_id = result[0][5]
def count = result[0][7]
def ndv = result[0][8]
def null_count = result[0][9]
def data_size_in_bytes = result[0][12]
def update_time = result[0][13]
sql """insert into internal.__internal_schema.column_statistics values ('$id', '$catalog_id', '$db_id', '$tbl_id', '$idx_id', '$col_id', NULL, $count, $ndv, $null_count, '', '', '$data_size_in_bytes', '$update_time')"""
result = sql """show column stats logs1_parquet (log_time)"""
assertEquals(result.size(), 1)
assertEquals(result[0][7], "N/A")
assertEquals(result[0][8], "N/A")
sql """use tpch1_parquet;"""
sql """drop stats region"""
sql """alter table region modify column r_comment set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='330.0', 'min_value'='ges. thinly even pinto beans ca', 'max_value'='uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl');"""
sql """alter table region modify column r_name set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0', 'min_value'='AFRICA', 'max_value'='MIDDLE EAST');"""
sql """alter table region modify column r_regionkey set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='20.0', 'min_value'='0', 'max_value'='4');"""
result = sql """show column stats region(r_regionkey)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "r_regionkey")
assertEquals(result[0][2], "5.0")
assertEquals(result[0][3], "5.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "20.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "0")
assertEquals(result[0][8], "4")
result = sql """show column stats region(r_comment)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "r_comment")
assertEquals(result[0][2], "5.0")
assertEquals(result[0][3], "5.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "330.0")
assertEquals(result[0][6], "66.0")
assertEquals(result[0][7], "\'ges. thinly even pinto beans ca\'")
assertEquals(result[0][8], "\'uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl\'")
result = sql """show column stats region(r_name)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "r_name")
assertEquals(result[0][2], "5.0")
assertEquals(result[0][3], "5.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "34.0")
assertEquals(result[0][6], "6.8")
assertEquals(result[0][7], "\'AFRICA\'")
assertEquals(result[0][8], "\'MIDDLE EAST\'")
sql """drop catalog ${catalog_name}"""
}
}

View File

@ -15,12 +15,21 @@
// specific language governing permissions and limitations
// under the License.
suite("test_hive_statistic_timeout", "p2,external,hive,external_remote,external_remote_hive, nonConcurrent") {
String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort")
String catalog_name = "test_hive_statistic_timeout"
suite("test_hive_statistic_timeout", "p0,external,hive,external_docker,external_docker_hive") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("disable Hive test.")
return
}
for (String hivePrefix : ["hive2", "hive3"]) {
String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp")
String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort")
String catalog_name = hivePrefix + "_test_hive_statistic_timeout"
// TODO tpch will be supported in future
return
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
@ -33,6 +42,7 @@ suite("test_hive_statistic_timeout", "p2,external,hive,external_remote,external_
sql """use ${catalog_name}.tpch_1000_parquet"""
sql """set global analyze_timeout=1"""
try {
test {
sql """analyze table part (p_partkey, p_container, p_type, p_retailprice) with sync with full;"""
@ -41,6 +51,7 @@ suite("test_hive_statistic_timeout", "p2,external,hive,external_remote,external_
} finally {
sql """set global analyze_timeout=43200"""
}
sql """drop catalog ${catalog_name}""";
}
}

View File

@ -15,12 +15,17 @@
// specific language governing permissions and limitations
// under the License.
suite("test_hive_to_array", "p2,external,hive,external_remote,external_remote_hive") {
String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort")
String catalog_name = "test_hive_to_array"
suite("test_hive_to_array", "p0,external,hive,external_docker,external_docker_hive") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled == null || !enabled.equalsIgnoreCase("true")) {
logger.info("disable Hive test.")
return
}
for (String hivePrefix : ["hive2", "hive3"]) {
String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp")
String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort")
String catalog_name = hivePrefix + "_test_hive_to_array"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (

View File

@ -0,0 +1,47 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_text_garbled_file", "p0,external,hive,external_docker,external_docker_hive") {
//test hive garbled files , prevent be hanged
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
for (String hivePrefix : ["hive2", "hive3"]) {
String extHiveHmsHost = context.config.otherConfigs.get("externalEnvIp")
String extHiveHmsPort = context.config.otherConfigs.get(hivePrefix + "HmsPort")
String catalog_name = hivePrefix + "_test_text_garbled_file"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hadoop.username' = 'hadoop',
'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)
order_qt_garbled_file """
select * from ${catalog_name}.multi_catalog.test_csv_format_error;
"""
}
}
}

View File

@ -1,338 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_hive_statistic", "p2,external,hive,external_remote,external_remote_hive") {
String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort")
String catalog_name = "test_hive_statistic"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hadoop.username' = 'hadoop',
'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
logger.info("catalog " + catalog_name + " created")
// Test analyze table without init.
sql """analyze table ${catalog_name}.tpch_1000_parquet.region with sync"""
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)
sql """use statistics;"""
sql """analyze table `statistics` with sync"""
def result = sql """show column stats `statistics` (lo_quantity)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_quantity")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "46.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "50")
result = sql """show column stats `statistics` (lo_orderkey)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_orderkey")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "26.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "98")
result = sql """show column stats `statistics` (lo_linenumber)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_linenumber")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "7.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "1")
assertEquals(result[0][8], "7")
result = sql """show column stats `statistics` (lo_custkey)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_custkey")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "26.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "67423")
assertEquals(result[0][8], "2735521")
result = sql """show column stats `statistics` (lo_partkey)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_partkey")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "2250")
assertEquals(result[0][8], "989601")
result = sql """show column stats `statistics` (lo_suppkey)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_suppkey")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "4167")
assertEquals(result[0][8], "195845")
result = sql """show column stats `statistics` (lo_orderdate)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_orderdate")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "26.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "19920221")
assertEquals(result[0][8], "19980721")
result = sql """show column stats `statistics` (lo_orderpriority)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_orderpriority")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "5.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "880.0")
assertEquals(result[0][6], "8.8")
assertEquals(result[0][7], "'1-URGENT'")
assertEquals(result[0][8], "'5-LOW'")
result = sql """show column stats `statistics` (lo_shippriority)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_shippriority")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "1.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "0")
assertEquals(result[0][8], "0")
result = sql """show column stats `statistics` (lo_extendedprice)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_extendedprice")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "104300")
assertEquals(result[0][8], "9066094")
result = sql """show column stats `statistics` (lo_ordtotalprice)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_ordtotalprice")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "26.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "3428256")
assertEquals(result[0][8], "36771805")
result = sql """show column stats `statistics` (lo_discount)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_discount")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "11.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "0")
assertEquals(result[0][8], "10")
result = sql """show column stats `statistics` (lo_revenue)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_revenue")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "101171")
assertEquals(result[0][8], "8703450")
result = sql """show column stats `statistics` (lo_supplycost)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_supplycost")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "100.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "58023")
assertEquals(result[0][8], "121374")
result = sql """show column stats `statistics` (lo_tax)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_tax")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "9.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "0")
assertEquals(result[0][8], "8")
result = sql """show column stats `statistics` (lo_commitdate)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_commitdate")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "95.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "400.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "19920515")
assertEquals(result[0][8], "19981016")
result = sql """show column stats `statistics` (lo_shipmode)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_shipmode")
assertEquals(result[0][2], "100.0")
assertEquals(result[0][3], "7.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "421.0")
assertEquals(result[0][6], "4.21")
assertEquals(result[0][7], "'AIR'")
assertEquals(result[0][8], "'TRUCK'")
sql """ALTER TABLE statistics MODIFY COLUMN lo_shipmode SET STATS ('row_count'='6001215')"""
result = sql "show column stats `statistics` (lo_shipmode)"
assertEquals(result.size(), 1)
assertEquals(result[0][0], "lo_shipmode")
assertEquals(result[0][2], "6001215.0")
sql """drop stats statistics"""
result = sql """show column stats statistics"""
assertEquals(result.size(), 0)
sql """analyze database `statistics` with sync"""
result = sql """show table stats statistics"""
assertEquals(result.size(), 1)
assertEquals(result[0][2], "100")
result = sql """show table cached stats statistics"""
assertEquals(result.size(), 1)
assertEquals(result[0][2], "100")
sql """drop stats statistics"""
result = sql """show column cached stats statistics"""
assertEquals(result.size(), 0)
sql """use multi_catalog"""
sql """analyze table logs1_parquet (log_time) with sync"""
def ctlId
def dbId
def tblId
result = sql """show catalogs"""
for (int i = 0; i < result.size(); i++) {
if (result[i][1] == catalog_name) {
ctlId = result[i][0]
}
}
result = sql """show proc '/catalogs/$ctlId'"""
for (int i = 0; i < result.size(); i++) {
if (result[i][1] == 'multi_catalog') {
dbId = result[i][0]
}
}
result = sql """show proc '/catalogs/$ctlId/$dbId'"""
for (int i = 0; i < result.size(); i++) {
if (result[i][1] == 'logs1_parquet') {
tblId = result[i][0]
}
}
result = sql """select * from internal.__internal_schema.column_statistics where id = '${tblId}--1-log_time'"""
assertEquals(result.size(), 1)
def id = result[0][0]
def catalog_id = result[0][1]
def db_id = result[0][2]
def tbl_id = result[0][3]
def idx_id = result[0][4]
def col_id = result[0][5]
def count = result[0][7]
def ndv = result[0][8]
def null_count = result[0][9]
def data_size_in_bytes = result[0][12]
def update_time = result[0][13]
sql """insert into internal.__internal_schema.column_statistics values ('$id', '$catalog_id', '$db_id', '$tbl_id', '$idx_id', '$col_id', NULL, $count, $ndv, $null_count, '', '', '$data_size_in_bytes', '$update_time')"""
result = sql """show column stats logs1_parquet (log_time)"""
assertEquals(result.size(), 1)
assertEquals(result[0][7], "N/A")
assertEquals(result[0][8], "N/A")
sql """use tpch1_parquet;"""
sql """drop stats region"""
sql """alter table region modify column r_comment set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='330.0', 'min_value'='ges. thinly even pinto beans ca', 'max_value'='uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl');"""
sql """alter table region modify column r_name set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='34.0', 'min_value'='AFRICA', 'max_value'='MIDDLE EAST');"""
sql """alter table region modify column r_regionkey set stats ('row_count'='5.0', 'ndv'='5.0', 'num_nulls'='0.0', 'data_size'='20.0', 'min_value'='0', 'max_value'='4');"""
result = sql """show column stats region(r_regionkey)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "r_regionkey")
assertEquals(result[0][2], "5.0")
assertEquals(result[0][3], "5.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "20.0")
assertEquals(result[0][6], "4.0")
assertEquals(result[0][7], "0")
assertEquals(result[0][8], "4")
result = sql """show column stats region(r_comment)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "r_comment")
assertEquals(result[0][2], "5.0")
assertEquals(result[0][3], "5.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "330.0")
assertEquals(result[0][6], "66.0")
assertEquals(result[0][7], "\'ges. thinly even pinto beans ca\'")
assertEquals(result[0][8], "\'uickly special accounts cajole carefully blithely close requests. carefully final asymptotes haggle furiousl\'")
result = sql """show column stats region(r_name)"""
assertEquals(result.size(), 1)
assertEquals(result[0][0], "r_name")
assertEquals(result[0][2], "5.0")
assertEquals(result[0][3], "5.0")
assertEquals(result[0][4], "0.0")
assertEquals(result[0][5], "34.0")
assertEquals(result[0][6], "6.8")
assertEquals(result[0][7], "\'AFRICA\'")
assertEquals(result[0][8], "\'MIDDLE EAST\'")
sql """drop catalog ${catalog_name}"""
}
}

View File

@ -1,46 +0,0 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_text_garbled_file", "p2,external,hive,external_remote,external_remote_hive") {
//test hive garbled files , prevent be hanged
String enabled = context.config.otherConfigs.get("enableExternalHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String extHiveHmsHost = context.config.otherConfigs.get("extHiveHmsHost")
String extHiveHmsPort = context.config.otherConfigs.get("extHiveHmsPort")
String catalog_name = "test_text_garbled_file"
sql """drop catalog if exists ${catalog_name};"""
sql """
create catalog if not exists ${catalog_name} properties (
'type'='hms',
'hadoop.username' = 'hadoop',
'hive.metastore.uris' = 'thrift://${extHiveHmsHost}:${extHiveHmsPort}'
);
"""
logger.info("catalog " + catalog_name + " created")
sql """switch ${catalog_name};"""
logger.info("switched to catalog " + catalog_name)
order_qt_garbled_file """
select * from ${catalog_name}.multi_catalog.test_csv_format_error;
"""
}
}