[Enhance](external)change hive docker to host network and add hive case (#24401)
1. Change the external hive docker network mode from the bridge mode to the host mode to support the external test of the multi-node doris cluster 2. Added more hive test data in various formats 3. Added a test case with hive
This commit is contained in:
@ -23,9 +23,8 @@
|
||||
set -eo pipefail
|
||||
|
||||
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
|
||||
|
||||
FS_PORT=8120
|
||||
HMS_PORT=9183
|
||||
FS_PORT=8020
|
||||
HMS_PORT=9083
|
||||
|
||||
cp "${ROOT}"/hadoop-hive.env.tpl "${ROOT}"/hadoop-hive.env
|
||||
# Need to set hostname of container to same as host machine's.
|
||||
@ -35,6 +34,8 @@ HOST_NAME="doris--"
|
||||
{
|
||||
echo "FS_PORT=${FS_PORT}"
|
||||
echo "HMS_PORT=${HMS_PORT}"
|
||||
echo "CORE_CONF_fs_defaultFS=hdfs://doris--namenode:${FS_PORT}"
|
||||
echo "CORE_CONF_fs_defaultFS=hdfs://${externalEnvIp}:${FS_PORT}"
|
||||
echo "HOST_NAME=${HOST_NAME}"
|
||||
echo "externalEnvIp=${externalEnvIp}"
|
||||
|
||||
} >>"${ROOT}"/hadoop-hive.env
|
||||
|
||||
@ -15,12 +15,12 @@
|
||||
# limitations under the License.
|
||||
#
|
||||
|
||||
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://doris--hive-metastore-postgresql:5432/metastore
|
||||
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://externalEnvIp:5432/metastore
|
||||
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
|
||||
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
|
||||
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
|
||||
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
|
||||
HIVE_SITE_CONF_hive_metastore_uris=thrift://doris--hive-metastore:9083
|
||||
HIVE_SITE_CONF_hive_metastore_uris=thrift://externalEnvIp:9083
|
||||
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
|
||||
HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0
|
||||
HIVE_SITE_CONF_hive_server2_thrift_port=10000
|
||||
@ -49,4 +49,3 @@ YARN_CONF_yarn_timeline___service_hostname=historyserver
|
||||
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
|
||||
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
|
||||
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031
|
||||
|
||||
|
||||
@ -18,10 +18,6 @@
|
||||
|
||||
version: "3.8"
|
||||
|
||||
networks:
|
||||
doris--network:
|
||||
driver: bridge
|
||||
|
||||
services:
|
||||
doris--namenode:
|
||||
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
|
||||
@ -29,30 +25,24 @@ services:
|
||||
- CLUSTER_NAME=test
|
||||
env_file:
|
||||
- ./hadoop-hive.env
|
||||
hostname: doris--namenode
|
||||
container_name: doris--namenode
|
||||
expose:
|
||||
- "50070"
|
||||
- "8020"
|
||||
- "9000"
|
||||
- "${FS_PORT}"
|
||||
ports:
|
||||
- "${FS_PORT}:${FS_PORT}"
|
||||
healthcheck:
|
||||
test: [ "CMD", "curl", "http://localhost:50070/" ]
|
||||
interval: 5s
|
||||
timeout: 120s
|
||||
retries: 120
|
||||
networks:
|
||||
- doris--network
|
||||
network_mode: "host"
|
||||
|
||||
doris--datanode:
|
||||
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
|
||||
env_file:
|
||||
- ./hadoop-hive.env
|
||||
environment:
|
||||
SERVICE_PRECONDITION: "doris--namenode:50070"
|
||||
hostname: doris--datanode
|
||||
SERVICE_PRECONDITION: "externalEnvIp:50070"
|
||||
container_name: doris--datanode
|
||||
expose:
|
||||
- "50075"
|
||||
@ -61,17 +51,15 @@ services:
|
||||
interval: 5s
|
||||
timeout: 60s
|
||||
retries: 120
|
||||
networks:
|
||||
- doris--network
|
||||
network_mode: "host"
|
||||
|
||||
doris--hive-server:
|
||||
image: bde2020/hive:2.3.2-postgresql-metastore
|
||||
env_file:
|
||||
- ./hadoop-hive.env
|
||||
environment:
|
||||
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://doris--hive-metastore-postgresql:5432/metastore"
|
||||
SERVICE_PRECONDITION: "doris--hive-metastore:9083"
|
||||
hostname: doris--hive-server
|
||||
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://externalEnvIp:5432/metastore"
|
||||
SERVICE_PRECONDITION: "externalEnvIp:9083"
|
||||
container_name: doris--hive-server
|
||||
expose:
|
||||
- "10000"
|
||||
@ -83,8 +71,7 @@ services:
|
||||
interval: 10s
|
||||
timeout: 120s
|
||||
retries: 120
|
||||
networks:
|
||||
- doris--network
|
||||
network_mode: "host"
|
||||
|
||||
|
||||
doris--hive-metastore:
|
||||
@ -94,24 +81,19 @@ services:
|
||||
command: /bin/bash /mnt/scripts/hive-metastore.sh
|
||||
# command: /opt/hive/bin/hive --service metastore
|
||||
environment:
|
||||
SERVICE_PRECONDITION: "doris--namenode:50070 doris--datanode:50075 doris--hive-metastore-postgresql:5432"
|
||||
hostname: doris--hive-metastore
|
||||
SERVICE_PRECONDITION: "externalEnvIp:50070 externalEnvIp:50075 externalEnvIp:5432"
|
||||
container_name: doris--hive-metastore
|
||||
expose:
|
||||
- "9083"
|
||||
ports:
|
||||
- "${HMS_PORT}:9083"
|
||||
volumes:
|
||||
- ./scripts:/mnt/scripts
|
||||
depends_on:
|
||||
- doris--hive-metastore-postgresql
|
||||
networks:
|
||||
- doris--network
|
||||
network_mode: "host"
|
||||
|
||||
doris--hive-metastore-postgresql:
|
||||
image: bde2020/hive-metastore-postgresql:2.3.0
|
||||
restart: always
|
||||
hostname: doris--hive-metastore-postgresql
|
||||
container_name: doris--hive-metastore-postgresql
|
||||
expose:
|
||||
- "5432"
|
||||
@ -120,5 +102,4 @@ services:
|
||||
interval: 5s
|
||||
timeout: 60s
|
||||
retries: 120
|
||||
networks:
|
||||
- doris--network
|
||||
network_mode: "host"
|
||||
|
||||
File diff suppressed because it is too large
Load Diff
@ -24,13 +24,13 @@ sleep 10s
|
||||
# if you test in your local,better use # to annotation section about tpch1.db
|
||||
if [[ ! -d "/mnt/scripts/tpch1.db" ]]; then
|
||||
echo "/mnt/scripts/tpch1.db does not exist"
|
||||
exit 1
|
||||
else
|
||||
wget -P /mnt/scripts https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/tpch1_parquet/tpch1.db.tar.gz
|
||||
cd /mnt/scripts/
|
||||
wget -P /mnt/scripts https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/tpch1_parquet/tpch1.db.tar.gz
|
||||
tar -zxf tpch1.db.tar.gz
|
||||
rm -rf tpch1.db.tar.gz
|
||||
cd -
|
||||
else
|
||||
echo "/mnt/scripts/tpch1.db exist, continue !"
|
||||
fi
|
||||
|
||||
# put data file
|
||||
|
||||
Binary file not shown.
@ -0,0 +1,10 @@
|
||||
"","test"
|
||||
"","test"
|
||||
"","test"
|
||||
"","test"
|
||||
"","test"
|
||||
"","test"
|
||||
"","test"
|
||||
"","test"
|
||||
"","test"
|
||||
"","test"
|
||||
File diff suppressed because it is too large
Load Diff
File diff suppressed because it is too large
Load Diff
File diff suppressed because one or more lines are too long
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
Binary file not shown.
File diff suppressed because one or more lines are too long
@ -260,6 +260,14 @@ fi
|
||||
|
||||
if [[ "${RUN_HIVE}" -eq 1 ]]; then
|
||||
# hive
|
||||
# If the doris cluster you need to test is single-node, you can use the default values; If the doris cluster you need to test is composed of multiple nodes, then you need to set the IP_HOST according to the actual situation of your machine
|
||||
#default value
|
||||
IP_HOST="127.0.0.1"
|
||||
eth0_num=$(ifconfig -a|grep flags=|grep -n ^eth0|awk -F ':' '{print $1}')
|
||||
IP_HOST=$(ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:"|tail -n +${eth0_num}|head -n 1)
|
||||
if [ "_${IP_HOST}" == "_" ];then
|
||||
echo "please set IP_HOST according to your actual situation"
|
||||
fi
|
||||
# before start it, you need to download parquet file package, see "README" in "docker-compose/hive/scripts/"
|
||||
cp "${ROOT}"/docker-compose/hive/gen_env.sh.tpl "${ROOT}"/docker-compose/hive/gen_env.sh
|
||||
sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/gen_env.sh
|
||||
@ -267,12 +275,13 @@ if [[ "${RUN_HIVE}" -eq 1 ]]; then
|
||||
cp "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl.tpl "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl
|
||||
sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/hive-2x.yaml
|
||||
sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl
|
||||
sed -i "s/externalEnvIp/${IP_HOST}/g" "${ROOT}"/docker-compose/hive/hive-2x.yaml
|
||||
sed -i "s/externalEnvIp/${IP_HOST}/g" "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl
|
||||
sed -i "s/\${externalEnvIp}/${IP_HOST}/g" "${ROOT}"/docker-compose/hive/gen_env.sh
|
||||
sudo bash "${ROOT}"/docker-compose/hive/gen_env.sh
|
||||
sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env down
|
||||
sudo sed -i '/${CONTAINER_UID}namenode/d' /etc/hosts
|
||||
if [[ "${STOP}" -ne 1 ]]; then
|
||||
sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env up --build --remove-orphans -d
|
||||
sudo echo "127.0.0.1 ${CONTAINER_UID}namenode" >> /etc/hosts
|
||||
fi
|
||||
fi
|
||||
|
||||
|
||||
File diff suppressed because one or more lines are too long
@ -0,0 +1,111 @@
|
||||
// Licensed to the Apache Software Foundation (ASF) under one
|
||||
// or more contributor license agreements. See the NOTICE file
|
||||
// distributed with this work for additional information
|
||||
// regarding copyright ownership. The ASF licenses this file
|
||||
// to you under the Apache License, Version 2.0 (the
|
||||
// "License"); you may not use this file except in compliance
|
||||
// with the License. You may obtain a copy of the License at
|
||||
//
|
||||
// http://www.apache.org/licenses/LICENSE-2.0
|
||||
//
|
||||
// Unless required by applicable law or agreed to in writing,
|
||||
// software distributed under the License is distributed on an
|
||||
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
// KIND, either express or implied. See the License for the
|
||||
// specific language governing permissions and limitations
|
||||
// under the License.
|
||||
|
||||
suite("test_hive_basic_type", "external_docker,hive,external_docker_hive,p0,external") {
|
||||
String enabled = context.config.otherConfigs.get("enableHiveTest")
|
||||
if (enabled != null && enabled.equalsIgnoreCase("true")) {
|
||||
String catalog_name = "test_hive_basic_type"
|
||||
String ex_db_name = "`default`"
|
||||
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
|
||||
String hms_port = context.config.otherConfigs.get("hms_port")
|
||||
String hdfs_port = context.config.otherConfigs.get("hdfs_port")
|
||||
|
||||
sql """drop catalog if exists ${catalog_name} """
|
||||
|
||||
sql """CREATE CATALOG ${catalog_name} PROPERTIES (
|
||||
'type'='hms',
|
||||
'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
|
||||
'hadoop.username' = 'hive'
|
||||
);"""
|
||||
|
||||
sql """switch ${catalog_name}"""
|
||||
|
||||
order_qt_2 """select * from ${catalog_name}.${ex_db_name}.parquet_partition_table order by l_orderkey limit 1;"""
|
||||
order_qt_3 """select * from ${catalog_name}.${ex_db_name}.parquet_delta_binary_packed order by int_value limit 1;"""
|
||||
order_qt_4 """select * from ${catalog_name}.${ex_db_name}.parquet_alltypes_tiny_pages order by id desc limit 5;"""
|
||||
order_qt_5 """select * from ${catalog_name}.${ex_db_name}.orc_all_types_partition order by bigint_col desc limit 3;"""
|
||||
order_qt_6 """select * from ${catalog_name}.${ex_db_name}.csv_partition_table order by k1 limit 1;"""
|
||||
order_qt_9 """select * from ${catalog_name}.${ex_db_name}.csv_all_types limit 1;"""
|
||||
order_qt_10 """select * from ${catalog_name}.${ex_db_name}.text_all_types limit 1;"""
|
||||
|
||||
// parquet bloom
|
||||
order_qt_11 """select * from ${catalog_name}.${ex_db_name}.bloom_parquet_table limit 1;"""
|
||||
|
||||
// orc bloom
|
||||
order_qt_12 """select * from ${catalog_name}.${ex_db_name}.bloom_orc_table limit 1;"""
|
||||
|
||||
// orc predicate
|
||||
order_qt_13 """select * from ${catalog_name}.${ex_db_name}.orc_predicate_table where column_primitive_bigint = 6 limit 10;"""
|
||||
order_qt_14 """select count(1) from ${catalog_name}.${ex_db_name}.orc_predicate_table where column_primitive_bigint = 6;"""
|
||||
order_qt_15 """select * from ${catalog_name}.${ex_db_name}.orc_predicate_table where column_primitive_bigint = 1 limit 10;"""
|
||||
order_qt_16 """select count(1) from ${catalog_name}.${ex_db_name}.orc_predicate_table where column_primitive_bigint = 1;"""
|
||||
order_qt_17 """select * from ${catalog_name}.${ex_db_name}.orc_predicate_table where column_primitive_integer = 3 and column_primitive_bigint = 6 limit 10;"""
|
||||
|
||||
// parquet predicate
|
||||
order_qt_18 """select * from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_bigint = 1 limit 10;"""
|
||||
order_qt_19 """select count(1) from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_bigint = 1;"""
|
||||
order_qt_20 """select * from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_integer = 3 limit 10;"""
|
||||
order_qt_21 """select count(1) from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_integer = 3;"""
|
||||
order_qt_22 """select * from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_integer = 1 limit 10;"""
|
||||
order_qt_23 """select count(1) from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_integer = 1;"""
|
||||
|
||||
// only null parquet file test
|
||||
order_qt_24 """select * from ${catalog_name}.${ex_db_name}.only_null;"""
|
||||
order_qt_25 """select * from ${catalog_name}.${ex_db_name}.only_null where x is null;"""
|
||||
order_qt_26 """select * from ${catalog_name}.${ex_db_name}.only_null where x is not null;"""
|
||||
|
||||
// parquet timestamp millis test
|
||||
order_qt_27 """desc ${catalog_name}.${ex_db_name}.parquet_timestamp_millis;"""
|
||||
order_qt_28 """select * from ${catalog_name}.${ex_db_name}.parquet_timestamp_millis order by test;"""
|
||||
|
||||
// parquet timestamp micros test
|
||||
order_qt_29 """desc ${catalog_name}.${ex_db_name}.parquet_timestamp_micros;"""
|
||||
order_qt_30 """select * from ${catalog_name}.${ex_db_name}.parquet_timestamp_micros order by test;"""
|
||||
|
||||
// parquet timestamp nanos test
|
||||
order_qt_31 """desc ${catalog_name}.${ex_db_name}.parquet_timestamp_nanos;"""
|
||||
order_qt_32 """select * from ${catalog_name}.${ex_db_name}.parquet_timestamp_nanos order by test;"""
|
||||
|
||||
order_qt_7 """select * from ${catalog_name}.${ex_db_name}.orc_all_types_t limit 1;"""
|
||||
|
||||
// parquet predicate
|
||||
order_qt_38 """select * from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_bigint = 6 limit 10;"""
|
||||
order_qt_39 """select count(1) from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_bigint = 6;"""
|
||||
order_qt_40 """select * from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_integer = 3 and column_primitive_bigint = 6 limit 10;"""
|
||||
|
||||
order_qt_33 """select * from ${catalog_name}.${ex_db_name}.parquet_all_types limit 1;"""
|
||||
|
||||
order_qt_36 """select * from ${catalog_name}.${ex_db_name}.parquet_gzip_all_types limit 1;"""
|
||||
|
||||
// hive tables of json classes do not necessarily support column separation to identify errors
|
||||
//order_qt_8 """select * from ${catalog_name}.${ex_db_name}.json_all_types limit 1;"""
|
||||
|
||||
// At present, doris only supports three formats of orc parquet textfile, while others are not supported
|
||||
|
||||
// hive tables in avro format are not supported
|
||||
//order_qt_34 """select * from ${catalog_name}.${ex_db_name}.avro_all_types limit 1;"""
|
||||
|
||||
// hive tables in SEQUENCEFILE format are not supported
|
||||
//order_qt_35 """select * from ${catalog_name}.${ex_db_name}.sequence_all_types limit 1;"""
|
||||
|
||||
// hive tables in rcbinary format are not supported
|
||||
//order_qt_37 """select * from ${catalog_name}.${ex_db_name}.rcbinary_all_types limit 1;"""
|
||||
|
||||
//sql """drop catalog if exists ${catalog_name} """
|
||||
}
|
||||
}
|
||||
|
||||
Reference in New Issue
Block a user