[Enhance](external)change hive docker to host network and add hive case (#24401)

1. Change the external hive docker network mode from the bridge mode to the host mode to support the external test of the multi-node doris cluster
2. Added more hive test data in various formats
3. Added a test case with hive
This commit is contained in:
zhangguoqiang
2023-09-15 17:46:24 +08:00
committed by GitHub
parent 4816ca6679
commit dc0c39f1d8
53 changed files with 3399 additions and 40 deletions

View File

@ -23,9 +23,8 @@
set -eo pipefail
ROOT="$(cd "$(dirname "${BASH_SOURCE[0]}")" &>/dev/null && pwd)"
FS_PORT=8120
HMS_PORT=9183
FS_PORT=8020
HMS_PORT=9083
cp "${ROOT}"/hadoop-hive.env.tpl "${ROOT}"/hadoop-hive.env
# Need to set hostname of container to same as host machine's.
@ -35,6 +34,8 @@ HOST_NAME="doris--"
{
echo "FS_PORT=${FS_PORT}"
echo "HMS_PORT=${HMS_PORT}"
echo "CORE_CONF_fs_defaultFS=hdfs://doris--namenode:${FS_PORT}"
echo "CORE_CONF_fs_defaultFS=hdfs://${externalEnvIp}:${FS_PORT}"
echo "HOST_NAME=${HOST_NAME}"
echo "externalEnvIp=${externalEnvIp}"
} >>"${ROOT}"/hadoop-hive.env

View File

@ -15,12 +15,12 @@
# limitations under the License.
#
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://doris--hive-metastore-postgresql:5432/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionURL=jdbc:postgresql://externalEnvIp:5432/metastore
HIVE_SITE_CONF_javax_jdo_option_ConnectionDriverName=org.postgresql.Driver
HIVE_SITE_CONF_javax_jdo_option_ConnectionUserName=hive
HIVE_SITE_CONF_javax_jdo_option_ConnectionPassword=hive
HIVE_SITE_CONF_datanucleus_autoCreateSchema=false
HIVE_SITE_CONF_hive_metastore_uris=thrift://doris--hive-metastore:9083
HIVE_SITE_CONF_hive_metastore_uris=thrift://externalEnvIp:9083
HDFS_CONF_dfs_namenode_datanode_registration_ip___hostname___check=false
HIVE_SITE_CONF_hive_server2_thrift_bind_host=0.0.0.0
HIVE_SITE_CONF_hive_server2_thrift_port=10000
@ -49,4 +49,3 @@ YARN_CONF_yarn_timeline___service_hostname=historyserver
YARN_CONF_yarn_resourcemanager_address=resourcemanager:8032
YARN_CONF_yarn_resourcemanager_scheduler_address=resourcemanager:8030
YARN_CONF_yarn_resourcemanager_resource__tracker_address=resourcemanager:8031

View File

@ -18,10 +18,6 @@
version: "3.8"
networks:
doris--network:
driver: bridge
services:
doris--namenode:
image: bde2020/hadoop-namenode:2.0.0-hadoop2.7.4-java8
@ -29,30 +25,24 @@ services:
- CLUSTER_NAME=test
env_file:
- ./hadoop-hive.env
hostname: doris--namenode
container_name: doris--namenode
expose:
- "50070"
- "8020"
- "9000"
- "${FS_PORT}"
ports:
- "${FS_PORT}:${FS_PORT}"
healthcheck:
test: [ "CMD", "curl", "http://localhost:50070/" ]
interval: 5s
timeout: 120s
retries: 120
networks:
- doris--network
network_mode: "host"
doris--datanode:
image: bde2020/hadoop-datanode:2.0.0-hadoop2.7.4-java8
env_file:
- ./hadoop-hive.env
environment:
SERVICE_PRECONDITION: "doris--namenode:50070"
hostname: doris--datanode
SERVICE_PRECONDITION: "externalEnvIp:50070"
container_name: doris--datanode
expose:
- "50075"
@ -61,17 +51,15 @@ services:
interval: 5s
timeout: 60s
retries: 120
networks:
- doris--network
network_mode: "host"
doris--hive-server:
image: bde2020/hive:2.3.2-postgresql-metastore
env_file:
- ./hadoop-hive.env
environment:
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://doris--hive-metastore-postgresql:5432/metastore"
SERVICE_PRECONDITION: "doris--hive-metastore:9083"
hostname: doris--hive-server
HIVE_CORE_CONF_javax_jdo_option_ConnectionURL: "jdbc:postgresql://externalEnvIp:5432/metastore"
SERVICE_PRECONDITION: "externalEnvIp:9083"
container_name: doris--hive-server
expose:
- "10000"
@ -83,8 +71,7 @@ services:
interval: 10s
timeout: 120s
retries: 120
networks:
- doris--network
network_mode: "host"
doris--hive-metastore:
@ -94,24 +81,19 @@ services:
command: /bin/bash /mnt/scripts/hive-metastore.sh
# command: /opt/hive/bin/hive --service metastore
environment:
SERVICE_PRECONDITION: "doris--namenode:50070 doris--datanode:50075 doris--hive-metastore-postgresql:5432"
hostname: doris--hive-metastore
SERVICE_PRECONDITION: "externalEnvIp:50070 externalEnvIp:50075 externalEnvIp:5432"
container_name: doris--hive-metastore
expose:
- "9083"
ports:
- "${HMS_PORT}:9083"
volumes:
- ./scripts:/mnt/scripts
depends_on:
- doris--hive-metastore-postgresql
networks:
- doris--network
network_mode: "host"
doris--hive-metastore-postgresql:
image: bde2020/hive-metastore-postgresql:2.3.0
restart: always
hostname: doris--hive-metastore-postgresql
container_name: doris--hive-metastore-postgresql
expose:
- "5432"
@ -120,5 +102,4 @@ services:
interval: 5s
timeout: 60s
retries: 120
networks:
- doris--network
network_mode: "host"

View File

@ -24,13 +24,13 @@ sleep 10s
# if you test in your local,better use # to annotation section about tpch1.db
if [[ ! -d "/mnt/scripts/tpch1.db" ]]; then
echo "/mnt/scripts/tpch1.db does not exist"
exit 1
else
wget -P /mnt/scripts https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/tpch1_parquet/tpch1.db.tar.gz
cd /mnt/scripts/
wget -P /mnt/scripts https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/load/tpch1_parquet/tpch1.db.tar.gz
tar -zxf tpch1.db.tar.gz
rm -rf tpch1.db.tar.gz
cd -
else
echo "/mnt/scripts/tpch1.db exist, continue !"
fi
# put data file

View File

@ -0,0 +1,10 @@
"","test"
"","test"
"","test"
"","test"
"","test"
"","test"
"","test"
"","test"
"","test"
"","test"

File diff suppressed because one or more lines are too long

File diff suppressed because one or more lines are too long

View File

@ -260,6 +260,14 @@ fi
if [[ "${RUN_HIVE}" -eq 1 ]]; then
# hive
# If the doris cluster you need to test is single-node, you can use the default values; If the doris cluster you need to test is composed of multiple nodes, then you need to set the IP_HOST according to the actual situation of your machine
#default value
IP_HOST="127.0.0.1"
eth0_num=$(ifconfig -a|grep flags=|grep -n ^eth0|awk -F ':' '{print $1}')
IP_HOST=$(ifconfig -a|grep inet|grep -v 127.0.0.1|grep -v inet6|awk '{print $2}'|tr -d "addr:"|tail -n +${eth0_num}|head -n 1)
if [ "_${IP_HOST}" == "_" ];then
echo "please set IP_HOST according to your actual situation"
fi
# before start it, you need to download parquet file package, see "README" in "docker-compose/hive/scripts/"
cp "${ROOT}"/docker-compose/hive/gen_env.sh.tpl "${ROOT}"/docker-compose/hive/gen_env.sh
sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/gen_env.sh
@ -267,12 +275,13 @@ if [[ "${RUN_HIVE}" -eq 1 ]]; then
cp "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl.tpl "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl
sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/hive-2x.yaml
sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl
sed -i "s/externalEnvIp/${IP_HOST}/g" "${ROOT}"/docker-compose/hive/hive-2x.yaml
sed -i "s/externalEnvIp/${IP_HOST}/g" "${ROOT}"/docker-compose/hive/hadoop-hive.env.tpl
sed -i "s/\${externalEnvIp}/${IP_HOST}/g" "${ROOT}"/docker-compose/hive/gen_env.sh
sudo bash "${ROOT}"/docker-compose/hive/gen_env.sh
sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env down
sudo sed -i '/${CONTAINER_UID}namenode/d' /etc/hosts
if [[ "${STOP}" -ne 1 ]]; then
sudo docker compose -f "${ROOT}"/docker-compose/hive/hive-2x.yaml --env-file "${ROOT}"/docker-compose/hive/hadoop-hive.env up --build --remove-orphans -d
sudo echo "127.0.0.1 ${CONTAINER_UID}namenode" >> /etc/hosts
fi
fi

File diff suppressed because one or more lines are too long

View File

@ -0,0 +1,111 @@
// Licensed to the Apache Software Foundation (ASF) under one
// or more contributor license agreements. See the NOTICE file
// distributed with this work for additional information
// regarding copyright ownership. The ASF licenses this file
// to you under the Apache License, Version 2.0 (the
// "License"); you may not use this file except in compliance
// with the License. You may obtain a copy of the License at
//
// http://www.apache.org/licenses/LICENSE-2.0
//
// Unless required by applicable law or agreed to in writing,
// software distributed under the License is distributed on an
// "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
// KIND, either express or implied. See the License for the
// specific language governing permissions and limitations
// under the License.
suite("test_hive_basic_type", "external_docker,hive,external_docker_hive,p0,external") {
String enabled = context.config.otherConfigs.get("enableHiveTest")
if (enabled != null && enabled.equalsIgnoreCase("true")) {
String catalog_name = "test_hive_basic_type"
String ex_db_name = "`default`"
String externalEnvIp = context.config.otherConfigs.get("externalEnvIp")
String hms_port = context.config.otherConfigs.get("hms_port")
String hdfs_port = context.config.otherConfigs.get("hdfs_port")
sql """drop catalog if exists ${catalog_name} """
sql """CREATE CATALOG ${catalog_name} PROPERTIES (
'type'='hms',
'hive.metastore.uris' = 'thrift://${externalEnvIp}:${hms_port}',
'hadoop.username' = 'hive'
);"""
sql """switch ${catalog_name}"""
order_qt_2 """select * from ${catalog_name}.${ex_db_name}.parquet_partition_table order by l_orderkey limit 1;"""
order_qt_3 """select * from ${catalog_name}.${ex_db_name}.parquet_delta_binary_packed order by int_value limit 1;"""
order_qt_4 """select * from ${catalog_name}.${ex_db_name}.parquet_alltypes_tiny_pages order by id desc limit 5;"""
order_qt_5 """select * from ${catalog_name}.${ex_db_name}.orc_all_types_partition order by bigint_col desc limit 3;"""
order_qt_6 """select * from ${catalog_name}.${ex_db_name}.csv_partition_table order by k1 limit 1;"""
order_qt_9 """select * from ${catalog_name}.${ex_db_name}.csv_all_types limit 1;"""
order_qt_10 """select * from ${catalog_name}.${ex_db_name}.text_all_types limit 1;"""
// parquet bloom
order_qt_11 """select * from ${catalog_name}.${ex_db_name}.bloom_parquet_table limit 1;"""
// orc bloom
order_qt_12 """select * from ${catalog_name}.${ex_db_name}.bloom_orc_table limit 1;"""
// orc predicate
order_qt_13 """select * from ${catalog_name}.${ex_db_name}.orc_predicate_table where column_primitive_bigint = 6 limit 10;"""
order_qt_14 """select count(1) from ${catalog_name}.${ex_db_name}.orc_predicate_table where column_primitive_bigint = 6;"""
order_qt_15 """select * from ${catalog_name}.${ex_db_name}.orc_predicate_table where column_primitive_bigint = 1 limit 10;"""
order_qt_16 """select count(1) from ${catalog_name}.${ex_db_name}.orc_predicate_table where column_primitive_bigint = 1;"""
order_qt_17 """select * from ${catalog_name}.${ex_db_name}.orc_predicate_table where column_primitive_integer = 3 and column_primitive_bigint = 6 limit 10;"""
// parquet predicate
order_qt_18 """select * from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_bigint = 1 limit 10;"""
order_qt_19 """select count(1) from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_bigint = 1;"""
order_qt_20 """select * from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_integer = 3 limit 10;"""
order_qt_21 """select count(1) from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_integer = 3;"""
order_qt_22 """select * from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_integer = 1 limit 10;"""
order_qt_23 """select count(1) from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_integer = 1;"""
// only null parquet file test
order_qt_24 """select * from ${catalog_name}.${ex_db_name}.only_null;"""
order_qt_25 """select * from ${catalog_name}.${ex_db_name}.only_null where x is null;"""
order_qt_26 """select * from ${catalog_name}.${ex_db_name}.only_null where x is not null;"""
// parquet timestamp millis test
order_qt_27 """desc ${catalog_name}.${ex_db_name}.parquet_timestamp_millis;"""
order_qt_28 """select * from ${catalog_name}.${ex_db_name}.parquet_timestamp_millis order by test;"""
// parquet timestamp micros test
order_qt_29 """desc ${catalog_name}.${ex_db_name}.parquet_timestamp_micros;"""
order_qt_30 """select * from ${catalog_name}.${ex_db_name}.parquet_timestamp_micros order by test;"""
// parquet timestamp nanos test
order_qt_31 """desc ${catalog_name}.${ex_db_name}.parquet_timestamp_nanos;"""
order_qt_32 """select * from ${catalog_name}.${ex_db_name}.parquet_timestamp_nanos order by test;"""
order_qt_7 """select * from ${catalog_name}.${ex_db_name}.orc_all_types_t limit 1;"""
// parquet predicate
order_qt_38 """select * from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_bigint = 6 limit 10;"""
order_qt_39 """select count(1) from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_bigint = 6;"""
order_qt_40 """select * from ${catalog_name}.${ex_db_name}.parquet_predicate_table where column_primitive_integer = 3 and column_primitive_bigint = 6 limit 10;"""
order_qt_33 """select * from ${catalog_name}.${ex_db_name}.parquet_all_types limit 1;"""
order_qt_36 """select * from ${catalog_name}.${ex_db_name}.parquet_gzip_all_types limit 1;"""
// hive tables of json classes do not necessarily support column separation to identify errors
//order_qt_8 """select * from ${catalog_name}.${ex_db_name}.json_all_types limit 1;"""
// At present, doris only supports three formats of orc parquet textfile, while others are not supported
// hive tables in avro format are not supported
//order_qt_34 """select * from ${catalog_name}.${ex_db_name}.avro_all_types limit 1;"""
// hive tables in SEQUENCEFILE format are not supported
//order_qt_35 """select * from ${catalog_name}.${ex_db_name}.sequence_all_types limit 1;"""
// hive tables in rcbinary format are not supported
//order_qt_37 """select * from ${catalog_name}.${ex_db_name}.rcbinary_all_types limit 1;"""
//sql """drop catalog if exists ${catalog_name} """
}
}