[fix](hive docker)Reserve host port for hive2 namenode and datanode (#47262) (#47354)

Problem Summary:

The [External hive

CI](http://43.132.222.7:8111/buildConfiguration/Doris_External_Regression/612304?buildTab=log&linesState=3650&logView=flowAware)
failed because of `namenode` error( 50070 port already in used), docker
logs:
```txt
2025-01-21T04:22:37.955682469Z java.net.BindException: Port in use: 0.0.0.0:50070
2025-01-21T04:22:37.955686106Z 	at org.apache.hadoop.http.HttpServer2.openListeners(HttpServer2.java:940)
2025-01-21T04:22:37.955689402Z 	at org.apache.hadoop.http.HttpServer2.start(HttpServer2.java:876)
2025-01-21T04:22:37.955692708Z 	at org.apache.hadoop.hdfs.server.namenode.NameNodeHttpServer.start(NameNodeHttpServer.java:142)
2025-01-21T04:22:37.955697828Z 	at org.apache.hadoop.hdfs.server.namenode.NameNode.startHttpServer(NameNode.java:760)
2025-01-21T04:22:37.955701444Z 	at org.apache.hadoop.hdfs.server.namenode.NameNode.initialize(NameNode.java:639)
2025-01-21T04:22:37.955704831Z 	at org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:819)
2025-01-21T04:22:37.955708237Z 	at org.apache.hadoop.hdfs.server.namenode.NameNode.<init>(NameNode.java:803)
2025-01-21T04:22:37.955711674Z 	at org.apache.hadoop.hdfs.server.namenode.NameNode.createNameNode(NameNode.java:1500)
2025-01-21T04:22:37.955715090Z 	at org.apache.hadoop.hdfs.server.namenode.NameNode.main(NameNode.java:1566)
2025-01-21T04:22:37.955718446Z Caused by: java.net.BindException: Address already in use
2025-01-21T04:22:37.955722013Z 	at sun.nio.ch.Net.bind0(Native Method)
2025-01-21T04:22:37.955725460Z 	at sun.nio.ch.Net.bind(Net.java:433)
2025-01-21T04:22:37.955729227Z 	at sun.nio.ch.Net.bind(Net.java:425)
2025-01-21T04:22:37.955733074Z 	at sun.nio.ch.ServerSocketChannelImpl.bind(ServerSocketChannelImpl.java:223)
2025-01-21T04:22:37.955736600Z 	at sun.nio.ch.ServerSocketAdaptor.bind(ServerSocketAdaptor.java:74)
2025-01-21T04:22:37.955740197Z 	at org.mortbay.jetty.nio.SelectChannelConnector.open(SelectChannelConnector.java:216)
2025-01-21T04:22:37.955743884Z 	at org.apache.hadoop.http.HttpServer2.openListeners(HttpServer2.java:934)
2025-01-21T04:22:37.955747391Z 	... 8 more
2025-01-21T04:22:37.961686454Z 25/01/21 04:22:37 INFO util.ExitUtil: Exiting with status 1
```

The best choice is avoid the services using server port at range
`/proc/sys/net/ipv4/ip_local_port_range` (32768-60999). But since the
namenode [hardcode exposing port `50070` in docker
image](https://hub.docker.com/layers/bde2020/hadoop-datanode/2.0.0-hadoop2.7.4-java8/images/sha256-5623fca5e36d890983cdc6cfd29744d1d65476528117975b3af6a80d99b3c62f),
so we add the port to `net.ipv4.ip_local_reserved_ports` and introduce a
new flags `--reserve-ports` to control it (default false, because not
everyone want to modify system reserved ports).

Change-Id: I03a81e9931cb555695199436b6f0517cccf83588
This commit is contained in:
Thearas
2025-01-24 17:12:03 +09:00
committed by GitHub
parent 3aad9e5f67
commit baaf026e82
4 changed files with 37 additions and 8 deletions

View File

@ -63,8 +63,10 @@ services:
expose:
- "${HS_PORT}"
depends_on:
- datanode
- namenode
datanode:
condition: service_healthy
namenode:
condition: service_healthy
healthcheck:
test: beeline -u "jdbc:hive2://127.0.0.1:${HS_PORT}/default" -n health_check -e "show databases;"
interval: 10s
@ -86,7 +88,8 @@ services:
volumes:
- ./scripts:/mnt/scripts
depends_on:
- hive-metastore-postgresql
hive-metastore-postgresql:
condition: service_healthy
healthcheck:
test: ["CMD", "sh", "-c", "/mnt/scripts/healthy_check.sh"]
interval: 20s

View File

@ -20,7 +20,7 @@
# Do not use "_" or other sepcial characters, only number and alphabeta.
# NOTICE: change this uid will modify hive-*.yaml
export FS_PORT=8020 # should be same as hive3HmsPort in regression-conf.groovy
export FS_PORT=8020 # should be same as hive2HmsPort in regression-conf.groovy
export HMS_PORT=9083 # should be same as hive2HmsPort in regression-conf.groovy
export HS_PORT=10000 # should be same as hive2ServerPort in regression-conf.groovy
export PG_PORT=5432 # should be same as hive2PgPort in regression-conf.groovy

View File

@ -63,8 +63,10 @@ services:
expose:
- "${HS_PORT}"
depends_on:
- datanode
- namenode
datanode:
condition: service_healthy
namenode:
condition: service_healthy
healthcheck:
test: beeline -u "jdbc:hive2://127.0.0.1:${HS_PORT}/default" -n health_check -e "show databases;"
interval: 10s
@ -86,7 +88,8 @@ services:
volumes:
- ./scripts:/mnt/scripts
depends_on:
- hive-metastore-postgresql
hive-metastore-postgresql:
condition: service_healthy
healthcheck:
test: ["CMD", "sh", "-c", "/mnt/scripts/healthy_check.sh"]
interval: 20s

View File

@ -33,8 +33,9 @@ Usage: $0 <options>
[no option] start all components
--help,-h show this usage
-c mysql start MySQL
-c mysql,hive3 start MySQL and Hive3
-c mysql,hive3 start MySQL and Hive3
--stop stop the specified components
--reserve-ports reserve host ports by setting 'net.ipv4.ip_local_reserved_ports' to avoid port already bind error
All valid components:
mysql,pg,oracle,sqlserver,clickhouse,es,hive2,hive3,iceberg,hudi,trino,kafka,mariadb,db2,kerberos,oceanbase
@ -47,6 +48,7 @@ if ! OPTS="$(getopt \
-o '' \
-l 'help' \
-l 'stop' \
-l 'reserve-ports' \
-o 'hc:' \
-- "$@")"; then
usage
@ -57,6 +59,7 @@ eval set -- "${OPTS}"
COMPONENTS=""
HELP=0
STOP=0
NEED_RESERVE_PORTS=0
if [[ "$#" == 1 ]]; then
# default
@ -80,6 +83,10 @@ else
COMPONENTS=$2
shift 2
;;
--reserve-ports)
NEED_RESERVE_PORTS=1
shift
;;
--)
shift
break
@ -138,6 +145,8 @@ RUN_DB2=0
RUN_KERBEROS=0
RUN_OCENABASE=0
RESERVED_PORTS="65535"
for element in "${COMPONENTS_ARR[@]}"; do
if [[ "${element}"x == "mysql"x ]]; then
RUN_MYSQL=1
@ -153,6 +162,7 @@ for element in "${COMPONENTS_ARR[@]}"; do
RUN_ES=1
elif [[ "${element}"x == "hive2"x ]]; then
RUN_HIVE2=1
RESERVED_PORTS="${RESERVED_PORTS},50070,50075" # namenode and datanode ports
elif [[ "${element}"x == "hive3"x ]]; then
RUN_HIVE3=1
elif [[ "${element}"x == "kafka"x ]]; then
@ -179,6 +189,17 @@ for element in "${COMPONENTS_ARR[@]}"; do
fi
done
reserve_ports() {
if [[ "${NEED_RESERVE_PORTS}" -eq 0 ]]; then
return
fi
if [[ "${RESERVED_PORTS}"x != ""x ]]; then
echo "Reserve ports: ${RESERVED_PORTS}"
sudo sysctl -w net.ipv4.ip_local_reserved_ports="${RESERVED_PORTS}"
fi
}
start_es() {
# elasticsearch
cp "${ROOT}"/docker-compose/elasticsearch/es.yaml.tpl "${ROOT}"/docker-compose/elasticsearch/es.yaml
@ -609,6 +630,8 @@ start_kerberos() {
echo "starting dockers in parrallel"
reserve_ports
declare -A pids
if [[ "${RUN_ES}" -eq 1 ]]; then