branch-2.1: [opt](iceberg docker)Use PostgreSQL as the backend for the Iceberg REST server. #46289 (#46576)
Cherry-picked from #46289 Co-authored-by: wuwenchi <wuwenchi@selectdb.com>
This commit is contained in:
committed by
GitHub
parent
26bdd23e05
commit
72cdedc47f
@ -23,24 +23,25 @@ start-worker.sh spark://doris--spark-iceberg:7077
|
||||
start-history-server.sh
|
||||
start-thriftserver.sh --driver-java-options "-Dderby.system.home=/tmp/derby"
|
||||
|
||||
# The creation of a Spark SQL client is time-consuming,
|
||||
# and reopening a new client for each SQL file execution leads to significant overhead.
|
||||
# To reduce the time spent on creating clients,
|
||||
# we group these files together and execute them using a single client.
|
||||
# This approach can reduce the time from 150s to 40s.
|
||||
|
||||
START_TIME1=$(date +%s)
|
||||
find /mnt/scripts/create_preinstalled_scripts/iceberg -name '*.sql' | sed 's|^|source |' | sed 's|$|;|'> iceberg_total.sql
|
||||
spark-sql --master spark://doris--spark-iceberg:7077 --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions -f iceberg_total.sql
|
||||
END_TIME1=$(date +%s)
|
||||
EXECUTION_TIME1=$((END_TIME1 - START_TIME1))
|
||||
echo "Script iceberg total: {} executed in $EXECUTION_TIME1 seconds"
|
||||
|
||||
ls /mnt/scripts/create_preinstalled_scripts/iceberg/*.sql | xargs -n 1 -I {} bash -c '
|
||||
START_TIME=$(date +%s)
|
||||
spark-sql --master spark://doris--spark-iceberg:7077 --conf spark.sql.extensions=org.apache.iceberg.spark.extensions.IcebergSparkSessionExtensions -f {}
|
||||
END_TIME=$(date +%s)
|
||||
EXECUTION_TIME=$((END_TIME - START_TIME))
|
||||
echo "Script: {} executed in $EXECUTION_TIME seconds"
|
||||
'
|
||||
|
||||
ls /mnt/scripts/create_preinstalled_scripts/paimon/*.sql | xargs -n 1 -I {} bash -c '
|
||||
START_TIME=$(date +%s)
|
||||
spark-sql --master spark://doris--spark-iceberg:7077 --conf spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions -f {}
|
||||
END_TIME=$(date +%s)
|
||||
EXECUTION_TIME=$((END_TIME - START_TIME))
|
||||
echo "Script: {} executed in $EXECUTION_TIME seconds"
|
||||
'
|
||||
|
||||
START_TIME2=$(date +%s)
|
||||
find /mnt/scripts/create_preinstalled_scripts/paimon -name '*.sql' | sed 's|^|source |' | sed 's|$|;|'> paimon_total.sql
|
||||
spark-sql --master spark://doris--spark-iceberg:7077 --conf spark.sql.extensions=org.apache.paimon.spark.extensions.PaimonSparkSessionExtensions -f paimon_total.sql
|
||||
END_TIME2=$(date +%s)
|
||||
EXECUTION_TIME2=$((END_TIME2 - START_TIME2))
|
||||
echo "Script paimon total: {} executed in $EXECUTION_TIME2 seconds"
|
||||
|
||||
touch /mnt/SUCCESS;
|
||||
|
||||
|
||||
@ -18,6 +18,7 @@
|
||||
version: "3"
|
||||
|
||||
services:
|
||||
|
||||
spark-iceberg:
|
||||
image: tabulario/spark-iceberg
|
||||
container_name: doris--spark-iceberg
|
||||
@ -47,6 +48,19 @@ services:
|
||||
interval: 5s
|
||||
timeout: 120s
|
||||
retries: 120
|
||||
|
||||
postgres:
|
||||
image: postgis/postgis:14-3.3
|
||||
container_name: doris--postgres
|
||||
environment:
|
||||
POSTGRES_PASSWORD: 123456
|
||||
POSTGRES_USER: root
|
||||
POSTGRES_DB: iceberg
|
||||
volumes:
|
||||
- ./data/input/pgdata:/var/lib/postgresql/data
|
||||
networks:
|
||||
- doris--iceberg
|
||||
|
||||
rest:
|
||||
image: tabulario/iceberg-rest
|
||||
container_name: doris--iceberg-rest
|
||||
@ -54,6 +68,8 @@ services:
|
||||
- ${REST_CATALOG_PORT}:8181
|
||||
volumes:
|
||||
- ./data:/mnt/data
|
||||
depends_on:
|
||||
- postgres
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=admin
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
@ -61,6 +77,9 @@ services:
|
||||
- CATALOG_WAREHOUSE=s3a://warehouse/wh/
|
||||
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
|
||||
- CATALOG_S3_ENDPOINT=http://minio:9000
|
||||
- CATALOG_URI=jdbc:postgresql://postgres:5432/iceberg
|
||||
- CATALOG_JDBC_USER=root
|
||||
- CATALOG_JDBC_PASSWORD=123456
|
||||
networks:
|
||||
- doris--iceberg
|
||||
entrypoint: /bin/bash /mnt/data/input/script/rest_init.sh
|
||||
|
||||
Reference in New Issue
Block a user