[feature](docker)add docker-iceberg init tables (#25424)
Add some init tables for docker-iceberg.
This commit is contained in:
25
docker/thirdparties/docker-compose/iceberg/README.md
Normal file
25
docker/thirdparties/docker-compose/iceberg/README.md
Normal file
@ -0,0 +1,25 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
|
||||
```
|
||||
tools:
|
||||
gen_data.py: generate random data
|
||||
save_docker.sh: save the current docker state
|
||||
```
|
||||
@ -19,6 +19,6 @@
|
||||
NOTEBOOK_SERVER_PORT=8888
|
||||
SPARK_DRIVER_UI_PORT=8080
|
||||
SPARK_HISTORY_UI_PORT=10000
|
||||
REST_CATALOG_PORT=8181
|
||||
REST_CATALOG_PORT=18181
|
||||
MINIO_UI_PORT=9000
|
||||
MINIO_API_PORT=9001
|
||||
|
||||
@ -18,64 +18,59 @@
|
||||
version: "3"
|
||||
|
||||
services:
|
||||
doris--spark-iceberg:
|
||||
spark-iceberg:
|
||||
image: tabulario/spark-iceberg
|
||||
container_name: doris--spark-iceberg
|
||||
hostname: doris--spark-iceberg
|
||||
build: spark/
|
||||
depends_on:
|
||||
- doris--rest
|
||||
- doris--minio
|
||||
- rest
|
||||
- minio
|
||||
volumes:
|
||||
- ./warehouse:/home/iceberg/warehouse
|
||||
- ./notebooks:/home/iceberg/notebooks/notebooks
|
||||
- ./entrypoint.sh:/opt/spark/entrypoint.sh
|
||||
- ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
|
||||
- ./data/output/spark-warehouse:/home/iceberg/warehouse
|
||||
- ./data/output/spark-notebooks:/home/iceberg/notebooks/notebooks
|
||||
- ./data:/mnt/data
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=admin
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
- AWS_REGION=us-east-1
|
||||
ports:
|
||||
- ${NOTEBOOK_SERVER_PORT}:8888
|
||||
- ${SPARK_DRIVER_UI_PORT}:8080
|
||||
- ${SPARK_HISTORY_UI_PORT}:10000
|
||||
links:
|
||||
- doris--rest:rest
|
||||
- doris--minio:minio
|
||||
networks:
|
||||
- doris--iceberg
|
||||
entrypoint:
|
||||
- /opt/spark/entrypoint.sh
|
||||
|
||||
doris--rest:
|
||||
image: tabulario/iceberg-rest:0.2.0
|
||||
rest:
|
||||
image: tabulario/iceberg-rest
|
||||
container_name: doris--iceberg-rest
|
||||
ports:
|
||||
- ${REST_CATALOG_PORT}:8181
|
||||
volumes:
|
||||
- ./data:/mnt/data
|
||||
environment:
|
||||
- AWS_ACCESS_KEY_ID=admin
|
||||
- AWS_SECRET_ACCESS_KEY=password
|
||||
- AWS_REGION=us-east-1
|
||||
- CATALOG_WAREHOUSE=s3a://warehouse/wh/
|
||||
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
|
||||
- CATALOG_S3_ENDPOINT=http://doris--minio:9000
|
||||
- CATALOG_S3_ENDPOINT=http://minio:9000
|
||||
networks:
|
||||
- doris--iceberg
|
||||
doris--minio:
|
||||
entrypoint: /bin/bash /mnt/data/input/script/rest_init.sh
|
||||
|
||||
minio:
|
||||
image: minio/minio
|
||||
container_name: doris--minio
|
||||
hostname: doris--minio
|
||||
environment:
|
||||
- MINIO_ROOT_USER=admin
|
||||
- MINIO_ROOT_PASSWORD=password
|
||||
ports:
|
||||
- ${MINIO_UI_PORT}:9001
|
||||
- ${MINIO_API_PORT}:9000
|
||||
- MINIO_DOMAIN=minio
|
||||
networks:
|
||||
- doris--iceberg
|
||||
doris--iceberg:
|
||||
aliases:
|
||||
- warehouse.minio
|
||||
command: ["server", "/data", "--console-address", ":9001"]
|
||||
doris--mc:
|
||||
|
||||
mc:
|
||||
depends_on:
|
||||
- doris--minio
|
||||
- minio
|
||||
image: minio/mc
|
||||
container_name: doris--mc
|
||||
environment:
|
||||
@ -84,12 +79,16 @@ services:
|
||||
- AWS_REGION=us-east-1
|
||||
networks:
|
||||
- doris--iceberg
|
||||
volumes:
|
||||
- ./data:/mnt/data
|
||||
entrypoint: >
|
||||
/bin/sh -c "
|
||||
until (/usr/bin/mc config host add minio http://doris--minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
|
||||
until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
|
||||
/usr/bin/mc rm -r --force minio/warehouse;
|
||||
/usr/bin/mc mb minio/warehouse;
|
||||
/usr/bin/mc policy set public minio/warehouse;
|
||||
echo 'copy data';
|
||||
mc cp -r /mnt/data/input/minio/warehouse/* minio/warehouse/;
|
||||
tail -f /dev/null
|
||||
"
|
||||
networks:
|
||||
|
||||
106
docker/thirdparties/docker-compose/iceberg/tools/gen_data.py
Normal file
106
docker/thirdparties/docker-compose/iceberg/tools/gen_data.py
Normal file
@ -0,0 +1,106 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
import random
|
||||
import string
|
||||
|
||||
table_name = "demo.format_v1.sample_parquet"
|
||||
|
||||
alphabet = 'abcdefghijklmnopqrstuvwxyz!@#$%^&*()'
|
||||
binary_alphabet = '11111111111110000000000000000000'
|
||||
data_choice = ["date('2000-12-31')", "date('1969-09-21')", "date('2969-02-03')"]
|
||||
timestamp_choice = [
|
||||
"TIMESTAMP '1970-01-01 00:00:01.000001 UTC+00:00'",
|
||||
"TIMESTAMP '1970-01-02 00:00:01.000001 UTC+00:00'",
|
||||
"TIMESTAMP '1970-01-03 00:00:01.000001 UTC+00:00'",
|
||||
"TIMESTAMP '1970-01-04 00:00:01.000001 UTC+00:00'"]
|
||||
timestamp_ntz_choice = [
|
||||
"TIMESTAMP_NTZ '2017-12-01 10:12:55.038194 UTC'",
|
||||
"TIMESTAMP_NTZ '2017-12-02 10:12:55.038194 UTC'",
|
||||
"TIMESTAMP_NTZ '2017-12-03 10:12:55.038194 UTC'",
|
||||
"TIMESTAMP_NTZ '2017-12-04 10:12:55.038194 UTC'",
|
||||
]
|
||||
city_choice = [
|
||||
"'Shanghai'", "'Hefei'", "'Beijing'", "'Hangzhou'"
|
||||
]
|
||||
|
||||
|
||||
def get_one_data():
|
||||
id = random.randint(-100000000, 100000000)
|
||||
|
||||
col_boolean = True
|
||||
if random.randint(-1000000, 1000000) % 2 == 0:
|
||||
col_boolean = False
|
||||
|
||||
col_short = random.randint(-32700, 32700)
|
||||
|
||||
col_byte = random.randint(-128, 127)
|
||||
|
||||
col_integer = random.randint(-21474836, 2147483)
|
||||
|
||||
col_long = random.randint(-92233720368547758, 92233720368547758)
|
||||
|
||||
col_float = random.random() * 10
|
||||
|
||||
col_double = random.random() * 10
|
||||
|
||||
col_date = random.choice(data_choice)
|
||||
|
||||
col_timestamp = random.choice(timestamp_choice)
|
||||
|
||||
col_timestamp_ntz = random.choice(timestamp_ntz_choice)
|
||||
|
||||
col_char = "".join(random.sample(alphabet, random.randint(1,18)))
|
||||
|
||||
col_varchar = ''.join(random.sample(string.ascii_letters + string.digits, random.randint(1, 20)))
|
||||
|
||||
col_string = ''.join(random.sample(string.ascii_letters + string.digits, random.randint(1, 20)))
|
||||
|
||||
col_binary = ''.join(random.sample(binary_alphabet, random.randint(1,30)))
|
||||
|
||||
col_decimal = random.random() * 10000
|
||||
|
||||
city = random.choice(city_choice)
|
||||
|
||||
out = "{},{},{},{},{},{},{},{},{},{},{},'{}','{}','{}',CAST('{}' AS BINARY),{},{}".format(
|
||||
id,
|
||||
col_boolean,
|
||||
col_short,
|
||||
col_byte,
|
||||
col_integer,
|
||||
col_long,
|
||||
col_float,
|
||||
col_double,
|
||||
col_date,
|
||||
col_timestamp,
|
||||
col_timestamp_ntz,
|
||||
col_char,
|
||||
col_varchar,
|
||||
col_string,
|
||||
col_binary,
|
||||
col_decimal,
|
||||
city
|
||||
)
|
||||
return out
|
||||
|
||||
with open('insert_table_values.sql', 'w') as f:
|
||||
f.write("INSERT INTO {} VALUES\n".format(table_name))
|
||||
f.write(" ({})\n".format(get_one_data()))
|
||||
for i in range(1, 1000):
|
||||
f.write(", ({})\n".format(get_one_data()))
|
||||
f.write(";\n")
|
||||
|
||||
@ -0,0 +1,23 @@
|
||||
#!/bin/bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# save sqlite
|
||||
docker exec iceberg-rest bash -c 'cp /tmp/iceberg_rest_mode\=memory /mnt/data/input/'
|
||||
|
||||
# save iceberg from s3
|
||||
docker exec mc bash -c 'mc cp -r minio/warehouse /mnt/data/input/minio'
|
||||
@ -60,7 +60,7 @@ STOP=0
|
||||
|
||||
if [[ "$#" == 1 ]]; then
|
||||
# default
|
||||
COMPONENTS="mysql,es,hive,pg,oracle,sqlserver,clickhouse,mariadb"
|
||||
COMPONENTS="mysql,es,hive,pg,oracle,sqlserver,clickhouse,mariadb,iceberg"
|
||||
else
|
||||
while true; do
|
||||
case "$1" in
|
||||
@ -332,13 +332,12 @@ if [[ "${RUN_ICEBERG}" -eq 1 ]]; then
|
||||
sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/entrypoint.sh
|
||||
sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/spark-defaults.conf
|
||||
sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env down
|
||||
sudo rm -rf "${ROOT}"/docker-compose/iceberg/data
|
||||
if [[ "${STOP}" -ne 1 ]]; then
|
||||
sudo rm -rf "${ROOT}"/docker-compose/iceberg/notebooks
|
||||
sudo mkdir "${ROOT}"/docker-compose/iceberg/notebooks
|
||||
sudo rm -rf "${ROOT}"/docker-compose/iceberg/spark
|
||||
sudo mkdir "${ROOT}"/docker-compose/iceberg/spark
|
||||
sudo rm -rf "${ROOT}"/docker-compose/iceberg/warehouse
|
||||
sudo mkdir "${ROOT}"/docker-compose/iceberg/warehouse
|
||||
wget -P ${ROOT}/docker-compose/iceberg https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/iceberg/iceberg_data.zip
|
||||
sudo unzip -d "${ROOT}"/docker-compose/iceberg -q ${ROOT}/docker-compose/iceberg/iceberg_data.zip
|
||||
sudo mv "${ROOT}"/docker-compose/iceberg/iceberg_data "${ROOT}"/docker-compose/iceberg/data
|
||||
sudo rm -rf ${ROOT}/docker-compose/iceberg/iceberg_data.zip
|
||||
sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d
|
||||
fi
|
||||
fi
|
||||
|
||||
@ -177,6 +177,9 @@ s3Endpoint = "cos.ap-hongkong.myqcloud.com"
|
||||
s3BucketName = "doris-build-hk-1308700295"
|
||||
s3Region = "ap-hongkong"
|
||||
|
||||
// iceberg rest catalog config
|
||||
iceberg_rest_uri_port=18181
|
||||
|
||||
// If the failure suite num exceeds this config
|
||||
// all following suite will be skipped to fast quit the run.
|
||||
// <=0 means no limit.
|
||||
|
||||
@ -92,6 +92,9 @@ hiveServerPort=10000
|
||||
enableKafkaTest=true
|
||||
kafka_port=19193
|
||||
|
||||
// iceberg test config
|
||||
iceberg_rest_uri_port=18181
|
||||
|
||||
enableEsTest=false
|
||||
es_6_port=19200
|
||||
es_7_port=29200
|
||||
|
||||
Reference in New Issue
Block a user