[feature](docker)add docker-iceberg init tables (#25424)

Add some init tables for docker-iceberg.
This commit is contained in:
wuwenchi
2023-10-24 19:29:57 +08:00
committed by GitHub
parent b16af25f7b
commit 10f1957379
8 changed files with 194 additions and 36 deletions

View File

@ -0,0 +1,25 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
```
tools:
gen_data.py: generate random data
save_docker.sh: save the current docker state
```

View File

@ -19,6 +19,6 @@
NOTEBOOK_SERVER_PORT=8888
SPARK_DRIVER_UI_PORT=8080
SPARK_HISTORY_UI_PORT=10000
REST_CATALOG_PORT=8181
REST_CATALOG_PORT=18181
MINIO_UI_PORT=9000
MINIO_API_PORT=9001

View File

@ -18,64 +18,59 @@
version: "3"
services:
doris--spark-iceberg:
spark-iceberg:
image: tabulario/spark-iceberg
container_name: doris--spark-iceberg
hostname: doris--spark-iceberg
build: spark/
depends_on:
- doris--rest
- doris--minio
- rest
- minio
volumes:
- ./warehouse:/home/iceberg/warehouse
- ./notebooks:/home/iceberg/notebooks/notebooks
- ./entrypoint.sh:/opt/spark/entrypoint.sh
- ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf
- ./data/output/spark-warehouse:/home/iceberg/warehouse
- ./data/output/spark-notebooks:/home/iceberg/notebooks/notebooks
- ./data:/mnt/data
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
ports:
- ${NOTEBOOK_SERVER_PORT}:8888
- ${SPARK_DRIVER_UI_PORT}:8080
- ${SPARK_HISTORY_UI_PORT}:10000
links:
- doris--rest:rest
- doris--minio:minio
networks:
- doris--iceberg
entrypoint:
- /opt/spark/entrypoint.sh
doris--rest:
image: tabulario/iceberg-rest:0.2.0
rest:
image: tabulario/iceberg-rest
container_name: doris--iceberg-rest
ports:
- ${REST_CATALOG_PORT}:8181
volumes:
- ./data:/mnt/data
environment:
- AWS_ACCESS_KEY_ID=admin
- AWS_SECRET_ACCESS_KEY=password
- AWS_REGION=us-east-1
- CATALOG_WAREHOUSE=s3a://warehouse/wh/
- CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO
- CATALOG_S3_ENDPOINT=http://doris--minio:9000
- CATALOG_S3_ENDPOINT=http://minio:9000
networks:
- doris--iceberg
doris--minio:
entrypoint: /bin/bash /mnt/data/input/script/rest_init.sh
minio:
image: minio/minio
container_name: doris--minio
hostname: doris--minio
environment:
- MINIO_ROOT_USER=admin
- MINIO_ROOT_PASSWORD=password
ports:
- ${MINIO_UI_PORT}:9001
- ${MINIO_API_PORT}:9000
- MINIO_DOMAIN=minio
networks:
- doris--iceberg
doris--iceberg:
aliases:
- warehouse.minio
command: ["server", "/data", "--console-address", ":9001"]
doris--mc:
mc:
depends_on:
- doris--minio
- minio
image: minio/mc
container_name: doris--mc
environment:
@ -84,12 +79,16 @@ services:
- AWS_REGION=us-east-1
networks:
- doris--iceberg
volumes:
- ./data:/mnt/data
entrypoint: >
/bin/sh -c "
until (/usr/bin/mc config host add minio http://doris--minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done;
/usr/bin/mc rm -r --force minio/warehouse;
/usr/bin/mc mb minio/warehouse;
/usr/bin/mc policy set public minio/warehouse;
echo 'copy data';
mc cp -r /mnt/data/input/minio/warehouse/* minio/warehouse/;
tail -f /dev/null
"
networks:

View File

@ -0,0 +1,106 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
import random
import string
table_name = "demo.format_v1.sample_parquet"
alphabet = 'abcdefghijklmnopqrstuvwxyz!@#$%^&*()'
binary_alphabet = '11111111111110000000000000000000'
data_choice = ["date('2000-12-31')", "date('1969-09-21')", "date('2969-02-03')"]
timestamp_choice = [
"TIMESTAMP '1970-01-01 00:00:01.000001 UTC+00:00'",
"TIMESTAMP '1970-01-02 00:00:01.000001 UTC+00:00'",
"TIMESTAMP '1970-01-03 00:00:01.000001 UTC+00:00'",
"TIMESTAMP '1970-01-04 00:00:01.000001 UTC+00:00'"]
timestamp_ntz_choice = [
"TIMESTAMP_NTZ '2017-12-01 10:12:55.038194 UTC'",
"TIMESTAMP_NTZ '2017-12-02 10:12:55.038194 UTC'",
"TIMESTAMP_NTZ '2017-12-03 10:12:55.038194 UTC'",
"TIMESTAMP_NTZ '2017-12-04 10:12:55.038194 UTC'",
]
city_choice = [
"'Shanghai'", "'Hefei'", "'Beijing'", "'Hangzhou'"
]
def get_one_data():
id = random.randint(-100000000, 100000000)
col_boolean = True
if random.randint(-1000000, 1000000) % 2 == 0:
col_boolean = False
col_short = random.randint(-32700, 32700)
col_byte = random.randint(-128, 127)
col_integer = random.randint(-21474836, 2147483)
col_long = random.randint(-92233720368547758, 92233720368547758)
col_float = random.random() * 10
col_double = random.random() * 10
col_date = random.choice(data_choice)
col_timestamp = random.choice(timestamp_choice)
col_timestamp_ntz = random.choice(timestamp_ntz_choice)
col_char = "".join(random.sample(alphabet, random.randint(1,18)))
col_varchar = ''.join(random.sample(string.ascii_letters + string.digits, random.randint(1, 20)))
col_string = ''.join(random.sample(string.ascii_letters + string.digits, random.randint(1, 20)))
col_binary = ''.join(random.sample(binary_alphabet, random.randint(1,30)))
col_decimal = random.random() * 10000
city = random.choice(city_choice)
out = "{},{},{},{},{},{},{},{},{},{},{},'{}','{}','{}',CAST('{}' AS BINARY),{},{}".format(
id,
col_boolean,
col_short,
col_byte,
col_integer,
col_long,
col_float,
col_double,
col_date,
col_timestamp,
col_timestamp_ntz,
col_char,
col_varchar,
col_string,
col_binary,
col_decimal,
city
)
return out
with open('insert_table_values.sql', 'w') as f:
f.write("INSERT INTO {} VALUES\n".format(table_name))
f.write(" ({})\n".format(get_one_data()))
for i in range(1, 1000):
f.write(", ({})\n".format(get_one_data()))
f.write(";\n")

View File

@ -0,0 +1,23 @@
#!/bin/bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# save sqlite
docker exec iceberg-rest bash -c 'cp /tmp/iceberg_rest_mode\=memory /mnt/data/input/'
# save iceberg from s3
docker exec mc bash -c 'mc cp -r minio/warehouse /mnt/data/input/minio'

View File

@ -60,7 +60,7 @@ STOP=0
if [[ "$#" == 1 ]]; then
# default
COMPONENTS="mysql,es,hive,pg,oracle,sqlserver,clickhouse,mariadb"
COMPONENTS="mysql,es,hive,pg,oracle,sqlserver,clickhouse,mariadb,iceberg"
else
while true; do
case "$1" in
@ -332,13 +332,12 @@ if [[ "${RUN_ICEBERG}" -eq 1 ]]; then
sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/entrypoint.sh
sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/spark-defaults.conf
sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env down
sudo rm -rf "${ROOT}"/docker-compose/iceberg/data
if [[ "${STOP}" -ne 1 ]]; then
sudo rm -rf "${ROOT}"/docker-compose/iceberg/notebooks
sudo mkdir "${ROOT}"/docker-compose/iceberg/notebooks
sudo rm -rf "${ROOT}"/docker-compose/iceberg/spark
sudo mkdir "${ROOT}"/docker-compose/iceberg/spark
sudo rm -rf "${ROOT}"/docker-compose/iceberg/warehouse
sudo mkdir "${ROOT}"/docker-compose/iceberg/warehouse
wget -P ${ROOT}/docker-compose/iceberg https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/iceberg/iceberg_data.zip
sudo unzip -d "${ROOT}"/docker-compose/iceberg -q ${ROOT}/docker-compose/iceberg/iceberg_data.zip
sudo mv "${ROOT}"/docker-compose/iceberg/iceberg_data "${ROOT}"/docker-compose/iceberg/data
sudo rm -rf ${ROOT}/docker-compose/iceberg/iceberg_data.zip
sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d
fi
fi

View File

@ -177,6 +177,9 @@ s3Endpoint = "cos.ap-hongkong.myqcloud.com"
s3BucketName = "doris-build-hk-1308700295"
s3Region = "ap-hongkong"
// iceberg rest catalog config
iceberg_rest_uri_port=18181
// If the failure suite num exceeds this config
// all following suite will be skipped to fast quit the run.
// <=0 means no limit.

View File

@ -92,6 +92,9 @@ hiveServerPort=10000
enableKafkaTest=true
kafka_port=19193
// iceberg test config
iceberg_rest_uri_port=18181
enableEsTest=false
es_6_port=19200
es_7_port=29200