diff --git a/docker/thirdparties/docker-compose/iceberg/README.md b/docker/thirdparties/docker-compose/iceberg/README.md new file mode 100644 index 0000000000..7458c71703 --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/README.md @@ -0,0 +1,25 @@ + + + +``` +tools: + gen_data.py: generate random data + save_docker.sh: save the current docker state +``` diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.env b/docker/thirdparties/docker-compose/iceberg/iceberg.env index e4a95c99ce..4cc8b42eaf 100644 --- a/docker/thirdparties/docker-compose/iceberg/iceberg.env +++ b/docker/thirdparties/docker-compose/iceberg/iceberg.env @@ -19,6 +19,6 @@ NOTEBOOK_SERVER_PORT=8888 SPARK_DRIVER_UI_PORT=8080 SPARK_HISTORY_UI_PORT=10000 -REST_CATALOG_PORT=8181 +REST_CATALOG_PORT=18181 MINIO_UI_PORT=9000 MINIO_API_PORT=9001 diff --git a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl index 1cd3d4acf9..343c8dd2e5 100644 --- a/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl +++ b/docker/thirdparties/docker-compose/iceberg/iceberg.yaml.tpl @@ -18,64 +18,59 @@ version: "3" services: - doris--spark-iceberg: + spark-iceberg: image: tabulario/spark-iceberg container_name: doris--spark-iceberg hostname: doris--spark-iceberg build: spark/ depends_on: - - doris--rest - - doris--minio + - rest + - minio volumes: - - ./warehouse:/home/iceberg/warehouse - - ./notebooks:/home/iceberg/notebooks/notebooks - - ./entrypoint.sh:/opt/spark/entrypoint.sh - - ./spark-defaults.conf:/opt/spark/conf/spark-defaults.conf + - ./data/output/spark-warehouse:/home/iceberg/warehouse + - ./data/output/spark-notebooks:/home/iceberg/notebooks/notebooks + - ./data:/mnt/data environment: - AWS_ACCESS_KEY_ID=admin - AWS_SECRET_ACCESS_KEY=password - AWS_REGION=us-east-1 - ports: - - ${NOTEBOOK_SERVER_PORT}:8888 - - ${SPARK_DRIVER_UI_PORT}:8080 - - ${SPARK_HISTORY_UI_PORT}:10000 - links: - - doris--rest:rest - - doris--minio:minio networks: - doris--iceberg - entrypoint: - - /opt/spark/entrypoint.sh - doris--rest: - image: tabulario/iceberg-rest:0.2.0 + rest: + image: tabulario/iceberg-rest + container_name: doris--iceberg-rest ports: - ${REST_CATALOG_PORT}:8181 + volumes: + - ./data:/mnt/data environment: - AWS_ACCESS_KEY_ID=admin - AWS_SECRET_ACCESS_KEY=password - AWS_REGION=us-east-1 - CATALOG_WAREHOUSE=s3a://warehouse/wh/ - CATALOG_IO__IMPL=org.apache.iceberg.aws.s3.S3FileIO - - CATALOG_S3_ENDPOINT=http://doris--minio:9000 + - CATALOG_S3_ENDPOINT=http://minio:9000 networks: - doris--iceberg - doris--minio: + entrypoint: /bin/bash /mnt/data/input/script/rest_init.sh + + minio: image: minio/minio container_name: doris--minio - hostname: doris--minio environment: - MINIO_ROOT_USER=admin - MINIO_ROOT_PASSWORD=password - ports: - - ${MINIO_UI_PORT}:9001 - - ${MINIO_API_PORT}:9000 + - MINIO_DOMAIN=minio networks: - - doris--iceberg + doris--iceberg: + aliases: + - warehouse.minio command: ["server", "/data", "--console-address", ":9001"] - doris--mc: + + mc: depends_on: - - doris--minio + - minio image: minio/mc container_name: doris--mc environment: @@ -84,12 +79,16 @@ services: - AWS_REGION=us-east-1 networks: - doris--iceberg + volumes: + - ./data:/mnt/data entrypoint: > /bin/sh -c " - until (/usr/bin/mc config host add minio http://doris--minio:9000 admin password) do echo '...waiting...' && sleep 1; done; + until (/usr/bin/mc config host add minio http://minio:9000 admin password) do echo '...waiting...' && sleep 1; done; /usr/bin/mc rm -r --force minio/warehouse; /usr/bin/mc mb minio/warehouse; /usr/bin/mc policy set public minio/warehouse; + echo 'copy data'; + mc cp -r /mnt/data/input/minio/warehouse/* minio/warehouse/; tail -f /dev/null " networks: diff --git a/docker/thirdparties/docker-compose/iceberg/tools/gen_data.py b/docker/thirdparties/docker-compose/iceberg/tools/gen_data.py new file mode 100644 index 0000000000..2bd2d7f0ff --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/tools/gen_data.py @@ -0,0 +1,106 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +import random +import string + +table_name = "demo.format_v1.sample_parquet" + +alphabet = 'abcdefghijklmnopqrstuvwxyz!@#$%^&*()' +binary_alphabet = '11111111111110000000000000000000' +data_choice = ["date('2000-12-31')", "date('1969-09-21')", "date('2969-02-03')"] +timestamp_choice = [ + "TIMESTAMP '1970-01-01 00:00:01.000001 UTC+00:00'", + "TIMESTAMP '1970-01-02 00:00:01.000001 UTC+00:00'", + "TIMESTAMP '1970-01-03 00:00:01.000001 UTC+00:00'", + "TIMESTAMP '1970-01-04 00:00:01.000001 UTC+00:00'"] +timestamp_ntz_choice = [ + "TIMESTAMP_NTZ '2017-12-01 10:12:55.038194 UTC'", + "TIMESTAMP_NTZ '2017-12-02 10:12:55.038194 UTC'", + "TIMESTAMP_NTZ '2017-12-03 10:12:55.038194 UTC'", + "TIMESTAMP_NTZ '2017-12-04 10:12:55.038194 UTC'", +] +city_choice = [ + "'Shanghai'", "'Hefei'", "'Beijing'", "'Hangzhou'" +] + + +def get_one_data(): + id = random.randint(-100000000, 100000000) + + col_boolean = True + if random.randint(-1000000, 1000000) % 2 == 0: + col_boolean = False + + col_short = random.randint(-32700, 32700) + + col_byte = random.randint(-128, 127) + + col_integer = random.randint(-21474836, 2147483) + + col_long = random.randint(-92233720368547758, 92233720368547758) + + col_float = random.random() * 10 + + col_double = random.random() * 10 + + col_date = random.choice(data_choice) + + col_timestamp = random.choice(timestamp_choice) + + col_timestamp_ntz = random.choice(timestamp_ntz_choice) + + col_char = "".join(random.sample(alphabet, random.randint(1,18))) + + col_varchar = ''.join(random.sample(string.ascii_letters + string.digits, random.randint(1, 20))) + + col_string = ''.join(random.sample(string.ascii_letters + string.digits, random.randint(1, 20))) + + col_binary = ''.join(random.sample(binary_alphabet, random.randint(1,30))) + + col_decimal = random.random() * 10000 + + city = random.choice(city_choice) + + out = "{},{},{},{},{},{},{},{},{},{},{},'{}','{}','{}',CAST('{}' AS BINARY),{},{}".format( + id, + col_boolean, + col_short, + col_byte, + col_integer, + col_long, + col_float, + col_double, + col_date, + col_timestamp, + col_timestamp_ntz, + col_char, + col_varchar, + col_string, + col_binary, + col_decimal, + city + ) + return out + +with open('insert_table_values.sql', 'w') as f: + f.write("INSERT INTO {} VALUES\n".format(table_name)) + f.write(" ({})\n".format(get_one_data())) + for i in range(1, 1000): + f.write(", ({})\n".format(get_one_data())) + f.write(";\n") + diff --git a/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh b/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh new file mode 100644 index 0000000000..cc149d4811 --- /dev/null +++ b/docker/thirdparties/docker-compose/iceberg/tools/save_docker.sh @@ -0,0 +1,23 @@ +#!/bin/bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# save sqlite +docker exec iceberg-rest bash -c 'cp /tmp/iceberg_rest_mode\=memory /mnt/data/input/' + +# save iceberg from s3 +docker exec mc bash -c 'mc cp -r minio/warehouse /mnt/data/input/minio' diff --git a/docker/thirdparties/run-thirdparties-docker.sh b/docker/thirdparties/run-thirdparties-docker.sh index baa4007ac1..de1422d1af 100755 --- a/docker/thirdparties/run-thirdparties-docker.sh +++ b/docker/thirdparties/run-thirdparties-docker.sh @@ -60,7 +60,7 @@ STOP=0 if [[ "$#" == 1 ]]; then # default - COMPONENTS="mysql,es,hive,pg,oracle,sqlserver,clickhouse,mariadb" + COMPONENTS="mysql,es,hive,pg,oracle,sqlserver,clickhouse,mariadb,iceberg" else while true; do case "$1" in @@ -332,13 +332,12 @@ if [[ "${RUN_ICEBERG}" -eq 1 ]]; then sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/entrypoint.sh sed -i "s/doris--/${CONTAINER_UID}/g" "${ROOT}"/docker-compose/iceberg/spark-defaults.conf sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env down + sudo rm -rf "${ROOT}"/docker-compose/iceberg/data if [[ "${STOP}" -ne 1 ]]; then - sudo rm -rf "${ROOT}"/docker-compose/iceberg/notebooks - sudo mkdir "${ROOT}"/docker-compose/iceberg/notebooks - sudo rm -rf "${ROOT}"/docker-compose/iceberg/spark - sudo mkdir "${ROOT}"/docker-compose/iceberg/spark - sudo rm -rf "${ROOT}"/docker-compose/iceberg/warehouse - sudo mkdir "${ROOT}"/docker-compose/iceberg/warehouse + wget -P ${ROOT}/docker-compose/iceberg https://doris-build-hk-1308700295.cos.ap-hongkong.myqcloud.com/regression/iceberg/iceberg_data.zip + sudo unzip -d "${ROOT}"/docker-compose/iceberg -q ${ROOT}/docker-compose/iceberg/iceberg_data.zip + sudo mv "${ROOT}"/docker-compose/iceberg/iceberg_data "${ROOT}"/docker-compose/iceberg/data + sudo rm -rf ${ROOT}/docker-compose/iceberg/iceberg_data.zip sudo docker compose -f "${ROOT}"/docker-compose/iceberg/iceberg.yaml --env-file "${ROOT}"/docker-compose/iceberg/iceberg.env up -d fi fi diff --git a/regression-test/conf/regression-conf.groovy b/regression-test/conf/regression-conf.groovy index acf9672de1..5fdcc3692e 100644 --- a/regression-test/conf/regression-conf.groovy +++ b/regression-test/conf/regression-conf.groovy @@ -177,6 +177,9 @@ s3Endpoint = "cos.ap-hongkong.myqcloud.com" s3BucketName = "doris-build-hk-1308700295" s3Region = "ap-hongkong" +// iceberg rest catalog config +iceberg_rest_uri_port=18181 + // If the failure suite num exceeds this config // all following suite will be skipped to fast quit the run. // <=0 means no limit. diff --git a/regression-test/pipeline/p0/conf/regression-conf.groovy b/regression-test/pipeline/p0/conf/regression-conf.groovy index 549e28d9a8..fecda4db1f 100644 --- a/regression-test/pipeline/p0/conf/regression-conf.groovy +++ b/regression-test/pipeline/p0/conf/regression-conf.groovy @@ -92,6 +92,9 @@ hiveServerPort=10000 enableKafkaTest=true kafka_port=19193 +// iceberg test config +iceberg_rest_uri_port=18181 + enableEsTest=false es_6_port=19200 es_7_port=29200