From 9cceddaeb3544f03372dfea3f63e886cf05c9377 Mon Sep 17 00:00:00 2001 From: Dongyang Li Date: Wed, 24 Aug 2022 14:29:38 +0800 Subject: [PATCH] [tools](ssb and tpch)optimize tools (#11975) --- tools/ssb-tools/README.md | 24 ++- tools/ssb-tools/{ => bin}/build-ssb-dbgen.sh | 21 ++- .../ssb-tools/{ => bin}/create-ssb-tables.sh | 19 ++- tools/ssb-tools/{ => bin}/gen-ssb-data.sh | 20 +-- .../load-ssb-data.sh} | 107 +++++++++++-- .../{ => bin}/run-ssb-flat-queries.sh | 22 +-- tools/ssb-tools/{ => bin}/run-ssb-queries.sh | 17 +- tools/ssb-tools/{ => conf}/doris-cluster.conf | 0 tools/ssb-tools/create-ssb-flat-table.sh | 101 ------------ tools/ssb-tools/ddl/create-ssb-flat-table.sql | 2 +- tools/ssb-tools/load-ssb-dimension-data.sh | 86 ---------- tools/ssb-tools/load-ssb-fact-data.sh | 147 ------------------ tools/tpch-tools/README.md | 12 +- .../tpch-tools/{ => bin}/build-tpch-dbgen.sh | 13 +- .../{ => bin}/create-tpch-tables.sh | 9 +- tools/tpch-tools/{ => bin}/gen-tpch-data.sh | 32 ++-- tools/tpch-tools/{ => bin}/load-tpch-data.sh | 95 ++++++----- .../tpch-tools/{ => bin}/run-tpch-queries.sh | 24 ++- .../tpch-tools/{ => conf}/doris-cluster.conf | 2 +- .../{ => ddl}/create-tpch-tables.sql | 0 20 files changed, 266 insertions(+), 487 deletions(-) rename tools/ssb-tools/{ => bin}/build-ssb-dbgen.sh (80%) rename tools/ssb-tools/{ => bin}/create-ssb-tables.sh (78%) rename tools/ssb-tools/{ => bin}/gen-ssb-data.sh (87%) rename tools/ssb-tools/{load-ssb-flat-data.sh => bin/load-ssb-data.sh} (57%) rename tools/ssb-tools/{ => bin}/run-ssb-flat-queries.sh (80%) rename tools/ssb-tools/{ => bin}/run-ssb-queries.sh (84%) rename tools/ssb-tools/{ => conf}/doris-cluster.conf (100%) delete mode 100755 tools/ssb-tools/create-ssb-flat-table.sh delete mode 100755 tools/ssb-tools/load-ssb-dimension-data.sh delete mode 100755 tools/ssb-tools/load-ssb-fact-data.sh rename tools/tpch-tools/{ => bin}/build-tpch-dbgen.sh (93%) rename tools/tpch-tools/{ => bin}/create-tpch-tables.sh (87%) rename tools/tpch-tools/{ => bin}/gen-tpch-data.sh (80%) rename tools/tpch-tools/{ => bin}/load-tpch-data.sh (69%) rename tools/tpch-tools/{ => bin}/run-tpch-queries.sh (79%) rename tools/tpch-tools/{ => conf}/doris-cluster.conf (98%) rename tools/tpch-tools/{ => ddl}/create-tpch-tables.sql (100%) diff --git a/tools/ssb-tools/README.md b/tools/ssb-tools/README.md index 5cb2a8e39e..6d8e85e97d 100644 --- a/tools/ssb-tools/README.md +++ b/tools/ssb-tools/README.md @@ -20,27 +20,21 @@ under the License. # Usage These scripts are used to make ssb and ssb flat test. - The ssb flat data comes from ssb tables by way of 'INSERT INTO ... SELECT ...', - which means ssb test steps 1 to 4 should have been done before loading ssb flat data. + The ssb flat data comes from ssb tables by way of 'INSERT INTO ... SELECT ...'. ## ssb test, follow the steps below: ### 1. build ssb dbgen tool. - ./build-ssb-dbgen.sh + ./bin/build-ssb-dbgen.sh ### 2. generate ssb data. use -h for more infomations. - ./gen-ssb-data.sh -s 1 -### 3. create ssb tables. modify `doris-cluster.conf` to specify doris info, then run script below. - ./create-ssb-tables.sh + ./bin/gen-ssb-data.sh -s 1 +### 3. create ssb tables. modify `conf/doris-cluster.conf` to specify Doris cluster info, then run script below. + ./bin/create-ssb-tables.sh ### 4. load ssb data. use -h for help. - ./load-ssb-dimension-data.sh - ./load-ssb-fact-data.sh + ./bin/load-ssb-data.sh ### 5. run ssb queries. - ./run-ssb-queries.sh + ./bin/run-ssb-queries.sh ## ssb flat test, follow the steps below: ### 1. prepare ssb data, which means ssb test steps 1 to 4 have been done. -### 2. create ssb flat table in the same database of ssb tables. - ./create-ssb-flat-table.sh -### 3. load ssb flat data. - ./load-ssb-flat-data.sh -### 4. run ssb flat queries. - ./run-ssb-flat-queries.sh +### 2. run ssb flat queries. + ./bin/run-ssb-flat-queries.sh diff --git a/tools/ssb-tools/build-ssb-dbgen.sh b/tools/ssb-tools/bin/build-ssb-dbgen.sh similarity index 80% rename from tools/ssb-tools/build-ssb-dbgen.sh rename to tools/ssb-tools/bin/build-ssb-dbgen.sh index 59af467441..56fd2056e4 100755 --- a/tools/ssb-tools/build-ssb-dbgen.sh +++ b/tools/ssb-tools/bin/build-ssb-dbgen.sh @@ -19,14 +19,17 @@ ############################################################## # This script is used to build ssb-dbgen # sssb-dbgen's source code is from https://github.com/electrum/ssb-dbgen.git -# Usage: +# Usage: # sh build-ssb-dbgen.sh ############################################################## set -eo pipefail -ROOT=`dirname "$0"` -ROOT=`cd "$ROOT"; pwd` +ROOT=$(dirname "$0") +ROOT=$( + cd "$ROOT" + pwd +) CURDIR=${ROOT} SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/ @@ -35,17 +38,23 @@ SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/ if [[ -d $SSB_DBGEN_DIR ]]; then echo "Dir $CURDIR/ssb-dbgen/ already exists. No need to download." echo "If you want to download ssb-dbgen again, please delete this dir first." + exit 1 else - curl https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz | tar xz -C $CURDIR/ + cd "$CURDIR" + wget https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz && tar -xzvf ssb-dbgen-linux.tar.gz -C $CURDIR/ fi # compile ssb-dbgen -cd $SSB_DBGEN_DIR/ && make +cd "$SSB_DBGEN_DIR/" && make cd - # check if [[ -f $CURDIR/ssb-dbgen/dbgen ]]; then - echo "Build succeed! Run $CURDIR/ssb-dbgen/dbgen -h" + echo -e " +################ +Build succeed! +################ +Run $CURDIR/ssb-dbgen/dbgen -h" exit 0 else echo "Build failed!" diff --git a/tools/ssb-tools/create-ssb-tables.sh b/tools/ssb-tools/bin/create-ssb-tables.sh similarity index 78% rename from tools/ssb-tools/create-ssb-tables.sh rename to tools/ssb-tools/bin/create-ssb-tables.sh index c7b2dd2af4..41177843e2 100755 --- a/tools/ssb-tools/create-ssb-tables.sh +++ b/tools/ssb-tools/bin/create-ssb-tables.sh @@ -29,19 +29,20 @@ ROOT=$( ) CURDIR=${ROOT} -DDL="${CURDIR}/ddl/create-ssb-tables.sql" +SSB_DDL="${CURDIR}/../ddl/create-ssb-tables.sql" +SSB_FLAT_DDL="${CURDIR}/../ddl/create-ssb-flat-table.sql" usage() { echo " This script is used to create SSB tables, -will use mysql client to connect Doris server which is specified in doris-cluster.conf file. +will use mysql client to connect Doris server which is specified in conf/doris-cluster.conf file. Usage: $0 " exit 1 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -o 'h' \ -- "$@") @@ -86,7 +87,8 @@ check_prerequest() { check_prerequest "mysql --version" "mysql" -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" @@ -95,7 +97,10 @@ echo "USER: $USER" echo "PASSWORD: $PASSWORD" echo "DB: $DB" -mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS $DB" +mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT EXISTS $DB" -echo "Run DDL from ${DDL}" -mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB <${DDL} +echo "Run DDL from $SSB_DDL" +mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_DDL" + +echo "Run DDL from $SSB_FLAT_DDL" +mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_FLAT_DDL" diff --git a/tools/ssb-tools/gen-ssb-data.sh b/tools/ssb-tools/bin/gen-ssb-data.sh similarity index 87% rename from tools/ssb-tools/gen-ssb-data.sh rename to tools/ssb-tools/bin/gen-ssb-data.sh index 929a9e1eea..afc9e8bf68 100755 --- a/tools/ssb-tools/gen-ssb-data.sh +++ b/tools/ssb-tools/bin/gen-ssb-data.sh @@ -48,7 +48,7 @@ Usage: $0 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -o 'hs:c:' \ -- "$@") @@ -107,24 +107,24 @@ if [[ -d $SSB_DATA_DIR/ ]]; then exit 1 fi -mkdir $SSB_DATA_DIR/ +mkdir "$SSB_DATA_DIR/" # gen data -cd $SSB_DBGEN_DIR +cd "$SSB_DBGEN_DIR" echo "Begin to generate data for table: customer" -$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c +"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T c echo "Begin to generate data for table: part" -$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T p +"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T p echo "Begin to generate data for table: supplier" -$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s +"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T s echo "Begin to generate data for table: date" -$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T d +"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T d echo "Begin to generate data for table: lineorder" -$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T l -C $PARALLEL +"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T l -C "$PARALLEL" cd - # move data to $SSB_DATA_DIR -mv $SSB_DBGEN_DIR/*.tbl* $SSB_DATA_DIR/ +mv "$SSB_DBGEN_DIR"/*.tbl* "$SSB_DATA_DIR/" # check data -du -sh $SSB_DATA_DIR/*.tbl* +du -sh "$SSB_DATA_DIR"/*.tbl* diff --git a/tools/ssb-tools/load-ssb-flat-data.sh b/tools/ssb-tools/bin/load-ssb-data.sh similarity index 57% rename from tools/ssb-tools/load-ssb-flat-data.sh rename to tools/ssb-tools/bin/load-ssb-data.sh index 813400adbe..f6be6bbe72 100755 --- a/tools/ssb-tools/load-ssb-flat-data.sh +++ b/tools/ssb-tools/bin/load-ssb-data.sh @@ -30,30 +30,46 @@ ROOT=$( ) CURDIR=${ROOT} +SSB_DATA_DIR="$CURDIR/ssb-data/" usage() { echo " -The ssb flat data actually comes from ssb tables, and will load by 'INSERT INTO ... SELECT ...' Usage: $0 + Optional options: + -c parallelism to load data of lineorder table, default is 5. + + Eg. + $0 load data using default value. + $0 -c 10 load lineorder table data using parallelism 10. " exit 1 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ - -o 'h' \ + -o 'hc:' \ -- "$@") eval set -- "$OPTS" +PARALLEL=5 HELP=0 + +if [ $# == 0 ]; then + usage +fi + while true; do case "$1" in -h) HELP=1 shift ;; + -c) + PARALLEL=$2 + shift 2 + ;; --) shift break @@ -70,6 +86,14 @@ if [[ ${HELP} -eq 1 ]]; then exit fi +echo "Parallelism: $PARALLEL" + +# check if ssb-data exists +if [[ ! -d $SSB_DATA_DIR/ ]]; then + echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first." + exit 1 +fi + check_prerequest() { local CMD=$1 local NAME=$2 @@ -80,9 +104,9 @@ check_prerequest() { } run_sql() { - sql="$@" - echo $sql - mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e "$@" + sql="$*" + echo "$sql" + mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$@" } load_lineitem_flat() { @@ -165,7 +189,9 @@ ON (p.p_partkey = l.lo_partkey); check_prerequest "curl --version" "curl" # load lineorder -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" +export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" echo "FE_HTTP_PORT: $FE_HTTP_PORT" @@ -173,25 +199,78 @@ echo "USER: $USER" echo "PASSWORD: $PASSWORD" echo "DB: $DB" -echo 'Loading data for table: lineorder_flat' +date +echo "==========Start to load data into ssb tables==========" +echo 'Loading data for table: part' +curl --location-trusted -u "$USER":"$PASSWORD" \ + -H "column_separator:|" \ + -H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \ + -T "$SSB_DATA_DIR"/part.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load -echo '============================================' +echo 'Loading data for table: date' +curl --location-trusted -u "$USER":"$PASSWORD" \ + -H "column_separator:|" \ + -H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \ + -T "$SSB_DATA_DIR"/date.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/dates/_stream_load + +echo 'Loading data for table: supplier' +curl --location-trusted -u "$USER":"$PASSWORD" \ + -H "column_separator:|" \ + -H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \ + -T "$SSB_DATA_DIR"/supplier.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load + +echo 'Loading data for table: customer' +curl --location-trusted -u "$USER":"$PASSWORD" \ + -H "column_separator:|" \ + -H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \ + -T "$SSB_DATA_DIR"/customer.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load + +echo "Loading data for table: lineorder, with $PARALLEL parallel" +function load() { + echo "$@" + curl --location-trusted -u "$USER":"$PASSWORD" \ + -H "column_separator:|" \ + -H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \ + -T "$@" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineorder/_stream_load +} + +# set parallelism +[ -e /tmp/fd1 ] || mkfifo /tmp/fd1 +exec 3<>/tmp/fd1 +rm -rf /tmp/fd1 + +for ((i = 1; i <= PARALLEL; i++)); do + echo >&3 +done + +date +for file in "$SSB_DATA_DIR"/lineorder.tbl.*; do + read -r -u3 + { + load "$file" + echo >&3 + } & +done + +# wait for child thread finished +wait +date + +echo "==========Start to insert data into ssb flat table==========" echo "change some session variables before load, and then restore after load." origin_query_timeout=$(run_sql 'select @@query_timeout;' | sed -n '3p') origin_parallel=$(run_sql 'select @@parallel_fragment_exec_instance_num;' | sed -n '3p') # set parallel_fragment_exec_instance_num=1, loading maybe slow but stable. run_sql "set global query_timeout=7200;" run_sql "set global parallel_fragment_exec_instance_num=1;" - echo '============================================' -echo $(date) +date load_lineitem_flat - +date echo '============================================' echo "restore session variables" run_sql "set global query_timeout=${origin_query_timeout};" run_sql "set global parallel_fragment_exec_instance_num=${origin_parallel};" - echo '============================================' -echo $(date) + echo "DONE." diff --git a/tools/ssb-tools/run-ssb-flat-queries.sh b/tools/ssb-tools/bin/run-ssb-flat-queries.sh similarity index 80% rename from tools/ssb-tools/run-ssb-flat-queries.sh rename to tools/ssb-tools/bin/run-ssb-flat-queries.sh index e623aed770..6db5ba3303 100755 --- a/tools/ssb-tools/run-ssb-flat-queries.sh +++ b/tools/ssb-tools/bin/run-ssb-flat-queries.sh @@ -28,8 +28,8 @@ ROOT=$( pwd ) -CURDIR=${ROOT} -QUERIES_DIR=$CURDIR/ssb-flat-queries +CURDIR="${ROOT}" +QUERIES_DIR="$CURDIR/../ssb-flat-queries" usage() { echo " @@ -41,7 +41,7 @@ Usage: $0 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -o 'h' \ -- "$@") @@ -86,7 +86,8 @@ check_prerequest() { check_prerequest "mysqlslap --version" "mysqlslap" -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" @@ -96,8 +97,8 @@ echo "PASSWORD: $PASSWORD" echo "DB: $DB" pre_set() { - echo $@ - mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e "$@" + echo "$@" + mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@" } pre_set "set global enable_vectorized_engine=1;" @@ -105,14 +106,15 @@ pre_set "set global parallel_fragment_exec_instance_num=8;" pre_set "set global exec_mem_limit=8G;" pre_set "set global batch_size=4096;" echo '============================================' -pre_set "show variables" +pre_set "show variables;" +echo '============================================' +pre_set "show table status;" echo '============================================' for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1' '4.2' '4.3'; do # First run to prevent the affect of cold start - mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D $DB <$QUERIES_DIR/q${i}.sql >/dev/null 2>&1 + mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D "$DB" <"$QUERIES_DIR"/q${i}.sql >/dev/null 2>&1 # Then run 3 times and takes the average time - res=$(mysqlslap -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT --create-schema=$DB --query=$QUERIES_DIR/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10) + res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" --create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10) echo "q$i: $res" - sleep 1 done diff --git a/tools/ssb-tools/run-ssb-queries.sh b/tools/ssb-tools/bin/run-ssb-queries.sh similarity index 84% rename from tools/ssb-tools/run-ssb-queries.sh rename to tools/ssb-tools/bin/run-ssb-queries.sh index 106c6e35e8..5a1db9a57e 100755 --- a/tools/ssb-tools/run-ssb-queries.sh +++ b/tools/ssb-tools/bin/run-ssb-queries.sh @@ -29,7 +29,7 @@ ROOT=$( ) CURDIR=${ROOT} -QUERIES_DIR=$CURDIR/ssb-queries +QUERIES_DIR=$CURDIR/../ssb-queries usage() { echo " @@ -41,7 +41,7 @@ Usage: $0 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -o 'h' \ -- "$@") @@ -86,7 +86,8 @@ check_prerequest() { check_prerequest "mysqlslap --version" "mysql slap" -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" @@ -96,8 +97,8 @@ echo "PASSWORD: $PASSWORD" echo "DB: $DB" pre_set() { - echo $@ - mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e "$@" + echo "$@" + mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@" } pre_set "set global enable_vectorized_engine=1;" @@ -108,11 +109,13 @@ pre_set "set global enable_projection=true;" pre_set "set global runtime_filter_mode=global;" # pre_set "set global enable_cost_based_join_reorder=1" echo '============================================' -pre_set "show variables" +pre_set "show variables;" +echo '============================================' +pre_set "show table status;" echo '============================================' for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1' '4.2' '4.3'; do # Each query is executed 3 times and takes the average time - res=$(mysqlslap -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT --create-schema=$DB --query=$QUERIES_DIR/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10) + res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" --create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10) echo "q$i: $res" done diff --git a/tools/ssb-tools/doris-cluster.conf b/tools/ssb-tools/conf/doris-cluster.conf similarity index 100% rename from tools/ssb-tools/doris-cluster.conf rename to tools/ssb-tools/conf/doris-cluster.conf diff --git a/tools/ssb-tools/create-ssb-flat-table.sh b/tools/ssb-tools/create-ssb-flat-table.sh deleted file mode 100755 index 32778a906d..0000000000 --- a/tools/ssb-tools/create-ssb-flat-table.sh +++ /dev/null @@ -1,101 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -############################################################## -# This script is used to create ssb flat table -############################################################## - -set -eo pipefail - -ROOT=$(dirname "$0") -ROOT=$( - cd "$ROOT" - pwd -) - -CURDIR=${ROOT} -DDL="${CURDIR}/ddl/create-ssb-flat-table.sql" - -usage() { - echo " -This script is used to create ssb flat table, -will use mysql client to connect Doris server which is specified in doris-cluster.conf file. -Usage: $0 - " - exit 1 -} - -OPTS=$(getopt \ - -n $0 \ - -o '' \ - -o 'h' \ - -- "$@") - -eval set -- "$OPTS" -HELP=0 - -if [ $# == 0 ]; then - usage -fi - -while true; do - case "$1" in - -h) - HELP=1 - shift - ;; - --) - shift - break - ;; - *) - echo "Internal error" - exit 1 - ;; - esac -done - -if [[ ${HELP} -eq 1 ]]; then - usage - exit -fi - -check_prerequest() { - local CMD=$1 - local NAME=$2 - if ! $CMD; then - echo "$NAME is missing. This script depends on mysql to create tables in Doris." - exit 1 - fi -} - -check_prerequest "mysql --version" "mysql" - -source $CURDIR/doris-cluster.conf -export MYSQL_PWD=$PASSWORD - -echo "FE_HOST: $FE_HOST" -echo "FE_QUERY_PORT: $FE_QUERY_PORT" -echo "USER: $USER" -echo "PASSWORD: $PASSWORD" -echo "DB: $DB" - -mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS $DB" - -echo "Run DDL from ${DDL}" -mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB <${DDL} diff --git a/tools/ssb-tools/ddl/create-ssb-flat-table.sql b/tools/ssb-tools/ddl/create-ssb-flat-table.sql index b0a4adf817..b1e1681a94 100644 --- a/tools/ssb-tools/ddl/create-ssb-flat-table.sql +++ b/tools/ssb-tools/ddl/create-ssb-flat-table.sql @@ -15,7 +15,7 @@ -- specific language governing permissions and limitations -- under the License. -CREATE TABLE `lineorder_flat` ( +CREATE TABLE IF NOT EXISTS `lineorder_flat` ( `LO_ORDERDATE` int(11) NOT NULL COMMENT "", `LO_ORDERKEY` int(11) NOT NULL COMMENT "", `LO_LINENUMBER` tinyint(4) NOT NULL COMMENT "", diff --git a/tools/ssb-tools/load-ssb-dimension-data.sh b/tools/ssb-tools/load-ssb-dimension-data.sh deleted file mode 100755 index 81d3f900ed..0000000000 --- a/tools/ssb-tools/load-ssb-dimension-data.sh +++ /dev/null @@ -1,86 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -############################################################## -# This script is used to load generated ssb data set to Doris -# Only for 4 dimension tables: customer, part, supplier and date. -# Usage: -# sh load-dimension-data.sh -############################################################## - -set -eo pipefail - -ROOT=$(dirname "$0") -ROOT=$( - cd "$ROOT" - pwd -) - -CURDIR=${ROOT} -SSB_DATA_DIR=$CURDIR/ssb-data/ - -# check if ssb-data exists -if [[ ! -d $SSB_DATA_DIR/ ]]; then - echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first." - exit 1 -fi - -check_prerequest() { - local CMD=$1 - local NAME=$2 - if ! $CMD; then - echo "$NAME is missing. This script depends on cURL to load data to Doris." - exit 1 - fi -} - -check_prerequest "curl --version" "curl" - -# load 4 small dimension tables - -source $CURDIR/doris-cluster.conf - -echo "FE_HOST: $FE_HOST" -echo "FE_HTTP_PORT: $FE_HTTP_PORT" -echo "USER: $USER" -echo "PASSWORD: $PASSWORD" -echo "DB: $DB" - -echo 'Loading data for table: part' -curl --location-trusted -u $USER:$PASSWORD \ - -H "column_separator:|" \ - -H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \ - -T $SSB_DATA_DIR/part.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load - -echo 'Loading data for table: date' -curl --location-trusted -u $USER:$PASSWORD \ - -H "column_separator:|" \ - -H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \ - -T $SSB_DATA_DIR/date.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/dates/_stream_load - -echo 'Loading data for table: supplier' -curl --location-trusted -u $USER:$PASSWORD \ - -H "column_separator:|" \ - -H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \ - -T $SSB_DATA_DIR/supplier.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load - -echo 'Loading data for table: customer' -curl --location-trusted -u $USER:$PASSWORD \ - -H "column_separator:|" \ - -H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \ - -T $SSB_DATA_DIR/customer.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load diff --git a/tools/ssb-tools/load-ssb-fact-data.sh b/tools/ssb-tools/load-ssb-fact-data.sh deleted file mode 100755 index e6cd55e2aa..0000000000 --- a/tools/ssb-tools/load-ssb-fact-data.sh +++ /dev/null @@ -1,147 +0,0 @@ -#!/usr/bin/env bash -# Licensed to the Apache Software Foundation (ASF) under one -# or more contributor license agreements. See the NOTICE file -# distributed with this work for additional information -# regarding copyright ownership. The ASF licenses this file -# to you under the Apache License, Version 2.0 (the -# "License"); you may not use this file except in compliance -# with the License. You may obtain a copy of the License at -# -# http://www.apache.org/licenses/LICENSE-2.0 -# -# Unless required by applicable law or agreed to in writing, -# software distributed under the License is distributed on an -# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY -# KIND, either express or implied. See the License for the -# specific language governing permissions and limitations -# under the License. - -############################################################## -# This script is used to load generated ssb data set to Doris -# Only for 1 fact table: lineorder -############################################################## - -set -eo pipefail - -ROOT=$(dirname "$0") -ROOT=$( - cd "$ROOT" - pwd -) - -CURDIR=${ROOT} -SSB_DATA_DIR=$CURDIR/ssb-data/ - -usage() { - echo " -Usage: $0 - Optional options: - -c parallelism to load data of lineorder table, default is 5. - - Eg. - $0 load data using default value. - $0 -c 10 load lineorder table data using parallelism 10. - " - exit 1 -} - -OPTS=$(getopt \ - -n $0 \ - -o '' \ - -o 'hc:' \ - -- "$@") - -eval set -- "$OPTS" - -PARALLEL=3 -HELP=0 - -if [ $# == 0 ]; then - usage -fi - -while true; do - case "$1" in - -h) - HELP=1 - shift - ;; - -c) - PARALLEL=$2 - shift 2 - ;; - --) - shift - break - ;; - *) - echo "Internal error" - exit 1 - ;; - esac -done - -if [[ ${HELP} -eq 1 ]]; then - usage - exit -fi - -echo "Parallelism: $PARALLEL" - -# check if ssb-data exists -if [[ ! -d $SSB_DATA_DIR/ ]]; then - echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first." - exit 1 -fi - -check_prerequest() { - local CMD=$1 - local NAME=$2 - if ! $CMD; then - echo "$NAME is missing. This script depends on cURL to load data to Doris." - exit 1 - fi -} - -check_prerequest "curl --version" "curl" - -# load lineorder -source $CURDIR/doris-cluster.conf - -echo "FE_HOST: $FE_HOST" -echo "FE_HTTP_PORT: $FE_HTTP_PORT" -echo "USER: $USER" -echo "PASSWORD: $PASSWORD" -echo "DB: $DB" - -function load() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD \ - -H "column_separator:|" \ - -H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineorder/_stream_load -} - -# set parallelism -[ -e /tmp/fd1 ] || mkfifo /tmp/fd1 -exec 3<>/tmp/fd1 -rm -rf /tmp/fd1 - -for ((i = 1; i <= $PARALLEL; i++)); do - echo >&3 -done - -echo $(date) -for file in $(ls $SSB_DATA_DIR/lineorder.tbl.*); do - read -u3 - { - load $file - echo >&3 - } & -done - -# wait for child thread finished -wait - -echo $(date) -echo "DONE." diff --git a/tools/tpch-tools/README.md b/tools/tpch-tools/README.md index dbef056fa7..bd9e483eec 100644 --- a/tools/tpch-tools/README.md +++ b/tools/tpch-tools/README.md @@ -24,23 +24,23 @@ follow the steps below: ### 1. build tpc-h dbgen tool. - ./build-tpch-dbgen.sh + ./bin/build-tpch-dbgen.sh ### 2. generate tpc-h data. use -h for more infomations. - ./gen-tpch-data.sh -s 1 + ./bin/gen-tpch-data.sh -s 1 -### 3. create tpc-h tables. modify `doris-cluster.conf` to specify doris info, then run script below. +### 3. create tpc-h tables. modify `conf/doris-cluster.conf` to specify doris info, then run script below. - ./create-tpch-tables.sh + ./bin/create-tpch-tables.sh ### 4. load tpc-h data. use -h for help. - ./load-tpch-data.sh + ./bin/load-tpch-data.sh ### 5. run tpc-h queries. - ./run-tpch-queries.sh + ./bin/run-tpch-queries.sh NOTICE: At present, Doris's query optimizer and statistical information functions are not complete, so we rewrite some queries in TPC-H to adapt to Doris' execution framework, but it does not affect the correctness of the results. The rewritten SQL is marked with "Modified" in the corresponding .sql file. diff --git a/tools/tpch-tools/build-tpch-dbgen.sh b/tools/tpch-tools/bin/build-tpch-dbgen.sh similarity index 93% rename from tools/tpch-tools/build-tpch-dbgen.sh rename to tools/tpch-tools/bin/build-tpch-dbgen.sh index 76da1ba890..b0cce4b1da 100755 --- a/tools/tpch-tools/build-tpch-dbgen.sh +++ b/tools/tpch-tools/bin/build-tpch-dbgen.sh @@ -51,13 +51,12 @@ if [[ -d $TPCH_DBGEN_DIR ]]; then echo "If you want to download TPC-H_Tools_v3.0.0 again, please delete this dir first." else wget "https://tools-chengdu.oss-cn-chengdu.aliyuncs.com/TPC-H_Tools_v3.0.0.zip" - - unzip TPC-H_Tools_v3.0.0.zip -d $CURDIR/ + unzip TPC-H_Tools_v3.0.0.zip -d "$CURDIR"/ fi # modify tpcd.h -cd $TPCH_DBGEN_DIR/ -echo ' +cd "$TPCH_DBGEN_DIR"/ +printf '%s' ' #ifdef MYSQL #define GEN_QUERY_PLAN "" #define START_TRAN "START TRANSACTION" @@ -81,7 +80,11 @@ cd - # check if [[ -f $TPCH_DBGEN_DIR/dbgen ]]; then - echo "Build succeed! Run $TPCH_DBGEN_DIR/dbgen -h" + echo " +################ +Build succeed! +################ +Run $TPCH_DBGEN_DIR/dbgen -h" exit 0 else echo "Build failed!" diff --git a/tools/tpch-tools/create-tpch-tables.sh b/tools/tpch-tools/bin/create-tpch-tables.sh similarity index 87% rename from tools/tpch-tools/create-tpch-tables.sh rename to tools/tpch-tools/bin/create-tpch-tables.sh index 79961dbba6..01ca7a1345 100755 --- a/tools/tpch-tools/create-tpch-tables.sh +++ b/tools/tpch-tools/bin/create-tpch-tables.sh @@ -40,7 +40,7 @@ Usage: $0 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -- "$@") @@ -84,7 +84,8 @@ check_prerequest() { check_prerequest "mysql --version" "mysql" -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" @@ -93,7 +94,7 @@ echo "USER: $USER" echo "PASSWORD: $PASSWORD" echo "DB: $DB" -mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS $DB" +mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT EXISTS $DB" echo "Run SQLs from $CURDIR/create-tpch-tables.sql" -mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB <$CURDIR/create-tpch-tables.sql +mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$CURDIR"/../ddl/create-tpch-tables.sql diff --git a/tools/tpch-tools/gen-tpch-data.sh b/tools/tpch-tools/bin/gen-tpch-data.sh similarity index 80% rename from tools/tpch-tools/gen-tpch-data.sh rename to tools/tpch-tools/bin/gen-tpch-data.sh index 0e7359d601..4202e3c58d 100755 --- a/tools/tpch-tools/gen-tpch-data.sh +++ b/tools/tpch-tools/bin/gen-tpch-data.sh @@ -48,7 +48,7 @@ Usage: $0 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -o 'hs:c:' \ -- "$@") @@ -107,40 +107,40 @@ if [[ -d $TPCH_DATA_DIR/ ]]; then exit 1 fi -mkdir $TPCH_DATA_DIR/ +mkdir "$TPCH_DATA_DIR"/ # gen data -cd $TPCH_DBGEN_DIR +cd "$TPCH_DBGEN_DIR" echo "Begin to generate data for table: region" -$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T r +"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T r echo "Begin to generate data for table: nation" -$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T n +"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T n echo "Begin to generate data for table: supplier" -$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s +"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T s echo "Begin to generate data for table: part" -$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T P +"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T P echo "Begin to generate data for table: customer" -$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c +"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T c echo "Begin to generate data for table: partsupp" -for i in $(seq 1 $PARALLEL); do +for i in $(seq 1 "$PARALLEL"); do { - $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T S -C $PARALLEL -S ${i} + "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T S -C "$PARALLEL" -S "$i" } & done wait echo "Begin to generate data for table: orders" -for i in $(seq 1 $PARALLEL); do +for i in $(seq 1 "$PARALLEL"); do { - $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T O -C $PARALLEL -S ${i} + "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T O -C "$PARALLEL" -S "$i" } & done wait echo "Begin to generate data for table: lineitem" -for i in $(seq 1 $PARALLEL); do +for i in $(seq 1 "$PARALLEL"); do { - $TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T L -C $PARALLEL -S ${i} + "$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T L -C "$PARALLEL" -S "$i" } & done wait @@ -148,7 +148,7 @@ wait cd - # move data to $TPCH_DATA_DIR -mv $TPCH_DBGEN_DIR/*.tbl* $TPCH_DATA_DIR/ +mv "$TPCH_DBGEN_DIR"/*.tbl* "$TPCH_DATA_DIR"/ # check data -du -sh $TPCH_DATA_DIR/*.tbl* +du -sh "$TPCH_DATA_DIR"/*.tbl* diff --git a/tools/tpch-tools/load-tpch-data.sh b/tools/tpch-tools/bin/load-tpch-data.sh similarity index 69% rename from tools/tpch-tools/load-tpch-data.sh rename to tools/tpch-tools/bin/load-tpch-data.sh index 572bfb859c..7a250ef2f4 100755 --- a/tools/tpch-tools/load-tpch-data.sh +++ b/tools/tpch-tools/bin/load-tpch-data.sh @@ -46,7 +46,7 @@ Usage: $0 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -o 'hc:' \ -- "$@") @@ -89,7 +89,7 @@ fi echo "Parallelism: $PARALLEL" # check if tpch-data exists -if [[ ! -d $TPCH_DATA_DIR/ ]]; then +if [[ ! -d "$TPCH_DATA_DIR"/ ]]; then echo "$TPCH_DATA_DIR does not exist. Run sh gen-tpch-data.sh first." exit 1 fi @@ -106,7 +106,9 @@ check_prerequest() { check_prerequest "curl --version" "curl" # load tables -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" +export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" echo "FE_HTTP_PORT: $FE_HTTP_PORT" @@ -115,61 +117,62 @@ echo "PASSWORD: $PASSWORD" echo "DB: $DB" function load_region() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: r_regionkey, r_name, r_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/region/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/region/_stream_load } function load_nation() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/nation/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/nation/_stream_load } function load_supplier() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load } function load_customer() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load } function load_part() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load } function load_partsupp() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/partsupp/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/partsupp/_stream_load } function load_orders() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/orders/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/orders/_stream_load } function load_lineitem() { - echo $@ - curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \ + echo "$*" + curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \ -H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \ - -T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineitem/_stream_load + -T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineitem/_stream_load } # start load -load_region $TPCH_DATA_DIR/region.tbl -load_nation $TPCH_DATA_DIR/nation.tbl -load_supplier $TPCH_DATA_DIR/supplier.tbl -load_customer $TPCH_DATA_DIR/customer.tbl -load_part $TPCH_DATA_DIR/part.tbl - +date +load_region "$TPCH_DATA_DIR"/region.tbl +load_nation "$TPCH_DATA_DIR"/nation.tbl +load_supplier "$TPCH_DATA_DIR"/supplier.tbl +load_customer "$TPCH_DATA_DIR"/customer.tbl +load_part "$TPCH_DATA_DIR"/part.tbl +date # set parallelism # 以PID为名, 防止创建命名管道时与已有文件重名,从而失败 @@ -182,20 +185,21 @@ exec 3<>${fifo} rm -rf ${fifo} # 在fd3中放置$PARALLEL个空行作为令牌 -for ((i = 1; i <= $PARALLEL; i++)); do +for ((i = 1; i <= PARALLEL; i++)); do echo >&3 done -for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do +date +for file in "$TPCH_DATA_DIR"/lineitem.tbl*; do # 领取令牌, 即从fd3中读取行, 每次一行 # 对管道,读一行便少一行,每次只能读取一行 # 所有行读取完毕, 执行挂起, 直到管道再次有可读行 # 因此实现了进程数量控制 - read -u3 + read -r -u3 # 要批量执行的命令放在大括号内, 后台运行 { - load_lineitem $file + load_lineitem "$file" echo "----loaded $file" sleep 2 # 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行 @@ -203,20 +207,22 @@ for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do } & done -for file in $(ls $TPCH_DATA_DIR/orders.tbl*); do - read -u3 +date +for file in "$TPCH_DATA_DIR"/orders.tbl*; do + read -r -u3 { - load_orders $file + load_orders "$file" echo "----loaded $file" sleep 2 echo >&3 } & done -for file in $(ls $TPCH_DATA_DIR/partsupp.tbl*); do - read -u3 +date +for file in "$TPCH_DATA_DIR"/partsupp.tbl*; do + read -r -u3 { - load_partsupp $file + load_partsupp "$file" echo "----loaded $file" sleep 2 echo >&3 @@ -227,3 +233,6 @@ done wait # 删除文件标识符 exec 3>&- +date + +echo "DONE." \ No newline at end of file diff --git a/tools/tpch-tools/run-tpch-queries.sh b/tools/tpch-tools/bin/run-tpch-queries.sh similarity index 79% rename from tools/tpch-tools/run-tpch-queries.sh rename to tools/tpch-tools/bin/run-tpch-queries.sh index 9d7a3f1e18..14d00e17bb 100755 --- a/tools/tpch-tools/run-tpch-queries.sh +++ b/tools/tpch-tools/bin/run-tpch-queries.sh @@ -29,7 +29,7 @@ ROOT=$( ) CURDIR=${ROOT} -QUERIES_DIR=$CURDIR/queries +QUERIES_DIR=$CURDIR/../queries usage() { echo " @@ -41,7 +41,7 @@ Usage: $0 } OPTS=$(getopt \ - -n $0 \ + -n "$0" \ -o '' \ -- "$@") @@ -85,7 +85,9 @@ check_prerequest() { check_prerequest "mysql --version" "mysql" -source $CURDIR/doris-cluster.conf +# shellcheck source=/dev/null +source "$CURDIR/../conf/doris-cluster.conf" +export MYSQL_PWD=$PASSWORD echo "FE_HOST: $FE_HOST" echo "FE_QUERY_PORT: $FE_QUERY_PORT" @@ -95,10 +97,16 @@ echo "DB: $DB" echo "Time Unit: ms" pre_set() { - echo $@ - mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e "$@" + echo "$*" + mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$*" } +echo '============================================' +pre_set "show variables;" +echo '============================================' +pre_set "show table status;" +echo '============================================' + sum=0 for i in $(seq 1 22); do total=0 @@ -106,12 +114,12 @@ for i in $(seq 1 22); do # Each query is executed ${run} times and takes the average time for j in $(seq 1 ${run}); do start=$(date +%s%3N) - mysql -h$FE_HOST -u $USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB --comments <$QUERIES_DIR/q$i.sql >/dev/null + mysql -h"$FE_HOST" -u "$USER" -P"$FE_QUERY_PORT" -D"$DB" --comments <"$QUERIES_DIR"/q"$i".sql >/dev/null end=$(date +%s%3N) total=$((total + end - start)) done - cost=$((total / ${run})) + cost=$((total / run)) echo "q$i: ${cost}" - sum=$((sum + $cost)) + sum=$((sum + cost)) done echo "Total cost: $sum" diff --git a/tools/tpch-tools/doris-cluster.conf b/tools/tpch-tools/conf/doris-cluster.conf similarity index 98% rename from tools/tpch-tools/doris-cluster.conf rename to tools/tpch-tools/conf/doris-cluster.conf index 7367b144b2..9417bcb9e0 100644 --- a/tools/tpch-tools/doris-cluster.conf +++ b/tools/tpch-tools/conf/doris-cluster.conf @@ -26,4 +26,4 @@ export USER='root' # Doris password export PASSWORD='' # The database where TPC-H tables located -export DB='tpch1' +export DB='tpch' diff --git a/tools/tpch-tools/create-tpch-tables.sql b/tools/tpch-tools/ddl/create-tpch-tables.sql similarity index 100% rename from tools/tpch-tools/create-tpch-tables.sql rename to tools/tpch-tools/ddl/create-tpch-tables.sql