[tools](ssb and tpch)optimize tools (#11975)
This commit is contained in:
@ -20,27 +20,21 @@ under the License.
|
||||
# Usage
|
||||
|
||||
These scripts are used to make ssb and ssb flat test.
|
||||
The ssb flat data comes from ssb tables by way of 'INSERT INTO ... SELECT ...',
|
||||
which means ssb test steps 1 to 4 should have been done before loading ssb flat data.
|
||||
The ssb flat data comes from ssb tables by way of 'INSERT INTO ... SELECT ...'.
|
||||
|
||||
## ssb test, follow the steps below:
|
||||
### 1. build ssb dbgen tool.
|
||||
./build-ssb-dbgen.sh
|
||||
./bin/build-ssb-dbgen.sh
|
||||
### 2. generate ssb data. use -h for more infomations.
|
||||
./gen-ssb-data.sh -s 1
|
||||
### 3. create ssb tables. modify `doris-cluster.conf` to specify doris info, then run script below.
|
||||
./create-ssb-tables.sh
|
||||
./bin/gen-ssb-data.sh -s 1
|
||||
### 3. create ssb tables. modify `conf/doris-cluster.conf` to specify Doris cluster info, then run script below.
|
||||
./bin/create-ssb-tables.sh
|
||||
### 4. load ssb data. use -h for help.
|
||||
./load-ssb-dimension-data.sh
|
||||
./load-ssb-fact-data.sh
|
||||
./bin/load-ssb-data.sh
|
||||
### 5. run ssb queries.
|
||||
./run-ssb-queries.sh
|
||||
./bin/run-ssb-queries.sh
|
||||
|
||||
## ssb flat test, follow the steps below:
|
||||
### 1. prepare ssb data, which means ssb test steps 1 to 4 have been done.
|
||||
### 2. create ssb flat table in the same database of ssb tables.
|
||||
./create-ssb-flat-table.sh
|
||||
### 3. load ssb flat data.
|
||||
./load-ssb-flat-data.sh
|
||||
### 4. run ssb flat queries.
|
||||
./run-ssb-flat-queries.sh
|
||||
### 2. run ssb flat queries.
|
||||
./bin/run-ssb-flat-queries.sh
|
||||
|
||||
@ -19,14 +19,17 @@
|
||||
##############################################################
|
||||
# This script is used to build ssb-dbgen
|
||||
# sssb-dbgen's source code is from https://github.com/electrum/ssb-dbgen.git
|
||||
# Usage:
|
||||
# Usage:
|
||||
# sh build-ssb-dbgen.sh
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
ROOT=`dirname "$0"`
|
||||
ROOT=`cd "$ROOT"; pwd`
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/
|
||||
@ -35,17 +38,23 @@ SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/
|
||||
if [[ -d $SSB_DBGEN_DIR ]]; then
|
||||
echo "Dir $CURDIR/ssb-dbgen/ already exists. No need to download."
|
||||
echo "If you want to download ssb-dbgen again, please delete this dir first."
|
||||
exit 1
|
||||
else
|
||||
curl https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz | tar xz -C $CURDIR/
|
||||
cd "$CURDIR"
|
||||
wget https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz && tar -xzvf ssb-dbgen-linux.tar.gz -C $CURDIR/
|
||||
fi
|
||||
|
||||
# compile ssb-dbgen
|
||||
cd $SSB_DBGEN_DIR/ && make
|
||||
cd "$SSB_DBGEN_DIR/" && make
|
||||
cd -
|
||||
|
||||
# check
|
||||
if [[ -f $CURDIR/ssb-dbgen/dbgen ]]; then
|
||||
echo "Build succeed! Run $CURDIR/ssb-dbgen/dbgen -h"
|
||||
echo -e "
|
||||
################
|
||||
Build succeed!
|
||||
################
|
||||
Run $CURDIR/ssb-dbgen/dbgen -h"
|
||||
exit 0
|
||||
else
|
||||
echo "Build failed!"
|
||||
@ -29,19 +29,20 @@ ROOT=$(
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
DDL="${CURDIR}/ddl/create-ssb-tables.sql"
|
||||
SSB_DDL="${CURDIR}/../ddl/create-ssb-tables.sql"
|
||||
SSB_FLAT_DDL="${CURDIR}/../ddl/create-ssb-flat-table.sql"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
This script is used to create SSB tables,
|
||||
will use mysql client to connect Doris server which is specified in doris-cluster.conf file.
|
||||
will use mysql client to connect Doris server which is specified in conf/doris-cluster.conf file.
|
||||
Usage: $0
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'h' \
|
||||
-- "$@")
|
||||
@ -86,7 +87,8 @@ check_prerequest() {
|
||||
|
||||
check_prerequest "mysql --version" "mysql"
|
||||
|
||||
source $CURDIR/doris-cluster.conf
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
@ -95,7 +97,10 @@ echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS $DB"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT EXISTS $DB"
|
||||
|
||||
echo "Run DDL from ${DDL}"
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB <${DDL}
|
||||
echo "Run DDL from $SSB_DDL"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_DDL"
|
||||
|
||||
echo "Run DDL from $SSB_FLAT_DDL"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_FLAT_DDL"
|
||||
@ -48,7 +48,7 @@ Usage: $0 <options>
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hs:c:' \
|
||||
-- "$@")
|
||||
@ -107,24 +107,24 @@ if [[ -d $SSB_DATA_DIR/ ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir $SSB_DATA_DIR/
|
||||
mkdir "$SSB_DATA_DIR/"
|
||||
|
||||
# gen data
|
||||
cd $SSB_DBGEN_DIR
|
||||
cd "$SSB_DBGEN_DIR"
|
||||
echo "Begin to generate data for table: customer"
|
||||
$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c
|
||||
"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T c
|
||||
echo "Begin to generate data for table: part"
|
||||
$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T p
|
||||
"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T p
|
||||
echo "Begin to generate data for table: supplier"
|
||||
$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s
|
||||
"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T s
|
||||
echo "Begin to generate data for table: date"
|
||||
$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T d
|
||||
"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T d
|
||||
echo "Begin to generate data for table: lineorder"
|
||||
$SSB_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T l -C $PARALLEL
|
||||
"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T l -C "$PARALLEL"
|
||||
cd -
|
||||
|
||||
# move data to $SSB_DATA_DIR
|
||||
mv $SSB_DBGEN_DIR/*.tbl* $SSB_DATA_DIR/
|
||||
mv "$SSB_DBGEN_DIR"/*.tbl* "$SSB_DATA_DIR/"
|
||||
|
||||
# check data
|
||||
du -sh $SSB_DATA_DIR/*.tbl*
|
||||
du -sh "$SSB_DATA_DIR"/*.tbl*
|
||||
@ -30,30 +30,46 @@ ROOT=$(
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
SSB_DATA_DIR="$CURDIR/ssb-data/"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
The ssb flat data actually comes from ssb tables, and will load by 'INSERT INTO ... SELECT ...'
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-c parallelism to load data of lineorder table, default is 5.
|
||||
|
||||
Eg.
|
||||
$0 load data using default value.
|
||||
$0 -c 10 load lineorder table data using parallelism 10.
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'h' \
|
||||
-o 'hc:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
|
||||
PARALLEL=5
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
-c)
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
@ -70,6 +86,14 @@ if [[ ${HELP} -eq 1 ]]; then
|
||||
exit
|
||||
fi
|
||||
|
||||
echo "Parallelism: $PARALLEL"
|
||||
|
||||
# check if ssb-data exists
|
||||
if [[ ! -d $SSB_DATA_DIR/ ]]; then
|
||||
echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
@ -80,9 +104,9 @@ check_prerequest() {
|
||||
}
|
||||
|
||||
run_sql() {
|
||||
sql="$@"
|
||||
echo $sql
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e "$@"
|
||||
sql="$*"
|
||||
echo "$sql"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$@"
|
||||
}
|
||||
|
||||
load_lineitem_flat() {
|
||||
@ -165,7 +189,9 @@ ON (p.p_partkey = l.lo_partkey);
|
||||
check_prerequest "curl --version" "curl"
|
||||
|
||||
# load lineorder
|
||||
source $CURDIR/doris-cluster.conf
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
|
||||
@ -173,25 +199,78 @@ echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
|
||||
echo 'Loading data for table: lineorder_flat'
|
||||
date
|
||||
echo "==========Start to load data into ssb tables=========="
|
||||
echo 'Loading data for table: part'
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \
|
||||
-T "$SSB_DATA_DIR"/part.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load
|
||||
|
||||
echo '============================================'
|
||||
echo 'Loading data for table: date'
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \
|
||||
-T "$SSB_DATA_DIR"/date.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/dates/_stream_load
|
||||
|
||||
echo 'Loading data for table: supplier'
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
|
||||
-T "$SSB_DATA_DIR"/supplier.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load
|
||||
|
||||
echo 'Loading data for table: customer'
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \
|
||||
-T "$SSB_DATA_DIR"/customer.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load
|
||||
|
||||
echo "Loading data for table: lineorder, with $PARALLEL parallel"
|
||||
function load() {
|
||||
echo "$@"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \
|
||||
-T "$@" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineorder/_stream_load
|
||||
}
|
||||
|
||||
# set parallelism
|
||||
[ -e /tmp/fd1 ] || mkfifo /tmp/fd1
|
||||
exec 3<>/tmp/fd1
|
||||
rm -rf /tmp/fd1
|
||||
|
||||
for ((i = 1; i <= PARALLEL; i++)); do
|
||||
echo >&3
|
||||
done
|
||||
|
||||
date
|
||||
for file in "$SSB_DATA_DIR"/lineorder.tbl.*; do
|
||||
read -r -u3
|
||||
{
|
||||
load "$file"
|
||||
echo >&3
|
||||
} &
|
||||
done
|
||||
|
||||
# wait for child thread finished
|
||||
wait
|
||||
date
|
||||
|
||||
echo "==========Start to insert data into ssb flat table=========="
|
||||
echo "change some session variables before load, and then restore after load."
|
||||
origin_query_timeout=$(run_sql 'select @@query_timeout;' | sed -n '3p')
|
||||
origin_parallel=$(run_sql 'select @@parallel_fragment_exec_instance_num;' | sed -n '3p')
|
||||
# set parallel_fragment_exec_instance_num=1, loading maybe slow but stable.
|
||||
run_sql "set global query_timeout=7200;"
|
||||
run_sql "set global parallel_fragment_exec_instance_num=1;"
|
||||
|
||||
echo '============================================'
|
||||
echo $(date)
|
||||
date
|
||||
load_lineitem_flat
|
||||
|
||||
date
|
||||
echo '============================================'
|
||||
echo "restore session variables"
|
||||
run_sql "set global query_timeout=${origin_query_timeout};"
|
||||
run_sql "set global parallel_fragment_exec_instance_num=${origin_parallel};"
|
||||
|
||||
echo '============================================'
|
||||
echo $(date)
|
||||
|
||||
echo "DONE."
|
||||
@ -28,8 +28,8 @@ ROOT=$(
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
QUERIES_DIR=$CURDIR/ssb-flat-queries
|
||||
CURDIR="${ROOT}"
|
||||
QUERIES_DIR="$CURDIR/../ssb-flat-queries"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
@ -41,7 +41,7 @@ Usage: $0
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'h' \
|
||||
-- "$@")
|
||||
@ -86,7 +86,8 @@ check_prerequest() {
|
||||
|
||||
check_prerequest "mysqlslap --version" "mysqlslap"
|
||||
|
||||
source $CURDIR/doris-cluster.conf
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
@ -96,8 +97,8 @@ echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
|
||||
pre_set() {
|
||||
echo $@
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e "$@"
|
||||
echo "$@"
|
||||
mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@"
|
||||
}
|
||||
|
||||
pre_set "set global enable_vectorized_engine=1;"
|
||||
@ -105,14 +106,15 @@ pre_set "set global parallel_fragment_exec_instance_num=8;"
|
||||
pre_set "set global exec_mem_limit=8G;"
|
||||
pre_set "set global batch_size=4096;"
|
||||
echo '============================================'
|
||||
pre_set "show variables"
|
||||
pre_set "show variables;"
|
||||
echo '============================================'
|
||||
pre_set "show table status;"
|
||||
echo '============================================'
|
||||
|
||||
for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1' '4.2' '4.3'; do
|
||||
# First run to prevent the affect of cold start
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D $DB <$QUERIES_DIR/q${i}.sql >/dev/null 2>&1
|
||||
mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D "$DB" <"$QUERIES_DIR"/q${i}.sql >/dev/null 2>&1
|
||||
# Then run 3 times and takes the average time
|
||||
res=$(mysqlslap -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT --create-schema=$DB --query=$QUERIES_DIR/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10)
|
||||
res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" --create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10)
|
||||
echo "q$i: $res"
|
||||
sleep 1
|
||||
done
|
||||
@ -29,7 +29,7 @@ ROOT=$(
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
QUERIES_DIR=$CURDIR/ssb-queries
|
||||
QUERIES_DIR=$CURDIR/../ssb-queries
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
@ -41,7 +41,7 @@ Usage: $0
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'h' \
|
||||
-- "$@")
|
||||
@ -86,7 +86,8 @@ check_prerequest() {
|
||||
|
||||
check_prerequest "mysqlslap --version" "mysql slap"
|
||||
|
||||
source $CURDIR/doris-cluster.conf
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
@ -96,8 +97,8 @@ echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
|
||||
pre_set() {
|
||||
echo $@
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e "$@"
|
||||
echo "$@"
|
||||
mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@"
|
||||
}
|
||||
|
||||
pre_set "set global enable_vectorized_engine=1;"
|
||||
@ -108,11 +109,13 @@ pre_set "set global enable_projection=true;"
|
||||
pre_set "set global runtime_filter_mode=global;"
|
||||
# pre_set "set global enable_cost_based_join_reorder=1"
|
||||
echo '============================================'
|
||||
pre_set "show variables"
|
||||
pre_set "show variables;"
|
||||
echo '============================================'
|
||||
pre_set "show table status;"
|
||||
echo '============================================'
|
||||
|
||||
for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1' '4.2' '4.3'; do
|
||||
# Each query is executed 3 times and takes the average time
|
||||
res=$(mysqlslap -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT --create-schema=$DB --query=$QUERIES_DIR/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10)
|
||||
res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" --create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10)
|
||||
echo "q$i: $res"
|
||||
done
|
||||
@ -1,101 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
##############################################################
|
||||
# This script is used to create ssb flat table
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
DDL="${CURDIR}/ddl/create-ssb-flat-table.sql"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
This script is used to create ssb flat table,
|
||||
will use mysql client to connect Doris server which is specified in doris-cluster.conf file.
|
||||
Usage: $0
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-o '' \
|
||||
-o 'h' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
exit
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on mysql to create tables in Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "mysql --version" "mysql"
|
||||
|
||||
source $CURDIR/doris-cluster.conf
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
|
||||
echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS $DB"
|
||||
|
||||
echo "Run DDL from ${DDL}"
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB <${DDL}
|
||||
@ -15,7 +15,7 @@
|
||||
-- specific language governing permissions and limitations
|
||||
-- under the License.
|
||||
|
||||
CREATE TABLE `lineorder_flat` (
|
||||
CREATE TABLE IF NOT EXISTS `lineorder_flat` (
|
||||
`LO_ORDERDATE` int(11) NOT NULL COMMENT "",
|
||||
`LO_ORDERKEY` int(11) NOT NULL COMMENT "",
|
||||
`LO_LINENUMBER` tinyint(4) NOT NULL COMMENT "",
|
||||
|
||||
@ -1,86 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
##############################################################
|
||||
# This script is used to load generated ssb data set to Doris
|
||||
# Only for 4 dimension tables: customer, part, supplier and date.
|
||||
# Usage:
|
||||
# sh load-dimension-data.sh
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
SSB_DATA_DIR=$CURDIR/ssb-data/
|
||||
|
||||
# check if ssb-data exists
|
||||
if [[ ! -d $SSB_DATA_DIR/ ]]; then
|
||||
echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on cURL to load data to Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "curl --version" "curl"
|
||||
|
||||
# load 4 small dimension tables
|
||||
|
||||
source $CURDIR/doris-cluster.conf
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
|
||||
echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
|
||||
echo 'Loading data for table: part'
|
||||
curl --location-trusted -u $USER:$PASSWORD \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \
|
||||
-T $SSB_DATA_DIR/part.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load
|
||||
|
||||
echo 'Loading data for table: date'
|
||||
curl --location-trusted -u $USER:$PASSWORD \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \
|
||||
-T $SSB_DATA_DIR/date.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/dates/_stream_load
|
||||
|
||||
echo 'Loading data for table: supplier'
|
||||
curl --location-trusted -u $USER:$PASSWORD \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
|
||||
-T $SSB_DATA_DIR/supplier.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load
|
||||
|
||||
echo 'Loading data for table: customer'
|
||||
curl --location-trusted -u $USER:$PASSWORD \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \
|
||||
-T $SSB_DATA_DIR/customer.tbl http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load
|
||||
@ -1,147 +0,0 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
##############################################################
|
||||
# This script is used to load generated ssb data set to Doris
|
||||
# Only for 1 fact table: lineorder
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
SSB_DATA_DIR=$CURDIR/ssb-data/
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-c parallelism to load data of lineorder table, default is 5.
|
||||
|
||||
Eg.
|
||||
$0 load data using default value.
|
||||
$0 -c 10 load lineorder table data using parallelism 10.
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-o '' \
|
||||
-o 'hc:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
|
||||
PARALLEL=3
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
-c)
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
exit
|
||||
fi
|
||||
|
||||
echo "Parallelism: $PARALLEL"
|
||||
|
||||
# check if ssb-data exists
|
||||
if [[ ! -d $SSB_DATA_DIR/ ]]; then
|
||||
echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on cURL to load data to Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "curl --version" "curl"
|
||||
|
||||
# load lineorder
|
||||
source $CURDIR/doris-cluster.conf
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
|
||||
echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
|
||||
function load() {
|
||||
echo $@
|
||||
curl --location-trusted -u $USER:$PASSWORD \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \
|
||||
-T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineorder/_stream_load
|
||||
}
|
||||
|
||||
# set parallelism
|
||||
[ -e /tmp/fd1 ] || mkfifo /tmp/fd1
|
||||
exec 3<>/tmp/fd1
|
||||
rm -rf /tmp/fd1
|
||||
|
||||
for ((i = 1; i <= $PARALLEL; i++)); do
|
||||
echo >&3
|
||||
done
|
||||
|
||||
echo $(date)
|
||||
for file in $(ls $SSB_DATA_DIR/lineorder.tbl.*); do
|
||||
read -u3
|
||||
{
|
||||
load $file
|
||||
echo >&3
|
||||
} &
|
||||
done
|
||||
|
||||
# wait for child thread finished
|
||||
wait
|
||||
|
||||
echo $(date)
|
||||
echo "DONE."
|
||||
@ -24,23 +24,23 @@ follow the steps below:
|
||||
|
||||
### 1. build tpc-h dbgen tool.
|
||||
|
||||
./build-tpch-dbgen.sh
|
||||
./bin/build-tpch-dbgen.sh
|
||||
|
||||
### 2. generate tpc-h data. use -h for more infomations.
|
||||
|
||||
./gen-tpch-data.sh -s 1
|
||||
./bin/gen-tpch-data.sh -s 1
|
||||
|
||||
### 3. create tpc-h tables. modify `doris-cluster.conf` to specify doris info, then run script below.
|
||||
### 3. create tpc-h tables. modify `conf/doris-cluster.conf` to specify doris info, then run script below.
|
||||
|
||||
./create-tpch-tables.sh
|
||||
./bin/create-tpch-tables.sh
|
||||
|
||||
### 4. load tpc-h data. use -h for help.
|
||||
|
||||
./load-tpch-data.sh
|
||||
./bin/load-tpch-data.sh
|
||||
|
||||
### 5. run tpc-h queries.
|
||||
|
||||
./run-tpch-queries.sh
|
||||
./bin/run-tpch-queries.sh
|
||||
|
||||
NOTICE: At present, Doris's query optimizer and statistical information functions are not complete, so we rewrite some queries in TPC-H to adapt to Doris' execution framework, but it does not affect the correctness of the results. The rewritten SQL is marked with "Modified" in the corresponding .sql file.
|
||||
|
||||
|
||||
@ -51,13 +51,12 @@ if [[ -d $TPCH_DBGEN_DIR ]]; then
|
||||
echo "If you want to download TPC-H_Tools_v3.0.0 again, please delete this dir first."
|
||||
else
|
||||
wget "https://tools-chengdu.oss-cn-chengdu.aliyuncs.com/TPC-H_Tools_v3.0.0.zip"
|
||||
|
||||
unzip TPC-H_Tools_v3.0.0.zip -d $CURDIR/
|
||||
unzip TPC-H_Tools_v3.0.0.zip -d "$CURDIR"/
|
||||
fi
|
||||
|
||||
# modify tpcd.h
|
||||
cd $TPCH_DBGEN_DIR/
|
||||
echo '
|
||||
cd "$TPCH_DBGEN_DIR"/
|
||||
printf '%s' '
|
||||
#ifdef MYSQL
|
||||
#define GEN_QUERY_PLAN ""
|
||||
#define START_TRAN "START TRANSACTION"
|
||||
@ -81,7 +80,11 @@ cd -
|
||||
|
||||
# check
|
||||
if [[ -f $TPCH_DBGEN_DIR/dbgen ]]; then
|
||||
echo "Build succeed! Run $TPCH_DBGEN_DIR/dbgen -h"
|
||||
echo "
|
||||
################
|
||||
Build succeed!
|
||||
################
|
||||
Run $TPCH_DBGEN_DIR/dbgen -h"
|
||||
exit 0
|
||||
else
|
||||
echo "Build failed!"
|
||||
@ -40,7 +40,7 @@ Usage: $0
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-- "$@")
|
||||
|
||||
@ -84,7 +84,8 @@ check_prerequest() {
|
||||
|
||||
check_prerequest "mysql --version" "mysql"
|
||||
|
||||
source $CURDIR/doris-cluster.conf
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
@ -93,7 +94,7 @@ echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -e "CREATE DATABASE IF NOT EXISTS $DB"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT EXISTS $DB"
|
||||
|
||||
echo "Run SQLs from $CURDIR/create-tpch-tables.sql"
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB <$CURDIR/create-tpch-tables.sql
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$CURDIR"/../ddl/create-tpch-tables.sql
|
||||
@ -48,7 +48,7 @@ Usage: $0 <options>
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hs:c:' \
|
||||
-- "$@")
|
||||
@ -107,40 +107,40 @@ if [[ -d $TPCH_DATA_DIR/ ]]; then
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir $TPCH_DATA_DIR/
|
||||
mkdir "$TPCH_DATA_DIR"/
|
||||
|
||||
# gen data
|
||||
cd $TPCH_DBGEN_DIR
|
||||
cd "$TPCH_DBGEN_DIR"
|
||||
echo "Begin to generate data for table: region"
|
||||
$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T r
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T r
|
||||
echo "Begin to generate data for table: nation"
|
||||
$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T n
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T n
|
||||
echo "Begin to generate data for table: supplier"
|
||||
$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T s
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T s
|
||||
echo "Begin to generate data for table: part"
|
||||
$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T P
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T P
|
||||
echo "Begin to generate data for table: customer"
|
||||
$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T c
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T c
|
||||
echo "Begin to generate data for table: partsupp"
|
||||
for i in $(seq 1 $PARALLEL); do
|
||||
for i in $(seq 1 "$PARALLEL"); do
|
||||
{
|
||||
$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T S -C $PARALLEL -S ${i}
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T S -C "$PARALLEL" -S "$i"
|
||||
} &
|
||||
done
|
||||
wait
|
||||
|
||||
echo "Begin to generate data for table: orders"
|
||||
for i in $(seq 1 $PARALLEL); do
|
||||
for i in $(seq 1 "$PARALLEL"); do
|
||||
{
|
||||
$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T O -C $PARALLEL -S ${i}
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T O -C "$PARALLEL" -S "$i"
|
||||
} &
|
||||
done
|
||||
wait
|
||||
|
||||
echo "Begin to generate data for table: lineitem"
|
||||
for i in $(seq 1 $PARALLEL); do
|
||||
for i in $(seq 1 "$PARALLEL"); do
|
||||
{
|
||||
$TPCH_DBGEN_DIR/dbgen -f -s $SCALE_FACTOR -T L -C $PARALLEL -S ${i}
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T L -C "$PARALLEL" -S "$i"
|
||||
} &
|
||||
done
|
||||
wait
|
||||
@ -148,7 +148,7 @@ wait
|
||||
cd -
|
||||
|
||||
# move data to $TPCH_DATA_DIR
|
||||
mv $TPCH_DBGEN_DIR/*.tbl* $TPCH_DATA_DIR/
|
||||
mv "$TPCH_DBGEN_DIR"/*.tbl* "$TPCH_DATA_DIR"/
|
||||
|
||||
# check data
|
||||
du -sh $TPCH_DATA_DIR/*.tbl*
|
||||
du -sh "$TPCH_DATA_DIR"/*.tbl*
|
||||
@ -46,7 +46,7 @@ Usage: $0 <options>
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hc:' \
|
||||
-- "$@")
|
||||
@ -89,7 +89,7 @@ fi
|
||||
echo "Parallelism: $PARALLEL"
|
||||
|
||||
# check if tpch-data exists
|
||||
if [[ ! -d $TPCH_DATA_DIR/ ]]; then
|
||||
if [[ ! -d "$TPCH_DATA_DIR"/ ]]; then
|
||||
echo "$TPCH_DATA_DIR does not exist. Run sh gen-tpch-data.sh first."
|
||||
exit 1
|
||||
fi
|
||||
@ -106,7 +106,9 @@ check_prerequest() {
|
||||
check_prerequest "curl --version" "curl"
|
||||
|
||||
# load tables
|
||||
source $CURDIR/doris-cluster.conf
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
|
||||
@ -115,61 +117,62 @@ echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
|
||||
function load_region() {
|
||||
echo $@
|
||||
curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
-H "columns: r_regionkey, r_name, r_comment, temp" \
|
||||
-T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/region/_stream_load
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/region/_stream_load
|
||||
}
|
||||
function load_nation() {
|
||||
echo $@
|
||||
curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
-H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
|
||||
-T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/nation/_stream_load
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/nation/_stream_load
|
||||
}
|
||||
function load_supplier() {
|
||||
echo $@
|
||||
curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
-H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \
|
||||
-T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/supplier/_stream_load
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load
|
||||
}
|
||||
function load_customer() {
|
||||
echo $@
|
||||
curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
-H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \
|
||||
-T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/customer/_stream_load
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load
|
||||
}
|
||||
function load_part() {
|
||||
echo $@
|
||||
curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
-H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \
|
||||
-T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/part/_stream_load
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load
|
||||
}
|
||||
function load_partsupp() {
|
||||
echo $@
|
||||
curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
-H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \
|
||||
-T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/partsupp/_stream_load
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/partsupp/_stream_load
|
||||
}
|
||||
function load_orders() {
|
||||
echo $@
|
||||
curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
-H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
|
||||
-T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/orders/_stream_load
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/orders/_stream_load
|
||||
}
|
||||
function load_lineitem() {
|
||||
echo $@
|
||||
curl --location-trusted -u $USER:$PASSWORD -H "column_separator:|" \
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
-H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \
|
||||
-T $@ http://$FE_HOST:$FE_HTTP_PORT/api/$DB/lineitem/_stream_load
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineitem/_stream_load
|
||||
}
|
||||
|
||||
# start load
|
||||
load_region $TPCH_DATA_DIR/region.tbl
|
||||
load_nation $TPCH_DATA_DIR/nation.tbl
|
||||
load_supplier $TPCH_DATA_DIR/supplier.tbl
|
||||
load_customer $TPCH_DATA_DIR/customer.tbl
|
||||
load_part $TPCH_DATA_DIR/part.tbl
|
||||
|
||||
date
|
||||
load_region "$TPCH_DATA_DIR"/region.tbl
|
||||
load_nation "$TPCH_DATA_DIR"/nation.tbl
|
||||
load_supplier "$TPCH_DATA_DIR"/supplier.tbl
|
||||
load_customer "$TPCH_DATA_DIR"/customer.tbl
|
||||
load_part "$TPCH_DATA_DIR"/part.tbl
|
||||
date
|
||||
# set parallelism
|
||||
|
||||
# 以PID为名, 防止创建命名管道时与已有文件重名,从而失败
|
||||
@ -182,20 +185,21 @@ exec 3<>${fifo}
|
||||
rm -rf ${fifo}
|
||||
|
||||
# 在fd3中放置$PARALLEL个空行作为令牌
|
||||
for ((i = 1; i <= $PARALLEL; i++)); do
|
||||
for ((i = 1; i <= PARALLEL; i++)); do
|
||||
echo >&3
|
||||
done
|
||||
|
||||
for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do
|
||||
date
|
||||
for file in "$TPCH_DATA_DIR"/lineitem.tbl*; do
|
||||
# 领取令牌, 即从fd3中读取行, 每次一行
|
||||
# 对管道,读一行便少一行,每次只能读取一行
|
||||
# 所有行读取完毕, 执行挂起, 直到管道再次有可读行
|
||||
# 因此实现了进程数量控制
|
||||
read -u3
|
||||
read -r -u3
|
||||
|
||||
# 要批量执行的命令放在大括号内, 后台运行
|
||||
{
|
||||
load_lineitem $file
|
||||
load_lineitem "$file"
|
||||
echo "----loaded $file"
|
||||
sleep 2
|
||||
# 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行
|
||||
@ -203,20 +207,22 @@ for file in $(ls $TPCH_DATA_DIR/lineitem.tbl*); do
|
||||
} &
|
||||
done
|
||||
|
||||
for file in $(ls $TPCH_DATA_DIR/orders.tbl*); do
|
||||
read -u3
|
||||
date
|
||||
for file in "$TPCH_DATA_DIR"/orders.tbl*; do
|
||||
read -r -u3
|
||||
{
|
||||
load_orders $file
|
||||
load_orders "$file"
|
||||
echo "----loaded $file"
|
||||
sleep 2
|
||||
echo >&3
|
||||
} &
|
||||
done
|
||||
|
||||
for file in $(ls $TPCH_DATA_DIR/partsupp.tbl*); do
|
||||
read -u3
|
||||
date
|
||||
for file in "$TPCH_DATA_DIR"/partsupp.tbl*; do
|
||||
read -r -u3
|
||||
{
|
||||
load_partsupp $file
|
||||
load_partsupp "$file"
|
||||
echo "----loaded $file"
|
||||
sleep 2
|
||||
echo >&3
|
||||
@ -227,3 +233,6 @@ done
|
||||
wait
|
||||
# 删除文件标识符
|
||||
exec 3>&-
|
||||
date
|
||||
|
||||
echo "DONE."
|
||||
@ -29,7 +29,7 @@ ROOT=$(
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
QUERIES_DIR=$CURDIR/queries
|
||||
QUERIES_DIR=$CURDIR/../queries
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
@ -41,7 +41,7 @@ Usage: $0
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-- "$@")
|
||||
|
||||
@ -85,7 +85,9 @@ check_prerequest() {
|
||||
|
||||
check_prerequest "mysql --version" "mysql"
|
||||
|
||||
source $CURDIR/doris-cluster.conf
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
|
||||
@ -95,10 +97,16 @@ echo "DB: $DB"
|
||||
echo "Time Unit: ms"
|
||||
|
||||
pre_set() {
|
||||
echo $@
|
||||
mysql -h$FE_HOST -u$USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB -e "$@"
|
||||
echo "$*"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$*"
|
||||
}
|
||||
|
||||
echo '============================================'
|
||||
pre_set "show variables;"
|
||||
echo '============================================'
|
||||
pre_set "show table status;"
|
||||
echo '============================================'
|
||||
|
||||
sum=0
|
||||
for i in $(seq 1 22); do
|
||||
total=0
|
||||
@ -106,12 +114,12 @@ for i in $(seq 1 22); do
|
||||
# Each query is executed ${run} times and takes the average time
|
||||
for j in $(seq 1 ${run}); do
|
||||
start=$(date +%s%3N)
|
||||
mysql -h$FE_HOST -u $USER --password=$PASSWORD -P$FE_QUERY_PORT -D$DB --comments <$QUERIES_DIR/q$i.sql >/dev/null
|
||||
mysql -h"$FE_HOST" -u "$USER" -P"$FE_QUERY_PORT" -D"$DB" --comments <"$QUERIES_DIR"/q"$i".sql >/dev/null
|
||||
end=$(date +%s%3N)
|
||||
total=$((total + end - start))
|
||||
done
|
||||
cost=$((total / ${run}))
|
||||
cost=$((total / run))
|
||||
echo "q$i: ${cost}"
|
||||
sum=$((sum + $cost))
|
||||
sum=$((sum + cost))
|
||||
done
|
||||
echo "Total cost: $sum"
|
||||
@ -26,4 +26,4 @@ export USER='root'
|
||||
# Doris password
|
||||
export PASSWORD=''
|
||||
# The database where TPC-H tables located
|
||||
export DB='tpch1'
|
||||
export DB='tpch'
|
||||
Reference in New Issue
Block a user