[ci](shellcheck)shellcheck include ssb-tools and tpch-tools (#12039)
This commit is contained in:
2
.github/workflows/shellcheck.yml
vendored
2
.github/workflows/shellcheck.yml
vendored
@ -35,4 +35,4 @@ jobs:
|
||||
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
|
||||
with:
|
||||
sh_checker_comment: true
|
||||
sh_checker_exclude: .git .github ^thirdparty/src ^thirdparty/installed ^ui ^docs/node_modules ^tools/ssb-tools ^tools/tpch-tools ^tools/clickbench-tools ^extension ^output ^fs_brokers/apache_hdfs_broker/output
|
||||
sh_checker_exclude: .git .github ^thirdparty/src ^thirdparty/installed ^ui ^docs/node_modules ^tools/clickbench-tools ^extension ^output ^fs_brokers/apache_hdfs_broker/output
|
||||
|
||||
@ -27,34 +27,36 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/
|
||||
CURDIR="${ROOT}"
|
||||
SSB_DBGEN_DIR="${CURDIR}/ssb-dbgen/"
|
||||
|
||||
# download ssb-dbgen first
|
||||
if [[ -d $SSB_DBGEN_DIR ]]; then
|
||||
echo "Dir $CURDIR/ssb-dbgen/ already exists. No need to download."
|
||||
if [[ -d ${SSB_DBGEN_DIR} ]]; then
|
||||
echo "Dir ${CURDIR}/ssb-dbgen/ already exists. No need to download."
|
||||
echo "If you want to download ssb-dbgen again, please delete this dir first."
|
||||
exit 1
|
||||
else
|
||||
cd "$CURDIR"
|
||||
wget https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz && tar -xzvf ssb-dbgen-linux.tar.gz -C $CURDIR/
|
||||
cd "${CURDIR}"
|
||||
wget https://palo-cloud-repo-bd.bd.bcebos.com/baidu-doris-release/ssb-dbgen-linux.tar.gz
|
||||
tar -xzvf ssb-dbgen-linux.tar.gz -C "${CURDIR}"/
|
||||
fi
|
||||
|
||||
# compile ssb-dbgen
|
||||
cd "$SSB_DBGEN_DIR/" && make
|
||||
cd "${SSB_DBGEN_DIR}/"
|
||||
make
|
||||
cd -
|
||||
|
||||
# check
|
||||
if [[ -f $CURDIR/ssb-dbgen/dbgen ]]; then
|
||||
if [[ -f ${CURDIR}/ssb-dbgen/dbgen ]]; then
|
||||
echo -e "
|
||||
################
|
||||
Build succeed!
|
||||
################
|
||||
Run $CURDIR/ssb-dbgen/dbgen -h"
|
||||
Run ${CURDIR}/ssb-dbgen/dbgen -h"
|
||||
exit 0
|
||||
else
|
||||
echo "Build failed!"
|
||||
|
||||
@ -24,83 +24,82 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
pwd
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
CURDIR="${ROOT}"
|
||||
SSB_DDL="${CURDIR}/../ddl/create-ssb-tables.sql"
|
||||
SSB_FLAT_DDL="${CURDIR}/../ddl/create-ssb-flat-table.sql"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
echo "
|
||||
This script is used to create SSB tables,
|
||||
will use mysql client to connect Doris server which is specified in conf/doris-cluster.conf file.
|
||||
Usage: $0
|
||||
"
|
||||
exit 1
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'h' \
|
||||
-- "$@")
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'h' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
eval set -- "${OPTS}"
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
usage
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
exit
|
||||
usage
|
||||
exit
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on mysql to create tables in Doris."
|
||||
exit 1
|
||||
fi
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! ${CMD}; then
|
||||
echo "${NAME} is missing. This script depends on mysql to create tables in Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "mysql --version" "mysql"
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
source "${CURDIR}/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD="${PASSWORD}"
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
|
||||
echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
echo "FE_HOST: ${FE_HOST}"
|
||||
echo "FE_QUERY_PORT: ${FE_QUERY_PORT}"
|
||||
echo "USER: ${USER}"
|
||||
echo "PASSWORD: ${PASSWORD}"
|
||||
echo "DB: ${DB}"
|
||||
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT EXISTS $DB"
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -e "CREATE DATABASE IF NOT EXISTS ${DB}"
|
||||
|
||||
echo "Run DDL from $SSB_DDL"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_DDL"
|
||||
echo "Run DDL from ${SSB_DDL}"
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" <"${SSB_DDL}"
|
||||
|
||||
echo "Run DDL from $SSB_FLAT_DDL"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$SSB_FLAT_DDL"
|
||||
echo "Run DDL from ${SSB_FLAT_DDL}"
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" <"${SSB_FLAT_DDL}"
|
||||
|
||||
@ -24,16 +24,16 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
pwd
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
SSB_DBGEN_DIR=$CURDIR/ssb-dbgen/
|
||||
SSB_DATA_DIR=$CURDIR/ssb-data/
|
||||
SSB_DBGEN_DIR=${CURDIR}/ssb-dbgen/
|
||||
SSB_DATA_DIR=${CURDIR}/ssb-data/
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
echo "
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-s scale factor, default is 100
|
||||
@ -44,87 +44,87 @@ Usage: $0 <options>
|
||||
$0 -s 10 generate data with scale factor 10.
|
||||
$0 -s 10 -c 5 generate data with scale factor 10. And using 5 threads to generate data concurrently.
|
||||
"
|
||||
exit 1
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hs:c:' \
|
||||
-- "$@")
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hs:c:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
eval set -- "${OPTS}"
|
||||
|
||||
SCALE_FACTOR=100
|
||||
PARALLEL=10
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
usage
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
-s)
|
||||
SCALE_FACTOR=$2
|
||||
shift 2
|
||||
;;
|
||||
-c)
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
-s)
|
||||
SCALE_FACTOR=$2
|
||||
shift 2
|
||||
;;
|
||||
-c)
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
exit
|
||||
usage
|
||||
exit
|
||||
fi
|
||||
|
||||
echo "Scale Factor: $SCALE_FACTOR"
|
||||
echo "Parallelism: $PARALLEL"
|
||||
echo "Scale Factor: ${SCALE_FACTOR}"
|
||||
echo "Parallelism: ${PARALLEL}"
|
||||
|
||||
# check if dbgen exists
|
||||
if [[ ! -f $SSB_DBGEN_DIR/dbgen ]]; then
|
||||
echo "$SSB_DBGEN_DIR/dbgen does not exist. Run build-ssb-dbgen.sh first to build it first."
|
||||
exit 1
|
||||
if [[ ! -f ${SSB_DBGEN_DIR}/dbgen ]]; then
|
||||
echo "${SSB_DBGEN_DIR}/dbgen does not exist. Run build-ssb-dbgen.sh first to build it first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -d $SSB_DATA_DIR/ ]]; then
|
||||
echo "$SSB_DATA_DIR exists. Remove it before generating data"
|
||||
exit 1
|
||||
if [[ -d ${SSB_DATA_DIR}/ ]]; then
|
||||
echo "${SSB_DATA_DIR} exists. Remove it before generating data"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir "$SSB_DATA_DIR/"
|
||||
mkdir "${SSB_DATA_DIR}/"
|
||||
|
||||
# gen data
|
||||
cd "$SSB_DBGEN_DIR"
|
||||
cd "${SSB_DBGEN_DIR}"
|
||||
echo "Begin to generate data for table: customer"
|
||||
"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T c
|
||||
"${SSB_DBGEN_DIR}/dbgen" -f -s "${SCALE_FACTOR}" -T c
|
||||
echo "Begin to generate data for table: part"
|
||||
"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T p
|
||||
"${SSB_DBGEN_DIR}/dbgen" -f -s "${SCALE_FACTOR}" -T p
|
||||
echo "Begin to generate data for table: supplier"
|
||||
"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T s
|
||||
"${SSB_DBGEN_DIR}/dbgen" -f -s "${SCALE_FACTOR}" -T s
|
||||
echo "Begin to generate data for table: date"
|
||||
"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T d
|
||||
"${SSB_DBGEN_DIR}/dbgen" -f -s "${SCALE_FACTOR}" -T d
|
||||
echo "Begin to generate data for table: lineorder"
|
||||
"$SSB_DBGEN_DIR/dbgen" -f -s "$SCALE_FACTOR" -T l -C "$PARALLEL"
|
||||
"${SSB_DBGEN_DIR}/dbgen" -f -s "${SCALE_FACTOR}" -T l -C "${PARALLEL}"
|
||||
cd -
|
||||
|
||||
# move data to $SSB_DATA_DIR
|
||||
mv "$SSB_DBGEN_DIR"/*.tbl* "$SSB_DATA_DIR/"
|
||||
mv "${SSB_DBGEN_DIR}"/*.tbl* "${SSB_DATA_DIR}/"
|
||||
|
||||
# check data
|
||||
du -sh "$SSB_DATA_DIR"/*.tbl*
|
||||
du -sh "${SSB_DATA_DIR}"/*.tbl*
|
||||
|
||||
@ -25,12 +25,12 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
SSB_DATA_DIR="$CURDIR/ssb-data/"
|
||||
CURDIR="${ROOT}"
|
||||
SSB_DATA_DIR="${CURDIR}/ssb-data/"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
@ -51,12 +51,12 @@ OPTS=$(getopt \
|
||||
-o 'hc:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
eval set -- "${OPTS}"
|
||||
|
||||
PARALLEL=5
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
@ -86,33 +86,33 @@ if [[ ${HELP} -eq 1 ]]; then
|
||||
exit
|
||||
fi
|
||||
|
||||
echo "Parallelism: $PARALLEL"
|
||||
echo "Parallelism: ${PARALLEL}"
|
||||
|
||||
# check if ssb-data exists
|
||||
if [[ ! -d $SSB_DATA_DIR/ ]]; then
|
||||
echo "$SSB_DATA_DIR does not exist. Run sh gen-ssb-data.sh first."
|
||||
if [[ ! -d ${SSB_DATA_DIR}/ ]]; then
|
||||
echo "${SSB_DATA_DIR} does not exist. Run sh gen-ssb-data.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on cURL to load data to Doris."
|
||||
if ! ${CMD}; then
|
||||
echo "${NAME} is missing. This script depends on cURL to load data to Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
run_sql() {
|
||||
sql="$*"
|
||||
echo "$sql"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$@"
|
||||
echo "${sql}"
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$@"
|
||||
}
|
||||
|
||||
load_lineitem_flat() {
|
||||
# Loading data in batches by year.
|
||||
for con in 'lo_orderdate<19930101' 'lo_orderdate>=19930101 and lo_orderdate<19940101' 'lo_orderdate>=19940101 and lo_orderdate<19950101' 'lo_orderdate>=19950101 and lo_orderdate<19960101' 'lo_orderdate>=19960101 and lo_orderdate<19970101' 'lo_orderdate>=19970101 and lo_orderdate<19980101' 'lo_orderdate>=19980101'; do
|
||||
echo -e "\n$con"
|
||||
echo -e "\n${con}"
|
||||
run_sql "
|
||||
INSERT INTO lineorder_flat
|
||||
SELECT
|
||||
@ -189,53 +189,52 @@ ON (p.p_partkey = l.lo_partkey);
|
||||
check_prerequest "curl --version" "curl"
|
||||
|
||||
# load lineorder
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
source "${CURDIR}/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=${PASSWORD}
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
|
||||
echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
echo "FE_HOST: ${FE_HOST}"
|
||||
echo "FE_HTTP_PORT: ${FE_HTTP_PORT}"
|
||||
echo "USER: ${USER}"
|
||||
echo "PASSWORD: ${PASSWORD}"
|
||||
echo "DB: ${DB}"
|
||||
|
||||
date
|
||||
echo "==========Start to load data into ssb tables=========="
|
||||
echo 'Loading data for table: part'
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \
|
||||
-T "$SSB_DATA_DIR"/part.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load
|
||||
-T "${SSB_DATA_DIR}"/part.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
|
||||
|
||||
echo 'Loading data for table: date'
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \
|
||||
-T "$SSB_DATA_DIR"/date.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/dates/_stream_load
|
||||
-T "${SSB_DATA_DIR}"/date.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load
|
||||
|
||||
echo 'Loading data for table: supplier'
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
|
||||
-T "$SSB_DATA_DIR"/supplier.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load
|
||||
-T "${SSB_DATA_DIR}"/supplier.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
|
||||
|
||||
echo 'Loading data for table: customer'
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \
|
||||
-T "$SSB_DATA_DIR"/customer.tbl http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load
|
||||
-T "${SSB_DATA_DIR}"/customer.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
|
||||
|
||||
echo "Loading data for table: lineorder, with $PARALLEL parallel"
|
||||
echo "Loading data for table: lineorder, with ${PARALLEL} parallel"
|
||||
function load() {
|
||||
echo "$@"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \
|
||||
-T "$@" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineorder/_stream_load
|
||||
-T "$@" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load
|
||||
}
|
||||
|
||||
# set parallelism
|
||||
[ -e /tmp/fd1 ] || mkfifo /tmp/fd1
|
||||
[[ -e /tmp/fd1 ]] || mkfifo /tmp/fd1
|
||||
exec 3<>/tmp/fd1
|
||||
rm -rf /tmp/fd1
|
||||
|
||||
@ -244,10 +243,10 @@ for ((i = 1; i <= PARALLEL; i++)); do
|
||||
done
|
||||
|
||||
date
|
||||
for file in "$SSB_DATA_DIR"/lineorder.tbl.*; do
|
||||
for file in "${SSB_DATA_DIR}"/lineorder.tbl.*; do
|
||||
read -r -u3
|
||||
{
|
||||
load "$file"
|
||||
load "${file}"
|
||||
echo >&3
|
||||
} &
|
||||
done
|
||||
@ -258,8 +257,14 @@ date
|
||||
|
||||
echo "==========Start to insert data into ssb flat table=========="
|
||||
echo "change some session variables before load, and then restore after load."
|
||||
origin_query_timeout=$(run_sql 'select @@query_timeout;' | sed -n '3p')
|
||||
origin_parallel=$(run_sql 'select @@parallel_fragment_exec_instance_num;' | sed -n '3p')
|
||||
origin_query_timeout=$(
|
||||
set -e
|
||||
run_sql 'select @@query_timeout;' | sed -n '3p'
|
||||
)
|
||||
origin_parallel=$(
|
||||
set -e
|
||||
run_sql 'select @@parallel_fragment_exec_instance_num;' | sed -n '3p'
|
||||
)
|
||||
# set parallel_fragment_exec_instance_num=1, loading maybe slow but stable.
|
||||
run_sql "set global query_timeout=7200;"
|
||||
run_sql "set global parallel_fragment_exec_instance_num=1;"
|
||||
|
||||
@ -24,12 +24,12 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR="${ROOT}"
|
||||
QUERIES_DIR="$CURDIR/../ssb-flat-queries"
|
||||
QUERIES_DIR="${CURDIR}/../ssb-flat-queries"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
@ -46,10 +46,10 @@ OPTS=$(getopt \
|
||||
-o 'h' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
eval set -- "${OPTS}"
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
@ -78,27 +78,26 @@ fi
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on mysql to create tables in Doris."
|
||||
if ! ${CMD}; then
|
||||
echo "${NAME} is missing. This script depends on mysql to create tables in Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "mysqlslap --version" "mysqlslap"
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
source "${CURDIR}/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=${PASSWORD}
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
|
||||
echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
echo "FE_HOST: ${FE_HOST}"
|
||||
echo "FE_QUERY_PORT: ${FE_QUERY_PORT}"
|
||||
echo "USER: ${USER}"
|
||||
echo "PASSWORD: ${PASSWORD}"
|
||||
echo "DB: ${DB}"
|
||||
|
||||
pre_set() {
|
||||
echo "$@"
|
||||
mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@"
|
||||
mysql -h"${FE_HOST}" -P"${FE_QUERY_PORT}" -u"${USER}" -D"${DB}" -e "$@"
|
||||
}
|
||||
|
||||
pre_set "set global enable_vectorized_engine=1;"
|
||||
@ -113,8 +112,8 @@ echo '============================================'
|
||||
|
||||
for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1' '4.2' '4.3'; do
|
||||
# First run to prevent the affect of cold start
|
||||
mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D "$DB" <"$QUERIES_DIR"/q${i}.sql >/dev/null 2>&1
|
||||
mysql -h"${FE_HOST}" -P"${FE_QUERY_PORT}" -u"${USER}" -D "${DB}" <"${QUERIES_DIR}/q${i}.sql" >/dev/null 2>&1
|
||||
# Then run 3 times and takes the average time
|
||||
res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" --create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10)
|
||||
echo "q$i: $res"
|
||||
res=$(mysqlslap -h"${FE_HOST}" -P"${FE_QUERY_PORT}" -u"${USER}" --create-schema="${DB}" --query="${QUERIES_DIR}/q${i}.sql" -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10)
|
||||
echo "q${i}: ${res}"
|
||||
done
|
||||
|
||||
@ -24,12 +24,12 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
QUERIES_DIR=$CURDIR/../ssb-queries
|
||||
QUERIES_DIR=${CURDIR}/../ssb-queries
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
@ -46,10 +46,10 @@ OPTS=$(getopt \
|
||||
-o 'h' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
eval set -- "${OPTS}"
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
@ -78,27 +78,26 @@ fi
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on mysql to create tables in Doris."
|
||||
if ! ${CMD}; then
|
||||
echo "${NAME} is missing. This script depends on mysql to create tables in Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "mysqlslap --version" "mysql slap"
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
source "${CURDIR}/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=${PASSWORD}
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
|
||||
echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
echo "FE_HOST: ${FE_HOST}"
|
||||
echo "FE_QUERY_PORT: ${FE_QUERY_PORT}"
|
||||
echo "USER: ${USER}"
|
||||
echo "PASSWORD: ${PASSWORD}"
|
||||
echo "DB: ${DB}"
|
||||
|
||||
pre_set() {
|
||||
echo "$@"
|
||||
mysql -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" -D"$DB" -e "$@"
|
||||
mysql -h"${FE_HOST}" -P"${FE_QUERY_PORT}" -u"${USER}" -D"${DB}" -e "$@"
|
||||
}
|
||||
|
||||
pre_set "set global enable_vectorized_engine=1;"
|
||||
@ -116,6 +115,6 @@ echo '============================================'
|
||||
|
||||
for i in '1.1' '1.2' '1.3' '2.1' '2.2' '2.3' '3.1' '3.2' '3.3' '3.4' '4.1' '4.2' '4.3'; do
|
||||
# Each query is executed 3 times and takes the average time
|
||||
res=$(mysqlslap -h"$FE_HOST" -P"$FE_QUERY_PORT" -u"$USER" --create-schema="$DB" --query="$QUERIES_DIR"/q${i}.sql -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10)
|
||||
echo "q$i: $res"
|
||||
res=$(mysqlslap -h"${FE_HOST}" -P"${FE_QUERY_PORT}" -u"${USER}" --create-schema="${DB}" --query="${QUERIES_DIR}/q${i}.sql" -F '\r' -i 3 | sed -n '2p' | cut -d ' ' -f 9,10)
|
||||
echo "q${i}: ${res}"
|
||||
done
|
||||
|
||||
@ -27,18 +27,18 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
TPCH_DBGEN_DIR=$CURDIR/TPC-H_Tools_v3.0.0/dbgen
|
||||
CURDIR="${ROOT}"
|
||||
TPCH_DBGEN_DIR="${CURDIR}/TPC-H_Tools_v3.0.0/dbgen"
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on unzip to extract files from TPC-H_Tools_v3.0.0.zip"
|
||||
if ! ${CMD}; then
|
||||
echo "${NAME} is missing. This script depends on unzip to extract files from TPC-H_Tools_v3.0.0.zip"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
@ -46,16 +46,16 @@ check_prerequest() {
|
||||
check_prerequest "unzip -h" "unzip"
|
||||
|
||||
# download tpch tools pacage first
|
||||
if [[ -d $TPCH_DBGEN_DIR ]]; then
|
||||
echo "Dir $TPCH_DBGEN_DIR already exists. No need to download."
|
||||
if [[ -d ${TPCH_DBGEN_DIR} ]]; then
|
||||
echo "Dir ${TPCH_DBGEN_DIR} already exists. No need to download."
|
||||
echo "If you want to download TPC-H_Tools_v3.0.0 again, please delete this dir first."
|
||||
else
|
||||
wget "https://tools-chengdu.oss-cn-chengdu.aliyuncs.com/TPC-H_Tools_v3.0.0.zip"
|
||||
unzip TPC-H_Tools_v3.0.0.zip -d "$CURDIR"/
|
||||
unzip TPC-H_Tools_v3.0.0.zip -d "${CURDIR}/"
|
||||
fi
|
||||
|
||||
# modify tpcd.h
|
||||
cd "$TPCH_DBGEN_DIR"/
|
||||
cd "${TPCH_DBGEN_DIR}/"
|
||||
printf '%s' '
|
||||
#ifdef MYSQL
|
||||
#define GEN_QUERY_PLAN ""
|
||||
@ -79,12 +79,12 @@ make >/dev/null
|
||||
cd -
|
||||
|
||||
# check
|
||||
if [[ -f $TPCH_DBGEN_DIR/dbgen ]]; then
|
||||
if [[ -f ${TPCH_DBGEN_DIR}/dbgen ]]; then
|
||||
echo "
|
||||
################
|
||||
Build succeed!
|
||||
################
|
||||
Run $TPCH_DBGEN_DIR/dbgen -h"
|
||||
Run ${TPCH_DBGEN_DIR}/dbgen -h"
|
||||
exit 0
|
||||
else
|
||||
echo "Build failed!"
|
||||
|
||||
@ -24,77 +24,76 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
pwd
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
echo "
|
||||
This script is used to create TPC-H tables,
|
||||
will use mysql client to connect Doris server which is specified in doris-cluster.conf file.
|
||||
Usage: $0
|
||||
"
|
||||
exit 1
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-- "$@")
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
eval set -- "${OPTS}"
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
usage
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
exit
|
||||
usage
|
||||
exit
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on mysql to create tables in Doris."
|
||||
exit 1
|
||||
fi
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! ${CMD}; then
|
||||
echo "${NAME} is missing. This script depends on mysql to create tables in Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "mysql --version" "mysql"
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
source "${CURDIR}/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=${PASSWORD}
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
|
||||
echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
echo "FE_HOST: ${FE_HOST}"
|
||||
echo "FE_QUERY_PORT: ${FE_QUERY_PORT}"
|
||||
echo "USER: ${USER}"
|
||||
echo "PASSWORD: ${PASSWORD}"
|
||||
echo "DB: ${DB}"
|
||||
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -e "CREATE DATABASE IF NOT EXISTS $DB"
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -e "CREATE DATABASE IF NOT EXISTS ${DB}"
|
||||
|
||||
echo "Run SQLs from $CURDIR/create-tpch-tables.sql"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" <"$CURDIR"/../ddl/create-tpch-tables.sql
|
||||
echo "Run SQLs from ${CURDIR}/create-tpch-tables.sql"
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" <"${CURDIR}"/../ddl/create-tpch-tables.sql
|
||||
|
||||
@ -24,16 +24,16 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
pwd
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
TPCH_DBGEN_DIR=$CURDIR/TPC-H_Tools_v3.0.0/dbgen/
|
||||
TPCH_DATA_DIR=$CURDIR/tpch-data/
|
||||
CURDIR="${ROOT}"
|
||||
TPCH_DBGEN_DIR="${CURDIR}/TPC-H_Tools_v3.0.0/dbgen/"
|
||||
TPCH_DATA_DIR="${CURDIR}/tpch-data/"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
echo "
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-s scale factor, default is 100
|
||||
@ -44,111 +44,111 @@ Usage: $0 <options>
|
||||
$0 -s 10 generate data with scale factor 10.
|
||||
$0 -s 10 -c 5 generate data with scale factor 10. And using 5 threads to generate data concurrently.
|
||||
"
|
||||
exit 1
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hs:c:' \
|
||||
-- "$@")
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hs:c:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
eval set -- "${OPTS}"
|
||||
|
||||
SCALE_FACTOR=100
|
||||
PARALLEL=10
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
usage
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
-s)
|
||||
SCALE_FACTOR=$2
|
||||
shift 2
|
||||
;;
|
||||
-c)
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
-s)
|
||||
SCALE_FACTOR=$2
|
||||
shift 2
|
||||
;;
|
||||
-c)
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
exit
|
||||
usage
|
||||
exit
|
||||
fi
|
||||
|
||||
echo "Scale Factor: $SCALE_FACTOR"
|
||||
echo "Parallelism: $PARALLEL"
|
||||
echo "Scale Factor: ${SCALE_FACTOR}"
|
||||
echo "Parallelism: ${PARALLEL}"
|
||||
|
||||
# check if dbgen exists
|
||||
if [[ ! -f $TPCH_DBGEN_DIR/dbgen ]]; then
|
||||
echo "$TPCH_DBGEN_DIR/dbgen does not exist. Run build-tpch-dbgen.sh first to build it first."
|
||||
exit 1
|
||||
if [[ ! -f ${TPCH_DBGEN_DIR}/dbgen ]]; then
|
||||
echo "${TPCH_DBGEN_DIR}/dbgen does not exist. Run build-tpch-dbgen.sh first to build it first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -d $TPCH_DATA_DIR/ ]]; then
|
||||
echo "$TPCH_DATA_DIR exists. Remove it before generating data"
|
||||
exit 1
|
||||
if [[ -d ${TPCH_DATA_DIR}/ ]]; then
|
||||
echo "${TPCH_DATA_DIR} exists. Remove it before generating data"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir "$TPCH_DATA_DIR"/
|
||||
mkdir "${TPCH_DATA_DIR}"/
|
||||
|
||||
# gen data
|
||||
cd "$TPCH_DBGEN_DIR"
|
||||
cd "${TPCH_DBGEN_DIR}"
|
||||
echo "Begin to generate data for table: region"
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T r
|
||||
"${TPCH_DBGEN_DIR}"/dbgen -f -s "${SCALE_FACTOR}" -T r
|
||||
echo "Begin to generate data for table: nation"
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T n
|
||||
"${TPCH_DBGEN_DIR}"/dbgen -f -s "${SCALE_FACTOR}" -T n
|
||||
echo "Begin to generate data for table: supplier"
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T s
|
||||
"${TPCH_DBGEN_DIR}"/dbgen -f -s "${SCALE_FACTOR}" -T s
|
||||
echo "Begin to generate data for table: part"
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T P
|
||||
"${TPCH_DBGEN_DIR}"/dbgen -f -s "${SCALE_FACTOR}" -T P
|
||||
echo "Begin to generate data for table: customer"
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T c
|
||||
"${TPCH_DBGEN_DIR}"/dbgen -f -s "${SCALE_FACTOR}" -T c
|
||||
echo "Begin to generate data for table: partsupp"
|
||||
for i in $(seq 1 "$PARALLEL"); do
|
||||
{
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T S -C "$PARALLEL" -S "$i"
|
||||
} &
|
||||
for i in $(seq 1 "${PARALLEL}"); do
|
||||
{
|
||||
"${TPCH_DBGEN_DIR}"/dbgen -f -s "${SCALE_FACTOR}" -T S -C "${PARALLEL}" -S "${i}"
|
||||
} &
|
||||
done
|
||||
wait
|
||||
|
||||
echo "Begin to generate data for table: orders"
|
||||
for i in $(seq 1 "$PARALLEL"); do
|
||||
{
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T O -C "$PARALLEL" -S "$i"
|
||||
} &
|
||||
for i in $(seq 1 "${PARALLEL}"); do
|
||||
{
|
||||
"${TPCH_DBGEN_DIR}"/dbgen -f -s "${SCALE_FACTOR}" -T O -C "${PARALLEL}" -S "${i}"
|
||||
} &
|
||||
done
|
||||
wait
|
||||
|
||||
echo "Begin to generate data for table: lineitem"
|
||||
for i in $(seq 1 "$PARALLEL"); do
|
||||
{
|
||||
"$TPCH_DBGEN_DIR"/dbgen -f -s "$SCALE_FACTOR" -T L -C "$PARALLEL" -S "$i"
|
||||
} &
|
||||
for i in $(seq 1 "${PARALLEL}"); do
|
||||
{
|
||||
"${TPCH_DBGEN_DIR}"/dbgen -f -s "${SCALE_FACTOR}" -T L -C "${PARALLEL}" -S "${i}"
|
||||
} &
|
||||
done
|
||||
wait
|
||||
|
||||
cd -
|
||||
|
||||
# move data to $TPCH_DATA_DIR
|
||||
mv "$TPCH_DBGEN_DIR"/*.tbl* "$TPCH_DATA_DIR"/
|
||||
# move data to ${TPCH_DATA_DIR}
|
||||
mv "${TPCH_DBGEN_DIR}"/*.tbl* "${TPCH_DATA_DIR}"/
|
||||
|
||||
# check data
|
||||
du -sh "$TPCH_DATA_DIR"/*.tbl*
|
||||
du -sh "${TPCH_DATA_DIR}"/*.tbl*
|
||||
|
||||
@ -25,12 +25,12 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
TPCH_DATA_DIR=$CURDIR/tpch-data
|
||||
CURDIR="${ROOT}"
|
||||
TPCH_DATA_DIR="${CURDIR}/tpch-data"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
@ -51,12 +51,12 @@ OPTS=$(getopt \
|
||||
-o 'hc:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
eval set -- "${OPTS}"
|
||||
|
||||
PARALLEL=5
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
@ -86,19 +86,19 @@ if [[ ${HELP} -eq 1 ]]; then
|
||||
exit
|
||||
fi
|
||||
|
||||
echo "Parallelism: $PARALLEL"
|
||||
echo "Parallelism: ${PARALLEL}"
|
||||
|
||||
# check if tpch-data exists
|
||||
if [[ ! -d "$TPCH_DATA_DIR"/ ]]; then
|
||||
echo "$TPCH_DATA_DIR does not exist. Run sh gen-tpch-data.sh first."
|
||||
if [[ ! -d "${TPCH_DATA_DIR}"/ ]]; then
|
||||
echo "${TPCH_DATA_DIR} does not exist. Run sh gen-tpch-data.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on cURL to load data to Doris."
|
||||
if ! ${CMD}; then
|
||||
echo "${NAME} is missing. This script depends on cURL to load data to Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
@ -106,83 +106,82 @@ check_prerequest() {
|
||||
check_prerequest "curl --version" "curl"
|
||||
|
||||
# load tables
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
source "${CURDIR}/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=${PASSWORD}
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
|
||||
echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
echo "FE_HOST: ${FE_HOST}"
|
||||
echo "FE_HTTP_PORT: ${FE_HTTP_PORT}"
|
||||
echo "USER: ${USER}"
|
||||
echo "PASSWORD: ${PASSWORD}"
|
||||
echo "DB: ${DB}"
|
||||
|
||||
function load_region() {
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: r_regionkey, r_name, r_comment, temp" \
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/region/_stream_load
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load
|
||||
}
|
||||
function load_nation() {
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/nation/_stream_load
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load
|
||||
}
|
||||
function load_supplier() {
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/supplier/_stream_load
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
|
||||
}
|
||||
function load_customer() {
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/customer/_stream_load
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
|
||||
}
|
||||
function load_part() {
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/part/_stream_load
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
|
||||
}
|
||||
function load_partsupp() {
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/partsupp/_stream_load
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load
|
||||
}
|
||||
function load_orders() {
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/orders/_stream_load
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load
|
||||
}
|
||||
function load_lineitem() {
|
||||
echo "$*"
|
||||
curl --location-trusted -u "$USER":"$PASSWORD" -H "column_separator:|" \
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \
|
||||
-T "$*" http://"$FE_HOST":"$FE_HTTP_PORT"/api/"$DB"/lineitem/_stream_load
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load
|
||||
}
|
||||
|
||||
# start load
|
||||
date
|
||||
load_region "$TPCH_DATA_DIR"/region.tbl
|
||||
load_nation "$TPCH_DATA_DIR"/nation.tbl
|
||||
load_supplier "$TPCH_DATA_DIR"/supplier.tbl
|
||||
load_customer "$TPCH_DATA_DIR"/customer.tbl
|
||||
load_part "$TPCH_DATA_DIR"/part.tbl
|
||||
load_region "${TPCH_DATA_DIR}"/region.tbl
|
||||
load_nation "${TPCH_DATA_DIR}"/nation.tbl
|
||||
load_supplier "${TPCH_DATA_DIR}"/supplier.tbl
|
||||
load_customer "${TPCH_DATA_DIR}"/customer.tbl
|
||||
load_part "${TPCH_DATA_DIR}"/part.tbl
|
||||
date
|
||||
# set parallelism
|
||||
|
||||
# 以PID为名, 防止创建命名管道时与已有文件重名,从而失败
|
||||
fifo="/tmp/$$.fifo"
|
||||
# 创建命名管道
|
||||
mkfifo ${fifo}
|
||||
mkfifo "${fifo}"
|
||||
# 以读写方式打开命名管道,文件标识符fd为3,fd可取除0,1,2,5外0-9中的任意数字
|
||||
exec 3<>${fifo}
|
||||
exec 3<>"${fifo}"
|
||||
# 删除文件, 也可不删除, 不影响后面操作
|
||||
rm -rf ${fifo}
|
||||
rm -rf "${fifo}"
|
||||
|
||||
# 在fd3中放置$PARALLEL个空行作为令牌
|
||||
for ((i = 1; i <= PARALLEL; i++)); do
|
||||
@ -190,7 +189,7 @@ for ((i = 1; i <= PARALLEL; i++)); do
|
||||
done
|
||||
|
||||
date
|
||||
for file in "$TPCH_DATA_DIR"/lineitem.tbl*; do
|
||||
for file in "${TPCH_DATA_DIR}"/lineitem.tbl*; do
|
||||
# 领取令牌, 即从fd3中读取行, 每次一行
|
||||
# 对管道,读一行便少一行,每次只能读取一行
|
||||
# 所有行读取完毕, 执行挂起, 直到管道再次有可读行
|
||||
@ -199,8 +198,8 @@ for file in "$TPCH_DATA_DIR"/lineitem.tbl*; do
|
||||
|
||||
# 要批量执行的命令放在大括号内, 后台运行
|
||||
{
|
||||
load_lineitem "$file"
|
||||
echo "----loaded $file"
|
||||
load_lineitem "${file}"
|
||||
echo "----loaded ${file}"
|
||||
sleep 2
|
||||
# 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行
|
||||
echo >&3
|
||||
@ -208,22 +207,22 @@ for file in "$TPCH_DATA_DIR"/lineitem.tbl*; do
|
||||
done
|
||||
|
||||
date
|
||||
for file in "$TPCH_DATA_DIR"/orders.tbl*; do
|
||||
for file in "${TPCH_DATA_DIR}"/orders.tbl*; do
|
||||
read -r -u3
|
||||
{
|
||||
load_orders "$file"
|
||||
echo "----loaded $file"
|
||||
load_orders "${file}"
|
||||
echo "----loaded ${file}"
|
||||
sleep 2
|
||||
echo >&3
|
||||
} &
|
||||
done
|
||||
|
||||
date
|
||||
for file in "$TPCH_DATA_DIR"/partsupp.tbl*; do
|
||||
for file in "${TPCH_DATA_DIR}"/partsupp.tbl*; do
|
||||
read -r -u3
|
||||
{
|
||||
load_partsupp "$file"
|
||||
echo "----loaded $file"
|
||||
load_partsupp "${file}"
|
||||
echo "----loaded ${file}"
|
||||
sleep 2
|
||||
echo >&3
|
||||
} &
|
||||
@ -235,4 +234,4 @@ wait
|
||||
exec 3>&-
|
||||
date
|
||||
|
||||
echo "DONE."
|
||||
echo "DONE."
|
||||
|
||||
@ -24,12 +24,12 @@ set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "$ROOT"
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
QUERIES_DIR=$CURDIR/../queries
|
||||
CURDIR="${ROOT}"
|
||||
QUERIES_DIR="${CURDIR}/../queries"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
@ -45,10 +45,10 @@ OPTS=$(getopt \
|
||||
-o '' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "$OPTS"
|
||||
eval set -- "${OPTS}"
|
||||
HELP=0
|
||||
|
||||
if [ $# == 0 ]; then
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
@ -77,28 +77,27 @@ fi
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! $CMD; then
|
||||
echo "$NAME is missing. This script depends on mysql to create tables in Doris."
|
||||
if ! ${CMD}; then
|
||||
echo "${NAME} is missing. This script depends on mysql to create tables in Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "mysql --version" "mysql"
|
||||
|
||||
# shellcheck source=/dev/null
|
||||
source "$CURDIR/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=$PASSWORD
|
||||
source "${CURDIR}/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=${PASSWORD}
|
||||
|
||||
echo "FE_HOST: $FE_HOST"
|
||||
echo "FE_QUERY_PORT: $FE_QUERY_PORT"
|
||||
echo "USER: $USER"
|
||||
echo "PASSWORD: $PASSWORD"
|
||||
echo "DB: $DB"
|
||||
echo "FE_HOST: ${FE_HOST}"
|
||||
echo "FE_QUERY_PORT: ${FE_QUERY_PORT}"
|
||||
echo "USER: ${USER}"
|
||||
echo "PASSWORD: ${PASSWORD}"
|
||||
echo "DB: ${DB}"
|
||||
echo "Time Unit: ms"
|
||||
|
||||
pre_set() {
|
||||
echo "$*"
|
||||
mysql -h"$FE_HOST" -u"$USER" -P"$FE_QUERY_PORT" -D"$DB" -e "$*"
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*"
|
||||
}
|
||||
|
||||
echo '============================================'
|
||||
@ -112,14 +111,14 @@ for i in $(seq 1 22); do
|
||||
total=0
|
||||
run=3
|
||||
# Each query is executed ${run} times and takes the average time
|
||||
for j in $(seq 1 ${run}); do
|
||||
for ((j = 0; j < run; j++)); do
|
||||
start=$(date +%s%3N)
|
||||
mysql -h"$FE_HOST" -u "$USER" -P"$FE_QUERY_PORT" -D"$DB" --comments <"$QUERIES_DIR"/q"$i".sql >/dev/null
|
||||
mysql -h"${FE_HOST}" -u "${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" --comments <"${QUERIES_DIR}/q${i}.sql" >/dev/null
|
||||
end=$(date +%s%3N)
|
||||
total=$((total + end - start))
|
||||
done
|
||||
cost=$((total / run))
|
||||
echo "q$i: ${cost}"
|
||||
echo "q${i}: ${cost}"
|
||||
sum=$((sum + cost))
|
||||
done
|
||||
echo "Total cost: $sum"
|
||||
echo "Total cost: ${sum}"
|
||||
|
||||
Reference in New Issue
Block a user