[tools](tpcds) fix bug of generating and loading data (#17835)
--------- Co-authored-by: stephen <hello_stephen@@qq.com>
This commit is contained in:
@ -36,7 +36,7 @@ usage() {
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-s scale factor, default is 1
|
||||
-c parallelism to generate data of (lineitem, orders, partsupp) table, default is 10
|
||||
-c parallelism to generate data, default is 10, max is 100
|
||||
|
||||
Eg.
|
||||
$0 generate data using default value.
|
||||
@ -114,12 +114,24 @@ date
|
||||
cd "${TPCDS_DBGEN_DIR}"
|
||||
if [[ ${PARALLEL} -eq 1 ]] && "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}"; then
|
||||
echo "data genarated."
|
||||
elif [[ ${PARALLEL} -gt 1 ]] && "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -PARALLEL "${PARALLEL}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}"; then
|
||||
elif [[ ${PARALLEL} -gt 1 ]] && [[ ${PARALLEL} -le 100 ]]; then
|
||||
for c in $(seq 1 "${PARALLEL}"); do
|
||||
"${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -PARALLEL "${PARALLEL}" -CHILD "${c}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}" &
|
||||
done
|
||||
wait
|
||||
echo "data genarated."
|
||||
else
|
||||
echo "ERROR occured." && exit 1
|
||||
echo "ERROR: bad parallelism ${PARALLEL}" && exit 1
|
||||
fi
|
||||
cd -
|
||||
cd "${TPCDS_DATA_DIR}"
|
||||
echo "Convert encoding of customer table files from one iso-8859-1 to utf-8."
|
||||
for i in $(seq 1 "${PARALLEL}"); do
|
||||
if ! [[ -f "customer_${i}_${PARALLEL}.dat" ]]; then continue; fi
|
||||
mv "customer_${i}_${PARALLEL}.dat" "customer_${i}_${PARALLEL}.dat.bak"
|
||||
iconv -f iso-8859-1 -t utf-8 "customer_${i}_${PARALLEL}.dat.bak" -o "customer_${i}_${PARALLEL}.dat"
|
||||
rm "customer_${i}_${PARALLEL}.dat.bak"
|
||||
done
|
||||
date
|
||||
|
||||
# check data
|
||||
du -sh "${TPCDS_DATA_DIR}"/*.dat*
|
||||
|
||||
@ -168,7 +168,8 @@ for table_name in ${!table_columns[*]}; do
|
||||
|
||||
# 要批量执行的命令放在大括号内, 后台运行
|
||||
{
|
||||
for file in "${TPCDS_DATA_DIR}/${table_name}"*.dat; do
|
||||
for file in "${TPCDS_DATA_DIR}/${table_name}"_{1..100}_*.dat; do
|
||||
if ! [[ -f "${file}" ]]; then continue; fi
|
||||
ret=$(curl \
|
||||
--location-trusted \
|
||||
-u "${USER}":"${PASSWORD:=}" \
|
||||
@ -179,10 +180,9 @@ for table_name in ${!table_columns[*]}; do
|
||||
if [[ $(echo "${ret}" | jq ".Status") == '"Success"' ]]; then
|
||||
echo "----loaded ${file}"
|
||||
else
|
||||
echo -e "\033[31m----load ${file} FAIL...\033[0m"
|
||||
echo -e "\033[31m----load ${file} FAIL...\n${ret}\033[0m"
|
||||
fi
|
||||
done
|
||||
sleep 2
|
||||
# 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行
|
||||
echo >&3
|
||||
} &
|
||||
|
||||
@ -17,7 +17,7 @@
|
||||
# under the License.
|
||||
|
||||
##############################################################
|
||||
# This script is used to run TPC-DS 99 queries
|
||||
# This script is used to run TPC-DS 103 queries
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
@ -33,7 +33,7 @@ TPCDS_QUERIES_DIR="${CURDIR}/../queries"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
This script is used to run TPC-DS 99 queries,
|
||||
This script is used to run TPC-DS 103 queries,
|
||||
will use mysql client to connect Doris server which parameter is specified in doris-cluster.conf file.
|
||||
Usage: $0
|
||||
"
|
||||
|
||||
Reference in New Issue
Block a user