[Enhancement](tools) Support transaction for benchmarks loading (#31126)
This commit is contained in:
@ -30,13 +30,18 @@ ROOT=$(
|
||||
|
||||
CURDIR=${ROOT}
|
||||
DATA_DIR=$CURDIR/
|
||||
# DATA_DIR=/mnt/disk1/stephen/data/clickbench
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
This script is used to load ClickBench data,
|
||||
will use mysql client to connect Doris server which is specified in conf/doris-cluster.conf file.
|
||||
Usage: $0
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-x use transaction id. multi times of loading with the same id won't load duplicate data.
|
||||
|
||||
Eg.
|
||||
$0 load data using default value.
|
||||
$0 -x blabla use transaction id \"blabla\".
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
@ -44,11 +49,13 @@ Usage: $0
|
||||
OPTS=$(getopt \
|
||||
-n $0 \
|
||||
-o '' \
|
||||
-o 'h' \
|
||||
-o 'hx:' \
|
||||
-- "$@")
|
||||
eval set -- "$OPTS"
|
||||
|
||||
HELP=0
|
||||
TXN_ID=""
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
@ -59,6 +66,10 @@ while true; do
|
||||
shift
|
||||
break
|
||||
;;
|
||||
-x)
|
||||
TXN_ID=$2
|
||||
shift 2
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
@ -113,7 +124,6 @@ function load() {
|
||||
if [ ! -f "$DATA_DIR/hits_split${i}" ]; then
|
||||
echo "will download hits_split${i} to $DATA_DIR"
|
||||
wget --continue "https://doris-test-data.oss-cn-hongkong.aliyuncs.com/ClickBench/hits_split${i}" &
|
||||
# wget --continue "https://doris-test-data.oss-cn-hongkong-internal.aliyuncs.com/ClickBench/hits_split${i}" &
|
||||
PID=$!
|
||||
wget_pids[${#wget_pids[@]}]=$PID
|
||||
fi
|
||||
@ -127,11 +137,20 @@ function load() {
|
||||
for i in $(seq 0 9); do
|
||||
echo -e "
|
||||
start loading hits_split${i}"
|
||||
curl --location-trusted \
|
||||
-u $USER:$PASSWORD \
|
||||
-T "$DATA_DIR/hits_split${i}" \
|
||||
-H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
|
||||
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl --location-trusted \
|
||||
-u $USER:$PASSWORD \
|
||||
-T "$DATA_DIR/hits_split${i}" \
|
||||
-H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
|
||||
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
|
||||
else
|
||||
curl --location-trusted \
|
||||
-u $USER:$PASSWORD \
|
||||
-T "$DATA_DIR/hits_split${i}" \
|
||||
-H "label:${TXN_ID}_${i}" \
|
||||
-H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
|
||||
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
|
||||
@ -36,11 +36,13 @@ usage() {
|
||||
echo "
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-c parallelism to load data of lineorder table, default is 5.
|
||||
-c parallelism to load data of lineorder table, default is 5.
|
||||
-x use transaction id. multi times of loading with the same id won't load duplicate data.
|
||||
|
||||
Eg.
|
||||
$0 load data using default value.
|
||||
$0 -c 10 load lineorder table data using parallelism 10.
|
||||
$0 -c 10 load lineorder table data using parallelism 10.
|
||||
$0 -x blabla use transaction id \"blabla\".
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
@ -48,13 +50,14 @@ Usage: $0 <options>
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hc:' \
|
||||
-o 'hc:x:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
|
||||
PARALLEL=5
|
||||
HELP=0
|
||||
TXN_ID=""
|
||||
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
@ -70,6 +73,10 @@ while true; do
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
-x)
|
||||
TXN_ID=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
@ -110,9 +117,13 @@ run_sql() {
|
||||
|
||||
load_lineitem_flat() {
|
||||
# Loading data in batches by year.
|
||||
local flat_con_idx=0
|
||||
for con in 'lo_orderdate<19930101' 'lo_orderdate>=19930101 and lo_orderdate<19940101' 'lo_orderdate>=19940101 and lo_orderdate<19950101' 'lo_orderdate>=19950101 and lo_orderdate<19960101' 'lo_orderdate>=19960101 and lo_orderdate<19970101' 'lo_orderdate>=19970101 and lo_orderdate<19980101' 'lo_orderdate>=19980101'; do
|
||||
echo -e "\n${con}"
|
||||
run_sql "
|
||||
flat_con_idx=$((flat_con_idx + 1))
|
||||
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
run_sql "
|
||||
INSERT INTO lineorder_flat
|
||||
SELECT
|
||||
LO_ORDERDATE,
|
||||
@ -182,6 +193,79 @@ ON (s.s_suppkey = l.lo_suppkey)
|
||||
INNER JOIN part p
|
||||
ON (p.p_partkey = l.lo_partkey);
|
||||
"
|
||||
else
|
||||
run_sql "
|
||||
INSERT INTO lineorder_flat
|
||||
WITH LABEL \`${TXN_ID}_flat_${flat_con_idx}\`
|
||||
SELECT
|
||||
LO_ORDERDATE,
|
||||
LO_ORDERKEY,
|
||||
LO_LINENUMBER,
|
||||
LO_CUSTKEY,
|
||||
LO_PARTKEY,
|
||||
LO_SUPPKEY,
|
||||
LO_ORDERPRIORITY,
|
||||
LO_SHIPPRIORITY,
|
||||
LO_QUANTITY,
|
||||
LO_EXTENDEDPRICE,
|
||||
LO_ORDTOTALPRICE,
|
||||
LO_DISCOUNT,
|
||||
LO_REVENUE,
|
||||
LO_SUPPLYCOST,
|
||||
LO_TAX,
|
||||
LO_COMMITDATE,
|
||||
LO_SHIPMODE,
|
||||
C_NAME,
|
||||
C_ADDRESS,
|
||||
C_CITY,
|
||||
C_NATION,
|
||||
C_REGION,
|
||||
C_PHONE,
|
||||
C_MKTSEGMENT,
|
||||
S_NAME,
|
||||
S_ADDRESS,
|
||||
S_CITY,
|
||||
S_NATION,
|
||||
S_REGION,
|
||||
S_PHONE,
|
||||
P_NAME,
|
||||
P_MFGR,
|
||||
P_CATEGORY,
|
||||
P_BRAND,
|
||||
P_COLOR,
|
||||
P_TYPE,
|
||||
P_SIZE,
|
||||
P_CONTAINER
|
||||
FROM (
|
||||
SELECT
|
||||
lo_orderkey,
|
||||
lo_linenumber,
|
||||
lo_custkey,
|
||||
lo_partkey,
|
||||
lo_suppkey,
|
||||
lo_orderdate,
|
||||
lo_orderpriority,
|
||||
lo_shippriority,
|
||||
lo_quantity,
|
||||
lo_extendedprice,
|
||||
lo_ordtotalprice,
|
||||
lo_discount,
|
||||
lo_revenue,
|
||||
lo_supplycost,
|
||||
lo_tax,
|
||||
lo_commitdate,
|
||||
lo_shipmode
|
||||
FROM lineorder
|
||||
WHERE ${con}
|
||||
) l
|
||||
INNER JOIN customer c
|
||||
ON (c.c_custkey = l.lo_custkey)
|
||||
INNER JOIN supplier s
|
||||
ON (s.s_suppkey = l.lo_suppkey)
|
||||
INNER JOIN part p
|
||||
ON (p.p_partkey = l.lo_partkey);
|
||||
"
|
||||
fi
|
||||
done
|
||||
}
|
||||
|
||||
@ -200,36 +284,73 @@ echo "Start time: $(date)"
|
||||
echo "==========Start to load data into ssb tables=========="
|
||||
|
||||
echo 'Loading data for table: part'
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \
|
||||
-T "${SSB_DATA_DIR}"/part.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \
|
||||
-T "${SSB_DATA_DIR}"/part.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
|
||||
else
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "label:${TXN_ID}_part" -H "column_separator:|" \
|
||||
-H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \
|
||||
-T "${SSB_DATA_DIR}"/part.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
|
||||
fi
|
||||
|
||||
echo 'Loading data for table: date'
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \
|
||||
-T "${SSB_DATA_DIR}"/date.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "label:${TXN_ID}_date" -H "column_separator:|" \
|
||||
-H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \
|
||||
-T "${SSB_DATA_DIR}"/date.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load
|
||||
else
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \
|
||||
-T "${SSB_DATA_DIR}"/date.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load
|
||||
fi
|
||||
|
||||
echo 'Loading data for table: supplier'
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
|
||||
-T "${SSB_DATA_DIR}"/supplier.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
|
||||
-T "${SSB_DATA_DIR}"/supplier.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
|
||||
else
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "label:${TXN_ID}_supplier" -H "column_separator:|" \
|
||||
-H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
|
||||
-T "${SSB_DATA_DIR}"/supplier.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
|
||||
fi
|
||||
|
||||
echo 'Loading data for table: customer'
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \
|
||||
-T "${SSB_DATA_DIR}"/customer.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \
|
||||
-T "${SSB_DATA_DIR}"/customer.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
|
||||
else
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "label:${TXN_ID}_customer" -H "column_separator:|" \
|
||||
-H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \
|
||||
-T "${SSB_DATA_DIR}"/customer.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
|
||||
fi
|
||||
|
||||
echo "Loading data for table: lineorder, with ${PARALLEL} parallel"
|
||||
function load() {
|
||||
echo "$@"
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \
|
||||
-T "$@" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load
|
||||
# shellcheck disable=SC2016,SC2124
|
||||
local FILE_ID="${@//*./}"
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \
|
||||
-T "$@" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load
|
||||
else
|
||||
curl --location-trusted -u "${USER}":"${PASSWORD}" \
|
||||
-H "label:${TXN_ID}_lineorder_${FILE_ID}" -H "column_separator:|" \
|
||||
-H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \
|
||||
-T "$@" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load
|
||||
fi
|
||||
}
|
||||
|
||||
# set parallelism
|
||||
|
||||
@ -35,11 +35,13 @@ usage() {
|
||||
echo "
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-c parallelism to load data of lineitem, orders, partsupp, default is 5.
|
||||
-c parallelism to load data of lineitem, orders, partsupp, default is 5.
|
||||
-x use transaction id. multi times of loading with the same id won't load duplicate data.
|
||||
|
||||
Eg.
|
||||
$0 load data using default value.
|
||||
$0 -c 10 load lineitem, orders, partsupp table data using parallelism 10.
|
||||
$0 -c 10 load lineitem, orders, partsupp table data using parallelism 10.
|
||||
$0 -x blabla use transaction id \"blabla\".
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
@ -47,13 +49,14 @@ Usage: $0 <options>
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hc:' \
|
||||
-o 'hc:x:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
|
||||
PARALLEL=5
|
||||
HELP=0
|
||||
TXN_ID=""
|
||||
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
@ -69,6 +72,10 @@ while true; do
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
-x)
|
||||
TXN_ID=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
@ -170,13 +177,26 @@ for table_name in ${!table_columns[*]}; do
|
||||
{
|
||||
for file in "${TPCDS_DATA_DIR}/${table_name}"_{1..100}_*.dat; do
|
||||
if ! [[ -f "${file}" ]]; then continue; fi
|
||||
ret=$(curl \
|
||||
--location-trusted \
|
||||
-u "${USER}":"${PASSWORD:=}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns: ${table_columns[${table_name}]}" \
|
||||
-T "${file}" \
|
||||
http://"${FE_HOST}":"${FE_HTTP_PORT:=8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null)
|
||||
FILE_ID=$(echo "${file}" | awk -F'/' '{print $(NF)}' | awk -F'.' '{print $(1)}')
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
ret=$(curl \
|
||||
--location-trusted \
|
||||
-u "${USER}":"${PASSWORD:-}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns: ${table_columns[${table_name}]}" \
|
||||
-T "${file}" \
|
||||
http://"${FE_HOST}":"${FE_HTTP_PORT:-8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null)
|
||||
else
|
||||
ret=$(curl \
|
||||
--location-trusted \
|
||||
-u "${USER}":"${PASSWORD:-}" \
|
||||
-H "label:${TXN_ID}_${FILE_ID}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns: ${table_columns[${table_name}]}" \
|
||||
-T "${file}" \
|
||||
http://"${FE_HOST}":"${FE_HTTP_PORT:-8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null)
|
||||
fi
|
||||
|
||||
if [[ $(echo "${ret}" | jq ".Status") == '"Success"' ]]; then
|
||||
echo "----loaded ${file}"
|
||||
else
|
||||
|
||||
@ -36,11 +36,13 @@ usage() {
|
||||
echo "
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-c parallelism to load data of lineitem, orders, partsupp, default is 5.
|
||||
-c parallelism to load data of lineitem, orders, partsupp, default is 5.
|
||||
-x use transaction id. multi times of loading with the same id won't load duplicate data.
|
||||
|
||||
Eg.
|
||||
$0 load data using default value.
|
||||
$0 -c 10 load lineitem, orders, partsupp table data using parallelism 10.
|
||||
$0 -c 10 load lineitem, orders, partsupp table data using parallelism 10.
|
||||
$0 -x blabla use transaction id \"blabla\".
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
@ -48,13 +50,14 @@ Usage: $0 <options>
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hc:' \
|
||||
-o 'hc:x:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
|
||||
PARALLEL=5
|
||||
HELP=0
|
||||
TXN_ID=""
|
||||
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
@ -70,6 +73,10 @@ while true; do
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
-x)
|
||||
TXN_ID=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
@ -116,51 +123,113 @@ echo "DB: ${DB}"
|
||||
|
||||
function load_region() {
|
||||
echo "$*"
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: r_regionkey, r_name, r_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: r_regionkey, r_name, r_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load
|
||||
else
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "label:${TXN_ID}_region" \
|
||||
-H "columns: r_regionkey, r_name, r_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load
|
||||
fi
|
||||
}
|
||||
function load_nation() {
|
||||
echo "$*"
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load
|
||||
else
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "label:${TXN_ID}_nation" \
|
||||
-H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load
|
||||
fi
|
||||
}
|
||||
function load_supplier() {
|
||||
echo "$*"
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
|
||||
else
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "label:${TXN_ID}_supplier" \
|
||||
-H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
|
||||
fi
|
||||
}
|
||||
function load_customer() {
|
||||
echo "$*"
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
|
||||
else
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "label:${TXN_ID}_customer" \
|
||||
-H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
|
||||
fi
|
||||
}
|
||||
function load_part() {
|
||||
echo "$*"
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
|
||||
else
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "label:${TXN_ID}_part" \
|
||||
-H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
|
||||
fi
|
||||
}
|
||||
function load_partsupp() {
|
||||
echo "$*"
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load
|
||||
# shellcheck disable=SC2016,SC2124
|
||||
local FILE_ID="${@//*./}"
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load
|
||||
else
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "label:${TXN_ID}_partsupp_${FILE_ID}" \
|
||||
-H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load
|
||||
fi
|
||||
}
|
||||
function load_orders() {
|
||||
echo "$*"
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load
|
||||
# shellcheck disable=SC2016,SC2124
|
||||
local FILE_ID="${@//*./}"
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load
|
||||
else
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "label:${TXN_ID}_orders_${FILE_ID}" \
|
||||
-H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load
|
||||
fi
|
||||
}
|
||||
function load_lineitem() {
|
||||
echo "$*"
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load
|
||||
# shellcheck disable=SC2016,SC2124
|
||||
local FILE_ID="${@//*./}"
|
||||
if [[ -z ${TXN_ID} ]]; then
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load
|
||||
else
|
||||
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
|
||||
-H "label:${TXN_ID}_lineitem_${FILE_ID}" \
|
||||
-H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \
|
||||
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load
|
||||
fi
|
||||
}
|
||||
|
||||
# start load
|
||||
|
||||
Reference in New Issue
Block a user