[Enhancement](tools) Support transaction for benchmarks loading (#31126)

This commit is contained in:
zclllyybb
2024-02-20 14:29:34 +08:00
committed by yiguolei
parent 4aaab6fb44
commit 95a1e1c0bf
4 changed files with 299 additions and 70 deletions

View File

@ -30,13 +30,18 @@ ROOT=$(
CURDIR=${ROOT}
DATA_DIR=$CURDIR/
# DATA_DIR=/mnt/disk1/stephen/data/clickbench
usage() {
echo "
This script is used to load ClickBench data,
will use mysql client to connect Doris server which is specified in conf/doris-cluster.conf file.
Usage: $0
Usage: $0 <options>
Optional options:
-x use transaction id. multi times of loading with the same id won't load duplicate data.
Eg.
$0 load data using default value.
$0 -x blabla use transaction id \"blabla\".
"
exit 1
}
@ -44,11 +49,13 @@ Usage: $0
OPTS=$(getopt \
-n $0 \
-o '' \
-o 'h' \
-o 'hx:' \
-- "$@")
eval set -- "$OPTS"
HELP=0
TXN_ID=""
while true; do
case "$1" in
-h)
@ -59,6 +66,10 @@ while true; do
shift
break
;;
-x)
TXN_ID=$2
shift 2
;;
*)
echo "Internal error"
exit 1
@ -113,7 +124,6 @@ function load() {
if [ ! -f "$DATA_DIR/hits_split${i}" ]; then
echo "will download hits_split${i} to $DATA_DIR"
wget --continue "https://doris-test-data.oss-cn-hongkong.aliyuncs.com/ClickBench/hits_split${i}" &
# wget --continue "https://doris-test-data.oss-cn-hongkong-internal.aliyuncs.com/ClickBench/hits_split${i}" &
PID=$!
wget_pids[${#wget_pids[@]}]=$PID
fi
@ -127,11 +137,20 @@ function load() {
for i in $(seq 0 9); do
echo -e "
start loading hits_split${i}"
curl --location-trusted \
-u $USER:$PASSWORD \
-T "$DATA_DIR/hits_split${i}" \
-H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
if [[ -z ${TXN_ID} ]]; then
curl --location-trusted \
-u $USER:$PASSWORD \
-T "$DATA_DIR/hits_split${i}" \
-H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
else
curl --location-trusted \
-u $USER:$PASSWORD \
-T "$DATA_DIR/hits_split${i}" \
-H "label:${TXN_ID}_${i}" \
-H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
fi
done
}

View File

@ -36,11 +36,13 @@ usage() {
echo "
Usage: $0 <options>
Optional options:
-c parallelism to load data of lineorder table, default is 5.
-c parallelism to load data of lineorder table, default is 5.
-x use transaction id. multi times of loading with the same id won't load duplicate data.
Eg.
$0 load data using default value.
$0 -c 10 load lineorder table data using parallelism 10.
$0 -c 10 load lineorder table data using parallelism 10.
$0 -x blabla use transaction id \"blabla\".
"
exit 1
}
@ -48,13 +50,14 @@ Usage: $0 <options>
OPTS=$(getopt \
-n "$0" \
-o '' \
-o 'hc:' \
-o 'hc:x:' \
-- "$@")
eval set -- "${OPTS}"
PARALLEL=5
HELP=0
TXN_ID=""
if [[ $# == 0 ]]; then
usage
@ -70,6 +73,10 @@ while true; do
PARALLEL=$2
shift 2
;;
-x)
TXN_ID=$2
shift 2
;;
--)
shift
break
@ -110,9 +117,13 @@ run_sql() {
load_lineitem_flat() {
# Loading data in batches by year.
local flat_con_idx=0
for con in 'lo_orderdate<19930101' 'lo_orderdate>=19930101 and lo_orderdate<19940101' 'lo_orderdate>=19940101 and lo_orderdate<19950101' 'lo_orderdate>=19950101 and lo_orderdate<19960101' 'lo_orderdate>=19960101 and lo_orderdate<19970101' 'lo_orderdate>=19970101 and lo_orderdate<19980101' 'lo_orderdate>=19980101'; do
echo -e "\n${con}"
run_sql "
flat_con_idx=$((flat_con_idx + 1))
if [[ -z ${TXN_ID} ]]; then
run_sql "
INSERT INTO lineorder_flat
SELECT
LO_ORDERDATE,
@ -182,6 +193,79 @@ ON (s.s_suppkey = l.lo_suppkey)
INNER JOIN part p
ON (p.p_partkey = l.lo_partkey);
"
else
run_sql "
INSERT INTO lineorder_flat
WITH LABEL \`${TXN_ID}_flat_${flat_con_idx}\`
SELECT
LO_ORDERDATE,
LO_ORDERKEY,
LO_LINENUMBER,
LO_CUSTKEY,
LO_PARTKEY,
LO_SUPPKEY,
LO_ORDERPRIORITY,
LO_SHIPPRIORITY,
LO_QUANTITY,
LO_EXTENDEDPRICE,
LO_ORDTOTALPRICE,
LO_DISCOUNT,
LO_REVENUE,
LO_SUPPLYCOST,
LO_TAX,
LO_COMMITDATE,
LO_SHIPMODE,
C_NAME,
C_ADDRESS,
C_CITY,
C_NATION,
C_REGION,
C_PHONE,
C_MKTSEGMENT,
S_NAME,
S_ADDRESS,
S_CITY,
S_NATION,
S_REGION,
S_PHONE,
P_NAME,
P_MFGR,
P_CATEGORY,
P_BRAND,
P_COLOR,
P_TYPE,
P_SIZE,
P_CONTAINER
FROM (
SELECT
lo_orderkey,
lo_linenumber,
lo_custkey,
lo_partkey,
lo_suppkey,
lo_orderdate,
lo_orderpriority,
lo_shippriority,
lo_quantity,
lo_extendedprice,
lo_ordtotalprice,
lo_discount,
lo_revenue,
lo_supplycost,
lo_tax,
lo_commitdate,
lo_shipmode
FROM lineorder
WHERE ${con}
) l
INNER JOIN customer c
ON (c.c_custkey = l.lo_custkey)
INNER JOIN supplier s
ON (s.s_suppkey = l.lo_suppkey)
INNER JOIN part p
ON (p.p_partkey = l.lo_partkey);
"
fi
done
}
@ -200,36 +284,73 @@ echo "Start time: $(date)"
echo "==========Start to load data into ssb tables=========="
echo 'Loading data for table: part'
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "column_separator:|" \
-H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \
-T "${SSB_DATA_DIR}"/part.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
if [[ -z ${TXN_ID} ]]; then
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "column_separator:|" \
-H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \
-T "${SSB_DATA_DIR}"/part.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
else
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "label:${TXN_ID}_part" -H "column_separator:|" \
-H "columns:p_partkey,p_name,p_mfgr,p_category,p_brand,p_color,p_type,p_size,p_container,p_dummy" \
-T "${SSB_DATA_DIR}"/part.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
fi
echo 'Loading data for table: date'
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "column_separator:|" \
-H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \
-T "${SSB_DATA_DIR}"/date.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load
if [[ -z ${TXN_ID} ]]; then
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "label:${TXN_ID}_date" -H "column_separator:|" \
-H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \
-T "${SSB_DATA_DIR}"/date.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load
else
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "column_separator:|" \
-H "columns:d_datekey,d_date,d_dayofweek,d_month,d_year,d_yearmonthnum,d_yearmonth,d_daynuminweek,d_daynuminmonth,d_daynuminyear,d_monthnuminyear,d_weeknuminyear,d_sellingseason,d_lastdayinweekfl,d_lastdayinmonthfl,d_holidayfl,d_weekdayfl,d_dummy" \
-T "${SSB_DATA_DIR}"/date.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/dates/_stream_load
fi
echo 'Loading data for table: supplier'
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "column_separator:|" \
-H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
-T "${SSB_DATA_DIR}"/supplier.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
if [[ -z ${TXN_ID} ]]; then
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "column_separator:|" \
-H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
-T "${SSB_DATA_DIR}"/supplier.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
else
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "label:${TXN_ID}_supplier" -H "column_separator:|" \
-H "columns:s_suppkey,s_name,s_address,s_city,s_nation,s_region,s_phone,s_dummy" \
-T "${SSB_DATA_DIR}"/supplier.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
fi
echo 'Loading data for table: customer'
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "column_separator:|" \
-H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \
-T "${SSB_DATA_DIR}"/customer.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
if [[ -z ${TXN_ID} ]]; then
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "column_separator:|" \
-H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \
-T "${SSB_DATA_DIR}"/customer.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
else
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "label:${TXN_ID}_customer" -H "column_separator:|" \
-H "columns:c_custkey,c_name,c_address,c_city,c_nation,c_region,c_phone,c_mktsegment,no_use" \
-T "${SSB_DATA_DIR}"/customer.tbl http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
fi
echo "Loading data for table: lineorder, with ${PARALLEL} parallel"
function load() {
echo "$@"
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "column_separator:|" \
-H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \
-T "$@" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load
# shellcheck disable=SC2016,SC2124
local FILE_ID="${@//*./}"
if [[ -z ${TXN_ID} ]]; then
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "column_separator:|" \
-H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \
-T "$@" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load
else
curl --location-trusted -u "${USER}":"${PASSWORD}" \
-H "label:${TXN_ID}_lineorder_${FILE_ID}" -H "column_separator:|" \
-H "columns:lo_orderkey,lo_linenumber,lo_custkey,lo_partkey,lo_suppkey,lo_orderdate,lo_orderpriority,lo_shippriority,lo_quantity,lo_extendedprice,lo_ordtotalprice,lo_discount,lo_revenue,lo_supplycost,lo_tax,lo_commitdate,lo_shipmode,lo_dummy" \
-T "$@" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineorder/_stream_load
fi
}
# set parallelism

View File

@ -35,11 +35,13 @@ usage() {
echo "
Usage: $0 <options>
Optional options:
-c parallelism to load data of lineitem, orders, partsupp, default is 5.
-c parallelism to load data of lineitem, orders, partsupp, default is 5.
-x use transaction id. multi times of loading with the same id won't load duplicate data.
Eg.
$0 load data using default value.
$0 -c 10 load lineitem, orders, partsupp table data using parallelism 10.
$0 -c 10 load lineitem, orders, partsupp table data using parallelism 10.
$0 -x blabla use transaction id \"blabla\".
"
exit 1
}
@ -47,13 +49,14 @@ Usage: $0 <options>
OPTS=$(getopt \
-n "$0" \
-o '' \
-o 'hc:' \
-o 'hc:x:' \
-- "$@")
eval set -- "${OPTS}"
PARALLEL=5
HELP=0
TXN_ID=""
if [[ $# == 0 ]]; then
usage
@ -69,6 +72,10 @@ while true; do
PARALLEL=$2
shift 2
;;
-x)
TXN_ID=$2
shift 2
;;
--)
shift
break
@ -170,13 +177,26 @@ for table_name in ${!table_columns[*]}; do
{
for file in "${TPCDS_DATA_DIR}/${table_name}"_{1..100}_*.dat; do
if ! [[ -f "${file}" ]]; then continue; fi
ret=$(curl \
--location-trusted \
-u "${USER}":"${PASSWORD:=}" \
-H "column_separator:|" \
-H "columns: ${table_columns[${table_name}]}" \
-T "${file}" \
http://"${FE_HOST}":"${FE_HTTP_PORT:=8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null)
FILE_ID=$(echo "${file}" | awk -F'/' '{print $(NF)}' | awk -F'.' '{print $(1)}')
if [[ -z ${TXN_ID} ]]; then
ret=$(curl \
--location-trusted \
-u "${USER}":"${PASSWORD:-}" \
-H "column_separator:|" \
-H "columns: ${table_columns[${table_name}]}" \
-T "${file}" \
http://"${FE_HOST}":"${FE_HTTP_PORT:-8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null)
else
ret=$(curl \
--location-trusted \
-u "${USER}":"${PASSWORD:-}" \
-H "label:${TXN_ID}_${FILE_ID}" \
-H "column_separator:|" \
-H "columns: ${table_columns[${table_name}]}" \
-T "${file}" \
http://"${FE_HOST}":"${FE_HTTP_PORT:-8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null)
fi
if [[ $(echo "${ret}" | jq ".Status") == '"Success"' ]]; then
echo "----loaded ${file}"
else

View File

@ -36,11 +36,13 @@ usage() {
echo "
Usage: $0 <options>
Optional options:
-c parallelism to load data of lineitem, orders, partsupp, default is 5.
-c parallelism to load data of lineitem, orders, partsupp, default is 5.
-x use transaction id. multi times of loading with the same id won't load duplicate data.
Eg.
$0 load data using default value.
$0 -c 10 load lineitem, orders, partsupp table data using parallelism 10.
$0 -c 10 load lineitem, orders, partsupp table data using parallelism 10.
$0 -x blabla use transaction id \"blabla\".
"
exit 1
}
@ -48,13 +50,14 @@ Usage: $0 <options>
OPTS=$(getopt \
-n "$0" \
-o '' \
-o 'hc:' \
-o 'hc:x:' \
-- "$@")
eval set -- "${OPTS}"
PARALLEL=5
HELP=0
TXN_ID=""
if [[ $# == 0 ]]; then
usage
@ -70,6 +73,10 @@ while true; do
PARALLEL=$2
shift 2
;;
-x)
TXN_ID=$2
shift 2
;;
--)
shift
break
@ -116,51 +123,113 @@ echo "DB: ${DB}"
function load_region() {
echo "$*"
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: r_regionkey, r_name, r_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load
if [[ -z ${TXN_ID} ]]; then
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: r_regionkey, r_name, r_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load
else
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "label:${TXN_ID}_region" \
-H "columns: r_regionkey, r_name, r_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/region/_stream_load
fi
}
function load_nation() {
echo "$*"
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load
if [[ -z ${TXN_ID} ]]; then
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load
else
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "label:${TXN_ID}_nation" \
-H "columns: n_nationkey, n_name, n_regionkey, n_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/nation/_stream_load
fi
}
function load_supplier() {
echo "$*"
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
if [[ -z ${TXN_ID} ]]; then
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
else
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "label:${TXN_ID}_supplier" \
-H "columns: s_suppkey, s_name, s_address, s_nationkey, s_phone, s_acctbal, s_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/supplier/_stream_load
fi
}
function load_customer() {
echo "$*"
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
if [[ -z ${TXN_ID} ]]; then
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
else
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "label:${TXN_ID}_customer" \
-H "columns: c_custkey, c_name, c_address, c_nationkey, c_phone, c_acctbal, c_mktsegment, c_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/customer/_stream_load
fi
}
function load_part() {
echo "$*"
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
if [[ -z ${TXN_ID} ]]; then
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
else
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "label:${TXN_ID}_part" \
-H "columns: p_partkey, p_name, p_mfgr, p_brand, p_type, p_size, p_container, p_retailprice, p_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/part/_stream_load
fi
}
function load_partsupp() {
echo "$*"
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load
# shellcheck disable=SC2016,SC2124
local FILE_ID="${@//*./}"
if [[ -z ${TXN_ID} ]]; then
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load
else
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "label:${TXN_ID}_partsupp_${FILE_ID}" \
-H "columns: ps_partkey, ps_suppkey, ps_availqty, ps_supplycost, ps_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/partsupp/_stream_load
fi
}
function load_orders() {
echo "$*"
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load
# shellcheck disable=SC2016,SC2124
local FILE_ID="${@//*./}"
if [[ -z ${TXN_ID} ]]; then
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load
else
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "label:${TXN_ID}_orders_${FILE_ID}" \
-H "columns: o_orderkey, o_custkey, o_orderstatus, o_totalprice, o_orderdate, o_orderpriority, o_clerk, o_shippriority, o_comment, temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/orders/_stream_load
fi
}
function load_lineitem() {
echo "$*"
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load
# shellcheck disable=SC2016,SC2124
local FILE_ID="${@//*./}"
if [[ -z ${TXN_ID} ]]; then
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load
else
curl -s --location-trusted -u "${USER}":"${PASSWORD}" -H "column_separator:|" \
-H "label:${TXN_ID}_lineitem_${FILE_ID}" \
-H "columns: l_orderkey, l_partkey, l_suppkey, l_linenumber, l_quantity, l_extendedprice, l_discount, l_tax, l_returnflag,l_linestatus, l_shipdate,l_commitdate,l_receiptdate,l_shipinstruct,l_shipmode,l_comment,temp" \
-T "$*" http://"${FE_HOST}":"${FE_HTTP_PORT}"/api/"${DB}"/lineitem/_stream_load
fi
}
# start load