[tools](tpc-ds) add script tools to run tpc-ds conveniently (#17366)

build-tpcds-tools.sh
gen-tpcds-data.sh
gen-tpcds-queries.sh
create-tpcds-tables.sh
load-tpcds-data.sh
run-tpcds-queries.sh
generate data and queries support specify SCALE,
create table may need to be edited handly to specify BUCKETS or change int to bigint if SCALE is too big.

---------

Co-authored-by: stephen <hello_stephen@@qq.com>
This commit is contained in:
Dongyang Li
2023-03-03 08:24:07 +08:00
committed by GitHub
parent ba82cd10c6
commit cd7e03575b
9 changed files with 1510 additions and 0 deletions

View File

@ -0,0 +1,47 @@
<!--
Licensed to the Apache Software Foundation (ASF) under one
or more contributor license agreements. See the NOTICE file
distributed with this work for additional information
regarding copyright ownership. The ASF licenses this file
to you under the Apache License, Version 2.0 (the
"License"); you may not use this file except in compliance
with the License. You may obtain a copy of the License at
http://www.apache.org/licenses/LICENSE-2.0
Unless required by applicable law or agreed to in writing,
software distributed under the License is distributed on an
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
KIND, either express or implied. See the License for the
specific language governing permissions and limitations
under the License.
-->
## Usage
These scripts are used to make tpc-ds test.
follow the steps below:
### 1. build tpc-ds dsdgen dsqgen tool.
./bin/build-tpcds-tools.sh
### 2. generate tpc-ds data. use -h for more infomations.
./bin/gen-tpcds-data.sh -s 1
### 3. generate tpc-ds queries. use -h for more infomations.
./bin/gen-tpcds-queries.sh -s 1
### 4. create tpc-ds tables. modify `conf/doris-cluster.conf` to specify doris info, then run script below.
./bin/create-tpcds-tables.sh
### 5. load tpc-ds data. use -h for help.
./bin/load-tpcds-data.sh
### 6. run tpc-ds queries.
./bin/run-tpcds-queries.sh

View File

@ -0,0 +1,74 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################
# This script is used to build tpcds-dsdgen
# TPC-DS_Tools_v3.2.0.zip is from https://www.tpc.org/tpc_documents_current_versions/current_specifications5.asp
# Usage:
# sh build-tpcds-dsdgen.sh
##############################################################
set -eo pipefail
ROOT=$(dirname "$0")
ROOT=$(
cd "${ROOT}"
pwd
)
CURDIR="${ROOT}"
TPCDS_DBGEN_DIR="${CURDIR}/DSGen-software-code-3.2.0rc1/tools"
check_prerequest() {
local CMD=$1
local NAME=$2
if ! ${CMD} >/dev/null; then
echo "${NAME} is missing. This script depends on unzip to extract files from TPC-DS_Tools_v3.2.0.zip"
exit 1
fi
}
check_prerequest "unzip -h" "unzip"
# download tpcds tools package first
if [[ -d "${CURDIR}/DSGen-software-code-3.2.0rc1" ]]; then
echo "If you want to rebuild TPC-DS_Tools_v3.2.0 again, please delete ${CURDIR}/DSGen-software-code-3.2.0rc1 first." && exit 1
elif [[ -f "${CURDIR}/TPC-DS_Tools_v3.2.0.zip" ]]; then
unzip TPC-DS_Tools_v3.2.0.zip -d "${CURDIR}/"
else
wget "https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/tools/TPC-DS_Tools_v3.2.0.zip"
unzip TPC-DS_Tools_v3.2.0.zip -d "${CURDIR}/"
fi
# compile tpcds-dsdgen
cd "${TPCDS_DBGEN_DIR}/"
make >/dev/null
cd -
# check
if [[ -f ${TPCDS_DBGEN_DIR}/dsdgen ]]; then
echo "
################
Build succeed!
################
Run ${TPCDS_DBGEN_DIR}/dsdgen -h"
exit 0
else
echo "Build failed!"
exit 1
fi

View File

@ -0,0 +1,100 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################
# This script is used to create TPC-DS tables
##############################################################
set -eo pipefail
ROOT=$(dirname "$0")
ROOT=$(
cd "${ROOT}"
pwd
)
CURDIR=${ROOT}
usage() {
echo "
This script is used to create TPC-DS tables,
will use mysql client to connect Doris server which is specified in doris-cluster.conf file.
Usage: $0
"
exit 1
}
OPTS=$(getopt \
-n "$0" \
-o '' \
-- "$@")
eval set -- "${OPTS}"
HELP=0
if [[ $# == 0 ]]; then
usage
fi
while true; do
case "$1" in
-h)
HELP=1
shift
;;
--)
shift
break
;;
*)
echo "Internal error"
exit 1
;;
esac
done
if [[ ${HELP} -eq 1 ]]; then
usage
fi
check_prerequest() {
local CMD=$1
local NAME=$2
if ! ${CMD}; then
echo "${NAME} is missing. This script depends on mysql to create tables in Doris."
exit 1
fi
}
check_prerequest "mysql --version" "mysql"
source "${CURDIR}/../conf/doris-cluster.conf"
export MYSQL_PWD=${PASSWORD}
echo "FE_HOST: ${FE_HOST}"
echo "FE_QUERY_PORT: ${FE_QUERY_PORT}"
echo "USER: ${USER}"
echo "DB: ${DB}"
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -e "DROP DATABASE IF EXISTS ${DB}"
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -e "CREATE DATABASE ${DB}"
echo "Run SQLs from ${CURDIR}/../ddl/create-tpcds-tables.sql"
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" <"${CURDIR}"/../ddl/create-tpcds-tables.sql
echo "tpcds tables has been created"

View File

@ -0,0 +1,125 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################
# This script is used to generate TPC-DS data set
##############################################################
set -eo pipefail
ROOT=$(dirname "$0")
ROOT=$(
cd "${ROOT}"
pwd
)
CURDIR="${ROOT}"
TPCDS_DBGEN_DIR="${CURDIR}/DSGen-software-code-3.2.0rc1/tools"
usage() {
echo "
Usage: $0 <options>
Optional options:
-s scale factor, default is 1
-c parallelism to generate data of (lineitem, orders, partsupp) table, default is 10
Eg.
$0 generate data using default value.
$0 -s 100 generate data with scale factor 100.
$0 -s 1000 -c 100 generate data with scale factor 1000. And using 1000 threads to generate data concurrently.
"
exit 1
}
OPTS=$(getopt \
-n "$0" \
-o '' \
-o 'hs:c:' \
-- "$@")
eval set -- "${OPTS}"
SCALE_FACTOR=1
PARALLEL=10
HELP=0
if [[ $# == 0 ]]; then
usage
fi
while true; do
case "$1" in
-h)
HELP=1
shift
;;
-s)
SCALE_FACTOR=$2
shift 2
;;
-c)
PARALLEL=$2
shift 2
;;
--)
shift
break
;;
*)
echo "Internal error"
exit 1
;;
esac
done
if [[ ${HELP} -eq 1 ]]; then
usage
fi
TPCDS_DATA_DIR="${CURDIR}/tpcds-data"
echo "Scale Factor: ${SCALE_FACTOR}"
echo "Parallelism: ${PARALLEL}"
# check if dsdgen exists
if [[ ! -f ${TPCDS_DBGEN_DIR}/dsdgen ]]; then
echo "${TPCDS_DBGEN_DIR}/dsdgen does not exist. Run build-tpcds-dsdgen.sh first to build it first."
exit 1
fi
if [[ -d ${TPCDS_DATA_DIR}/ ]]; then
echo "${TPCDS_DATA_DIR} exists. Remove it before generating data"
exit 1
fi
mkdir "${TPCDS_DATA_DIR}"/
# gen data
echo "Begin to generate data..."
date
cd "${TPCDS_DBGEN_DIR}"
if [[ ${PARALLEL} -eq 1 ]] && "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}"; then
echo "data genarated."
elif [[ ${PARALLEL} -gt 1 ]] && "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -PARALLEL "${PARALLEL}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}"; then
echo "data genarated."
else
echo "ERROR occured." && exit 1
fi
cd -
date
# check data
du -sh "${TPCDS_DATA_DIR}"/*.dat*

View File

@ -0,0 +1,173 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################
# This script is used to generate TPC-DS tables
##############################################################
set -eo pipefail
ROOT=$(dirname "$0")
ROOT=$(
cd "${ROOT}"
pwd
)
CURDIR="${ROOT}"
TPCDS_DSQGEN_DIR="${CURDIR}/DSGen-software-code-3.2.0rc1/tools"
TPCDS_QUERIE_DIR="${CURDIR}/../queries"
usage() {
echo "
This script is used to generate TPC-DS 99 queries of different scale,
Usage: $0 <options>
Optional options:
-s scale factor, default is 1
Eg.
$0 -s 100 generate tpcds queries with scale factor 100.
"
exit 1
}
OPTS=$(getopt \
-n "$0" \
-o '' \
-o 'hs:' \
-- "$@")
eval set -- "${OPTS}"
HELP=0
SCALE=1
if [[ $# == 0 ]]; then
usage
fi
while true; do
case "$1" in
-h)
HELP=1
shift
;;
-s)
SCALE=$2
shift 2
;;
--)
shift
break
;;
*)
echo "Internal error"
exit 1
;;
esac
done
if [[ ${HELP} -eq 1 ]]; then
usage
fi
# check if dsqgen exists
if [[ ! -f ${TPCDS_DSQGEN_DIR}/dsqgen ]]; then
echo "${TPCDS_DSQGEN_DIR}/dsqgen does not exist. Run build-tpcds-dbgen.sh to build it first."
exit 1
fi
if [[ -d ${TPCDS_QUERIE_DIR}/ ]]; then
echo "${TPCDS_QUERIE_DIR} exists. Remove it before re-generating"
exit 1
else
mkdir "${TPCDS_QUERIE_DIR}"
fi
cd "${TPCDS_DSQGEN_DIR}"
for i in {1..99}; do
sed -i '/define _END/d' "../query_templates/query${i}.tpl"
echo 'define _END = "";' >>"../query_templates/query${i}.tpl"
done
# generate query
"${TPCDS_DSQGEN_DIR}"/dsqgen \
-DIRECTORY ../query_templates/ \
-INPUT ../query_templates/templates.lst \
-DIALECT netezza \
-QUALIFY Y \
-SCALE "${SCALE}"
if [[ -f query_0.sql ]]; then
i=1
IFS=';'
query_strs=$(cat query_0.sql)
of="${TPCDS_QUERIE_DIR}/tpcds_queries.sql"
for q in ${query_strs}; do
if [[ ${i} -eq 2 ]]; then
echo "${q/from catalog_sales))/from catalog_sales) t)};" >>"${of}"
elif [[ ${i} -eq 5 ]]; then
q="${q// 14 days/ interval 14 day}" &&
q="${q//\'store\' || s_store_id as id/concat(\'store\', s_store_id) id}" &&
q="${q//\'catalog_page\' || cp_catalog_page_id as id/concat(\'catalog_page\', cp_catalog_page_id) id}" &&
q="${q//\'web_site\' || web_site_id as id/concat(\'web_site\', web_site_id) id}" &&
echo "${q};" >>"${of}"
elif [[ ${i} -eq 12 ]] || [[ ${i} -eq 21 ]] || [[ ${i} -eq 22 ]] || [[ ${i} -eq 44 ]] || [[ ${i} -eq 81 ]] || [[ ${i} -eq 102 ]]; then
q="${q//30 days/interval 30 day}" && echo "${q};" >>"${of}"
elif [[ ${i} -eq 14 ]]; then
# q="${q//and d3.d_year between 1999 AND 1999 + 2)/and d3.d_year between 1999 AND 1999 + 2) t}" && echo "${q};" >>"${of}"
q="${q//where i_brand_id = brand_id/t where i_brand_id = brand_id}" && echo "${q};" >>"${of}"
elif [[ ${i} -eq 17 ]] || [[ ${i} -eq 40 ]] || [[ ${i} -eq 86 ]] || [[ ${i} -eq 98 ]] || [[ ${i} -eq 99 ]]; then
q="${q//60 days)/interval 60 day)}" && echo "${q};" >>"${of}"
elif [[ ${i} -eq 24 ]]; then
q="${q//c_customer_sk))/c_customer_sk) t)}" &&
q="${q//best_ss_customer))/best_ss_customer)) t2}" &&
echo "${q};" >>"${of}"
elif [[ ${i} -eq 25 ]]; then
q="${q//c_customer_sk))/c_customer_sk) t)}" &&
q="${q//c_first_name)/c_first_name) t2}" &&
echo "${q};" >>"${of}"
elif [[ ${i} -eq 35 ]] || [[ ${i} -eq 96 ]]; then
q="${q//90 days)/interval 90 day)}" && echo "${q};" >>"${of}"
elif [[ ${i} -eq 53 ]]; then
q="${q//order by 1/ t order by 1}" && echo "${q};" >>"${of}"
elif [[ ${i} -eq 70 ]]; then
v=$(echo "${q}" | grep "||" | grep -o "'.*'" | sed -n '1p')
smc1=$(echo "${v//\'/}" | awk -F"|" '{print $1}')
smc2=$(echo "${v//\'/}" | awk -F"|" '{print $5}')
origin="'${smc1% }' || ',' || '${smc2# }'"
q="${q//${origin}/concat(concat(\'${smc1}\', \',\'), \'${smc2}\')}" && echo "${q};" >>"${of}"
elif [[ ${i} -eq 84 ]]; then
q="${q//30 days/interval 30 day}" &&
q="${q//\'store\' || store_id/concat(\'store\', store_id)}" &&
q="${q//\'catalog_page\' || catalog_page_id/concat(\'catalog_page\', catalog_page_id)}" &&
q="${q//\'web_site\' || web_site_id/concat(\'web_site\', web_site_id)}" &&
echo "${q};" >>"${of}"
elif [[ ${i} -eq 88 ]]; then
q="${q//coalesce(c_last_name,\'\') || \', \' || coalesce(c_first_name,\'\')/concat(concat(coalesce(c_last_name,\'\'), \',\'), coalesce(c_first_name,\'\'))}" &&
echo "${q};" >>"${of}"
else
echo "${q};" >>"${of}"
fi
i=$((i + 1))
done
echo -e "\033[32m
tpcds queries generated in:
${TPCDS_QUERIE_DIR}/tpcds_queries.sql
\033[0m"
else
echo -e "\033[31m ERROR: tpcds queries generate failed \033[0m" && exit 1
fi
cd - >/dev/null

View File

@ -0,0 +1,199 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################
# This script is used to load generated TPC-DS data set into Doris.
##############################################################
set -eo pipefail
ROOT=$(dirname "$0")
ROOT=$(
cd "${ROOT}"
pwd
)
CURDIR="${ROOT}"
TPCDS_DATA_DIR="${CURDIR}/tpcds-data"
usage() {
echo "
Usage: $0 <options>
Optional options:
-c parallelism to load data of lineitem, orders, partsupp, default is 5.
Eg.
$0 load data using default value.
$0 -c 10 load lineitem, orders, partsupp table data using parallelism 10.
"
exit 1
}
OPTS=$(getopt \
-n "$0" \
-o '' \
-o 'hc:' \
-- "$@")
eval set -- "${OPTS}"
PARALLEL=5
HELP=0
if [[ $# == 0 ]]; then
usage
fi
while true; do
case "$1" in
-h)
HELP=1
shift
;;
-c)
PARALLEL=$2
shift 2
;;
--)
shift
break
;;
*)
echo "Internal error"
exit 1
;;
esac
done
if [[ ${HELP} -eq 1 ]]; then
usage
fi
# check if tpcds-data exists
if [[ ! -d "${TPCDS_DATA_DIR}"/ ]]; then
echo "${TPCDS_DATA_DIR} does not exist. Run sh gen-tpcds-data.sh first."
exit 1
fi
check_prerequest() {
local CMD=$1
local NAME=$2
if ! ${CMD} &>/dev/null; then
echo "${NAME} is missing. This script depends on cURL to load data to Doris."
exit 1
fi
}
check_prerequest "curl --version" "curl"
check_prerequest "jq --version" "jq"
# load tables
source "${CURDIR}/../conf/doris-cluster.conf"
export MYSQL_PWD=${PASSWORD}
echo "Parallelism: ${PARALLEL}"
echo "FE_HOST: ${FE_HOST}"
echo "FE_HTTP_PORT: ${FE_HTTP_PORT}"
echo "USER: ${USER}"
echo "DB: ${DB}"
declare -A table_columns=(
['call_center']='cc_call_center_sk, cc_call_center_id, cc_rec_start_date, cc_rec_end_date, cc_closed_date_sk, cc_open_date_sk, cc_name, cc_class, cc_employees, cc_sq_ft, cc_hours, cc_manager, cc_mkt_id, cc_mkt_class, cc_mkt_desc, cc_market_manager, cc_division, cc_division_name, cc_company, cc_company_name, cc_street_number, cc_street_name, cc_street_type, cc_suite_number, cc_city, cc_county, cc_state, cc_zip, cc_country, cc_gmt_offset, cc_tax_percentage'
['catalog_page']='cp_catalog_page_sk, cp_catalog_page_id, cp_start_date_sk, cp_end_date_sk, cp_department, cp_catalog_number, cp_catalog_page_number, cp_description, cp_type'
['catalog_returns']='cr_returned_date_sk, cr_returned_time_sk, cr_item_sk, cr_refunded_customer_sk, cr_refunded_cdemo_sk, cr_refunded_hdemo_sk, cr_refunded_addr_sk, cr_returning_customer_sk, cr_returning_cdemo_sk, cr_returning_hdemo_sk, cr_returning_addr_sk, cr_call_center_sk, cr_catalog_page_sk, cr_ship_mode_sk, cr_warehouse_sk, cr_reason_sk, cr_order_number, cr_return_quantity, cr_return_amount, cr_return_tax, cr_return_amt_inc_tax, cr_fee, cr_return_ship_cost, cr_refunded_cash, cr_reversed_charge, cr_store_credit, cr_net_loss'
['catalog_sales']='cs_sold_date_sk, cs_sold_time_sk, cs_ship_date_sk, cs_bill_customer_sk, cs_bill_cdemo_sk, cs_bill_hdemo_sk, cs_bill_addr_sk, cs_ship_customer_sk, cs_ship_cdemo_sk, cs_ship_hdemo_sk, cs_ship_addr_sk, cs_call_center_sk, cs_catalog_page_sk, cs_ship_mode_sk, cs_warehouse_sk, cs_item_sk, cs_promo_sk, cs_order_number, cs_quantity, cs_wholesale_cost, cs_list_price, cs_sales_price, cs_ext_discount_amt, cs_ext_sales_price, cs_ext_wholesale_cost, cs_ext_list_price, cs_ext_tax, cs_coupon_amt, cs_ext_ship_cost, cs_net_paid, cs_net_paid_inc_tax, cs_net_paid_inc_ship, cs_net_paid_inc_ship_tax, cs_net_profit'
['customer_address']='ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type'
['customer_demographics']='cd_demo_sk, cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count'
['customer']='c_customer_sk, c_customer_id, c_current_cdemo_sk, c_current_hdemo_sk, c_current_addr_sk, c_first_shipto_date_sk, c_first_sales_date_sk, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date_sk'
['date_dim']='d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq, d_quarter_seq, d_year, d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq, d_fy_week_seq, d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday, d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq, d_current_day, d_current_week, d_current_month, d_current_quarter, d_current_year'
['dbgen_version']='dv_version, dv_create_date, dv_create_time, dv_cmdline_args'
['household_demographics']='hd_demo_sk, hd_income_band_sk, hd_buy_potential, hd_dep_count, hd_vehicle_count'
['income_band']='ib_income_band_sk, ib_lower_bound, ib_upper_bound'
['inventory']='inv_date_sk, inv_item_sk, inv_warehouse_sk, inv_quantity_on_hand'
['item']='i_item_sk, i_item_id, i_rec_start_date, i_rec_end_date, i_item_desc, i_current_price, i_wholesale_cost, i_brand_id, i_brand, i_class_id, i_class, i_category_id, i_category, i_manufact_id, i_manufact, i_size, i_formulation, i_color, i_units, i_container, i_manager_id, i_product_name'
['promotion']='p_promo_sk, p_promo_id, p_start_date_sk, p_end_date_sk, p_item_sk, p_cost, p_response_targe, p_promo_name, p_channel_dmail, p_channel_email, p_channel_catalog, p_channel_tv, p_channel_radio, p_channel_press, p_channel_event, p_channel_demo, p_channel_details, p_purpose, p_discount_active'
['reason']='r_reason_sk, r_reason_id, r_reason_desc'
['ship_mode']='sm_ship_mode_sk, sm_ship_mode_id, sm_type, sm_code, sm_carrier, sm_contract'
['store']='s_store_sk, s_store_id, s_rec_start_date, s_rec_end_date, s_closed_date_sk, s_store_name, s_number_employees, s_floor_space, s_hours, s_manager, s_market_id, s_geography_class, s_market_desc, s_market_manager, s_division_id, s_division_name, s_company_id, s_company_name, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip, s_country, s_gmt_offset, s_tax_precentage'
['store_returns']='sr_returned_date_sk, sr_return_time_sk, sr_item_sk, sr_customer_sk, sr_cdemo_sk, sr_hdemo_sk, sr_addr_sk, sr_store_sk, sr_reason_sk, sr_ticket_number, sr_return_quantity, sr_return_amt, sr_return_tax, sr_return_amt_inc_tax, sr_fee, sr_return_ship_cost, sr_refunded_cash, sr_reversed_charge, sr_store_credit, sr_net_loss'
['store_sales']='ss_sold_date_sk, ss_sold_time_sk, ss_item_sk, ss_customer_sk, ss_cdemo_sk, ss_hdemo_sk, ss_addr_sk, ss_store_sk, ss_promo_sk, ss_ticket_number, ss_quantity, ss_wholesale_cost, ss_list_price, ss_sales_price, ss_ext_discount_amt, ss_ext_sales_price, ss_ext_wholesale_cost, ss_ext_list_price, ss_ext_tax, ss_coupon_amt, ss_net_paid, ss_net_paid_inc_tax, ss_net_profit'
['time_dim']='t_time_sk, t_time_id, t_time, t_hour, t_minute, t_second, t_am_pm, t_shift, t_sub_shift, t_meal_time'
['warehouse']='w_warehouse_sk, w_warehouse_id, w_warehouse_name, w_warehouse_sq_ft, w_street_number, w_street_name, w_street_type, w_suite_number, w_city, w_county, w_state, w_zip, w_country, w_gmt_offset'
['web_page']='wp_web_page_sk, wp_web_page_id, wp_rec_start_date, wp_rec_end_date, wp_creation_date_sk, wp_access_date_sk, wp_autogen_flag, wp_customer_sk, wp_url, wp_type, wp_char_count, wp_link_count, wp_image_count, wp_max_ad_count'
['web_returns']='wr_returned_date_sk, wr_returned_time_sk, wr_item_sk, wr_refunded_customer_sk, wr_refunded_cdemo_sk, wr_refunded_hdemo_sk, wr_refunded_addr_sk, wr_returning_customer_sk, wr_returning_cdemo_sk, wr_returning_hdemo_sk, wr_returning_addr_sk, wr_web_page_sk, wr_reason_sk, wr_order_number, wr_return_quantity, wr_return_amt, wr_return_tax, wr_return_amt_inc_tax, wr_fee, wr_return_ship_cost, wr_refunded_cash, wr_reversed_charge, wr_account_credit, wr_net_loss'
['web_sales']='ws_sold_date_sk, ws_sold_time_sk, ws_ship_date_sk, ws_item_sk, ws_bill_customer_sk, ws_bill_cdemo_sk, ws_bill_hdemo_sk, ws_bill_addr_sk, ws_ship_customer_sk, ws_ship_cdemo_sk, ws_ship_hdemo_sk, ws_ship_addr_sk, ws_web_page_sk, ws_web_site_sk, ws_ship_mode_sk, ws_warehouse_sk, ws_promo_sk, ws_order_number, ws_quantity, ws_wholesale_cost, ws_list_price, ws_sales_price, ws_ext_discount_amt, ws_ext_sales_price, ws_ext_wholesale_cost, ws_ext_list_price, ws_ext_tax, ws_coupon_amt, ws_ext_ship_cost, ws_net_paid, ws_net_paid_inc_tax, ws_net_paid_inc_ship, ws_net_paid_inc_ship_tax, ws_net_profit'
['web_site']='web_site_sk, web_site_id, web_rec_start_date, web_rec_end_date, web_name, web_open_date_sk, web_close_date_sk, web_class, web_manager, web_mkt_id, web_mkt_class, web_mkt_desc, web_market_manager, web_company_id, web_company_name, web_street_number, web_street_name, web_street_type, web_suite_number, web_city, web_county, web_state, web_zip, web_country, web_gmt_offset, web_tax_percentage'
)
# set parallelism
# 以PID为名, 防止创建命名管道时与已有文件重名,从而失败
fifo="/tmp/$$.fifo"
# 创建命名管道
mkfifo "${fifo}"
# 以读写方式打开命名管道,文件标识符fd为3,fd可取除0,1,2,5外0-9中的任意数字
exec 3<>"${fifo}"
# 删除文件, 也可不删除, 不影响后面操作
rm -rf "${fifo}"
# 在fd3中放置$PARALLEL个空行作为令牌
for ((i = 1; i <= PARALLEL; i++)); do
echo >&3
done
# start load
start_time=$(date +%s)
echo "Start time: $(date)"
for table_name in ${!table_columns[*]}; do
# 领取令牌, 即从fd3中读取行, 每次一行
# 对管道,读一行便少一行,每次只能读取一行
# 所有行读取完毕, 执行挂起, 直到管道再次有可读行
# 因此实现了进程数量控制
read -r -u3
# 要批量执行的命令放在大括号内, 后台运行
{
for file in "${TPCDS_DATA_DIR}/${table_name}"*.dat; do
ret=$(curl \
--location-trusted \
-u "${USER}":"${PASSWORD:=}" \
-H "column_separator:|" \
-H "columns: ${table_columns[${table_name}]}" \
-T "${file}" \
http://"${FE_HOST}":"${FE_HTTP_PORT:=8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null)
if [[ $(echo "${ret}" | jq ".Status") == '"Success"' ]]; then
echo "----loaded ${file}"
else
echo -e "\033[31m----load ${file} FAIL...\033[0m"
fi
done
sleep 2
# 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行
echo >&3
} &
done
# 等待所有的后台子进程结束
wait
# 删除文件标识符
exec 3>&-
end_time=$(date +%s)
echo "End time: $(date)"
echo "Finish load tpcds data, Time taken: $((end_time - start_time)) seconds"

View File

@ -0,0 +1,133 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
##############################################################
# This script is used to run TPC-DS 99 queries
##############################################################
set -eo pipefail
ROOT=$(dirname "$0")
ROOT=$(
cd "${ROOT}"
pwd
)
CURDIR="${ROOT}"
TPCDS_QUERIES_DIR="${CURDIR}/../queries"
usage() {
echo "
This script is used to run TPC-DS 99 queries,
will use mysql client to connect Doris server which parameter is specified in doris-cluster.conf file.
Usage: $0
"
exit 1
}
OPTS=$(getopt \
-n "$0" \
-o '' \
-- "$@")
eval set -- "${OPTS}"
HELP=0
if [[ $# == 0 ]]; then
usage
fi
while true; do
case "$1" in
-h)
HELP=1
shift
;;
--)
shift
break
;;
*)
echo "Internal error"
exit 1
;;
esac
done
if [[ ${HELP} -eq 1 ]]; then
usage
fi
check_prerequest() {
local CMD=$1
local NAME=$2
if ! ${CMD}; then
echo "${NAME} is missing. This script depends on mysql to create tables in Doris."
exit 1
fi
}
check_prerequest "mysql --version" "mysql"
#shellcheck source=/dev/null
source "${CURDIR}/../conf/doris-cluster.conf"
export MYSQL_PWD=${PASSWORD:-}
echo "FE_HOST: ${FE_HOST:='127.0.0.1'}"
echo "FE_QUERY_PORT: ${FE_QUERY_PORT:='9030'}"
echo "USER: ${USER:='root'}"
echo "DB: ${DB:='tpcds'}"
echo "Time Unit: ms"
run_sql() {
echo "$*"
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*"
}
echo '============================================'
run_sql "show variables;"
echo '============================================'
run_sql "show table status;"
echo '============================================'
sum=0
IFS=';'
i=1
query_strs=$(cat "${TPCDS_QUERIES_DIR}/tpcds_queries.sql")
for query_str in ${query_strs}; do
# echo '============================================'
# echo "${query_str} "
# echo '============================================'
total=0
run=3
# Each query is executed ${run} times and takes the average time
for ((j = 0; j < run; j++)); do
# if [[ $i -lt 70 ]]; then continue; fi #########
start=$(date +%s%3N)
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" --comments -e"${query_str}" >/dev/null
end=$(date +%s%3N)
total=$((total + end - start))
done
cost=$((total / run))
echo "q${i}: ${cost} ms"
sum=$((sum + cost))
i=$((i + 1))
done <"${TPCDS_QUERIES_DIR}/tpcds_queries.sql"
echo "Total cost: ${sum} ms"
echo 'Finish tpcds queries.'

View File

@ -0,0 +1,29 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Any of FE host
export FE_HOST='127.0.0.1'
# http_port in fe.conf
export FE_HTTP_PORT=8030
# query_port in fe.conf
export FE_QUERY_PORT=9030
# Doris username
export USER='root'
# Doris password
export PASSWORD=''
# The database where TPC-DS tables located
export DB='tpcds'

View File

@ -0,0 +1,630 @@
-- Licensed to the Apache Software Foundation (ASF) under one
-- or more contributor license agreements. See the NOTICE file
-- distributed with this work for additional information
-- regarding copyright ownership. The ASF licenses this file
-- to you under the Apache License, Version 2.0 (the
-- "License"); you may not use this file except in compliance
-- with the License. You may obtain a copy of the License at
--
-- http://www.apache.org/licenses/LICENSE-2.0
--
-- Unless required by applicable law or agreed to in writing,
-- software distributed under the License is distributed on an
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
-- KIND, either express or implied. See the License for the
-- specific language governing permissions and limitations
-- under the License.
CREATE TABLE IF NOT EXISTS customer_demographics (
cd_demo_sk bigint not null,
cd_gender char(1),
cd_marital_status char(1),
cd_education_status char(20),
cd_purchase_estimate integer,
cd_credit_rating char(10),
cd_dep_count integer,
cd_dep_employed_count integer,
cd_dep_college_count integer
)
DUPLICATE KEY(cd_demo_sk)
DISTRIBUTED BY HASH(cd_gender) BUCKETS 12
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS reason (
r_reason_sk bigint not null,
r_reason_id char(16) not null,
r_reason_desc char(100)
)
DUPLICATE KEY(r_reason_sk)
DISTRIBUTED BY HASH(r_reason_sk) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS date_dim (
d_date_sk bigint not null,
d_date_id char(16) not null,
d_date date,
d_month_seq integer,
d_week_seq integer,
d_quarter_seq integer,
d_year integer,
d_dow integer,
d_moy integer,
d_dom integer,
d_qoy integer,
d_fy_year integer,
d_fy_quarter_seq integer,
d_fy_week_seq integer,
d_day_name char(9),
d_quarter_name char(6),
d_holiday char(1),
d_weekend char(1),
d_following_holiday char(1),
d_first_dom integer,
d_last_dom integer,
d_same_day_ly integer,
d_same_day_lq integer,
d_current_day char(1),
d_current_week char(1),
d_current_month char(1),
d_current_quarter char(1),
d_current_year char(1)
)
DUPLICATE KEY(d_date_sk)
DISTRIBUTED BY HASH(d_date_sk) BUCKETS 12
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS warehouse (
w_warehouse_sk bigint not null,
w_warehouse_id char(16) not null,
w_warehouse_name varchar(20),
w_warehouse_sq_ft integer,
w_street_number char(10),
w_street_name varchar(60),
w_street_type char(15),
w_suite_number char(10),
w_city varchar(60),
w_county varchar(30),
w_state char(2),
w_zip char(10),
w_country varchar(20),
w_gmt_offset decimal(5,2)
)
DUPLICATE KEY(w_warehouse_sk)
DISTRIBUTED BY HASH(w_warehouse_sk) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS catalog_sales (
cs_item_sk bigint not null,
cs_order_number bigint not null,
cs_sold_date_sk bigint,
cs_sold_time_sk bigint,
cs_ship_date_sk bigint,
cs_bill_customer_sk bigint,
cs_bill_cdemo_sk bigint,
cs_bill_hdemo_sk bigint,
cs_bill_addr_sk bigint,
cs_ship_customer_sk bigint,
cs_ship_cdemo_sk bigint,
cs_ship_hdemo_sk bigint,
cs_ship_addr_sk bigint,
cs_call_center_sk bigint,
cs_catalog_page_sk bigint,
cs_ship_mode_sk bigint,
cs_warehouse_sk bigint,
cs_promo_sk bigint,
cs_quantity integer,
cs_wholesale_cost decimal(7,2),
cs_list_price decimal(7,2),
cs_sales_price decimal(7,2),
cs_ext_discount_amt decimal(7,2),
cs_ext_sales_price decimal(7,2),
cs_ext_wholesale_cost decimal(7,2),
cs_ext_list_price decimal(7,2),
cs_ext_tax decimal(7,2),
cs_coupon_amt decimal(7,2),
cs_ext_ship_cost decimal(7,2),
cs_net_paid decimal(7,2),
cs_net_paid_inc_tax decimal(7,2),
cs_net_paid_inc_ship decimal(7,2),
cs_net_paid_inc_ship_tax decimal(7,2),
cs_net_profit decimal(7,2)
)
DUPLICATE KEY(cs_item_sk, cs_order_number)
DISTRIBUTED BY HASH(cs_item_sk, cs_order_number) BUCKETS 32
PROPERTIES (
"replication_num" = "1",
"colocate_with" = "catalog"
);
CREATE TABLE IF NOT EXISTS call_center (
cc_call_center_sk bigint not null,
cc_call_center_id char(16) not null,
cc_rec_start_date date,
cc_rec_end_date date,
cc_closed_date_sk integer,
cc_open_date_sk integer,
cc_name varchar(50),
cc_class varchar(50),
cc_employees integer,
cc_sq_ft integer,
cc_hours char(20),
cc_manager varchar(40),
cc_mkt_id integer,
cc_mkt_class char(50),
cc_mkt_desc varchar(100),
cc_market_manager varchar(40),
cc_division integer,
cc_division_name varchar(50),
cc_company integer,
cc_company_name char(50),
cc_street_number char(10),
cc_street_name varchar(60),
cc_street_type char(15),
cc_suite_number char(10),
cc_city varchar(60),
cc_county varchar(30),
cc_state char(2),
cc_zip char(10),
cc_country varchar(20),
cc_gmt_offset decimal(5,2),
cc_tax_percentage decimal(5,2)
)
DUPLICATE KEY(cc_call_center_sk)
DISTRIBUTED BY HASH(cc_call_center_sk) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS inventory (
inv_date_sk bigint not null,
inv_item_sk bigint not null,
inv_warehouse_sk bigint,
inv_quantity_on_hand integer
)
DUPLICATE KEY(inv_date_sk, inv_item_sk, inv_warehouse_sk)
DISTRIBUTED BY HASH(inv_date_sk, inv_item_sk, inv_warehouse_sk) BUCKETS 32
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS catalog_returns (
cr_item_sk bigint not null,
cr_order_number bigint not null,
cr_returned_date_sk bigint,
cr_returned_time_sk bigint,
cr_refunded_customer_sk bigint,
cr_refunded_cdemo_sk bigint,
cr_refunded_hdemo_sk bigint,
cr_refunded_addr_sk bigint,
cr_returning_customer_sk bigint,
cr_returning_cdemo_sk bigint,
cr_returning_hdemo_sk bigint,
cr_returning_addr_sk bigint,
cr_call_center_sk bigint,
cr_catalog_page_sk bigint,
cr_ship_mode_sk bigint,
cr_warehouse_sk bigint,
cr_reason_sk bigint,
cr_return_quantity integer,
cr_return_amount decimal(7,2),
cr_return_tax decimal(7,2),
cr_return_amt_inc_tax decimal(7,2),
cr_fee decimal(7,2),
cr_return_ship_cost decimal(7,2),
cr_refunded_cash decimal(7,2),
cr_reversed_charge decimal(7,2),
cr_store_credit decimal(7,2),
cr_net_loss decimal(7,2)
)
DUPLICATE KEY(cr_item_sk, cr_order_number)
DISTRIBUTED BY HASH(cr_item_sk, cr_order_number) BUCKETS 32
PROPERTIES (
"replication_num" = "1",
"colocate_with" = "catalog"
);
CREATE TABLE IF NOT EXISTS household_demographics (
hd_demo_sk bigint not null,
hd_income_band_sk bigint,
hd_buy_potential char(15),
hd_dep_count integer,
hd_vehicle_count integer
)
DUPLICATE KEY(hd_demo_sk)
DISTRIBUTED BY HASH(hd_demo_sk) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS customer_address (
ca_address_sk bigint not null,
ca_address_id char(16) not null,
ca_street_number char(10),
ca_street_name varchar(60),
ca_street_type char(15),
ca_suite_number char(10),
ca_city varchar(60),
ca_county varchar(30),
ca_state char(2),
ca_zip char(10),
ca_country varchar(20),
ca_gmt_offset decimal(5,2),
ca_location_type char(20)
)
DUPLICATE KEY(ca_address_sk)
DISTRIBUTED BY HASH(ca_address_sk) BUCKETS 12
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS income_band (
ib_income_band_sk bigint not null,
ib_lower_bound integer,
ib_upper_bound integer
)
DUPLICATE KEY(ib_income_band_sk)
DISTRIBUTED BY HASH(ib_income_band_sk) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS catalog_page (
cp_catalog_page_sk bigint not null,
cp_catalog_page_id char(16) not null,
cp_start_date_sk integer,
cp_end_date_sk integer,
cp_department varchar(50),
cp_catalog_number integer,
cp_catalog_page_number integer,
cp_description varchar(100),
cp_type varchar(100)
)
DUPLICATE KEY(cp_catalog_page_sk)
DISTRIBUTED BY HASH(cp_catalog_page_sk) BUCKETS 3
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS item (
i_item_sk bigint not null,
i_item_id char(16) not null,
i_rec_start_date date,
i_rec_end_date date,
i_item_desc varchar(200),
i_current_price decimal(7,2),
i_wholesale_cost decimal(7,2),
i_brand_id integer,
i_brand char(50),
i_class_id integer,
i_class char(50),
i_category_id integer,
i_category char(50),
i_manufact_id integer,
i_manufact char(50),
i_size char(20),
i_formulation char(20),
i_color char(20),
i_units char(10),
i_container char(10),
i_manager_id integer,
i_product_name char(50)
)
DUPLICATE KEY(i_item_sk)
DISTRIBUTED BY HASH(i_item_sk) BUCKETS 12
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS web_returns (
wr_item_sk bigint not null,
wr_order_number bigint not null,
wr_returned_date_sk bigint,
wr_returned_time_sk bigint,
wr_refunded_customer_sk bigint,
wr_refunded_cdemo_sk bigint,
wr_refunded_hdemo_sk bigint,
wr_refunded_addr_sk bigint,
wr_returning_customer_sk bigint,
wr_returning_cdemo_sk bigint,
wr_returning_hdemo_sk bigint,
wr_returning_addr_sk bigint,
wr_web_page_sk bigint,
wr_reason_sk bigint,
wr_return_quantity integer,
wr_return_amt decimal(7,2),
wr_return_tax decimal(7,2),
wr_return_amt_inc_tax decimal(7,2),
wr_fee decimal(7,2),
wr_return_ship_cost decimal(7,2),
wr_refunded_cash decimal(7,2),
wr_reversed_charge decimal(7,2),
wr_account_credit decimal(7,2),
wr_net_loss decimal(7,2)
)
DUPLICATE KEY(wr_item_sk, wr_order_number)
DISTRIBUTED BY HASH(wr_item_sk, wr_order_number) BUCKETS 32
PROPERTIES (
"replication_num" = "1",
"colocate_with" = "web"
);
CREATE TABLE IF NOT EXISTS web_site (
web_site_sk bigint not null,
web_site_id char(16) not null,
web_rec_start_date date,
web_rec_end_date date,
web_name varchar(50),
web_open_date_sk bigint,
web_close_date_sk bigint,
web_class varchar(50),
web_manager varchar(40),
web_mkt_id integer,
web_mkt_class varchar(50),
web_mkt_desc varchar(100),
web_market_manager varchar(40),
web_company_id integer,
web_company_name char(50),
web_street_number char(10),
web_street_name varchar(60),
web_street_type char(15),
web_suite_number char(10),
web_city varchar(60),
web_county varchar(30),
web_state char(2),
web_zip char(10),
web_country varchar(20),
web_gmt_offset decimal(5,2),
web_tax_percentage decimal(5,2)
)
DUPLICATE KEY(web_site_sk)
DISTRIBUTED BY HASH(web_site_sk) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS promotion (
p_promo_sk bigint not null,
p_promo_id char(16) not null,
p_start_date_sk bigint,
p_end_date_sk bigint,
p_item_sk bigint,
p_cost decimal(15,2),
p_response_targe integer,
p_promo_name char(50),
p_channel_dmail char(1),
p_channel_email char(1),
p_channel_catalog char(1),
p_channel_tv char(1),
p_channel_radio char(1),
p_channel_press char(1),
p_channel_event char(1),
p_channel_demo char(1),
p_channel_details varchar(100),
p_purpose char(15),
p_discount_active char(1)
)
DUPLICATE KEY(p_promo_sk)
DISTRIBUTED BY HASH(p_promo_sk) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS web_sales (
ws_item_sk bigint not null,
ws_order_number bigint not null,
ws_sold_date_sk bigint,
ws_sold_time_sk bigint,
ws_ship_date_sk bigint,
ws_bill_customer_sk bigint,
ws_bill_cdemo_sk bigint,
ws_bill_hdemo_sk bigint,
ws_bill_addr_sk bigint,
ws_ship_customer_sk bigint,
ws_ship_cdemo_sk bigint,
ws_ship_hdemo_sk bigint,
ws_ship_addr_sk bigint,
ws_web_page_sk bigint,
ws_web_site_sk bigint,
ws_ship_mode_sk bigint,
ws_warehouse_sk bigint,
ws_promo_sk bigint,
ws_quantity integer,
ws_wholesale_cost decimal(7,2),
ws_list_price decimal(7,2),
ws_sales_price decimal(7,2),
ws_ext_discount_amt decimal(7,2),
ws_ext_sales_price decimal(7,2),
ws_ext_wholesale_cost decimal(7,2),
ws_ext_list_price decimal(7,2),
ws_ext_tax decimal(7,2),
ws_coupon_amt decimal(7,2),
ws_ext_ship_cost decimal(7,2),
ws_net_paid decimal(7,2),
ws_net_paid_inc_tax decimal(7,2),
ws_net_paid_inc_ship decimal(7,2),
ws_net_paid_inc_ship_tax decimal(7,2),
ws_net_profit decimal(7,2)
)
DUPLICATE KEY(ws_item_sk, ws_order_number)
DISTRIBUTED BY HASH(ws_item_sk, ws_order_number) BUCKETS 32
PROPERTIES (
"replication_num" = "1",
"colocate_with" = "web"
);
CREATE TABLE IF NOT EXISTS store (
s_store_sk bigint not null,
s_store_id char(16) not null,
s_rec_start_date date,
s_rec_end_date date,
s_closed_date_sk bigint,
s_store_name varchar(50),
s_number_employees integer,
s_floor_space integer,
s_hours char(20),
s_manager varchar(40),
s_market_id integer,
s_geography_class varchar(100),
s_market_desc varchar(100),
s_market_manager varchar(40),
s_division_id integer,
s_division_name varchar(50),
s_company_id integer,
s_company_name varchar(50),
s_street_number varchar(10),
s_street_name varchar(60),
s_street_type char(15),
s_suite_number char(10),
s_city varchar(60),
s_county varchar(30),
s_state char(2),
s_zip char(10),
s_country varchar(20),
s_gmt_offset decimal(5,2),
s_tax_precentage decimal(5,2)
)
DUPLICATE KEY(s_store_sk)
DISTRIBUTED BY HASH(s_store_sk) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS time_dim (
t_time_sk bigint not null,
t_time_id char(16) not null,
t_time integer,
t_hour integer,
t_minute integer,
t_second integer,
t_am_pm char(2),
t_shift char(20),
t_sub_shift char(20),
t_meal_time char(20)
)
DUPLICATE KEY(t_time_sk)
DISTRIBUTED BY HASH(t_time_sk) BUCKETS 12
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS web_page (
wp_web_page_sk bigint not null,
wp_web_page_id char(16) not null,
wp_rec_start_date date,
wp_rec_end_date date,
wp_creation_date_sk bigint,
wp_access_date_sk bigint,
wp_autogen_flag char(1),
wp_customer_sk bigint,
wp_url varchar(100),
wp_type char(50),
wp_char_count integer,
wp_link_count integer,
wp_image_count integer,
wp_max_ad_count integer
)
DUPLICATE KEY(wp_web_page_sk)
DISTRIBUTED BY HASH(wp_web_page_sk) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS store_returns (
sr_item_sk bigint not null,
sr_ticket_number bigint not null,
sr_returned_date_sk bigint,
sr_return_time_sk bigint,
sr_customer_sk bigint,
sr_cdemo_sk bigint,
sr_hdemo_sk bigint,
sr_addr_sk bigint,
sr_store_sk bigint,
sr_reason_sk bigint,
sr_return_quantity integer,
sr_return_amt decimal(7,2),
sr_return_tax decimal(7,2),
sr_return_amt_inc_tax decimal(7,2),
sr_fee decimal(7,2),
sr_return_ship_cost decimal(7,2),
sr_refunded_cash decimal(7,2),
sr_reversed_charge decimal(7,2),
sr_store_credit decimal(7,2),
sr_net_loss decimal(7,2)
)
duplicate key(sr_item_sk, sr_ticket_number)
distributed by hash (sr_item_sk, sr_ticket_number) buckets 32
properties (
"replication_num" = "1",
"colocate_with" = "store"
);
CREATE TABLE IF NOT EXISTS store_sales (
ss_item_sk bigint not null,
ss_ticket_number bigint not null,
ss_sold_date_sk bigint,
ss_sold_time_sk bigint,
ss_customer_sk bigint,
ss_cdemo_sk bigint,
ss_hdemo_sk bigint,
ss_addr_sk bigint,
ss_store_sk bigint,
ss_promo_sk bigint,
ss_quantity integer,
ss_wholesale_cost decimal(7,2),
ss_list_price decimal(7,2),
ss_sales_price decimal(7,2),
ss_ext_discount_amt decimal(7,2),
ss_ext_sales_price decimal(7,2),
ss_ext_wholesale_cost decimal(7,2),
ss_ext_list_price decimal(7,2),
ss_ext_tax decimal(7,2),
ss_coupon_amt decimal(7,2),
ss_net_paid decimal(7,2),
ss_net_paid_inc_tax decimal(7,2),
ss_net_profit decimal(7,2)
)
DUPLICATE KEY(ss_item_sk, ss_ticket_number)
DISTRIBUTED BY HASH(ss_item_sk, ss_ticket_number) BUCKETS 32
PROPERTIES (
"replication_num" = "1",
"colocate_with" = "store"
);
CREATE TABLE IF NOT EXISTS ship_mode (
sm_ship_mode_sk bigint not null,
sm_ship_mode_id char(16) not null,
sm_type char(30),
sm_code char(10),
sm_carrier char(20),
sm_contract char(20)
)
DUPLICATE KEY(sm_ship_mode_sk)
DISTRIBUTED BY HASH(sm_ship_mode_sk) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS customer (
c_customer_sk bigint not null,
c_customer_id char(16) not null,
c_current_cdemo_sk bigint,
c_current_hdemo_sk bigint,
c_current_addr_sk bigint,
c_first_shipto_date_sk bigint,
c_first_sales_date_sk bigint,
c_salutation char(10),
c_first_name char(20),
c_last_name char(30),
c_preferred_cust_flag char(1),
c_birth_day integer,
c_birth_month integer,
c_birth_year integer,
c_birth_country varchar(20),
c_login char(13),
c_email_address char(50),
c_last_review_date_sk bigint
)
DUPLICATE KEY(c_customer_sk)
DISTRIBUTED BY HASH(c_customer_id) BUCKETS 12
PROPERTIES (
"replication_num" = "1"
);
CREATE TABLE IF NOT EXISTS dbgen_version
(
dv_version varchar(16) ,
dv_create_date date ,
dv_create_time datetime ,
dv_cmdline_args varchar(200)
)
DUPLICATE KEY(dv_version)
DISTRIBUTED BY HASH(dv_version) BUCKETS 1
PROPERTIES (
"replication_num" = "1"
);