[tools](tpc-ds) add script tools to run tpc-ds conveniently (#17366)
build-tpcds-tools.sh gen-tpcds-data.sh gen-tpcds-queries.sh create-tpcds-tables.sh load-tpcds-data.sh run-tpcds-queries.sh generate data and queries support specify SCALE, create table may need to be edited handly to specify BUCKETS or change int to bigint if SCALE is too big. --------- Co-authored-by: stephen <hello_stephen@@qq.com>
This commit is contained in:
47
tools/tpcds-tools/README.md
Normal file
47
tools/tpcds-tools/README.md
Normal file
@ -0,0 +1,47 @@
|
||||
<!--
|
||||
Licensed to the Apache Software Foundation (ASF) under one
|
||||
or more contributor license agreements. See the NOTICE file
|
||||
distributed with this work for additional information
|
||||
regarding copyright ownership. The ASF licenses this file
|
||||
to you under the Apache License, Version 2.0 (the
|
||||
"License"); you may not use this file except in compliance
|
||||
with the License. You may obtain a copy of the License at
|
||||
|
||||
http://www.apache.org/licenses/LICENSE-2.0
|
||||
|
||||
Unless required by applicable law or agreed to in writing,
|
||||
software distributed under the License is distributed on an
|
||||
"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
KIND, either express or implied. See the License for the
|
||||
specific language governing permissions and limitations
|
||||
under the License.
|
||||
-->
|
||||
|
||||
## Usage
|
||||
|
||||
These scripts are used to make tpc-ds test.
|
||||
follow the steps below:
|
||||
|
||||
### 1. build tpc-ds dsdgen dsqgen tool.
|
||||
|
||||
./bin/build-tpcds-tools.sh
|
||||
|
||||
### 2. generate tpc-ds data. use -h for more infomations.
|
||||
|
||||
./bin/gen-tpcds-data.sh -s 1
|
||||
|
||||
### 3. generate tpc-ds queries. use -h for more infomations.
|
||||
|
||||
./bin/gen-tpcds-queries.sh -s 1
|
||||
|
||||
### 4. create tpc-ds tables. modify `conf/doris-cluster.conf` to specify doris info, then run script below.
|
||||
|
||||
./bin/create-tpcds-tables.sh
|
||||
|
||||
### 5. load tpc-ds data. use -h for help.
|
||||
|
||||
./bin/load-tpcds-data.sh
|
||||
|
||||
### 6. run tpc-ds queries.
|
||||
|
||||
./bin/run-tpcds-queries.sh
|
||||
74
tools/tpcds-tools/bin/build-tpcds-tools.sh
Executable file
74
tools/tpcds-tools/bin/build-tpcds-tools.sh
Executable file
@ -0,0 +1,74 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
##############################################################
|
||||
# This script is used to build tpcds-dsdgen
|
||||
# TPC-DS_Tools_v3.2.0.zip is from https://www.tpc.org/tpc_documents_current_versions/current_specifications5.asp
|
||||
# Usage:
|
||||
# sh build-tpcds-dsdgen.sh
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR="${ROOT}"
|
||||
TPCDS_DBGEN_DIR="${CURDIR}/DSGen-software-code-3.2.0rc1/tools"
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! ${CMD} >/dev/null; then
|
||||
echo "${NAME} is missing. This script depends on unzip to extract files from TPC-DS_Tools_v3.2.0.zip"
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "unzip -h" "unzip"
|
||||
|
||||
# download tpcds tools package first
|
||||
if [[ -d "${CURDIR}/DSGen-software-code-3.2.0rc1" ]]; then
|
||||
echo "If you want to rebuild TPC-DS_Tools_v3.2.0 again, please delete ${CURDIR}/DSGen-software-code-3.2.0rc1 first." && exit 1
|
||||
elif [[ -f "${CURDIR}/TPC-DS_Tools_v3.2.0.zip" ]]; then
|
||||
unzip TPC-DS_Tools_v3.2.0.zip -d "${CURDIR}/"
|
||||
else
|
||||
wget "https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/tools/TPC-DS_Tools_v3.2.0.zip"
|
||||
unzip TPC-DS_Tools_v3.2.0.zip -d "${CURDIR}/"
|
||||
fi
|
||||
|
||||
# compile tpcds-dsdgen
|
||||
cd "${TPCDS_DBGEN_DIR}/"
|
||||
make >/dev/null
|
||||
cd -
|
||||
|
||||
# check
|
||||
if [[ -f ${TPCDS_DBGEN_DIR}/dsdgen ]]; then
|
||||
echo "
|
||||
################
|
||||
Build succeed!
|
||||
################
|
||||
Run ${TPCDS_DBGEN_DIR}/dsdgen -h"
|
||||
exit 0
|
||||
else
|
||||
echo "Build failed!"
|
||||
exit 1
|
||||
fi
|
||||
100
tools/tpcds-tools/bin/create-tpcds-tables.sh
Executable file
100
tools/tpcds-tools/bin/create-tpcds-tables.sh
Executable file
@ -0,0 +1,100 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
##############################################################
|
||||
# This script is used to create TPC-DS tables
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR=${ROOT}
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
This script is used to create TPC-DS tables,
|
||||
will use mysql client to connect Doris server which is specified in doris-cluster.conf file.
|
||||
Usage: $0
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
HELP=0
|
||||
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! ${CMD}; then
|
||||
echo "${NAME} is missing. This script depends on mysql to create tables in Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "mysql --version" "mysql"
|
||||
|
||||
source "${CURDIR}/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=${PASSWORD}
|
||||
|
||||
echo "FE_HOST: ${FE_HOST}"
|
||||
echo "FE_QUERY_PORT: ${FE_QUERY_PORT}"
|
||||
echo "USER: ${USER}"
|
||||
echo "DB: ${DB}"
|
||||
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -e "DROP DATABASE IF EXISTS ${DB}"
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -e "CREATE DATABASE ${DB}"
|
||||
|
||||
echo "Run SQLs from ${CURDIR}/../ddl/create-tpcds-tables.sql"
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" <"${CURDIR}"/../ddl/create-tpcds-tables.sql
|
||||
|
||||
echo "tpcds tables has been created"
|
||||
125
tools/tpcds-tools/bin/gen-tpcds-data.sh
Executable file
125
tools/tpcds-tools/bin/gen-tpcds-data.sh
Executable file
@ -0,0 +1,125 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
##############################################################
|
||||
# This script is used to generate TPC-DS data set
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR="${ROOT}"
|
||||
TPCDS_DBGEN_DIR="${CURDIR}/DSGen-software-code-3.2.0rc1/tools"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-s scale factor, default is 1
|
||||
-c parallelism to generate data of (lineitem, orders, partsupp) table, default is 10
|
||||
|
||||
Eg.
|
||||
$0 generate data using default value.
|
||||
$0 -s 100 generate data with scale factor 100.
|
||||
$0 -s 1000 -c 100 generate data with scale factor 1000. And using 1000 threads to generate data concurrently.
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hs:c:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
|
||||
SCALE_FACTOR=1
|
||||
PARALLEL=10
|
||||
HELP=0
|
||||
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
-s)
|
||||
SCALE_FACTOR=$2
|
||||
shift 2
|
||||
;;
|
||||
-c)
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
TPCDS_DATA_DIR="${CURDIR}/tpcds-data"
|
||||
echo "Scale Factor: ${SCALE_FACTOR}"
|
||||
echo "Parallelism: ${PARALLEL}"
|
||||
|
||||
# check if dsdgen exists
|
||||
if [[ ! -f ${TPCDS_DBGEN_DIR}/dsdgen ]]; then
|
||||
echo "${TPCDS_DBGEN_DIR}/dsdgen does not exist. Run build-tpcds-dsdgen.sh first to build it first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -d ${TPCDS_DATA_DIR}/ ]]; then
|
||||
echo "${TPCDS_DATA_DIR} exists. Remove it before generating data"
|
||||
exit 1
|
||||
fi
|
||||
|
||||
mkdir "${TPCDS_DATA_DIR}"/
|
||||
|
||||
# gen data
|
||||
echo "Begin to generate data..."
|
||||
date
|
||||
cd "${TPCDS_DBGEN_DIR}"
|
||||
if [[ ${PARALLEL} -eq 1 ]] && "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}"; then
|
||||
echo "data genarated."
|
||||
elif [[ ${PARALLEL} -gt 1 ]] && "${TPCDS_DBGEN_DIR}"/dsdgen -SCALE "${SCALE_FACTOR}" -PARALLEL "${PARALLEL}" -TERMINATE N -DIR "${TPCDS_DATA_DIR}"; then
|
||||
echo "data genarated."
|
||||
else
|
||||
echo "ERROR occured." && exit 1
|
||||
fi
|
||||
cd -
|
||||
date
|
||||
# check data
|
||||
du -sh "${TPCDS_DATA_DIR}"/*.dat*
|
||||
173
tools/tpcds-tools/bin/gen-tpcds-queries.sh
Executable file
173
tools/tpcds-tools/bin/gen-tpcds-queries.sh
Executable file
@ -0,0 +1,173 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
##############################################################
|
||||
# This script is used to generate TPC-DS tables
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR="${ROOT}"
|
||||
TPCDS_DSQGEN_DIR="${CURDIR}/DSGen-software-code-3.2.0rc1/tools"
|
||||
TPCDS_QUERIE_DIR="${CURDIR}/../queries"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
This script is used to generate TPC-DS 99 queries of different scale,
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-s scale factor, default is 1
|
||||
Eg.
|
||||
$0 -s 100 generate tpcds queries with scale factor 100.
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hs:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
HELP=0
|
||||
SCALE=1
|
||||
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
-s)
|
||||
SCALE=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
# check if dsqgen exists
|
||||
if [[ ! -f ${TPCDS_DSQGEN_DIR}/dsqgen ]]; then
|
||||
echo "${TPCDS_DSQGEN_DIR}/dsqgen does not exist. Run build-tpcds-dbgen.sh to build it first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
if [[ -d ${TPCDS_QUERIE_DIR}/ ]]; then
|
||||
echo "${TPCDS_QUERIE_DIR} exists. Remove it before re-generating"
|
||||
exit 1
|
||||
else
|
||||
mkdir "${TPCDS_QUERIE_DIR}"
|
||||
fi
|
||||
|
||||
cd "${TPCDS_DSQGEN_DIR}"
|
||||
for i in {1..99}; do
|
||||
sed -i '/define _END/d' "../query_templates/query${i}.tpl"
|
||||
echo 'define _END = "";' >>"../query_templates/query${i}.tpl"
|
||||
done
|
||||
|
||||
# generate query
|
||||
"${TPCDS_DSQGEN_DIR}"/dsqgen \
|
||||
-DIRECTORY ../query_templates/ \
|
||||
-INPUT ../query_templates/templates.lst \
|
||||
-DIALECT netezza \
|
||||
-QUALIFY Y \
|
||||
-SCALE "${SCALE}"
|
||||
|
||||
if [[ -f query_0.sql ]]; then
|
||||
i=1
|
||||
IFS=';'
|
||||
query_strs=$(cat query_0.sql)
|
||||
of="${TPCDS_QUERIE_DIR}/tpcds_queries.sql"
|
||||
for q in ${query_strs}; do
|
||||
if [[ ${i} -eq 2 ]]; then
|
||||
echo "${q/from catalog_sales))/from catalog_sales) t)};" >>"${of}"
|
||||
elif [[ ${i} -eq 5 ]]; then
|
||||
q="${q// 14 days/ interval 14 day}" &&
|
||||
q="${q//\'store\' || s_store_id as id/concat(\'store\', s_store_id) id}" &&
|
||||
q="${q//\'catalog_page\' || cp_catalog_page_id as id/concat(\'catalog_page\', cp_catalog_page_id) id}" &&
|
||||
q="${q//\'web_site\' || web_site_id as id/concat(\'web_site\', web_site_id) id}" &&
|
||||
echo "${q};" >>"${of}"
|
||||
elif [[ ${i} -eq 12 ]] || [[ ${i} -eq 21 ]] || [[ ${i} -eq 22 ]] || [[ ${i} -eq 44 ]] || [[ ${i} -eq 81 ]] || [[ ${i} -eq 102 ]]; then
|
||||
q="${q//30 days/interval 30 day}" && echo "${q};" >>"${of}"
|
||||
elif [[ ${i} -eq 14 ]]; then
|
||||
# q="${q//and d3.d_year between 1999 AND 1999 + 2)/and d3.d_year between 1999 AND 1999 + 2) t}" && echo "${q};" >>"${of}"
|
||||
q="${q//where i_brand_id = brand_id/t where i_brand_id = brand_id}" && echo "${q};" >>"${of}"
|
||||
elif [[ ${i} -eq 17 ]] || [[ ${i} -eq 40 ]] || [[ ${i} -eq 86 ]] || [[ ${i} -eq 98 ]] || [[ ${i} -eq 99 ]]; then
|
||||
q="${q//60 days)/interval 60 day)}" && echo "${q};" >>"${of}"
|
||||
elif [[ ${i} -eq 24 ]]; then
|
||||
q="${q//c_customer_sk))/c_customer_sk) t)}" &&
|
||||
q="${q//best_ss_customer))/best_ss_customer)) t2}" &&
|
||||
echo "${q};" >>"${of}"
|
||||
elif [[ ${i} -eq 25 ]]; then
|
||||
q="${q//c_customer_sk))/c_customer_sk) t)}" &&
|
||||
q="${q//c_first_name)/c_first_name) t2}" &&
|
||||
echo "${q};" >>"${of}"
|
||||
elif [[ ${i} -eq 35 ]] || [[ ${i} -eq 96 ]]; then
|
||||
q="${q//90 days)/interval 90 day)}" && echo "${q};" >>"${of}"
|
||||
elif [[ ${i} -eq 53 ]]; then
|
||||
q="${q//order by 1/ t order by 1}" && echo "${q};" >>"${of}"
|
||||
elif [[ ${i} -eq 70 ]]; then
|
||||
v=$(echo "${q}" | grep "||" | grep -o "'.*'" | sed -n '1p')
|
||||
smc1=$(echo "${v//\'/}" | awk -F"|" '{print $1}')
|
||||
smc2=$(echo "${v//\'/}" | awk -F"|" '{print $5}')
|
||||
origin="'${smc1% }' || ',' || '${smc2# }'"
|
||||
q="${q//${origin}/concat(concat(\'${smc1}\', \',\'), \'${smc2}\')}" && echo "${q};" >>"${of}"
|
||||
elif [[ ${i} -eq 84 ]]; then
|
||||
q="${q//30 days/interval 30 day}" &&
|
||||
q="${q//\'store\' || store_id/concat(\'store\', store_id)}" &&
|
||||
q="${q//\'catalog_page\' || catalog_page_id/concat(\'catalog_page\', catalog_page_id)}" &&
|
||||
q="${q//\'web_site\' || web_site_id/concat(\'web_site\', web_site_id)}" &&
|
||||
echo "${q};" >>"${of}"
|
||||
elif [[ ${i} -eq 88 ]]; then
|
||||
q="${q//coalesce(c_last_name,\'\') || \', \' || coalesce(c_first_name,\'\')/concat(concat(coalesce(c_last_name,\'\'), \',\'), coalesce(c_first_name,\'\'))}" &&
|
||||
echo "${q};" >>"${of}"
|
||||
else
|
||||
echo "${q};" >>"${of}"
|
||||
fi
|
||||
i=$((i + 1))
|
||||
done
|
||||
echo -e "\033[32m
|
||||
tpcds queries generated in:
|
||||
${TPCDS_QUERIE_DIR}/tpcds_queries.sql
|
||||
\033[0m"
|
||||
else
|
||||
echo -e "\033[31m ERROR: tpcds queries generate failed \033[0m" && exit 1
|
||||
fi
|
||||
cd - >/dev/null
|
||||
199
tools/tpcds-tools/bin/load-tpcds-data.sh
Executable file
199
tools/tpcds-tools/bin/load-tpcds-data.sh
Executable file
@ -0,0 +1,199 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
##############################################################
|
||||
# This script is used to load generated TPC-DS data set into Doris.
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR="${ROOT}"
|
||||
TPCDS_DATA_DIR="${CURDIR}/tpcds-data"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
Usage: $0 <options>
|
||||
Optional options:
|
||||
-c parallelism to load data of lineitem, orders, partsupp, default is 5.
|
||||
|
||||
Eg.
|
||||
$0 load data using default value.
|
||||
$0 -c 10 load lineitem, orders, partsupp table data using parallelism 10.
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-o 'hc:' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
|
||||
PARALLEL=5
|
||||
HELP=0
|
||||
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
-c)
|
||||
PARALLEL=$2
|
||||
shift 2
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
# check if tpcds-data exists
|
||||
if [[ ! -d "${TPCDS_DATA_DIR}"/ ]]; then
|
||||
echo "${TPCDS_DATA_DIR} does not exist. Run sh gen-tpcds-data.sh first."
|
||||
exit 1
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! ${CMD} &>/dev/null; then
|
||||
echo "${NAME} is missing. This script depends on cURL to load data to Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "curl --version" "curl"
|
||||
check_prerequest "jq --version" "jq"
|
||||
|
||||
# load tables
|
||||
source "${CURDIR}/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=${PASSWORD}
|
||||
|
||||
echo "Parallelism: ${PARALLEL}"
|
||||
echo "FE_HOST: ${FE_HOST}"
|
||||
echo "FE_HTTP_PORT: ${FE_HTTP_PORT}"
|
||||
echo "USER: ${USER}"
|
||||
echo "DB: ${DB}"
|
||||
|
||||
declare -A table_columns=(
|
||||
['call_center']='cc_call_center_sk, cc_call_center_id, cc_rec_start_date, cc_rec_end_date, cc_closed_date_sk, cc_open_date_sk, cc_name, cc_class, cc_employees, cc_sq_ft, cc_hours, cc_manager, cc_mkt_id, cc_mkt_class, cc_mkt_desc, cc_market_manager, cc_division, cc_division_name, cc_company, cc_company_name, cc_street_number, cc_street_name, cc_street_type, cc_suite_number, cc_city, cc_county, cc_state, cc_zip, cc_country, cc_gmt_offset, cc_tax_percentage'
|
||||
['catalog_page']='cp_catalog_page_sk, cp_catalog_page_id, cp_start_date_sk, cp_end_date_sk, cp_department, cp_catalog_number, cp_catalog_page_number, cp_description, cp_type'
|
||||
['catalog_returns']='cr_returned_date_sk, cr_returned_time_sk, cr_item_sk, cr_refunded_customer_sk, cr_refunded_cdemo_sk, cr_refunded_hdemo_sk, cr_refunded_addr_sk, cr_returning_customer_sk, cr_returning_cdemo_sk, cr_returning_hdemo_sk, cr_returning_addr_sk, cr_call_center_sk, cr_catalog_page_sk, cr_ship_mode_sk, cr_warehouse_sk, cr_reason_sk, cr_order_number, cr_return_quantity, cr_return_amount, cr_return_tax, cr_return_amt_inc_tax, cr_fee, cr_return_ship_cost, cr_refunded_cash, cr_reversed_charge, cr_store_credit, cr_net_loss'
|
||||
['catalog_sales']='cs_sold_date_sk, cs_sold_time_sk, cs_ship_date_sk, cs_bill_customer_sk, cs_bill_cdemo_sk, cs_bill_hdemo_sk, cs_bill_addr_sk, cs_ship_customer_sk, cs_ship_cdemo_sk, cs_ship_hdemo_sk, cs_ship_addr_sk, cs_call_center_sk, cs_catalog_page_sk, cs_ship_mode_sk, cs_warehouse_sk, cs_item_sk, cs_promo_sk, cs_order_number, cs_quantity, cs_wholesale_cost, cs_list_price, cs_sales_price, cs_ext_discount_amt, cs_ext_sales_price, cs_ext_wholesale_cost, cs_ext_list_price, cs_ext_tax, cs_coupon_amt, cs_ext_ship_cost, cs_net_paid, cs_net_paid_inc_tax, cs_net_paid_inc_ship, cs_net_paid_inc_ship_tax, cs_net_profit'
|
||||
['customer_address']='ca_address_sk, ca_address_id, ca_street_number, ca_street_name, ca_street_type, ca_suite_number, ca_city, ca_county, ca_state, ca_zip, ca_country, ca_gmt_offset, ca_location_type'
|
||||
['customer_demographics']='cd_demo_sk, cd_gender, cd_marital_status, cd_education_status, cd_purchase_estimate, cd_credit_rating, cd_dep_count, cd_dep_employed_count, cd_dep_college_count'
|
||||
['customer']='c_customer_sk, c_customer_id, c_current_cdemo_sk, c_current_hdemo_sk, c_current_addr_sk, c_first_shipto_date_sk, c_first_sales_date_sk, c_salutation, c_first_name, c_last_name, c_preferred_cust_flag, c_birth_day, c_birth_month, c_birth_year, c_birth_country, c_login, c_email_address, c_last_review_date_sk'
|
||||
['date_dim']='d_date_sk, d_date_id, d_date, d_month_seq, d_week_seq, d_quarter_seq, d_year, d_dow, d_moy, d_dom, d_qoy, d_fy_year, d_fy_quarter_seq, d_fy_week_seq, d_day_name, d_quarter_name, d_holiday, d_weekend, d_following_holiday, d_first_dom, d_last_dom, d_same_day_ly, d_same_day_lq, d_current_day, d_current_week, d_current_month, d_current_quarter, d_current_year'
|
||||
['dbgen_version']='dv_version, dv_create_date, dv_create_time, dv_cmdline_args'
|
||||
['household_demographics']='hd_demo_sk, hd_income_band_sk, hd_buy_potential, hd_dep_count, hd_vehicle_count'
|
||||
['income_band']='ib_income_band_sk, ib_lower_bound, ib_upper_bound'
|
||||
['inventory']='inv_date_sk, inv_item_sk, inv_warehouse_sk, inv_quantity_on_hand'
|
||||
['item']='i_item_sk, i_item_id, i_rec_start_date, i_rec_end_date, i_item_desc, i_current_price, i_wholesale_cost, i_brand_id, i_brand, i_class_id, i_class, i_category_id, i_category, i_manufact_id, i_manufact, i_size, i_formulation, i_color, i_units, i_container, i_manager_id, i_product_name'
|
||||
['promotion']='p_promo_sk, p_promo_id, p_start_date_sk, p_end_date_sk, p_item_sk, p_cost, p_response_targe, p_promo_name, p_channel_dmail, p_channel_email, p_channel_catalog, p_channel_tv, p_channel_radio, p_channel_press, p_channel_event, p_channel_demo, p_channel_details, p_purpose, p_discount_active'
|
||||
['reason']='r_reason_sk, r_reason_id, r_reason_desc'
|
||||
['ship_mode']='sm_ship_mode_sk, sm_ship_mode_id, sm_type, sm_code, sm_carrier, sm_contract'
|
||||
['store']='s_store_sk, s_store_id, s_rec_start_date, s_rec_end_date, s_closed_date_sk, s_store_name, s_number_employees, s_floor_space, s_hours, s_manager, s_market_id, s_geography_class, s_market_desc, s_market_manager, s_division_id, s_division_name, s_company_id, s_company_name, s_street_number, s_street_name, s_street_type, s_suite_number, s_city, s_county, s_state, s_zip, s_country, s_gmt_offset, s_tax_precentage'
|
||||
['store_returns']='sr_returned_date_sk, sr_return_time_sk, sr_item_sk, sr_customer_sk, sr_cdemo_sk, sr_hdemo_sk, sr_addr_sk, sr_store_sk, sr_reason_sk, sr_ticket_number, sr_return_quantity, sr_return_amt, sr_return_tax, sr_return_amt_inc_tax, sr_fee, sr_return_ship_cost, sr_refunded_cash, sr_reversed_charge, sr_store_credit, sr_net_loss'
|
||||
['store_sales']='ss_sold_date_sk, ss_sold_time_sk, ss_item_sk, ss_customer_sk, ss_cdemo_sk, ss_hdemo_sk, ss_addr_sk, ss_store_sk, ss_promo_sk, ss_ticket_number, ss_quantity, ss_wholesale_cost, ss_list_price, ss_sales_price, ss_ext_discount_amt, ss_ext_sales_price, ss_ext_wholesale_cost, ss_ext_list_price, ss_ext_tax, ss_coupon_amt, ss_net_paid, ss_net_paid_inc_tax, ss_net_profit'
|
||||
['time_dim']='t_time_sk, t_time_id, t_time, t_hour, t_minute, t_second, t_am_pm, t_shift, t_sub_shift, t_meal_time'
|
||||
['warehouse']='w_warehouse_sk, w_warehouse_id, w_warehouse_name, w_warehouse_sq_ft, w_street_number, w_street_name, w_street_type, w_suite_number, w_city, w_county, w_state, w_zip, w_country, w_gmt_offset'
|
||||
['web_page']='wp_web_page_sk, wp_web_page_id, wp_rec_start_date, wp_rec_end_date, wp_creation_date_sk, wp_access_date_sk, wp_autogen_flag, wp_customer_sk, wp_url, wp_type, wp_char_count, wp_link_count, wp_image_count, wp_max_ad_count'
|
||||
['web_returns']='wr_returned_date_sk, wr_returned_time_sk, wr_item_sk, wr_refunded_customer_sk, wr_refunded_cdemo_sk, wr_refunded_hdemo_sk, wr_refunded_addr_sk, wr_returning_customer_sk, wr_returning_cdemo_sk, wr_returning_hdemo_sk, wr_returning_addr_sk, wr_web_page_sk, wr_reason_sk, wr_order_number, wr_return_quantity, wr_return_amt, wr_return_tax, wr_return_amt_inc_tax, wr_fee, wr_return_ship_cost, wr_refunded_cash, wr_reversed_charge, wr_account_credit, wr_net_loss'
|
||||
['web_sales']='ws_sold_date_sk, ws_sold_time_sk, ws_ship_date_sk, ws_item_sk, ws_bill_customer_sk, ws_bill_cdemo_sk, ws_bill_hdemo_sk, ws_bill_addr_sk, ws_ship_customer_sk, ws_ship_cdemo_sk, ws_ship_hdemo_sk, ws_ship_addr_sk, ws_web_page_sk, ws_web_site_sk, ws_ship_mode_sk, ws_warehouse_sk, ws_promo_sk, ws_order_number, ws_quantity, ws_wholesale_cost, ws_list_price, ws_sales_price, ws_ext_discount_amt, ws_ext_sales_price, ws_ext_wholesale_cost, ws_ext_list_price, ws_ext_tax, ws_coupon_amt, ws_ext_ship_cost, ws_net_paid, ws_net_paid_inc_tax, ws_net_paid_inc_ship, ws_net_paid_inc_ship_tax, ws_net_profit'
|
||||
['web_site']='web_site_sk, web_site_id, web_rec_start_date, web_rec_end_date, web_name, web_open_date_sk, web_close_date_sk, web_class, web_manager, web_mkt_id, web_mkt_class, web_mkt_desc, web_market_manager, web_company_id, web_company_name, web_street_number, web_street_name, web_street_type, web_suite_number, web_city, web_county, web_state, web_zip, web_country, web_gmt_offset, web_tax_percentage'
|
||||
)
|
||||
|
||||
# set parallelism
|
||||
|
||||
# 以PID为名, 防止创建命名管道时与已有文件重名,从而失败
|
||||
fifo="/tmp/$$.fifo"
|
||||
# 创建命名管道
|
||||
mkfifo "${fifo}"
|
||||
# 以读写方式打开命名管道,文件标识符fd为3,fd可取除0,1,2,5外0-9中的任意数字
|
||||
exec 3<>"${fifo}"
|
||||
# 删除文件, 也可不删除, 不影响后面操作
|
||||
rm -rf "${fifo}"
|
||||
|
||||
# 在fd3中放置$PARALLEL个空行作为令牌
|
||||
for ((i = 1; i <= PARALLEL; i++)); do
|
||||
echo >&3
|
||||
done
|
||||
|
||||
# start load
|
||||
start_time=$(date +%s)
|
||||
echo "Start time: $(date)"
|
||||
for table_name in ${!table_columns[*]}; do
|
||||
# 领取令牌, 即从fd3中读取行, 每次一行
|
||||
# 对管道,读一行便少一行,每次只能读取一行
|
||||
# 所有行读取完毕, 执行挂起, 直到管道再次有可读行
|
||||
# 因此实现了进程数量控制
|
||||
read -r -u3
|
||||
|
||||
# 要批量执行的命令放在大括号内, 后台运行
|
||||
{
|
||||
for file in "${TPCDS_DATA_DIR}/${table_name}"*.dat; do
|
||||
ret=$(curl \
|
||||
--location-trusted \
|
||||
-u "${USER}":"${PASSWORD:=}" \
|
||||
-H "column_separator:|" \
|
||||
-H "columns: ${table_columns[${table_name}]}" \
|
||||
-T "${file}" \
|
||||
http://"${FE_HOST}":"${FE_HTTP_PORT:=8030}"/api/"${DB}"/"${table_name}"/_stream_load 2>/dev/null)
|
||||
if [[ $(echo "${ret}" | jq ".Status") == '"Success"' ]]; then
|
||||
echo "----loaded ${file}"
|
||||
else
|
||||
echo -e "\033[31m----load ${file} FAIL...\033[0m"
|
||||
fi
|
||||
done
|
||||
sleep 2
|
||||
# 归还令牌, 即进程结束后,再写入一行,使挂起的循环继续执行
|
||||
echo >&3
|
||||
} &
|
||||
done
|
||||
|
||||
# 等待所有的后台子进程结束
|
||||
wait
|
||||
# 删除文件标识符
|
||||
exec 3>&-
|
||||
|
||||
end_time=$(date +%s)
|
||||
echo "End time: $(date)"
|
||||
|
||||
echo "Finish load tpcds data, Time taken: $((end_time - start_time)) seconds"
|
||||
133
tools/tpcds-tools/bin/run-tpcds-queries.sh
Executable file
133
tools/tpcds-tools/bin/run-tpcds-queries.sh
Executable file
@ -0,0 +1,133 @@
|
||||
#!/usr/bin/env bash
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
##############################################################
|
||||
# This script is used to run TPC-DS 99 queries
|
||||
##############################################################
|
||||
|
||||
set -eo pipefail
|
||||
|
||||
ROOT=$(dirname "$0")
|
||||
ROOT=$(
|
||||
cd "${ROOT}"
|
||||
pwd
|
||||
)
|
||||
|
||||
CURDIR="${ROOT}"
|
||||
TPCDS_QUERIES_DIR="${CURDIR}/../queries"
|
||||
|
||||
usage() {
|
||||
echo "
|
||||
This script is used to run TPC-DS 99 queries,
|
||||
will use mysql client to connect Doris server which parameter is specified in doris-cluster.conf file.
|
||||
Usage: $0
|
||||
"
|
||||
exit 1
|
||||
}
|
||||
|
||||
OPTS=$(getopt \
|
||||
-n "$0" \
|
||||
-o '' \
|
||||
-- "$@")
|
||||
|
||||
eval set -- "${OPTS}"
|
||||
HELP=0
|
||||
|
||||
if [[ $# == 0 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
while true; do
|
||||
case "$1" in
|
||||
-h)
|
||||
HELP=1
|
||||
shift
|
||||
;;
|
||||
--)
|
||||
shift
|
||||
break
|
||||
;;
|
||||
*)
|
||||
echo "Internal error"
|
||||
exit 1
|
||||
;;
|
||||
esac
|
||||
done
|
||||
|
||||
if [[ ${HELP} -eq 1 ]]; then
|
||||
usage
|
||||
fi
|
||||
|
||||
check_prerequest() {
|
||||
local CMD=$1
|
||||
local NAME=$2
|
||||
if ! ${CMD}; then
|
||||
echo "${NAME} is missing. This script depends on mysql to create tables in Doris."
|
||||
exit 1
|
||||
fi
|
||||
}
|
||||
|
||||
check_prerequest "mysql --version" "mysql"
|
||||
|
||||
#shellcheck source=/dev/null
|
||||
source "${CURDIR}/../conf/doris-cluster.conf"
|
||||
export MYSQL_PWD=${PASSWORD:-}
|
||||
|
||||
echo "FE_HOST: ${FE_HOST:='127.0.0.1'}"
|
||||
echo "FE_QUERY_PORT: ${FE_QUERY_PORT:='9030'}"
|
||||
echo "USER: ${USER:='root'}"
|
||||
echo "DB: ${DB:='tpcds'}"
|
||||
echo "Time Unit: ms"
|
||||
|
||||
run_sql() {
|
||||
echo "$*"
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*"
|
||||
}
|
||||
|
||||
echo '============================================'
|
||||
run_sql "show variables;"
|
||||
echo '============================================'
|
||||
run_sql "show table status;"
|
||||
echo '============================================'
|
||||
|
||||
sum=0
|
||||
IFS=';'
|
||||
i=1
|
||||
query_strs=$(cat "${TPCDS_QUERIES_DIR}/tpcds_queries.sql")
|
||||
for query_str in ${query_strs}; do
|
||||
# echo '============================================'
|
||||
# echo "${query_str} "
|
||||
# echo '============================================'
|
||||
total=0
|
||||
run=3
|
||||
# Each query is executed ${run} times and takes the average time
|
||||
for ((j = 0; j < run; j++)); do
|
||||
# if [[ $i -lt 70 ]]; then continue; fi #########
|
||||
start=$(date +%s%3N)
|
||||
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" --comments -e"${query_str}" >/dev/null
|
||||
end=$(date +%s%3N)
|
||||
total=$((total + end - start))
|
||||
done
|
||||
cost=$((total / run))
|
||||
echo "q${i}: ${cost} ms"
|
||||
sum=$((sum + cost))
|
||||
i=$((i + 1))
|
||||
done <"${TPCDS_QUERIES_DIR}/tpcds_queries.sql"
|
||||
echo "Total cost: ${sum} ms"
|
||||
|
||||
echo 'Finish tpcds queries.'
|
||||
29
tools/tpcds-tools/conf/doris-cluster.conf
Normal file
29
tools/tpcds-tools/conf/doris-cluster.conf
Normal file
@ -0,0 +1,29 @@
|
||||
# Licensed to the Apache Software Foundation (ASF) under one
|
||||
# or more contributor license agreements. See the NOTICE file
|
||||
# distributed with this work for additional information
|
||||
# regarding copyright ownership. The ASF licenses this file
|
||||
# to you under the Apache License, Version 2.0 (the
|
||||
# "License"); you may not use this file except in compliance
|
||||
# with the License. You may obtain a copy of the License at
|
||||
#
|
||||
# http://www.apache.org/licenses/LICENSE-2.0
|
||||
#
|
||||
# Unless required by applicable law or agreed to in writing,
|
||||
# software distributed under the License is distributed on an
|
||||
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
# KIND, either express or implied. See the License for the
|
||||
# specific language governing permissions and limitations
|
||||
# under the License.
|
||||
|
||||
# Any of FE host
|
||||
export FE_HOST='127.0.0.1'
|
||||
# http_port in fe.conf
|
||||
export FE_HTTP_PORT=8030
|
||||
# query_port in fe.conf
|
||||
export FE_QUERY_PORT=9030
|
||||
# Doris username
|
||||
export USER='root'
|
||||
# Doris password
|
||||
export PASSWORD=''
|
||||
# The database where TPC-DS tables located
|
||||
export DB='tpcds'
|
||||
630
tools/tpcds-tools/ddl/create-tpcds-tables.sql
Normal file
630
tools/tpcds-tools/ddl/create-tpcds-tables.sql
Normal file
@ -0,0 +1,630 @@
|
||||
-- Licensed to the Apache Software Foundation (ASF) under one
|
||||
-- or more contributor license agreements. See the NOTICE file
|
||||
-- distributed with this work for additional information
|
||||
-- regarding copyright ownership. The ASF licenses this file
|
||||
-- to you under the Apache License, Version 2.0 (the
|
||||
-- "License"); you may not use this file except in compliance
|
||||
-- with the License. You may obtain a copy of the License at
|
||||
--
|
||||
-- http://www.apache.org/licenses/LICENSE-2.0
|
||||
--
|
||||
-- Unless required by applicable law or agreed to in writing,
|
||||
-- software distributed under the License is distributed on an
|
||||
-- "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
||||
-- KIND, either express or implied. See the License for the
|
||||
-- specific language governing permissions and limitations
|
||||
-- under the License.
|
||||
|
||||
CREATE TABLE IF NOT EXISTS customer_demographics (
|
||||
cd_demo_sk bigint not null,
|
||||
cd_gender char(1),
|
||||
cd_marital_status char(1),
|
||||
cd_education_status char(20),
|
||||
cd_purchase_estimate integer,
|
||||
cd_credit_rating char(10),
|
||||
cd_dep_count integer,
|
||||
cd_dep_employed_count integer,
|
||||
cd_dep_college_count integer
|
||||
)
|
||||
DUPLICATE KEY(cd_demo_sk)
|
||||
DISTRIBUTED BY HASH(cd_gender) BUCKETS 12
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS reason (
|
||||
r_reason_sk bigint not null,
|
||||
r_reason_id char(16) not null,
|
||||
r_reason_desc char(100)
|
||||
)
|
||||
DUPLICATE KEY(r_reason_sk)
|
||||
DISTRIBUTED BY HASH(r_reason_sk) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS date_dim (
|
||||
d_date_sk bigint not null,
|
||||
d_date_id char(16) not null,
|
||||
d_date date,
|
||||
d_month_seq integer,
|
||||
d_week_seq integer,
|
||||
d_quarter_seq integer,
|
||||
d_year integer,
|
||||
d_dow integer,
|
||||
d_moy integer,
|
||||
d_dom integer,
|
||||
d_qoy integer,
|
||||
d_fy_year integer,
|
||||
d_fy_quarter_seq integer,
|
||||
d_fy_week_seq integer,
|
||||
d_day_name char(9),
|
||||
d_quarter_name char(6),
|
||||
d_holiday char(1),
|
||||
d_weekend char(1),
|
||||
d_following_holiday char(1),
|
||||
d_first_dom integer,
|
||||
d_last_dom integer,
|
||||
d_same_day_ly integer,
|
||||
d_same_day_lq integer,
|
||||
d_current_day char(1),
|
||||
d_current_week char(1),
|
||||
d_current_month char(1),
|
||||
d_current_quarter char(1),
|
||||
d_current_year char(1)
|
||||
)
|
||||
DUPLICATE KEY(d_date_sk)
|
||||
DISTRIBUTED BY HASH(d_date_sk) BUCKETS 12
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS warehouse (
|
||||
w_warehouse_sk bigint not null,
|
||||
w_warehouse_id char(16) not null,
|
||||
w_warehouse_name varchar(20),
|
||||
w_warehouse_sq_ft integer,
|
||||
w_street_number char(10),
|
||||
w_street_name varchar(60),
|
||||
w_street_type char(15),
|
||||
w_suite_number char(10),
|
||||
w_city varchar(60),
|
||||
w_county varchar(30),
|
||||
w_state char(2),
|
||||
w_zip char(10),
|
||||
w_country varchar(20),
|
||||
w_gmt_offset decimal(5,2)
|
||||
)
|
||||
DUPLICATE KEY(w_warehouse_sk)
|
||||
DISTRIBUTED BY HASH(w_warehouse_sk) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS catalog_sales (
|
||||
cs_item_sk bigint not null,
|
||||
cs_order_number bigint not null,
|
||||
cs_sold_date_sk bigint,
|
||||
cs_sold_time_sk bigint,
|
||||
cs_ship_date_sk bigint,
|
||||
cs_bill_customer_sk bigint,
|
||||
cs_bill_cdemo_sk bigint,
|
||||
cs_bill_hdemo_sk bigint,
|
||||
cs_bill_addr_sk bigint,
|
||||
cs_ship_customer_sk bigint,
|
||||
cs_ship_cdemo_sk bigint,
|
||||
cs_ship_hdemo_sk bigint,
|
||||
cs_ship_addr_sk bigint,
|
||||
cs_call_center_sk bigint,
|
||||
cs_catalog_page_sk bigint,
|
||||
cs_ship_mode_sk bigint,
|
||||
cs_warehouse_sk bigint,
|
||||
cs_promo_sk bigint,
|
||||
cs_quantity integer,
|
||||
cs_wholesale_cost decimal(7,2),
|
||||
cs_list_price decimal(7,2),
|
||||
cs_sales_price decimal(7,2),
|
||||
cs_ext_discount_amt decimal(7,2),
|
||||
cs_ext_sales_price decimal(7,2),
|
||||
cs_ext_wholesale_cost decimal(7,2),
|
||||
cs_ext_list_price decimal(7,2),
|
||||
cs_ext_tax decimal(7,2),
|
||||
cs_coupon_amt decimal(7,2),
|
||||
cs_ext_ship_cost decimal(7,2),
|
||||
cs_net_paid decimal(7,2),
|
||||
cs_net_paid_inc_tax decimal(7,2),
|
||||
cs_net_paid_inc_ship decimal(7,2),
|
||||
cs_net_paid_inc_ship_tax decimal(7,2),
|
||||
cs_net_profit decimal(7,2)
|
||||
)
|
||||
DUPLICATE KEY(cs_item_sk, cs_order_number)
|
||||
DISTRIBUTED BY HASH(cs_item_sk, cs_order_number) BUCKETS 32
|
||||
PROPERTIES (
|
||||
"replication_num" = "1",
|
||||
"colocate_with" = "catalog"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS call_center (
|
||||
cc_call_center_sk bigint not null,
|
||||
cc_call_center_id char(16) not null,
|
||||
cc_rec_start_date date,
|
||||
cc_rec_end_date date,
|
||||
cc_closed_date_sk integer,
|
||||
cc_open_date_sk integer,
|
||||
cc_name varchar(50),
|
||||
cc_class varchar(50),
|
||||
cc_employees integer,
|
||||
cc_sq_ft integer,
|
||||
cc_hours char(20),
|
||||
cc_manager varchar(40),
|
||||
cc_mkt_id integer,
|
||||
cc_mkt_class char(50),
|
||||
cc_mkt_desc varchar(100),
|
||||
cc_market_manager varchar(40),
|
||||
cc_division integer,
|
||||
cc_division_name varchar(50),
|
||||
cc_company integer,
|
||||
cc_company_name char(50),
|
||||
cc_street_number char(10),
|
||||
cc_street_name varchar(60),
|
||||
cc_street_type char(15),
|
||||
cc_suite_number char(10),
|
||||
cc_city varchar(60),
|
||||
cc_county varchar(30),
|
||||
cc_state char(2),
|
||||
cc_zip char(10),
|
||||
cc_country varchar(20),
|
||||
cc_gmt_offset decimal(5,2),
|
||||
cc_tax_percentage decimal(5,2)
|
||||
)
|
||||
DUPLICATE KEY(cc_call_center_sk)
|
||||
DISTRIBUTED BY HASH(cc_call_center_sk) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS inventory (
|
||||
inv_date_sk bigint not null,
|
||||
inv_item_sk bigint not null,
|
||||
inv_warehouse_sk bigint,
|
||||
inv_quantity_on_hand integer
|
||||
)
|
||||
DUPLICATE KEY(inv_date_sk, inv_item_sk, inv_warehouse_sk)
|
||||
DISTRIBUTED BY HASH(inv_date_sk, inv_item_sk, inv_warehouse_sk) BUCKETS 32
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS catalog_returns (
|
||||
cr_item_sk bigint not null,
|
||||
cr_order_number bigint not null,
|
||||
cr_returned_date_sk bigint,
|
||||
cr_returned_time_sk bigint,
|
||||
cr_refunded_customer_sk bigint,
|
||||
cr_refunded_cdemo_sk bigint,
|
||||
cr_refunded_hdemo_sk bigint,
|
||||
cr_refunded_addr_sk bigint,
|
||||
cr_returning_customer_sk bigint,
|
||||
cr_returning_cdemo_sk bigint,
|
||||
cr_returning_hdemo_sk bigint,
|
||||
cr_returning_addr_sk bigint,
|
||||
cr_call_center_sk bigint,
|
||||
cr_catalog_page_sk bigint,
|
||||
cr_ship_mode_sk bigint,
|
||||
cr_warehouse_sk bigint,
|
||||
cr_reason_sk bigint,
|
||||
cr_return_quantity integer,
|
||||
cr_return_amount decimal(7,2),
|
||||
cr_return_tax decimal(7,2),
|
||||
cr_return_amt_inc_tax decimal(7,2),
|
||||
cr_fee decimal(7,2),
|
||||
cr_return_ship_cost decimal(7,2),
|
||||
cr_refunded_cash decimal(7,2),
|
||||
cr_reversed_charge decimal(7,2),
|
||||
cr_store_credit decimal(7,2),
|
||||
cr_net_loss decimal(7,2)
|
||||
)
|
||||
DUPLICATE KEY(cr_item_sk, cr_order_number)
|
||||
DISTRIBUTED BY HASH(cr_item_sk, cr_order_number) BUCKETS 32
|
||||
PROPERTIES (
|
||||
"replication_num" = "1",
|
||||
"colocate_with" = "catalog"
|
||||
);
|
||||
|
||||
CREATE TABLE IF NOT EXISTS household_demographics (
|
||||
hd_demo_sk bigint not null,
|
||||
hd_income_band_sk bigint,
|
||||
hd_buy_potential char(15),
|
||||
hd_dep_count integer,
|
||||
hd_vehicle_count integer
|
||||
)
|
||||
DUPLICATE KEY(hd_demo_sk)
|
||||
DISTRIBUTED BY HASH(hd_demo_sk) BUCKETS 3
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS customer_address (
|
||||
ca_address_sk bigint not null,
|
||||
ca_address_id char(16) not null,
|
||||
ca_street_number char(10),
|
||||
ca_street_name varchar(60),
|
||||
ca_street_type char(15),
|
||||
ca_suite_number char(10),
|
||||
ca_city varchar(60),
|
||||
ca_county varchar(30),
|
||||
ca_state char(2),
|
||||
ca_zip char(10),
|
||||
ca_country varchar(20),
|
||||
ca_gmt_offset decimal(5,2),
|
||||
ca_location_type char(20)
|
||||
)
|
||||
DUPLICATE KEY(ca_address_sk)
|
||||
DISTRIBUTED BY HASH(ca_address_sk) BUCKETS 12
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS income_band (
|
||||
ib_income_band_sk bigint not null,
|
||||
ib_lower_bound integer,
|
||||
ib_upper_bound integer
|
||||
)
|
||||
DUPLICATE KEY(ib_income_band_sk)
|
||||
DISTRIBUTED BY HASH(ib_income_band_sk) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS catalog_page (
|
||||
cp_catalog_page_sk bigint not null,
|
||||
cp_catalog_page_id char(16) not null,
|
||||
cp_start_date_sk integer,
|
||||
cp_end_date_sk integer,
|
||||
cp_department varchar(50),
|
||||
cp_catalog_number integer,
|
||||
cp_catalog_page_number integer,
|
||||
cp_description varchar(100),
|
||||
cp_type varchar(100)
|
||||
)
|
||||
DUPLICATE KEY(cp_catalog_page_sk)
|
||||
DISTRIBUTED BY HASH(cp_catalog_page_sk) BUCKETS 3
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS item (
|
||||
i_item_sk bigint not null,
|
||||
i_item_id char(16) not null,
|
||||
i_rec_start_date date,
|
||||
i_rec_end_date date,
|
||||
i_item_desc varchar(200),
|
||||
i_current_price decimal(7,2),
|
||||
i_wholesale_cost decimal(7,2),
|
||||
i_brand_id integer,
|
||||
i_brand char(50),
|
||||
i_class_id integer,
|
||||
i_class char(50),
|
||||
i_category_id integer,
|
||||
i_category char(50),
|
||||
i_manufact_id integer,
|
||||
i_manufact char(50),
|
||||
i_size char(20),
|
||||
i_formulation char(20),
|
||||
i_color char(20),
|
||||
i_units char(10),
|
||||
i_container char(10),
|
||||
i_manager_id integer,
|
||||
i_product_name char(50)
|
||||
)
|
||||
DUPLICATE KEY(i_item_sk)
|
||||
DISTRIBUTED BY HASH(i_item_sk) BUCKETS 12
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS web_returns (
|
||||
wr_item_sk bigint not null,
|
||||
wr_order_number bigint not null,
|
||||
wr_returned_date_sk bigint,
|
||||
wr_returned_time_sk bigint,
|
||||
wr_refunded_customer_sk bigint,
|
||||
wr_refunded_cdemo_sk bigint,
|
||||
wr_refunded_hdemo_sk bigint,
|
||||
wr_refunded_addr_sk bigint,
|
||||
wr_returning_customer_sk bigint,
|
||||
wr_returning_cdemo_sk bigint,
|
||||
wr_returning_hdemo_sk bigint,
|
||||
wr_returning_addr_sk bigint,
|
||||
wr_web_page_sk bigint,
|
||||
wr_reason_sk bigint,
|
||||
wr_return_quantity integer,
|
||||
wr_return_amt decimal(7,2),
|
||||
wr_return_tax decimal(7,2),
|
||||
wr_return_amt_inc_tax decimal(7,2),
|
||||
wr_fee decimal(7,2),
|
||||
wr_return_ship_cost decimal(7,2),
|
||||
wr_refunded_cash decimal(7,2),
|
||||
wr_reversed_charge decimal(7,2),
|
||||
wr_account_credit decimal(7,2),
|
||||
wr_net_loss decimal(7,2)
|
||||
)
|
||||
DUPLICATE KEY(wr_item_sk, wr_order_number)
|
||||
DISTRIBUTED BY HASH(wr_item_sk, wr_order_number) BUCKETS 32
|
||||
PROPERTIES (
|
||||
"replication_num" = "1",
|
||||
"colocate_with" = "web"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS web_site (
|
||||
web_site_sk bigint not null,
|
||||
web_site_id char(16) not null,
|
||||
web_rec_start_date date,
|
||||
web_rec_end_date date,
|
||||
web_name varchar(50),
|
||||
web_open_date_sk bigint,
|
||||
web_close_date_sk bigint,
|
||||
web_class varchar(50),
|
||||
web_manager varchar(40),
|
||||
web_mkt_id integer,
|
||||
web_mkt_class varchar(50),
|
||||
web_mkt_desc varchar(100),
|
||||
web_market_manager varchar(40),
|
||||
web_company_id integer,
|
||||
web_company_name char(50),
|
||||
web_street_number char(10),
|
||||
web_street_name varchar(60),
|
||||
web_street_type char(15),
|
||||
web_suite_number char(10),
|
||||
web_city varchar(60),
|
||||
web_county varchar(30),
|
||||
web_state char(2),
|
||||
web_zip char(10),
|
||||
web_country varchar(20),
|
||||
web_gmt_offset decimal(5,2),
|
||||
web_tax_percentage decimal(5,2)
|
||||
)
|
||||
DUPLICATE KEY(web_site_sk)
|
||||
DISTRIBUTED BY HASH(web_site_sk) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS promotion (
|
||||
p_promo_sk bigint not null,
|
||||
p_promo_id char(16) not null,
|
||||
p_start_date_sk bigint,
|
||||
p_end_date_sk bigint,
|
||||
p_item_sk bigint,
|
||||
p_cost decimal(15,2),
|
||||
p_response_targe integer,
|
||||
p_promo_name char(50),
|
||||
p_channel_dmail char(1),
|
||||
p_channel_email char(1),
|
||||
p_channel_catalog char(1),
|
||||
p_channel_tv char(1),
|
||||
p_channel_radio char(1),
|
||||
p_channel_press char(1),
|
||||
p_channel_event char(1),
|
||||
p_channel_demo char(1),
|
||||
p_channel_details varchar(100),
|
||||
p_purpose char(15),
|
||||
p_discount_active char(1)
|
||||
)
|
||||
DUPLICATE KEY(p_promo_sk)
|
||||
DISTRIBUTED BY HASH(p_promo_sk) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS web_sales (
|
||||
ws_item_sk bigint not null,
|
||||
ws_order_number bigint not null,
|
||||
ws_sold_date_sk bigint,
|
||||
ws_sold_time_sk bigint,
|
||||
ws_ship_date_sk bigint,
|
||||
ws_bill_customer_sk bigint,
|
||||
ws_bill_cdemo_sk bigint,
|
||||
ws_bill_hdemo_sk bigint,
|
||||
ws_bill_addr_sk bigint,
|
||||
ws_ship_customer_sk bigint,
|
||||
ws_ship_cdemo_sk bigint,
|
||||
ws_ship_hdemo_sk bigint,
|
||||
ws_ship_addr_sk bigint,
|
||||
ws_web_page_sk bigint,
|
||||
ws_web_site_sk bigint,
|
||||
ws_ship_mode_sk bigint,
|
||||
ws_warehouse_sk bigint,
|
||||
ws_promo_sk bigint,
|
||||
ws_quantity integer,
|
||||
ws_wholesale_cost decimal(7,2),
|
||||
ws_list_price decimal(7,2),
|
||||
ws_sales_price decimal(7,2),
|
||||
ws_ext_discount_amt decimal(7,2),
|
||||
ws_ext_sales_price decimal(7,2),
|
||||
ws_ext_wholesale_cost decimal(7,2),
|
||||
ws_ext_list_price decimal(7,2),
|
||||
ws_ext_tax decimal(7,2),
|
||||
ws_coupon_amt decimal(7,2),
|
||||
ws_ext_ship_cost decimal(7,2),
|
||||
ws_net_paid decimal(7,2),
|
||||
ws_net_paid_inc_tax decimal(7,2),
|
||||
ws_net_paid_inc_ship decimal(7,2),
|
||||
ws_net_paid_inc_ship_tax decimal(7,2),
|
||||
ws_net_profit decimal(7,2)
|
||||
)
|
||||
DUPLICATE KEY(ws_item_sk, ws_order_number)
|
||||
DISTRIBUTED BY HASH(ws_item_sk, ws_order_number) BUCKETS 32
|
||||
PROPERTIES (
|
||||
"replication_num" = "1",
|
||||
"colocate_with" = "web"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS store (
|
||||
s_store_sk bigint not null,
|
||||
s_store_id char(16) not null,
|
||||
s_rec_start_date date,
|
||||
s_rec_end_date date,
|
||||
s_closed_date_sk bigint,
|
||||
s_store_name varchar(50),
|
||||
s_number_employees integer,
|
||||
s_floor_space integer,
|
||||
s_hours char(20),
|
||||
s_manager varchar(40),
|
||||
s_market_id integer,
|
||||
s_geography_class varchar(100),
|
||||
s_market_desc varchar(100),
|
||||
s_market_manager varchar(40),
|
||||
s_division_id integer,
|
||||
s_division_name varchar(50),
|
||||
s_company_id integer,
|
||||
s_company_name varchar(50),
|
||||
s_street_number varchar(10),
|
||||
s_street_name varchar(60),
|
||||
s_street_type char(15),
|
||||
s_suite_number char(10),
|
||||
s_city varchar(60),
|
||||
s_county varchar(30),
|
||||
s_state char(2),
|
||||
s_zip char(10),
|
||||
s_country varchar(20),
|
||||
s_gmt_offset decimal(5,2),
|
||||
s_tax_precentage decimal(5,2)
|
||||
)
|
||||
DUPLICATE KEY(s_store_sk)
|
||||
DISTRIBUTED BY HASH(s_store_sk) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS time_dim (
|
||||
t_time_sk bigint not null,
|
||||
t_time_id char(16) not null,
|
||||
t_time integer,
|
||||
t_hour integer,
|
||||
t_minute integer,
|
||||
t_second integer,
|
||||
t_am_pm char(2),
|
||||
t_shift char(20),
|
||||
t_sub_shift char(20),
|
||||
t_meal_time char(20)
|
||||
)
|
||||
DUPLICATE KEY(t_time_sk)
|
||||
DISTRIBUTED BY HASH(t_time_sk) BUCKETS 12
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS web_page (
|
||||
wp_web_page_sk bigint not null,
|
||||
wp_web_page_id char(16) not null,
|
||||
wp_rec_start_date date,
|
||||
wp_rec_end_date date,
|
||||
wp_creation_date_sk bigint,
|
||||
wp_access_date_sk bigint,
|
||||
wp_autogen_flag char(1),
|
||||
wp_customer_sk bigint,
|
||||
wp_url varchar(100),
|
||||
wp_type char(50),
|
||||
wp_char_count integer,
|
||||
wp_link_count integer,
|
||||
wp_image_count integer,
|
||||
wp_max_ad_count integer
|
||||
)
|
||||
DUPLICATE KEY(wp_web_page_sk)
|
||||
DISTRIBUTED BY HASH(wp_web_page_sk) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS store_returns (
|
||||
sr_item_sk bigint not null,
|
||||
sr_ticket_number bigint not null,
|
||||
sr_returned_date_sk bigint,
|
||||
sr_return_time_sk bigint,
|
||||
sr_customer_sk bigint,
|
||||
sr_cdemo_sk bigint,
|
||||
sr_hdemo_sk bigint,
|
||||
sr_addr_sk bigint,
|
||||
sr_store_sk bigint,
|
||||
sr_reason_sk bigint,
|
||||
sr_return_quantity integer,
|
||||
sr_return_amt decimal(7,2),
|
||||
sr_return_tax decimal(7,2),
|
||||
sr_return_amt_inc_tax decimal(7,2),
|
||||
sr_fee decimal(7,2),
|
||||
sr_return_ship_cost decimal(7,2),
|
||||
sr_refunded_cash decimal(7,2),
|
||||
sr_reversed_charge decimal(7,2),
|
||||
sr_store_credit decimal(7,2),
|
||||
sr_net_loss decimal(7,2)
|
||||
)
|
||||
duplicate key(sr_item_sk, sr_ticket_number)
|
||||
distributed by hash (sr_item_sk, sr_ticket_number) buckets 32
|
||||
properties (
|
||||
"replication_num" = "1",
|
||||
"colocate_with" = "store"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS store_sales (
|
||||
ss_item_sk bigint not null,
|
||||
ss_ticket_number bigint not null,
|
||||
ss_sold_date_sk bigint,
|
||||
ss_sold_time_sk bigint,
|
||||
ss_customer_sk bigint,
|
||||
ss_cdemo_sk bigint,
|
||||
ss_hdemo_sk bigint,
|
||||
ss_addr_sk bigint,
|
||||
ss_store_sk bigint,
|
||||
ss_promo_sk bigint,
|
||||
ss_quantity integer,
|
||||
ss_wholesale_cost decimal(7,2),
|
||||
ss_list_price decimal(7,2),
|
||||
ss_sales_price decimal(7,2),
|
||||
ss_ext_discount_amt decimal(7,2),
|
||||
ss_ext_sales_price decimal(7,2),
|
||||
ss_ext_wholesale_cost decimal(7,2),
|
||||
ss_ext_list_price decimal(7,2),
|
||||
ss_ext_tax decimal(7,2),
|
||||
ss_coupon_amt decimal(7,2),
|
||||
ss_net_paid decimal(7,2),
|
||||
ss_net_paid_inc_tax decimal(7,2),
|
||||
ss_net_profit decimal(7,2)
|
||||
)
|
||||
DUPLICATE KEY(ss_item_sk, ss_ticket_number)
|
||||
DISTRIBUTED BY HASH(ss_item_sk, ss_ticket_number) BUCKETS 32
|
||||
PROPERTIES (
|
||||
"replication_num" = "1",
|
||||
"colocate_with" = "store"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS ship_mode (
|
||||
sm_ship_mode_sk bigint not null,
|
||||
sm_ship_mode_id char(16) not null,
|
||||
sm_type char(30),
|
||||
sm_code char(10),
|
||||
sm_carrier char(20),
|
||||
sm_contract char(20)
|
||||
)
|
||||
DUPLICATE KEY(sm_ship_mode_sk)
|
||||
DISTRIBUTED BY HASH(sm_ship_mode_sk) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS customer (
|
||||
c_customer_sk bigint not null,
|
||||
c_customer_id char(16) not null,
|
||||
c_current_cdemo_sk bigint,
|
||||
c_current_hdemo_sk bigint,
|
||||
c_current_addr_sk bigint,
|
||||
c_first_shipto_date_sk bigint,
|
||||
c_first_sales_date_sk bigint,
|
||||
c_salutation char(10),
|
||||
c_first_name char(20),
|
||||
c_last_name char(30),
|
||||
c_preferred_cust_flag char(1),
|
||||
c_birth_day integer,
|
||||
c_birth_month integer,
|
||||
c_birth_year integer,
|
||||
c_birth_country varchar(20),
|
||||
c_login char(13),
|
||||
c_email_address char(50),
|
||||
c_last_review_date_sk bigint
|
||||
)
|
||||
DUPLICATE KEY(c_customer_sk)
|
||||
DISTRIBUTED BY HASH(c_customer_id) BUCKETS 12
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
CREATE TABLE IF NOT EXISTS dbgen_version
|
||||
(
|
||||
dv_version varchar(16) ,
|
||||
dv_create_date date ,
|
||||
dv_create_time datetime ,
|
||||
dv_cmdline_args varchar(200)
|
||||
)
|
||||
DUPLICATE KEY(dv_version)
|
||||
DISTRIBUTED BY HASH(dv_version) BUCKETS 1
|
||||
PROPERTIES (
|
||||
"replication_num" = "1"
|
||||
);
|
||||
Reference in New Issue
Block a user