[ci](perf) 1. add perf check of tpcds, 2. adjust clickbench and tpch check (#28431)

This commit is contained in:
Dongyang Li
2023-12-29 09:26:15 +08:00
committed by GitHub
parent 269c1b189d
commit 61677d1d4b
20 changed files with 2103 additions and 22 deletions

View File

@ -88,6 +88,7 @@ header:
- "conf/mysql_ssl_default_certificate/client_certificate/client-cert.pem"
- "conf/mysql_ssl_default_certificate/client_certificate/client-key.pem"
- "regression-test/ssl_default_certificate/*"
- "regression-test/pipeline/performance/conf/session_variables"
- "extension/beats/go.mod"
- "extension/beats/go.sum"
- "pytest/hdfs"

View File

@ -53,12 +53,15 @@ function set_doris_conf_value() {
function start_doris_fe() {
if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi
if ! java -version >/dev/null; then sudo apt install openjdk-8-jdk -y >/dev/null; fi
if ! java -version >/dev/null ||
[[ -z "$(find /usr/lib/jvm -maxdepth 1 -type d -name 'java-8-*')" ]]; then
sudo apt update && sudo apt install openjdk-8-jdk -y >/dev/null
fi
JAVA_HOME="$(find /usr/lib/jvm -maxdepth 1 -type d -name 'java-8-*' | sed -n '1p')"
export JAVA_HOME
"${DORIS_HOME}"/fe/bin/start_fe.sh --daemon
if ! mysql --version >/dev/null; then sudo apt install -y mysql-client; fi
if ! mysql --version >/dev/null; then sudo apt update && sudo apt install -y mysql-client; fi
query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port)
cl="mysql -h127.0.0.1 -P${query_port} -uroot "
local i=1
@ -71,12 +74,14 @@ function start_doris_fe() {
fi
done
if [[ ${i} -ge 60 ]]; then echo "ERROR: Start Doris Frontend Failed after 2 mins wait..." && return 1; fi
}
function start_doris_be() {
if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi
if ! java -version >/dev/null; then sudo apt install openjdk-8-jdk -y >/dev/null; fi
if ! java -version >/dev/null ||
[[ -z "$(find /usr/lib/jvm -maxdepth 1 -type d -name 'java-8-*')" ]]; then
sudo apt update && sudo apt install openjdk-8-jdk -y >/dev/null
fi
JAVA_HOME="$(find /usr/lib/jvm -maxdepth 1 -type d -name 'java-8-*' | sed -n '1p')"
export JAVA_HOME
sysctl -w vm.max_map_count=2000000 &&
@ -101,7 +106,7 @@ function start_doris_be() {
function add_doris_be_to_fe() {
if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi
if ! mysql --version >/dev/null; then sudo apt install -y mysql-client; fi
if ! mysql --version >/dev/null; then sudo sudo apt update && apt install -y mysql-client; fi
query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port)
heartbeat_service_port=$(get_doris_conf_value "${DORIS_HOME}"/be/conf/be.conf heartbeat_service_port)
cl="mysql -h127.0.0.1 -P${query_port} -uroot "
@ -116,7 +121,7 @@ function add_doris_be_to_fe() {
echo 'Wait for Backends ready, sleep 2 seconds ...' && sleep 2
fi
done
if [[ ${i} -eq 60 ]]; then echo "ERROR: Add Doris Backend Failed after 2 mins wait..." && return 1; fi
if [[ ${i} -ge 60 ]]; then echo "ERROR: Add Doris Backend Failed after 2 mins wait..." && return 1; fi
}
function stop_doris() {
@ -129,6 +134,15 @@ function stop_doris() {
fi
}
function restart_doris() {
if stop_doris; then echo; fi
if ! start_doris_fe; then return 1; fi
if ! start_doris_be; then return 1; fi
# wait 10s for doris totally started, otherwize may encounter the error below,
# ERROR 1105 (HY000) at line 102: errCode = 2, detailMessage = Failed to find enough backend, please check the replication num,replication tag and storage medium.
sleep 10s
}
function check_tpch_table_rows() {
if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi
db_name="$1"
@ -138,20 +152,133 @@ function check_tpch_table_rows() {
query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port)
cl="mysql -h127.0.0.1 -P${query_port} -uroot "
declare -A table_rows
if [[ "${scale_factor}" == "100" ]]; then
if [[ "${scale_factor}" == "1" ]]; then
table_rows=(['region']=5 ['nation']=25 ['supplier']=10000 ['customer']=150000 ['part']=200000 ['partsupp']=800000 ['orders']=1500000 ['lineitem']=6001215)
elif [[ "${scale_factor}" == "100" ]]; then
table_rows=(['region']=5 ['nation']=25 ['supplier']=1000000 ['customer']=15000000 ['part']=20000000 ['partsupp']=80000000 ['orders']=150000000 ['lineitem']=600037902)
else
table_rows=(['region']=5 ['nation']=25 ['supplier']=10000 ['customer']=150000 ['part']=200000 ['partsupp']=800000 ['orders']=1500000 ['lineitem']=6001215)
echo "ERROR: unsupported scale_factor ${scale_factor} for tpch" && return 1
fi
for table in ${!table_rows[*]}; do
rows_actual=$(${cl} -D"${db_name}" -e"SELECT count(*) FROM ${table}" | sed -n '2p')
rows_expect=${table_rows[${table}]}
if [[ ${rows_actual} -ne ${rows_expect} ]]; then
echo "WARNING: ${table} actual rows: ${rows_actual}, expect rows: ${rows_expect}" && return 1
echo "ERROR: ${table} actual rows: ${rows_actual}, expect rows: ${rows_expect}" && return 1
fi
done
}
function check_tpcds_table_rows() {
if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi
db_name="$1"
scale_factor="$2"
if [[ -z "${scale_factor}" ]]; then return 1; fi
query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port)
cl="mysql -h127.0.0.1 -P${query_port} -uroot "
declare -A table_rows
if [[ "${scale_factor}" == "1" ]]; then
table_rows=(['income_band']=20 ['ship_mode']=20 ['warehouse']=5 ['reason']=35 ['web_site']=30 ['call_center']=6 ['store']=12 ['promotion']=300 ['household_demographics']=7200 ['web_page']=60 ['catalog_page']=11718 ['time_dim']=86400 ['date_dim']=73049 ['item']=18000 ['customer_demographics']=1920800 ['customer_address']=50000 ['customer']=100000 ['web_returns']=71763 ['catalog_returns']=144067 ['store_returns']=287514 ['inventory']=11745000 ['web_sales']=719384 ['catalog_sales']=1441548 ['store_sales']=2880404)
elif [[ "${scale_factor}" == "100" ]]; then
table_rows=(['income_band']=20 ['ship_mode']=20 ['warehouse']=15 ['reason']=55 ['web_site']=24 ['call_center']=30 ['store']=402 ['promotion']=1000 ['household_demographics']=7200 ['web_page']=2040 ['catalog_page']=20400 ['time_dim']=86400 ['date_dim']=73049 ['item']=204000 ['customer_demographics']=1920800 ['customer_address']=1000000 ['customer']=2000000 ['web_returns']=7197670 ['catalog_returns']=14404374 ['store_returns']=28795080 ['inventory']=399330000 ['web_sales']=72001237 ['catalog_sales']=143997065 ['store_sales']=287997024)
elif [[ "${scale_factor}" == "1000" ]]; then
table_rows=(['income_band']=20 ['ship_mode']=20 ['warehouse']=20 ['reason']=65 ['web_site']=54 ['call_center']=42 ['store']=1002 ['promotion']=1500 ['household_demographics']=7200 ['web_page']=3000 ['catalog_page']=30000 ['time_dim']=86400 ['date_dim']=73049 ['item']=300000 ['customer_demographics']=1920800 ['customer_address']=6000000 ['customer']=12000000 ['web_returns']=71997522 ['catalog_returns']=143996756 ['store_returns']=287999764 ['inventory']=783000000 ['web_sales']=720000376 ['catalog_sales']=1439980416 ['store_sales']=2879987999)
elif [[ "${scale_factor}" == "3000" ]]; then
table_rows=(['income_band']=20 ['ship_mode']=20 ['warehouse']=22 ['reason']=67 ['web_site']=66 ['call_center']=48 ['store']=1350 ['promotion']=1800 ['household_demographics']=7200 ['web_page']=3600 ['catalog_page']=36000 ['time_dim']=86400 ['date_dim']=73049 ['item']=360000 ['customer_demographics']=1920800 ['customer_address']=15000000 ['customer']=30000000 ['web_returns']=216003761 ['catalog_returns']=432018033 ['store_returns']=863989652 ['inventory']=1033560000 ['web_sales']=2159968881 ['catalog_sales']=4320078880 ['store_sales']=8639936081)
else
echo "ERROR: unsupported scale_factor ${scale_factor} for tpcds" && return 1
fi
for table in ${!table_rows[*]}; do
rows_actual=$(${cl} -D"${db_name}" -e"SELECT count(*) FROM ${table}" | sed -n '2p')
rows_expect=${table_rows[${table}]}
if [[ ${rows_actual} -ne ${rows_expect} ]]; then
echo "ERROR: ${table} actual rows: ${rows_actual}, expect rows: ${rows_expect}" && return 1
fi
done
}
function check_clickbench_table_rows() {
if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi
db_name="$1"
if [[ -z "${db_name}" ]]; then return 1; fi
query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port)
cl="mysql -h127.0.0.1 -P${query_port} -uroot "
declare -A table_rows
table_rows=(['hits']=99997497)
if ${DEBUG:-false}; then table_rows=(['hits']=10000); fi
for table in ${!table_rows[*]}; do
rows_actual=$(${cl} -D"${db_name}" -e"SELECT count(*) FROM ${table}" | sed -n '2p')
rows_expect=${table_rows[${table}]}
if [[ ${rows_actual} -ne ${rows_expect} ]]; then
echo "ERROR: ${table} actual rows: ${rows_actual}, expect rows: ${rows_expect}" && return 1
fi
done
}
function check_tpch_result() {
log_file="$1"
if [[ -z "${log_file}" ]]; then return 1; fi
if ! grep '^Total cold run time' "${log_file}" || ! grep '^Total hot run time' "${log_file}"; then
echo "ERROR: can not find 'Total hot run time' in '${log_file}'"
return 1
else
cold_run_time=$(grep '^Total cold run time' "${log_file}" | awk '{print $5}')
hot_run_time=$(grep '^Total hot run time' "${log_file}" | awk '{print $5}')
fi
# 单位是毫秒
cold_run_time_threshold=${cold_run_time_threshold:-50000}
hot_run_time_threshold=${hot_run_time_threshold:-42000}
if [[ ${cold_run_time} -gt ${cold_run_time_threshold} || ${hot_run_time} -gt ${hot_run_time_threshold} ]]; then
echo "ERROR:
cold_run_time ${cold_run_time} is great than the threshold ${cold_run_time_threshold},
or, hot_run_time ${hot_run_time} is great than the threshold ${hot_run_time_threshold}"
return 1
else
echo "INFO:
cold_run_time ${cold_run_time} is less than the threshold ${cold_run_time_threshold},
hot_run_time ${hot_run_time} is less than the threshold ${hot_run_time_threshold}"
fi
}
function check_tpcds_result() {
check_tpch_result "$1"
}
function check_clickbench_query_result() {
echo "TODO"
}
function check_clickbench_performance_result() {
result_file="$1"
if [[ -z "${result_file}" ]]; then return 1; fi
empty_query_time="$(awk -F ',' '{if( ($2=="") || ($3=="") || ($4=="") ){print $1}}' "${result_file}")"
if [[ -n ${empty_query_time} ]]; then
echo -e "ERROR: find empty query time of:\n${empty_query_time}" && return 1
fi
# 单位是秒
cold_run_time_threshold=${cold_run_time_threshold:-200}
hot_run_time_threshold=${hot_run_time_threshold:-55}
cold_run_sum=$(awk -F ',' '{sum+=$2} END {print sum}' result.csv)
hot_run_time=$(awk -F ',' '{if($3<$4){sum+=$3}else{sum+=$4}} END {print sum}' "${result_file}")
if [[ $(echo "${hot_run_time} > ${hot_run_time_threshold}" | bc) -eq 1 ]] ||
[[ $(echo "${cold_run_sum} > ${cold_run_time_threshold}" | bc) -eq 1 ]]; then
echo "ERROR:
cold_run_time ${cold_run_time} is great than the threshold ${cold_run_time_threshold},
or, hot_run_time ${hot_run_time} is great than the threshold ${hot_run_time_threshold}"
return 1
else
echo "INFO:
cold_run_time ${cold_run_time} is less than the threshold ${cold_run_time_threshold},
hot_run_time ${hot_run_time} is less than the threshold ${hot_run_time_threshold}"
fi
}
function check_load_performance() {
echo "TODO"
}
get_session_variable() {
if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi
usage="
@ -228,12 +355,11 @@ archive_doris_logs() {
if [[ -z ${archive_name} ]]; then echo "ERROR: archive file name required" && return 1; fi
if tar -I pigz \
--directory "${DORIS_HOME}" \
--absolute-names \
-cf "${DORIS_HOME}/${archive_name}" \
"${DORIS_HOME}"/fe/conf \
"${DORIS_HOME}"/fe/log \
"${DORIS_HOME}"/be/conf \
"${DORIS_HOME}"/be/log; then
fe/conf \
fe/log \
be/conf \
be/log; then
echo "${DORIS_HOME}/${archive_name}"
else
return 1

View File

@ -0,0 +1,38 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
tmp_env_file_path="${PWD}/.my_tmp_env"
usage() {
echo -e "
Usage:
$0 'get'
$0 'set' \"export skip_pipeline='true'\"
note: 'get' will return env file path; 'set' will add your new item into env file"
exit 1
}
if [[ $1 == 'get' ]]; then
if [[ ! -f "${tmp_env_file_path}" ]]; then touch "${tmp_env_file_path}"; fi
echo "${tmp_env_file_path}"
elif [[ $1 == 'set' ]]; then
if [[ -z $2 ]]; then usage; fi
echo "$2" >>"${tmp_env_file_path}"
else
usage
fi

View File

@ -53,7 +53,33 @@ function create_an_issue_comment_tpch() {
local COMMENT_BODY="$2"
local machine='aliyun_ecs.c7a.8xlarge_32C64G'
COMMENT_BODY="
TPC-H test result on machine: '${machine}'
TPC-H test result on machine: '${machine}', run with scripts in https://github.com/apache/doris/tree/master/tools/tpch-tools
\`\`\`
${COMMENT_BODY}
\`\`\`
"
create_an_issue_comment "${ISSUE_NUMBER}" "${COMMENT_BODY}"
}
function create_an_issue_comment_tpcds() {
local ISSUE_NUMBER="$1"
local COMMENT_BODY="$2"
local machine='aliyun_ecs.c7a.8xlarge_32C64G'
COMMENT_BODY="
TPC-DS test result on machine: '${machine}', run with scripts in https://github.com/apache/doris/tree/master/tools/tpcds-tools
\`\`\`
${COMMENT_BODY}
\`\`\`
"
create_an_issue_comment "${ISSUE_NUMBER}" "${COMMENT_BODY}"
}
function create_an_issue_comment_clickbench() {
local ISSUE_NUMBER="$1"
local COMMENT_BODY="$2"
local machine='aliyun_ecs.c7a.8xlarge_32C64G'
COMMENT_BODY="
ClickBench test result on machine: '${machine}', run with scripts in https://github.com/apache/doris/tree/master/tools/clickbench-tools
\`\`\`
${COMMENT_BODY}
\`\`\`
@ -271,3 +297,30 @@ file_changed_ckb() {
done
echo "return no need" && return 1
}
file_changed_perf() {
local all_files
all_files=$(cat all_files)
if _only_modified_regression_conf; then echo "return no need" && return 1; fi
if [[ -z ${all_files} ]]; then echo "return need" && return 0; fi
for af in ${all_files}; do
if [[ "${af}" == 'be'* ]] ||
[[ "${af}" == 'bin'* ]] ||
[[ "${af}" == 'conf'* ]] ||
[[ "${af}" == 'fe'* ]] ||
[[ "${af}" == 'gensrc'* ]] ||
[[ "${af}" == 'thirdparty'* ]] ||
[[ "${af}" == 'build.sh' ]] ||
[[ "${af}" == 'env.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/common/github-utils.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/common/doris-utils.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/common/oss-utils.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/performance/'* ]] ||
[[ "${af}" == 'tools/tpch-tools/bin/run-tpch-queries.sh' ]] ||
[[ "${af}" == 'tools/tpcds-tools/bin/run-tpcds-queries.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/tpch/tpch-sf100/'* ]]; then
echo "performance related file changed, return need" && return 0
fi
done
echo "return no need" && return 1
}

View File

@ -0,0 +1,47 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Build Step: Command Line
: <<EOF
#!/bin/bash
# Execute step even if some of the previous steps failed
if [[ -f "${teamcity_build_checkoutDir:-}"/regression-test/pipeline/performance/clean.sh ]]; then
cd "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance
bash -x clean.sh
else
echo "Build Step file missing: regression-test/pipeline/performance/clean.sh" && exit 1
fi
EOF
#####################################################################################
## clean.sh content ##
# shellcheck source=/dev/null
source "$(bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'get')"
if ${skip_pipeline:=false}; then echo "INFO: skip build pipline" && exit 0; else echo "INFO: no skip"; fi
# shellcheck source=/dev/null
# stop_doris
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/doris-utils.sh
DORIS_HOME="${teamcity_build_checkoutDir}/output"
export DORIS_HOME
stop_doris

View File

@ -0,0 +1,143 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Build Step: Command Line
: <<EOF
#!/bin/bash
export DEBUG=true
if [[ -f "${teamcity_build_checkoutDir:-}"/regression-test/pipeline/performance/compile.sh ]]; then
cd "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/
bash -x compile.sh
else
echo "Build Step file missing: regression-test/pipeline/performance/compile.sh" && exit 1
fi
EOF
#####################################################################################
## compile.sh content ##
if ${DEBUG:-false}; then
pull_request_num="28431"
commit_id="b052225cd0a180b4576319b5bd6331218dd0d3fe"
target_branch="master"
fi
if [[ -z "${teamcity_build_checkoutDir}" ]]; then echo "ERROR: env teamcity_build_checkoutDir not set" && exit 2; fi
if [[ -z "${pull_request_num}" ]]; then echo "ERROR: env pull_request_num not set" && exit 2; fi
if [[ -z "${commit_id}" ]]; then echo "ERROR: env commit_id not set" && exit 2; fi
if [[ -z "${target_branch}" ]]; then echo "ERROR: env target_branch not set" && exit 2; fi
# shellcheck source=/dev/null
source "$(bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'get')"
if ${skip_pipeline:=false}; then echo "INFO: skip build pipline" && exit 0; else echo "INFO: no skip"; fi
merge_pr_to_target_branch_latest() {
local pull_request_num="$1"
local target_branch="$2"
echo "INFO: merge pull request into ${target_branch}"
if [[ -z "${teamcity_build_checkoutDir}" ]]; then
echo "ERROR: env teamcity_build_checkoutDir not set" && return 1
fi
cd "${teamcity_build_checkoutDir}" || return 1
git reset --hard
git fetch origin "${target_branch}"
git checkout "${target_branch}"
git reset --hard origin/"${target_branch}"
git pull origin "${target_branch}"
git submodule update --init be/src/clucene
git submodule update --init be/src/apache-orc
local target_branch_commit_id
target_branch_commit_id=$(git rev-parse HEAD)
git config user.email "ci@selectdb.com"
git config user.name "ci"
echo "git fetch origin refs/pull/${pull_request_num}/head"
git fetch origin "refs/pull/${pull_request_num}/head"
git merge --no-edit --allow-unrelated-histories FETCH_HEAD
echo "INFO: merge refs/pull/${pull_request_num}/head into master: ${target_branch_commit_id}"
CONFLICTS=$(git ls-files -u | wc -l)
if [[ "${CONFLICTS}" -gt 0 ]]; then
echo "ERROR: merge refs/pull/${pull_request_num}/head into master failed. Aborting"
git merge --abort
return 1
fi
}
if [[ "${target_branch}" == "master" ]]; then
REMOTE_CCACHE='/mnt/remote_ccache_master'
docker_image="apache/doris:build-env-ldb-toolchain-0.19-latest"
elif [[ "${target_branch}" == "branch-2.0" ]]; then
docker_image="apache/doris:build-env-for-2.0"
REMOTE_CCACHE='/mnt/remote_ccache_branch_2'
elif [[ "${target_branch}" == "branch-1.2-lts" ]]; then
REMOTE_CCACHE='/mnt/remote_ccache_master'
docker_image="apache/doris:build-env-for-1.2"
else
REMOTE_CCACHE='/mnt/remote_ccache_master'
docker_image="apache/doris:build-env-ldb-toolchain-latest"
fi
if ${merge_target_branch_latest:-true}; then
if ! merge_pr_to_target_branch_latest "${pull_request_num}" "${target_branch}"; then
exit 1
fi
else
echo "INFO: skip merge_pr_to_target_branch_latest"
fi
mount_swapfile=""
if [[ -f /root/swapfile ]]; then mount_swapfile="-v /root/swapfile:/swapfile --memory-swap -1"; fi
git_storage_path=$(grep storage "${teamcity_build_checkoutDir}"/.git/config | rev | cut -d ' ' -f 1 | rev | awk -F '/lfs' '{print $1}')
sudo docker container prune -f
sudo docker image prune -f
sudo docker pull "${docker_image}"
docker_name=doris-compile-"${commit_id}"
if sudo docker ps -a --no-trunc | grep "${docker_name}"; then
sudo docker stop "${docker_name}"
sudo docker rm "${docker_name}"
fi
rm -f custom_env.sh
cp "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/custom_env.sh .
rm -rf "${teamcity_build_checkoutDir}"/output
set -x
# shellcheck disable=SC2086
sudo docker run -i --rm \
--name "${docker_name}" \
-e TZ=Asia/Shanghai \
${mount_swapfile} \
-v /etc/localtime:/etc/localtime:ro \
-v "${HOME}"/.m2:/root/.m2 \
-v "${HOME}"/.npm:/root/.npm \
-v /mnt/ccache/.ccache:/root/.ccache \
-v "${REMOTE_CCACHE}":/root/ccache \
-v "${git_storage_path}":/root/git \
-v "${teamcity_build_checkoutDir}":/root/doris \
"${docker_image}" \
/bin/bash -c "mkdir -p ${git_storage_path} \
&& cp -r /root/git/* ${git_storage_path}/ \
&& cd /root/doris \
&& export CCACHE_LOGFILE=/tmp/cache.debug \
&& export CCACHE_REMOTE_STORAGE=file:///root/ccache \
&& export EXTRA_CXX_FLAGS=-O3 \
&& export USE_JEMALLOC='ON' \
&& export ENABLE_PCH=OFF \
&& export CUSTOM_NPM_REGISTRY=https://registry.npmjs.org \
&& bash build.sh --fe --be --clean 2>&1 | tee build.log"
set +x
succ_symble="BUILD SUCCESS"
if [[ -d outout ]] && grep "${succ_symble}" "${teamcity_build_checkoutDir}"/build.log; then
echo -e "ERROR: BUILD FAILED"
exit 1
fi

View File

@ -0,0 +1,5 @@
## 简介
这里的 conf 用于社区 tpcds 流水线
基于 master 分支的默认 conf 文件,
参考:https://github.com/apache/doris/tree/master/conf
添加 fe_custom.conf,be_custom.conf,session_variables 来设置跑流水线的自定义设置。

View File

@ -0,0 +1,21 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
priority_networks=127.0.0.1/24
storage_root_path=/data/doris-storage
streaming_load_max_mb=102400

View File

@ -0,0 +1,20 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
export DORIS_TOOLCHAIN=gcc
export BUILD_TYPE=release

View File

@ -0,0 +1,28 @@
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
#####################################################################
## The uppercase properties are read and exported by bin/start_fe.sh.
## To see all Frontend configurations,
## see fe/src/org/apache/doris/common/Config.java
#####################################################################
priority_networks=127.0.0.1/24
meta_dir=/data/doris-meta
stream_load_default_timeout_second=3600
ignore_unknown_metadata_module=true

View File

@ -0,0 +1,115 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Build Step: Command Line
: <<EOF
#!/bin/bash
export DEBUG=true
if [[ -f "${teamcity_build_checkoutDir:-}"/regression-test/pipeline/performance/deploy.sh ]]; then
cd "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance
bash -x deploy.sh
else
echo "Build Step file missing: regression-test/pipeline/performance/deploy.sh" && exit 1
fi
EOF
## deploy.sh content ##
# shellcheck source=/dev/null
source "$(bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'get')"
if ${skip_pipeline:=false}; then echo "INFO: skip build pipline" && exit 0; else echo "INFO: no skip"; fi
# shellcheck source=/dev/null
# upload_doris_log_to_oss
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/oss-utils.sh
# shellcheck source=/dev/null
# start_doris_fe, get_doris_conf_value, start_doris_be, stop_doris,
# print_doris_fe_log, print_doris_be_log, archive_doris_logs
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/doris-utils.sh
if ${DEBUG:-false}; then
pull_request_num="28431"
commit_id="b052225cd0a180b4576319b5bd6331218dd0d3fe"
fi
echo "#### Check env"
if [[ -z "${teamcity_build_checkoutDir}" ||
-z "${pull_request_num}" ||
-z "${commit_id}" ]]; then
echo "ERROR: env teamcity_build_checkoutDir or pull_request_num or commit_id not set"
exit 1
fi
echo "#### Deploy Doris ####"
DORIS_HOME="${teamcity_build_checkoutDir}/output"
export DORIS_HOME
exit_flag=0
need_backup_doris_logs=false
echo "#### 1. try to kill old doris process"
stop_doris
echo "#### 2. copy conf from regression-test/pipeline/performance/conf/"
rm -f "${DORIS_HOME}"/fe/conf/fe_custom.conf "${DORIS_HOME}"/be/conf/be_custom.conf
if [[ -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/fe_custom.conf &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/be_custom.conf ]]; then
cp -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/fe_custom.conf "${DORIS_HOME}"/fe/conf/
cp -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/be_custom.conf "${DORIS_HOME}"/be/conf/
else
echo "ERROR: doris conf file missing in ${teamcity_build_checkoutDir}/regression-test/pipeline/performance/conf/"
exit 1
fi
echo "#### 3. start Doris"
meta_dir=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe_custom.conf meta_dir)
storage_root_path=$(get_doris_conf_value "${DORIS_HOME}"/be/conf/be_custom.conf storage_root_path)
mkdir -p "${meta_dir}"
mkdir -p "${storage_root_path}"
if ! start_doris_fe; then
echo "ERROR: Start doris fe failed."
print_doris_fe_log
need_backup_doris_logs=true
exit_flag=1
fi
if ! start_doris_be; then
echo "ERROR: Start doris be failed."
print_doris_be_log
need_backup_doris_logs=true
exit_flag=1
fi
if ! add_doris_be_to_fe; then
need_backup_doris_logs=true
exit_flag=1
fi
# wait 10s for doris totally started, otherwize may encounter the error below,
# ERROR 1105 (HY000) at line 102: errCode = 2, detailMessage = Failed to find enough backend, please check the replication num,replication tag and storage medium.
sleep 10s
echo "#### 4. set session variables"
echo "TODO"
echo "#### 5. check if need backup doris logs"
if ${need_backup_doris_logs}; then
print_doris_fe_log
print_doris_be_log
if file_name=$(archive_doris_logs "${pull_request_num}_${commit_id}_doris_logs.tar.gz"); then
upload_doris_log_to_oss "${file_name}"
fi
fi
exit "${exit_flag}"

View File

@ -0,0 +1,96 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Build Step: Command Line
: <<EOF
#!/bin/bash
set -x
pwd
rm -rf ../.old/*
set +x
if [[ -f "${teamcity_build_checkoutDir:-}"/regression-test/pipeline/performance/prepare.sh ]]; then
cd "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/
bash prepare.sh
else
echo "Build Step file missing: regression-test/pipeline/performance/prepare.sh" && exit 1
fi
EOF
#####################################################################################
## run.sh content ##
if ${DEBUG:-false}; then
pull_request_num="28431"
commit_id_from_trigger="5f5c4c80564c76ff4267fc4ce6a5408498ed1ab5"
commit_id="5f5c4c80564c76ff4267fc4ce6a5408498ed1ab5"
fi
echo "#### Check env"
if [[ -z "${teamcity_build_checkoutDir}" ||
-z "${commit_id_from_trigger}" ||
-z ${commit_id:-} ||
-z ${pull_request_num:-} ]]; then
echo "ERROR: env teamcity_build_checkoutDir or commit_id_from_trigger
or commit_id or pull_request_num not set" && exit 1
fi
commit_id_from_checkout=${commit_id}
echo "#### 1. check if need run"
if [[ "${commit_id_from_trigger}" != "${commit_id_from_checkout}" ]]; then
echo -e "从触发流水线 -> 流水线开始跑,这个时间段中如果有新commit,
这时候流水线 checkout 出来的 commit 就不是触发时的传过来的 commit了,
这种情况不需要跑,预期pr owner会重新触发。"
echo -e "ERROR: PR(${pull_request_num}),
the lastest commit id
${commit_id_from_checkout}
not equail to the commit_id_from_trigger
${commit_id_from_trigger}
commit_id_from_trigger is outdate"
exit 1
fi
# shellcheck source=/dev/null
source "$(bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'get')"
if ${skip_pipeline:=false}; then echo "INFO: skip build pipline" && exit 0; else echo "INFO: no skip"; fi
# shellcheck source=/dev/null
# _get_pr_changed_files file_changed_perf
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/github-utils.sh
if _get_pr_changed_files "${pull_request_num}"; then
if ! file_changed_perf; then
bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'set' "export skip_pipeline=true"
exit 0
fi
fi
echo "#### 2. check if tpch depending files exist"
if ! [[ -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/oss-utils.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/doris-utils.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/github-utils.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/be_custom.conf &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/custom_env.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/fe_custom.conf &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/prepare.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/compile.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/deploy.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/run-tpch.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/run-tpcds.sh &&
-f "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/run-tpch-queries.sh &&
-f "${teamcity_build_checkoutDir}"/tools/tpcds-tools/bin/run-tpcds-queries.sh ]]; then
echo "ERROR: depending files missing" && exit 1
fi

View File

@ -0,0 +1,323 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Build Step: Command Line
: <<EOF
#!/bin/bash
export DEBUG=true
if [[ -f "${teamcity_build_checkoutDir:-}"/regression-test/pipeline/performance/run-clickbench.sh ]]; then
cd "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/
bash -x run-clickbench.sh
else
echo "Build Step file missing: regression-test/pipeline/performance/run-clickbench.sh" && exit 1
fi
EOF
#####################################################################################
## run-clickbench.sh content ##
# shellcheck source=/dev/null
# check_clickbench_table_rows, stop_doris, set_session_variable, check_clickbench_result
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/doris-utils.sh
# shellcheck source=/dev/null
# create_an_issue_comment
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/github-utils.sh
# shellcheck source=/dev/null
# upload_doris_log_to_oss
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/oss-utils.sh
if ${DEBUG:-false}; then
pull_request_num="28431"
commit_id="5f5c4c80564c76ff4267fc4ce6a5408498ed1ab5"
fi
echo "#### Check env"
if [[ -z "${teamcity_build_checkoutDir}" ||
-z "${pull_request_num}" ||
-z "${commit_id}" ]]; then
echo "ERROR: env teamcity_build_checkoutDir or pull_request_num or commit_id not set"
exit 1
fi
# shellcheck source=/dev/null
source "$(bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'get')"
if ${skip_pipeline:=false}; then echo "INFO: skip build pipline" && exit 0; else echo "INFO: no skip"; fi
echo "#### Run clickbench test on Doris ####"
DORIS_HOME="${teamcity_build_checkoutDir}/output"
export DORIS_HOME
cold_run_time_threshold=${cold_run_time_threshold:-666} # 单位 秒
hot_run_time_threshold=${hot_run_time_threshold:-555} # 单位 秒
exit_flag=0
(
set -e
shopt -s inherit_errexit
host="127.0.0.1"
query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port)
backup_session_variables_file="${teamcity_build_checkoutDir}/regression-test/pipeline/performance/backup_session_variables_file.sql"
opt_session_variables_file="${teamcity_build_checkoutDir}/regression-test/pipeline/performance/opt_session_variables_file.sql"
echo "####optimize doris config"
echo "
priority_networks=127.0.0.1/24
meta_dir=/data/doris-meta
stream_load_default_timeout_second=3600
ignore_unknown_metadata_module=true
enable_full_auto_analyze=false
" | tee "${DORIS_HOME}"/fe/conf/fe_custom.conf
echo "
priority_networks=127.0.0.1/24
storage_root_path=/data/doris-storage
load_channel_memory_refresh_sleep_time_ms=1000
soft_mem_limit_frac=1
track_new_delete=false
streaming_load_max_mb=102400
doris_scanner_thread_pool_thread_num=8
tc_enable_aggressive_memory_decommit=false
enable_new_scan_node=false
#mem_limit=100%
mem_limit=90%
#write_buffer_size=1609715200
write_buffer_size=1209715200
load_process_max_memory_limit_percent=100
#load_process_soft_mem_limit_percent=80
disable_auto_compaction=true
disable_storage_page_cache=false
disable_chunk_allocator=false
enable_simdjson_reader = true
" | tee "${DORIS_HOME}"/be/conf/be_custom.conf
opt_session_variables="
set global exec_mem_limit=34359738368;
set global parallel_fragment_exec_instance_num=16;
set global parallel_pipeline_task_num=16;
set global enable_single_distinct_column_opt=true;
set global enable_function_pushdown=true;
set global forbid_unknown_col_stats=false;
set global runtime_filter_mode=global;
"
echo -e "${opt_session_variables}" | tee "${opt_session_variables_file}"
backup_session_variables() {
_IFS="${IFS}"
IFS=$'\n'
for line in ${opt_session_variables}; do
k="${line/set global /}"
k="${k%=*}"
v=$(mysql -h"${host}" -P"${query_port}" -uroot -e"show variables like '${k}'\G" | grep " Value: ")
v="${v/*Value: /}"
echo "set global ${k}=${v};" >>"${backup_session_variables_file}"
done
IFS="${_IFS}"
}
backup_session_variables
mysql -h"${host}" -P"${query_port}" -uroot -e"source ${opt_session_variables_file};"
echo "#### 1. Restart doris"
if ! restart_doris; then echo "ERROR: Restart doris failed" && exit 1; fi
echo "#### 2. check if need to load data"
data_home="/data/clickbench" # no / at the end
db_name="clickbench"
if ! check_clickbench_table_rows "${db_name}"; then
echo "INFO: need to load clickbench data"
if ${force_load_data:-false}; then echo "INFO: force_load_data is true"; else echo "ERROR: force_load_data is false" && exit 1; fi
# prepare data
mkdir -p "${data_home}"
# create table and load data
mysql -h"${host}" -P"${query_port}" -uroot -e "DROP DATABASE IF EXISTS ${db_name}"
mysql -h"${host}" -P"${query_port}" -uroot -e "CREATE DATABASE IF NOT EXISTS ${db_name}" && sleep 10
mysql -h"${host}" -P"${query_port}" -uroot "${db_name}" -e"
CREATE TABLE IF NOT EXISTS hits (
CounterID INT NOT NULL,
EventDate DateV2 NOT NULL,
UserID BIGINT NOT NULL,
EventTime DateTimeV2 NOT NULL,
WatchID BIGINT NOT NULL,
JavaEnable SMALLINT NOT NULL,
Title STRING NOT NULL,
GoodEvent SMALLINT NOT NULL,
ClientIP INT NOT NULL,
RegionID INT NOT NULL,
CounterClass SMALLINT NOT NULL,
OS SMALLINT NOT NULL,
UserAgent SMALLINT NOT NULL,
URL STRING NOT NULL,
Referer STRING NOT NULL,
IsRefresh SMALLINT NOT NULL,
RefererCategoryID SMALLINT NOT NULL,
RefererRegionID INT NOT NULL,
URLCategoryID SMALLINT NOT NULL,
URLRegionID INT NOT NULL,
ResolutionWidth SMALLINT NOT NULL,
ResolutionHeight SMALLINT NOT NULL,
ResolutionDepth SMALLINT NOT NULL,
FlashMajor SMALLINT NOT NULL,
FlashMinor SMALLINT NOT NULL,
FlashMinor2 STRING NOT NULL,
NetMajor SMALLINT NOT NULL,
NetMinor SMALLINT NOT NULL,
UserAgentMajor SMALLINT NOT NULL,
UserAgentMinor VARCHAR(255) NOT NULL,
CookieEnable SMALLINT NOT NULL,
JavascriptEnable SMALLINT NOT NULL,
IsMobile SMALLINT NOT NULL,
MobilePhone SMALLINT NOT NULL,
MobilePhoneModel STRING NOT NULL,
Params STRING NOT NULL,
IPNetworkID INT NOT NULL,
TraficSourceID SMALLINT NOT NULL,
SearchEngineID SMALLINT NOT NULL,
SearchPhrase STRING NOT NULL,
AdvEngineID SMALLINT NOT NULL,
IsArtifical SMALLINT NOT NULL,
WindowClientWidth SMALLINT NOT NULL,
WindowClientHeight SMALLINT NOT NULL,
ClientTimeZone SMALLINT NOT NULL,
ClientEventTime DateTimeV2 NOT NULL,
SilverlightVersion1 SMALLINT NOT NULL,
SilverlightVersion2 SMALLINT NOT NULL,
SilverlightVersion3 INT NOT NULL,
SilverlightVersion4 SMALLINT NOT NULL,
PageCharset STRING NOT NULL,
CodeVersion INT NOT NULL,
IsLink SMALLINT NOT NULL,
IsDownload SMALLINT NOT NULL,
IsNotBounce SMALLINT NOT NULL,
FUniqID BIGINT NOT NULL,
OriginalURL STRING NOT NULL,
HID INT NOT NULL,
IsOldCounter SMALLINT NOT NULL,
IsEvent SMALLINT NOT NULL,
IsParameter SMALLINT NOT NULL,
DontCountHits SMALLINT NOT NULL,
WithHash SMALLINT NOT NULL,
HitColor CHAR NOT NULL,
LocalEventTime DateTimeV2 NOT NULL,
Age SMALLINT NOT NULL,
Sex SMALLINT NOT NULL,
Income SMALLINT NOT NULL,
Interests SMALLINT NOT NULL,
Robotness SMALLINT NOT NULL,
RemoteIP INT NOT NULL,
WindowName INT NOT NULL,
OpenerName INT NOT NULL,
HistoryLength SMALLINT NOT NULL,
BrowserLanguage STRING NOT NULL,
BrowserCountry STRING NOT NULL,
SocialNetwork STRING NOT NULL,
SocialAction STRING NOT NULL,
HTTPError SMALLINT NOT NULL,
SendTiming INT NOT NULL,
DNSTiming INT NOT NULL,
ConnectTiming INT NOT NULL,
ResponseStartTiming INT NOT NULL,
ResponseEndTiming INT NOT NULL,
FetchTiming INT NOT NULL,
SocialSourceNetworkID SMALLINT NOT NULL,
SocialSourcePage STRING NOT NULL,
ParamPrice BIGINT NOT NULL,
ParamOrderID STRING NOT NULL,
ParamCurrency STRING NOT NULL,
ParamCurrencyID SMALLINT NOT NULL,
OpenstatServiceName STRING NOT NULL,
OpenstatCampaignID STRING NOT NULL,
OpenstatAdID STRING NOT NULL,
OpenstatSourceID STRING NOT NULL,
UTMSource STRING NOT NULL,
UTMMedium STRING NOT NULL,
UTMCampaign STRING NOT NULL,
UTMContent STRING NOT NULL,
UTMTerm STRING NOT NULL,
FromTag STRING NOT NULL,
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INT NOT NULL
)
DUPLICATE KEY (CounterID, EventDate, UserID, EventTime, WatchID)
DISTRIBUTED BY HASH(UserID) BUCKETS 16
PROPERTIES ( \"replication_num\"=\"1\");
"
echo "####load data"
if [[ ! -f "${data_home}"/hits.tsv ]] || [[ $(wc -c "${data_home}"/hits.tsv | awk '{print $1}') != '74807831229' ]]; then
cd "${data_home}"
wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz'
gzip -d hits.tsv.gz
if ${DEBUG:-false}; then head -n 10000 hits.tsv >hits.tsv.10000; fi
cd -
fi
data_file_name="${data_home}/hits.tsv"
if ${DEBUG:-false}; then data_file_name="${data_home}/hits.tsv.10000"; fi
echo "start loading ..."
START=$(date +%s)
curl --location-trusted \
-u root: \
-T "${data_file_name}" \
-H "label:hits_${START}" \
-H "columns: WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
"http://localhost:8030/api/${db_name}/hits/_stream_load"
END=$(date +%s)
LOADTIME=$(echo "${END} - ${START}" | bc)
echo "INFO: ClickBench Load data costs ${LOADTIME} seconds"
echo "${LOADTIME}" >clickbench_loadtime
if ! check_clickbench_table_rows "${db_name}"; then
exit 1
fi
data_reload="true"
fi
echo "#### 3. run clickbench query"
bash "${teamcity_build_checkoutDir}"/tools/clickbench-tools/run-clickbench-queries.sh
# result.csv 来自 run-clickbench-queries.sh 的产出
if ! check_clickbench_performance_result result.csv; then exit 1; fi
if ! check_clickbench_query_result; then exit 1; fi
cold_run_sum=$(awk -F ',' '{sum+=$2} END {print sum}' result.csv)
best_hot_run_sum=$(awk -F ',' '{if($3<$4){sum+=$3}else{sum+=$4}} END {print sum}' result.csv)
comment_body="ClickBench test result on commit ${commit_id:-}, data reload: ${data_reload:-"false"}
$(sed 's|,|\t|g' result.csv)
Total cold run time: ${cold_run_sum} s
Total hot run time: ${best_hot_run_sum} s"
echo "#### 4. comment result on clickbench"
comment_body=$(echo "${comment_body}" | sed -e ':a;N;$!ba;s/\t/\\t/g;s/\n/\\n/g') # 将所有的 Tab字符替换为\t 换行符替换为\n
create_an_issue_comment_clickbench "${pull_request_num:-}" "${comment_body}"
rm -f result.csv
echo "INFO: Restore session variables"
mysql -h"${host}" -P"${query_port}" -uroot -e "source ${backup_session_variables_file};"
rm -f "${backup_session_variables_file}"
)
exit_flag="$?"
echo "#### 5. check if need backup doris logs"
if [[ ${exit_flag} != "0" ]]; then
stop_doris
print_doris_fe_log
print_doris_be_log
if file_name=$(archive_doris_logs "${pull_request_num}_${commit_id}_doris_logs.tar.gz"); then
upload_doris_log_to_oss "${file_name}"
fi
fi
exit "${exit_flag}"

View File

@ -0,0 +1,688 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Build Step: Command Line
: <<EOF
#!/bin/bash
export DEBUG=true
if [[ -f "${teamcity_build_checkoutDir:-}"/regression-test/pipeline/performance/run-load.sh ]]; then
cd "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/
bash -x run-load.sh
else
echo "Build Step file missing: regression-test/pipeline/performance/run-load.sh" && exit 1
fi
EOF
#####################################################################################
## run-load.sh content ##
# shellcheck source=/dev/null
# restart_doris, set_session_variable
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/doris-utils.sh
# shellcheck source=/dev/null
# create_an_issue_comment
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/github-utils.sh
# shellcheck source=/dev/null
# upload_doris_log_to_oss
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/oss-utils.sh
if ${DEBUG:-false}; then
pull_request_num="28431"
commit_id="5f5c4c80564c76ff4267fc4ce6a5408498ed1ab5"
fi
echo "#### Check env"
if [[ -z "${teamcity_build_checkoutDir}" ||
-z "${pull_request_num}" ||
-z "${commit_id}" ]]; then
echo "ERROR: env teamcity_build_checkoutDir or pull_request_num or commit_id not set"
exit 1
fi
# shellcheck source=/dev/null
source "$(bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'get')"
if ${skip_pipeline:=false}; then echo "INFO: skip build pipline" && exit 0; else echo "INFO: no skip"; fi
echo "#### Run tpch test on Doris ####"
DORIS_HOME="${teamcity_build_checkoutDir}/output"
export DORIS_HOME
data_home="/data/clickbench/"
query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port)
http_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf http_port)
clt="mysql -h127.0.0.1 -P${query_port} -uroot "
DB="load_test_db"
stream_load_json_speed_threshold=${stream_load_json_speed_threshold:-100} # 单位 MB/s
stream_load_orc_speed_threshold=${stream_load_orc_speed_threshold:-10} # 单位 MB/s
stream_load_parquet_speed_threshold=${stream_load_parquet_speed_threshold:-10} # 单位 MB/s
insert_into_select_speed_threshold=${insert_into_select_speed_threshold:-310} # 单位 Krows/s
exit_flag=0
(
set -e
shopt -s inherit_errexit
stream_load_json() {
echo "#### create table"
ddl="
CREATE TABLE IF NOT EXISTS hits_json (
CounterID INT NOT NULL,
EventDate DateV2 NOT NULL,
UserID BIGINT NOT NULL,
EventTime DateTimeV2 NOT NULL,
WatchID BIGINT NOT NULL,
JavaEnable SMALLINT NOT NULL,
Title STRING NOT NULL,
GoodEvent SMALLINT NOT NULL,
ClientIP INT NOT NULL,
RegionID INT NOT NULL,
CounterClass SMALLINT NOT NULL,
OS SMALLINT NOT NULL,
UserAgent SMALLINT NOT NULL,
URL STRING NOT NULL,
Referer STRING NOT NULL,
IsRefresh SMALLINT NOT NULL,
RefererCategoryID SMALLINT NOT NULL,
RefererRegionID INT NOT NULL,
URLCategoryID SMALLINT NOT NULL,
URLRegionID INT NOT NULL,
ResolutionWidth SMALLINT NOT NULL,
ResolutionHeight SMALLINT NOT NULL,
ResolutionDepth SMALLINT NOT NULL,
FlashMajor SMALLINT NOT NULL,
FlashMinor SMALLINT NOT NULL,
FlashMinor2 STRING NOT NULL,
NetMajor SMALLINT NOT NULL,
NetMinor SMALLINT NOT NULL,
UserAgentMajor SMALLINT NOT NULL,
UserAgentMinor VARCHAR(255) NOT NULL,
CookieEnable SMALLINT NOT NULL,
JavascriptEnable SMALLINT NOT NULL,
IsMobile SMALLINT NOT NULL,
MobilePhone SMALLINT NOT NULL,
MobilePhoneModel STRING NOT NULL,
Params STRING NOT NULL,
IPNetworkID INT NOT NULL,
TraficSourceID SMALLINT NOT NULL,
SearchEngineID SMALLINT NOT NULL,
SearchPhrase STRING NOT NULL,
AdvEngineID SMALLINT NOT NULL,
IsArtifical SMALLINT NOT NULL,
WindowClientWidth SMALLINT NOT NULL,
WindowClientHeight SMALLINT NOT NULL,
ClientTimeZone SMALLINT NOT NULL,
ClientEventTime DateTimeV2 NOT NULL,
SilverlightVersion1 SMALLINT NOT NULL,
SilverlightVersion2 SMALLINT NOT NULL,
SilverlightVersion3 INT NOT NULL,
SilverlightVersion4 SMALLINT NOT NULL,
PageCharset STRING NOT NULL,
CodeVersion INT NOT NULL,
IsLink SMALLINT NOT NULL,
IsDownload SMALLINT NOT NULL,
IsNotBounce SMALLINT NOT NULL,
FUniqID BIGINT NOT NULL,
OriginalURL STRING NOT NULL,
HID INT NOT NULL,
IsOldCounter SMALLINT NOT NULL,
IsEvent SMALLINT NOT NULL,
IsParameter SMALLINT NOT NULL,
DontCountHits SMALLINT NOT NULL,
WithHash SMALLINT NOT NULL,
HitColor CHAR NOT NULL,
LocalEventTime DateTimeV2 NOT NULL,
Age SMALLINT NOT NULL,
Sex SMALLINT NOT NULL,
Income SMALLINT NOT NULL,
Interests SMALLINT NOT NULL,
Robotness SMALLINT NOT NULL,
RemoteIP INT NOT NULL,
WindowName INT NOT NULL,
OpenerName INT NOT NULL,
HistoryLength SMALLINT NOT NULL,
BrowserLanguage STRING NOT NULL,
BrowserCountry STRING NOT NULL,
SocialNetwork STRING NOT NULL,
SocialAction STRING NOT NULL,
HTTPError SMALLINT NOT NULL,
SendTiming INT NOT NULL,
DNSTiming INT NOT NULL,
ConnectTiming INT NOT NULL,
ResponseStartTiming INT NOT NULL,
ResponseEndTiming INT NOT NULL,
FetchTiming INT NOT NULL,
SocialSourceNetworkID SMALLINT NOT NULL,
SocialSourcePage STRING NOT NULL,
ParamPrice BIGINT NOT NULL,
ParamOrderID STRING NOT NULL,
ParamCurrency STRING NOT NULL,
ParamCurrencyID SMALLINT NOT NULL,
OpenstatServiceName STRING NOT NULL,
OpenstatCampaignID STRING NOT NULL,
OpenstatAdID STRING NOT NULL,
OpenstatSourceID STRING NOT NULL,
UTMSource STRING NOT NULL,
UTMMedium STRING NOT NULL,
UTMCampaign STRING NOT NULL,
UTMContent STRING NOT NULL,
UTMTerm STRING NOT NULL,
FromTag STRING NOT NULL,
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INT NOT NULL
)
DUPLICATE KEY (CounterID, EventDate, UserID, EventTime, WatchID)
DISTRIBUTED BY HASH(UserID) BUCKETS 16
PROPERTIES (\"replication_num\"=\"1\");
"
${clt} -D"${DB}" -e"${ddl}"
echo "#### load data"
if [[ ! -d "${data_home}" ]]; then mkdir -p "${data_home}"; fi
if [[ ! -f "${data_home}"/hits.json.1000000 ]] || [[ $(wc -c "${data_home}"/hits.json.1000000 | awk '{print $1}') != '2358488459' ]]; then
cd "${data_home}"
wget --continue 'https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/ClickBench/hits.json.1000000'
cd -
fi
ret=$(
curl --location-trusted \
-u root: \
-T "${data_home}/hits.json.1000000" \
-H "format:json" \
-H "label:hits_json" \
-H "read_json_by_line:true" \
-H 'jsonpaths:["$.WatchID","$.JavaEnable","$.Title","$.GoodEvent","$.EventTime","$.EventDate","$.CounterID","$.ClientIP","$.RegionID","$.UserID","$.CounterClass","$.OS","$.UserAgent","$.URL","$.Referer","$.IsRefresh","$.RefererCategoryID","$.RefererRegionID","$.URLCategoryID","$.URLRegionID","$.ResolutionWidth","$.ResolutionHeight","$.ResolutionDepth","$.FlashMajor","$.FlashMinor","$.FlashMinor2","$.NetMajor","$.NetMinor","$.UserAgentMajor","$.UserAgentMinor","$.CookieEnable","$.JavascriptEnable","$.IsMobile","$.MobilePhone","$.MobilePhoneModel","$.Params","$.IPNetworkID","$.TraficSourceID","$.SearchEngineID","$.SearchPhrase","$.AdvEngineID","$.IsArtifical","$.WindowClientWidth","$.WindowClientHeight","$.ClientTimeZone","$.ClientEventTime","$.SilverlightVersion1","$.SilverlightVersion2","$.SilverlightVersion3","$.SilverlightVersion4","$.PageCharset","$.CodeVersion","$.IsLink","$.IsDownload","$.IsNotBounce","$.FUniqID","$.OriginalURL","$.HID","$.IsOldCounter","$.IsEvent","$.IsParameter","$.DontCountHits","$.WithHash","$.HitColor","$.LocalEventTime","$.Age","$.Sex","$.Income","$.Interests","$.Robotness","$.RemoteIP","$.WindowName","$.OpenerName","$.HistoryLength","$.BrowserLanguage","$.BrowserCountry","$.SocialNetwork","$.SocialAction","$.HTTPError","$.SendTiming","$.DNSTiming","$.ConnectTiming","$.ResponseStartTiming","$.ResponseEndTiming","$.FetchTiming","$.SocialSourceNetworkID","$.SocialSourcePage","$.ParamPrice","$.ParamOrderID","$.ParamCurrency","$.ParamCurrencyID","$.OpenstatServiceName","$.OpenstatCampaignID","$.OpenstatAdID","$.OpenstatSourceID","$.UTMSource","$.UTMMedium","$.UTMCampaign","$.UTMContent","$.UTMTerm","$.FromTag","$.HasGCLID","$.RefererHash","$.URLHash","$.CLID"]' \
-H "columns: WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
"http://${FE_HOST:-127.0.0.1}:${http_port}/api/${DB}/hits_json/_stream_load"
)
sleep 5
if [[ $(${clt} -D"${DB}" -e"select count(*) from hits_json" | sed -n '2p') != 1000000 ]]; then echo "check load fail..." && return 1; fi
echo "#### record load test result"
stream_load_json_size=$(echo "${ret}" | jq '.LoadBytes')
stream_load_json_time=$(printf "%.0f" "$(echo "scale=1;$(echo "${ret}" | jq '.LoadTimeMs')/1000" | bc)")
stream_load_json_speed=$(echo "${stream_load_json_size} / 1024 / 1024/ ${stream_load_json_time}" | bc)
export stream_load_json_size
export stream_load_json_time
export stream_load_json_speed
}
stream_load_orc() {
echo "#### create table"
ddl="
CREATE TABLE IF NOT EXISTS hits_orc (
CounterID INT NOT NULL,
EventDate INT NOT NULL,
UserID BIGINT NOT NULL,
EventTime INT NOT NULL,
WatchID BIGINT NOT NULL,
JavaEnable SMALLINT NOT NULL,
Title STRING NOT NULL,
GoodEvent SMALLINT NOT NULL,
ClientIP INT NOT NULL,
RegionID INT NOT NULL,
CounterClass SMALLINT NOT NULL,
OS SMALLINT NOT NULL,
UserAgent SMALLINT NOT NULL,
URL STRING NOT NULL,
Referer STRING NOT NULL,
IsRefresh SMALLINT NOT NULL,
RefererCategoryID SMALLINT NOT NULL,
RefererRegionID INT NOT NULL,
URLCategoryID SMALLINT NOT NULL,
URLRegionID INT NOT NULL,
ResolutionWidth SMALLINT NOT NULL,
ResolutionHeight SMALLINT NOT NULL,
ResolutionDepth SMALLINT NOT NULL,
FlashMajor SMALLINT NOT NULL,
FlashMinor SMALLINT NOT NULL,
FlashMinor2 STRING NOT NULL,
NetMajor SMALLINT NOT NULL,
NetMinor SMALLINT NOT NULL,
UserAgentMajor SMALLINT NOT NULL,
UserAgentMinor VARCHAR(255) NOT NULL,
CookieEnable SMALLINT NOT NULL,
JavascriptEnable SMALLINT NOT NULL,
IsMobile SMALLINT NOT NULL,
MobilePhone SMALLINT NOT NULL,
MobilePhoneModel STRING NOT NULL,
Params STRING NOT NULL,
IPNetworkID INT NOT NULL,
TraficSourceID SMALLINT NOT NULL,
SearchEngineID SMALLINT NOT NULL,
SearchPhrase STRING NOT NULL,
AdvEngineID SMALLINT NOT NULL,
IsArtifical SMALLINT NOT NULL,
WindowClientWidth SMALLINT NOT NULL,
WindowClientHeight SMALLINT NOT NULL,
ClientTimeZone SMALLINT NOT NULL,
ClientEventTime INT NOT NULL,
SilverlightVersion1 SMALLINT NOT NULL,
SilverlightVersion2 SMALLINT NOT NULL,
SilverlightVersion3 INT NOT NULL,
SilverlightVersion4 SMALLINT NOT NULL,
PageCharset STRING NOT NULL,
CodeVersion INT NOT NULL,
IsLink SMALLINT NOT NULL,
IsDownload SMALLINT NOT NULL,
IsNotBounce SMALLINT NOT NULL,
FUniqID BIGINT NOT NULL,
OriginalURL STRING NOT NULL,
HID INT NOT NULL,
IsOldCounter SMALLINT NOT NULL,
IsEvent SMALLINT NOT NULL,
IsParameter SMALLINT NOT NULL,
DontCountHits SMALLINT NOT NULL,
WithHash SMALLINT NOT NULL,
HitColor CHAR NOT NULL,
LocalEventTime INT NOT NULL,
Age SMALLINT NOT NULL,
Sex SMALLINT NOT NULL,
Income SMALLINT NOT NULL,
Interests SMALLINT NOT NULL,
Robotness SMALLINT NOT NULL,
RemoteIP INT NOT NULL,
WindowName INT NOT NULL,
OpenerName INT NOT NULL,
HistoryLength SMALLINT NOT NULL,
BrowserLanguage STRING NOT NULL,
BrowserCountry STRING NOT NULL,
SocialNetwork STRING NOT NULL,
SocialAction STRING NOT NULL,
HTTPError SMALLINT NOT NULL,
SendTiming INT NOT NULL,
DNSTiming INT NOT NULL,
ConnectTiming INT NOT NULL,
ResponseStartTiming INT NOT NULL,
ResponseEndTiming INT NOT NULL,
FetchTiming INT NOT NULL,
SocialSourceNetworkID SMALLINT NOT NULL,
SocialSourcePage STRING NOT NULL,
ParamPrice BIGINT NOT NULL,
ParamOrderID STRING NOT NULL,
ParamCurrency STRING NOT NULL,
ParamCurrencyID SMALLINT NOT NULL,
OpenstatServiceName STRING NOT NULL,
OpenstatCampaignID STRING NOT NULL,
OpenstatAdID STRING NOT NULL,
OpenstatSourceID STRING NOT NULL,
UTMSource STRING NOT NULL,
UTMMedium STRING NOT NULL,
UTMCampaign STRING NOT NULL,
UTMContent STRING NOT NULL,
UTMTerm STRING NOT NULL,
FromTag STRING NOT NULL,
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INT NOT NULL
)
DUPLICATE KEY (CounterID, EventDate, UserID, EventTime, WatchID)
DISTRIBUTED BY HASH(UserID) BUCKETS 16
PROPERTIES (\"replication_num\"=\"1\");
"
${clt} -D"${DB}" -e"${ddl}"
echo "#### load data"
if [[ ! -d "${data_home}" ]]; then mkdir -p "${data_home}"; fi
if [[ ! -f "${data_home}"/hits_0.orc ]] || [[ $(wc -c "${data_home}"/hits_0.orc | awk '{print $1}') != '1101869774' ]]; then
cd "${data_home}"
wget --continue 'https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/ClickBench/hits_0.orc'
cd -
fi
ret=$(
curl --location-trusted \
-u root: \
-T "${data_home}/hits_0.orc" \
-H "format:orc" \
-H "label:hits_0_orc" \
-H "columns: watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid" \
"http://${FE_HOST:-127.0.0.1}:${http_port}/api/${DB}/hits_orc/_stream_load"
)
sleep 5
if [[ $(${clt} -D"${DB}" -e"select count(*) from hits_orc" | sed -n '2p') != 8800160 ]]; then echo "check load fail..." && return 1; fi
echo "#### record load test result"
stream_load_orc_size=$(echo "${ret}" | jq '.LoadBytes')
stream_load_orc_time=$(printf "%.0f" "$(echo "scale=1;$(echo "${ret}" | jq '.LoadTimeMs')/1000" | bc)")
stream_load_orc_speed=$(echo "${stream_load_orc_size} / 1024 / 1024/ ${stream_load_orc_time}" | bc)
export stream_load_orc_size
export stream_load_orc_time
export stream_load_orc_speed
}
stream_load_parquet() {
echo "#### create table"
ddl="
CREATE TABLE IF NOT EXISTS hits_parquet (
CounterID INT NOT NULL,
EventDate INT NOT NULL,
UserID BIGINT NOT NULL,
EventTime INT NOT NULL,
WatchID BIGINT NOT NULL,
JavaEnable SMALLINT NOT NULL,
Title STRING NOT NULL,
GoodEvent SMALLINT NOT NULL,
ClientIP INT NOT NULL,
RegionID INT NOT NULL,
CounterClass SMALLINT NOT NULL,
OS SMALLINT NOT NULL,
UserAgent SMALLINT NOT NULL,
URL STRING NOT NULL,
Referer STRING NOT NULL,
IsRefresh SMALLINT NOT NULL,
RefererCategoryID SMALLINT NOT NULL,
RefererRegionID INT NOT NULL,
URLCategoryID SMALLINT NOT NULL,
URLRegionID INT NOT NULL,
ResolutionWidth SMALLINT NOT NULL,
ResolutionHeight SMALLINT NOT NULL,
ResolutionDepth SMALLINT NOT NULL,
FlashMajor SMALLINT NOT NULL,
FlashMinor SMALLINT NOT NULL,
FlashMinor2 STRING NOT NULL,
NetMajor SMALLINT NOT NULL,
NetMinor SMALLINT NOT NULL,
UserAgentMajor SMALLINT NOT NULL,
UserAgentMinor VARCHAR(255) NOT NULL,
CookieEnable SMALLINT NOT NULL,
JavascriptEnable SMALLINT NOT NULL,
IsMobile SMALLINT NOT NULL,
MobilePhone SMALLINT NOT NULL,
MobilePhoneModel STRING NOT NULL,
Params STRING NOT NULL,
IPNetworkID INT NOT NULL,
TraficSourceID SMALLINT NOT NULL,
SearchEngineID SMALLINT NOT NULL,
SearchPhrase STRING NOT NULL,
AdvEngineID SMALLINT NOT NULL,
IsArtifical SMALLINT NOT NULL,
WindowClientWidth SMALLINT NOT NULL,
WindowClientHeight SMALLINT NOT NULL,
ClientTimeZone SMALLINT NOT NULL,
ClientEventTime INT NOT NULL,
SilverlightVersion1 SMALLINT NOT NULL,
SilverlightVersion2 SMALLINT NOT NULL,
SilverlightVersion3 INT NOT NULL,
SilverlightVersion4 SMALLINT NOT NULL,
PageCharset STRING NOT NULL,
CodeVersion INT NOT NULL,
IsLink SMALLINT NOT NULL,
IsDownload SMALLINT NOT NULL,
IsNotBounce SMALLINT NOT NULL,
FUniqID BIGINT NOT NULL,
OriginalURL STRING NOT NULL,
HID INT NOT NULL,
IsOldCounter SMALLINT NOT NULL,
IsEvent SMALLINT NOT NULL,
IsParameter SMALLINT NOT NULL,
DontCountHits SMALLINT NOT NULL,
WithHash SMALLINT NOT NULL,
HitColor CHAR NOT NULL,
LocalEventTime INT NOT NULL,
Age SMALLINT NOT NULL,
Sex SMALLINT NOT NULL,
Income SMALLINT NOT NULL,
Interests SMALLINT NOT NULL,
Robotness SMALLINT NOT NULL,
RemoteIP INT NOT NULL,
WindowName INT NOT NULL,
OpenerName INT NOT NULL,
HistoryLength SMALLINT NOT NULL,
BrowserLanguage STRING NOT NULL,
BrowserCountry STRING NOT NULL,
SocialNetwork STRING NOT NULL,
SocialAction STRING NOT NULL,
HTTPError SMALLINT NOT NULL,
SendTiming INT NOT NULL,
DNSTiming INT NOT NULL,
ConnectTiming INT NOT NULL,
ResponseStartTiming INT NOT NULL,
ResponseEndTiming INT NOT NULL,
FetchTiming INT NOT NULL,
SocialSourceNetworkID SMALLINT NOT NULL,
SocialSourcePage STRING NOT NULL,
ParamPrice BIGINT NOT NULL,
ParamOrderID STRING NOT NULL,
ParamCurrency STRING NOT NULL,
ParamCurrencyID SMALLINT NOT NULL,
OpenstatServiceName STRING NOT NULL,
OpenstatCampaignID STRING NOT NULL,
OpenstatAdID STRING NOT NULL,
OpenstatSourceID STRING NOT NULL,
UTMSource STRING NOT NULL,
UTMMedium STRING NOT NULL,
UTMCampaign STRING NOT NULL,
UTMContent STRING NOT NULL,
UTMTerm STRING NOT NULL,
FromTag STRING NOT NULL,
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INT NOT NULL
)
DUPLICATE KEY (CounterID, EventDate, UserID, EventTime, WatchID)
DISTRIBUTED BY HASH(UserID) BUCKETS 16
PROPERTIES (\"replication_num\"=\"1\");
"
${clt} -D"${DB}" -e"${ddl}"
echo "#### load data"
stream_load_parquet_size=0
stream_load_parquet_time=0
if [[ ! -d "${data_home}" ]]; then mkdir -p "${data_home}"; fi
declare -A file_sizes=(['hits_0.parquet']=122446530 ['hits_1.parquet']=174965044 ['hits_2.parquet']=230595491 ['hits_3.parquet']=192507052 ['hits_4.parquet']=140929275)
for file_name in ${!file_sizes[*]}; do
size_expect=${file_sizes[${file_name}]}
if [[ ! -f "${data_home}/${file_name}" ]] || [[ $(wc -c "${data_home}/${file_name}" | awk '{print $1}') != "${size_expect}" ]]; then
cd "${data_home}" && rm -f "${file_name}" && wget --continue "https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/ClickBench/${file_name}" && cd - || exit
fi
if ret=$(
curl --location-trusted \
-u root: \
-T "${data_home}/${file_name}" \
-H "format:parquet" \
-H "label:${file_name//./_}" \
-H "columns: watchid,javaenable,title,goodevent,eventtime,eventdate,counterid,clientip,regionid,userid,counterclass,os,useragent,url,referer,isrefresh,referercategoryid,refererregionid,urlcategoryid,urlregionid,resolutionwidth,resolutionheight,resolutiondepth,flashmajor,flashminor,flashminor2,netmajor,netminor,useragentmajor,useragentminor,cookieenable,javascriptenable,ismobile,mobilephone,mobilephonemodel,params,ipnetworkid,traficsourceid,searchengineid,searchphrase,advengineid,isartifical,windowclientwidth,windowclientheight,clienttimezone,clienteventtime,silverlightversion1,silverlightversion2,silverlightversion3,silverlightversion4,pagecharset,codeversion,islink,isdownload,isnotbounce,funiqid,originalurl,hid,isoldcounter,isevent,isparameter,dontcounthits,withhash,hitcolor,localeventtime,age,sex,income,interests,robotness,remoteip,windowname,openername,historylength,browserlanguage,browsercountry,socialnetwork,socialaction,httperror,sendtiming,dnstiming,connecttiming,responsestarttiming,responseendtiming,fetchtiming,socialsourcenetworkid,socialsourcepage,paramprice,paramorderid,paramcurrency,paramcurrencyid,openstatservicename,openstatcampaignid,openstatadid,openstatsourceid,utmsource,utmmedium,utmcampaign,utmcontent,utmterm,fromtag,hasgclid,refererhash,urlhash,clid" \
"http://${FE_HOST:-127.0.0.1}:${http_port}/api/${DB}/hits_parquet/_stream_load"
); then
_stream_load_parquet_size=$(echo "${ret}" | jq '.LoadBytes')
_stream_load_parquet_time=$(printf "%.0f" "$(echo "scale=1;$(echo "${ret}" | jq '.LoadTimeMs')/1000" | bc)")
stream_load_parquet_size=$((stream_load_parquet_size + _stream_load_parquet_size))
stream_load_parquet_time=$((stream_load_parquet_time + _stream_load_parquet_time))
fi
done
sleep 5
if [[ $(${clt} -D"${DB}" -e"select count(*) from hits_parquet" | sed -n '2p') != 5000000 ]]; then echo "check load fail..." && return 1; fi
echo "#### record load test result"
stream_load_parquet_speed=$(echo "${stream_load_parquet_size} / 1024 / 1024/ ${stream_load_parquet_time}" | bc)
export stream_load_parquet_size
export stream_load_parquet_time
export stream_load_parquet_speed
}
insert_into_select() {
echo "#### create table"
ddl="
CREATE TABLE IF NOT EXISTS hits_insert_into_select (
CounterID INT NOT NULL,
EventDate INT NOT NULL,
UserID BIGINT NOT NULL,
EventTime INT NOT NULL,
WatchID BIGINT NOT NULL,
JavaEnable SMALLINT NOT NULL,
Title STRING NOT NULL,
GoodEvent SMALLINT NOT NULL,
ClientIP INT NOT NULL,
RegionID INT NOT NULL,
CounterClass SMALLINT NOT NULL,
OS SMALLINT NOT NULL,
UserAgent SMALLINT NOT NULL,
URL STRING NOT NULL,
Referer STRING NOT NULL,
IsRefresh SMALLINT NOT NULL,
RefererCategoryID SMALLINT NOT NULL,
RefererRegionID INT NOT NULL,
URLCategoryID SMALLINT NOT NULL,
URLRegionID INT NOT NULL,
ResolutionWidth SMALLINT NOT NULL,
ResolutionHeight SMALLINT NOT NULL,
ResolutionDepth SMALLINT NOT NULL,
FlashMajor SMALLINT NOT NULL,
FlashMinor SMALLINT NOT NULL,
FlashMinor2 STRING NOT NULL,
NetMajor SMALLINT NOT NULL,
NetMinor SMALLINT NOT NULL,
UserAgentMajor SMALLINT NOT NULL,
UserAgentMinor VARCHAR(255) NOT NULL,
CookieEnable SMALLINT NOT NULL,
JavascriptEnable SMALLINT NOT NULL,
IsMobile SMALLINT NOT NULL,
MobilePhone SMALLINT NOT NULL,
MobilePhoneModel STRING NOT NULL,
Params STRING NOT NULL,
IPNetworkID INT NOT NULL,
TraficSourceID SMALLINT NOT NULL,
SearchEngineID SMALLINT NOT NULL,
SearchPhrase STRING NOT NULL,
AdvEngineID SMALLINT NOT NULL,
IsArtifical SMALLINT NOT NULL,
WindowClientWidth SMALLINT NOT NULL,
WindowClientHeight SMALLINT NOT NULL,
ClientTimeZone SMALLINT NOT NULL,
ClientEventTime INT NOT NULL,
SilverlightVersion1 SMALLINT NOT NULL,
SilverlightVersion2 SMALLINT NOT NULL,
SilverlightVersion3 INT NOT NULL,
SilverlightVersion4 SMALLINT NOT NULL,
PageCharset STRING NOT NULL,
CodeVersion INT NOT NULL,
IsLink SMALLINT NOT NULL,
IsDownload SMALLINT NOT NULL,
IsNotBounce SMALLINT NOT NULL,
FUniqID BIGINT NOT NULL,
OriginalURL STRING NOT NULL,
HID INT NOT NULL,
IsOldCounter SMALLINT NOT NULL,
IsEvent SMALLINT NOT NULL,
IsParameter SMALLINT NOT NULL,
DontCountHits SMALLINT NOT NULL,
WithHash SMALLINT NOT NULL,
HitColor CHAR NOT NULL,
LocalEventTime INT NOT NULL,
Age SMALLINT NOT NULL,
Sex SMALLINT NOT NULL,
Income SMALLINT NOT NULL,
Interests SMALLINT NOT NULL,
Robotness SMALLINT NOT NULL,
RemoteIP INT NOT NULL,
WindowName INT NOT NULL,
OpenerName INT NOT NULL,
HistoryLength SMALLINT NOT NULL,
BrowserLanguage STRING NOT NULL,
BrowserCountry STRING NOT NULL,
SocialNetwork STRING NOT NULL,
SocialAction STRING NOT NULL,
HTTPError SMALLINT NOT NULL,
SendTiming INT NOT NULL,
DNSTiming INT NOT NULL,
ConnectTiming INT NOT NULL,
ResponseStartTiming INT NOT NULL,
ResponseEndTiming INT NOT NULL,
FetchTiming INT NOT NULL,
SocialSourceNetworkID SMALLINT NOT NULL,
SocialSourcePage STRING NOT NULL,
ParamPrice BIGINT NOT NULL,
ParamOrderID STRING NOT NULL,
ParamCurrency STRING NOT NULL,
ParamCurrencyID SMALLINT NOT NULL,
OpenstatServiceName STRING NOT NULL,
OpenstatCampaignID STRING NOT NULL,
OpenstatAdID STRING NOT NULL,
OpenstatSourceID STRING NOT NULL,
UTMSource STRING NOT NULL,
UTMMedium STRING NOT NULL,
UTMCampaign STRING NOT NULL,
UTMContent STRING NOT NULL,
UTMTerm STRING NOT NULL,
FromTag STRING NOT NULL,
HasGCLID SMALLINT NOT NULL,
RefererHash BIGINT NOT NULL,
URLHash BIGINT NOT NULL,
CLID INT NOT NULL
)
DUPLICATE KEY (CounterID, EventDate, UserID, EventTime, WatchID)
DISTRIBUTED BY HASH(UserID) BUCKETS 16
PROPERTIES (\"replication_num\"=\"1\");
"
${clt} -D"${DB}" -e"${ddl}"
echo "#### load data by INSERT INTO SELECT"
insert_into_select_time=0
insert_into_select_rows=10000000
start=$(date +%s%3N)
if ${clt} -e"insert into ${DB}.hits_insert_into_select select * from clickbench.hits limit ${insert_into_select_rows};"; then
end=$(date +%s%3N)
insert_into_select_time=$(echo "scale=1; (${end} - ${start})/1000" | bc)
else
echo "ERROR: failed to insert into ${DB}.hits_insert_into_select select * from clickbench.hits limit ${insert_into_select_rows};"
return 1
fi
sleep 5
if [[ $(${clt} -D"${DB}" -e"select count(*) from hits_insert_into_select" | sed -n '2p') != "${insert_into_select_rows}" ]]; then echo "check load fail..." && return 1; fi
echo "#### record load test result"
insert_into_select_speed=$(echo "${insert_into_select_rows} / 1000 / ${insert_into_select_time}" | bc)
export insert_into_select_rows
export insert_into_select_time
export insert_into_select_speed
}
echo "#### 1. Restart doris"
if ! restart_doris; then echo "ERROR: Restart doris failed" && exit 1; fi
echo "#### 3. run streamload test"
set_session_variable runtime_filter_mode global
${clt} -e "DROP DATABASE IF EXISTS ${DB}" && sleep 1
${clt} -e "CREATE DATABASE IF NOT EXISTS ${DB}" && sleep 5
if ! stream_load_json; then exit 1; fi
if ! stream_load_orc; then exit 1; fi
if ! stream_load_parquet; then exit 1; fi
if ! insert_into_select; then exit 1; fi
if ! check_load_performance; then exit 1; fi
echo "#### 4. comment result on tpch"
comment_body="Load test result on commit ${commit_id:-} with default conf and session variables"
if [[ -n ${stream_load_json_time} ]]; then comment_body="${comment_body}\n stream load json: ${stream_load_json_time} seconds loaded ${stream_load_json_size} Bytes, about ${stream_load_json_speed} MB/s"; fi
if [[ -n ${stream_load_orc_time} ]]; then comment_body="${comment_body}\n stream load orc: ${stream_load_orc_time} seconds loaded ${stream_load_orc_size} Bytes, about ${stream_load_orc_speed} MB/s"; fi
if [[ -n ${stream_load_parquet_time} ]]; then comment_body="${comment_body}\n stream load parquet: ${stream_load_parquet_time} seconds loaded ${stream_load_parquet_size} Bytes, about ${stream_load_parquet_speed} MB/s"; fi
if [[ -n ${insert_into_select_time} ]]; then comment_body="${comment_body}\n insert into select: ${insert_into_select_time} seconds inserted ${insert_into_select_rows} Rows, about ${insert_into_select_speed}K ops/s"; fi
comment_body=$(echo "${comment_body}" | sed -e ':a;N;$!ba;s/\t/\\t/g;s/\n/\\n/g') # 将所有的 Tab字符替换为\t 换行符替换为\n
create_an_issue_comment_tpch "${pull_request_num:-}" "${comment_body}"
)
exit_flag="$?"
echo "#### 5. check if need backup doris logs"
if [[ ${exit_flag} != "0" ]]; then
print_doris_fe_log
print_doris_be_log
if file_name=$(archive_doris_logs "${pull_request_num}_${commit_id}_doris_logs.tar.gz"); then
upload_doris_log_to_oss "${file_name}"
fi
fi
exit "${exit_flag}"

View File

@ -0,0 +1,156 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Build Step: Command Line
: <<EOF
#!/bin/bash
export DEBUG=true
if [[ -f "${teamcity_build_checkoutDir:-}"/regression-test/pipeline/performance/run-tpcds.sh ]]; then
cd "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/
bash -x run-tpcds.sh
else
echo "Build Step file missing: regression-test/pipeline/performance/run-tpcds.sh" && exit 1
fi
EOF
#####################################################################################
## run-tpcds.sh content ##
# shellcheck source=/dev/null
# check_tpcds_table_rows, restart_doris, set_session_variable, check_tpcds_result
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/doris-utils.sh
# shellcheck source=/dev/null
# create_an_issue_comment
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/github-utils.sh
# shellcheck source=/dev/null
# upload_doris_log_to_oss
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/oss-utils.sh
if ${DEBUG:-false}; then
pull_request_num="28431"
commit_id="5f5c4c80564c76ff4267fc4ce6a5408498ed1ab5"
SF="1"
fi
echo "#### Check env"
if [[ -z "${teamcity_build_checkoutDir}" ||
-z "${pull_request_num}" ||
-z "${commit_id}" ]]; then
echo "ERROR: env teamcity_build_checkoutDir or pull_request_num or commit_id not set"
exit 1
fi
# shellcheck source=/dev/null
source "$(bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'get')"
if ${skip_pipeline:=false}; then echo "INFO: skip build pipline" && exit 0; else echo "INFO: no skip"; fi
echo "#### Run tpcds test on Doris ####"
DORIS_HOME="${teamcity_build_checkoutDir}/output"
export DORIS_HOME
cold_run_time_threshold=${cold_run_time_threshold:-600000} # ms
hot_run_time_threshold=${hot_run_time_threshold:-240000} # ms
exit_flag=0
(
set -e
shopt -s inherit_errexit
echo "#### 1. Restart doris"
if ! restart_doris; then echo "ERROR: Restart doris failed" && exit 1; fi
echo "#### 2. check if need to load data"
SF=${SF:-"100"} # SCALE FACTOR
TPCDS_DATA_DIR="/data/tpcds/sf_${SF}" # no / at the end
TPCDS_DATA_DIR_LINK="${teamcity_build_checkoutDir}"/tools/tpcds-tools/bin/tpcds-data # no / at the end
db_name="tpcds_sf${SF}"
sed -i "s|^export DB=.*$|export DB='${db_name}'|g" \
"${teamcity_build_checkoutDir}"/tools/tpcds-tools/conf/doris-cluster.conf
if ! check_tpcds_table_rows "${db_name}" "${SF}"; then
echo "INFO: need to load tpcds-sf${SF} data"
if ${force_load_data:-false}; then echo "INFO: force_load_data is true"; else echo "ERROR: force_load_data is false" && exit 1; fi
# prepare data
mkdir -p "${TPCDS_DATA_DIR}"
(
cd "${TPCDS_DATA_DIR}" || exit 1
declare -A table_file_count
if [[ ${SF} == "1" ]]; then
table_file_count=(['income_band']=1 ['ship_mode']=1 ['warehouse']=1 ['reason']=1 ['web_site']=1 ['call_center']=1 ['store']=1 ['promotion']=1 ['household_demographics']=1 ['web_page']=1 ['catalog_page']=1 ['time_dim']=1 ['date_dim']=1 ['item']=1 ['customer_demographics']=10 ['customer_address']=1 ['customer']=1 ['web_returns']=1 ['catalog_returns']=1 ['store_returns']=1 ['inventory']=10 ['web_sales']=1 ['catalog_sales']=1 ['store_sales']=1)
elif [[ ${SF} == "100" ]]; then
table_file_count=(['income_band']=1 ['ship_mode']=1 ['warehouse']=1 ['reason']=1 ['web_site']=1 ['call_center']=1 ['store']=1 ['promotion']=1 ['household_demographics']=1 ['web_page']=1 ['catalog_page']=1 ['time_dim']=1 ['date_dim']=1 ['item']=1 ['customer_demographics']=10 ['customer_address']=10 ['customer']=10 ['web_returns']=10 ['catalog_returns']=10 ['store_returns']=10 ['inventory']=10 ['web_sales']=10 ['catalog_sales']=10 ['store_sales']=10)
fi
for table_name in ${!table_file_count[*]}; do
if [[ ${table_file_count[${table_name}]} -eq 1 ]]; then
url="https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/tpcds/sf${SF}/${table_name}_1_10.dat.gz"
if [[ -f ${table_name}_1_10.dat ]]; then continue; fi
if ! wget --continue -t3 -q "${url}"; then echo "ERROR: wget --continue ${url}" && exit 1; fi
if ! gzip -d "${table_name}_1_10.dat.gz"; then echo "ERROR: gzip -d ${table_name}_1_10.dat.gz" && exit 1; fi
elif [[ ${table_file_count[${table_name}]} -eq 10 ]]; then
(
for i in {1..10}; do
url="https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/tpcds/sf${SF}/${table_name}_${i}_10.dat.gz"
if [[ -f ${table_name}_${i}_10.dat ]]; then continue; fi
if ! wget --continue -t3 -q "${url}"; then echo "ERROR: wget --continue ${url}" && exit 1; fi
if ! gzip -d "${table_name}_${i}_10.dat.gz"; then echo "ERROR: gzip -d ${table_name}_${i}_10.dat.gz" && exit 1; fi
done
) &
wait
fi
done
)
# create table and load data
bash "${teamcity_build_checkoutDir}"/tools/tpcds-tools/bin/create-tpcds-tables.sh -s "${SF}"
rm -rf "${TPCDS_DATA_DIR_LINK}"
ln -s "${TPCDS_DATA_DIR}" "${TPCDS_DATA_DIR_LINK}"
bash "${teamcity_build_checkoutDir}"/tools/tpcds-tools/bin/load-tpcds-data.sh -c 10
if ! check_tpcds_table_rows "${db_name}" "${SF}"; then
exit 1
fi
echo "INFO: sleep 10min to wait compaction done"
if ${DEBUG:-false}; then sleep 10s; else sleep 10m; fi
data_reload="true"
fi
echo "#### 3. run tpcds-sf${SF} query"
set_session_variable runtime_filter_mode global
bash "${teamcity_build_checkoutDir}"/tools/tpcds-tools/bin/run-tpcds-queries.sh -s "${SF}" | tee "${teamcity_build_checkoutDir}"/run-tpcds-queries.log
if ! check_tpcds_result "${teamcity_build_checkoutDir}"/run-tpcds-queries.log; then exit 1; fi
line_end=$(sed -n '/^Total hot run time/=' "${teamcity_build_checkoutDir}"/run-tpcds-queries.log)
line_begin=$((line_end - 100))
comment_body="TPC-DS sf${SF} test result on commit ${commit_id:-}, data reload: ${data_reload:-"false"}
run tpcds-sf${SF} query with default conf and session variables
$(sed -n "${line_begin},${line_end}p" "${teamcity_build_checkoutDir}"/run-tpcds-queries.log)"
echo "#### 4. comment result on tpcds"
comment_body=$(echo "${comment_body}" | sed -e ':a;N;$!ba;s/\t/\\t/g;s/\n/\\n/g') # 将所有的 Tab字符替换为\t 换行符替换为\n
create_an_issue_comment_tpcds "${pull_request_num:-}" "${comment_body}"
rm -f result.csv
)
exit_flag="$?"
echo "#### 5. check if need backup doris logs"
if [[ ${exit_flag} != "0" ]]; then
stop_doris
print_doris_fe_log
print_doris_be_log
if file_name=$(archive_doris_logs "${pull_request_num}_${commit_id}_doris_logs.tar.gz"); then
upload_doris_log_to_oss "${file_name}"
fi
fi
exit "${exit_flag}"

View File

@ -0,0 +1,159 @@
#!/usr/bin/env bash
# Licensed to the Apache Software Foundation (ASF) under one
# or more contributor license agreements. See the NOTICE file
# distributed with this work for additional information
# regarding copyright ownership. The ASF licenses this file
# to you under the Apache License, Version 2.0 (the
# "License"); you may not use this file except in compliance
# with the License. You may obtain a copy of the License at
#
# http://www.apache.org/licenses/LICENSE-2.0
#
# Unless required by applicable law or agreed to in writing,
# software distributed under the License is distributed on an
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
# KIND, either express or implied. See the License for the
# specific language governing permissions and limitations
# under the License.
# Build Step: Command Line
: <<EOF
#!/bin/bash
export DEBUG=true
if [[ -f "${teamcity_build_checkoutDir:-}"/regression-test/pipeline/performance/run-tpch.sh ]]; then
cd "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/
bash -x run-tpch.sh
else
echo "Build Step file missing: regression-test/pipeline/performance/run-tpch.sh" && exit 1
fi
EOF
#####################################################################################
## run-tpch.sh content ##
# shellcheck source=/dev/null
# check_tpch_table_rows, restart_doris, set_session_variable, check_tpch_result
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/doris-utils.sh
# shellcheck source=/dev/null
# create_an_issue_comment
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/github-utils.sh
# shellcheck source=/dev/null
# upload_doris_log_to_oss
source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/oss-utils.sh
if ${DEBUG:-false}; then
pull_request_num="28431"
commit_id="5f5c4c80564c76ff4267fc4ce6a5408498ed1ab5"
SF="1"
fi
echo "#### Check env"
if [[ -z "${teamcity_build_checkoutDir}" ||
-z "${pull_request_num}" ||
-z "${commit_id}" ]]; then
echo "ERROR: env teamcity_build_checkoutDir or pull_request_num or commit_id not set"
exit 1
fi
# shellcheck source=/dev/null
source "$(bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'get')"
if ${skip_pipeline:=false}; then echo "INFO: skip build pipline" && exit 0; else echo "INFO: no skip"; fi
echo "#### Run tpch test on Doris ####"
DORIS_HOME="${teamcity_build_checkoutDir}/output"
export DORIS_HOME
cold_run_time_threshold=${cold_run_time_threshold:-200000} # ms
hot_run_time_threshold=${hot_run_time_threshold:-50000} # ms
exit_flag=0
(
set -e
shopt -s inherit_errexit
echo "#### 1. Restart doris"
if ! restart_doris; then echo "ERROR: Restart doris failed" && exit 1; fi
echo "#### 2. check if need to load data"
SF=${SF:-"100"} # SCALE FACTOR
TPCH_DATA_DIR="/data/tpch/sf_${SF}" # no / at the end
TPCH_DATA_DIR_LINK="${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/tpch-data # no / at the end
db_name="tpch_sf${SF}"
sed -i "s|^export DB=.*$|export DB='${db_name}'|g" \
"${teamcity_build_checkoutDir}"/tools/tpch-tools/conf/doris-cluster.conf
if ! check_tpch_table_rows "${db_name}" "${SF}"; then
echo "INFO: need to load tpch-sf${SF} data"
if ${force_load_data:-false}; then echo "INFO: force_load_data is true"; else echo "ERROR: force_load_data is false" && exit 1; fi
# prepare data
mkdir -p "${TPCH_DATA_DIR}"
(
cd "${TPCH_DATA_DIR}" || exit 1
declare -A table_file_count
table_file_count=(['region']=1 ['nation']=1 ['supplier']=1 ['customer']=1 ['part']=1 ['partsupp']=10 ['orders']=10 ['lineitem']=10)
for table_name in ${!table_file_count[*]}; do
if [[ ${table_file_count[${table_name}]} -eq 1 ]]; then
url="https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/tpch/sf${SF}/${table_name}.tbl"
if ! wget --continue -t3 -q "${url}"; then echo "ERROR: wget --continue ${url}" && exit 1; fi
elif [[ ${table_file_count[${table_name}]} -eq 10 ]]; then
(
for i in {1..10}; do
url="https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/tpch/sf${SF}/${table_name}.tbl.${i}"
if ! wget --continue -t3 -q "${url}"; then echo "ERROR: wget --continue ${url}" && exit 1; fi
done
) &
wait
fi
done
)
# create table and load data
bash "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/create-tpch-tables.sh -s "${SF}"
rm -rf "${TPCH_DATA_DIR_LINK}"
ln -s "${TPCH_DATA_DIR}" "${TPCH_DATA_DIR_LINK}"
bash "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/load-tpch-data.sh -c 2
if ! check_tpch_table_rows "${db_name}" "${SF}"; then
exit 1
fi
echo "INFO: sleep 10min to wait compaction done"
if ${DEBUG:-false}; then sleep 10s; else sleep 10m; fi
data_reload="true"
fi
echo "#### 3. run tpch-sf${SF} query"
set_session_variable runtime_filter_mode global
bash "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/run-tpch-queries.sh -s "${SF}" | tee "${teamcity_build_checkoutDir}"/run-tpch-queries.log
if ! check_tpch_result "${teamcity_build_checkoutDir}"/run-tpch-queries.log; then exit 1; fi
line_end=$(sed -n '/^Total hot run time/=' "${teamcity_build_checkoutDir}"/run-tpch-queries.log)
line_begin=$((line_end - 23))
comment_body="Tpch sf${SF} test result on commit ${commit_id:-}, data reload: ${data_reload:-"false"}
------ Round 1 ----------------------------------
$(sed -n "${line_begin},${line_end}p" "${teamcity_build_checkoutDir}"/run-tpch-queries.log)"
echo "#### 4. run tpch-sf${SF} query with runtime_filter_mode=off"
set_session_variable runtime_filter_mode off
bash "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/run-tpch-queries.sh | tee "${teamcity_build_checkoutDir}"/run-tpch-queries.log
if ! grep '^Total hot run time' "${teamcity_build_checkoutDir}"/run-tpch-queries.log >/dev/null; then exit 1; fi
line_end=$(sed -n '/^Total hot run time/=' "${teamcity_build_checkoutDir}"/run-tpch-queries.log)
line_begin=$((line_end - 23))
comment_body="${comment_body}
----- Round 2, with runtime_filter_mode=off -----
$(sed -n "${line_begin},${line_end}p" "${teamcity_build_checkoutDir}"/run-tpch-queries.log)"
echo "#### 5. comment result on tpch"
comment_body=$(echo "${comment_body}" | sed -e ':a;N;$!ba;s/\t/\\t/g;s/\n/\\n/g') # 将所有的 Tab字符替换为\t 换行符替换为\n
create_an_issue_comment_tpch "${pull_request_num:-}" "${comment_body}"
rm -f result.csv
)
exit_flag="$?"
echo "#### 5. check if need backup doris logs"
if [[ ${exit_flag} != "0" ]]; then
stop_doris
print_doris_fe_log
print_doris_be_log
if file_name=$(archive_doris_logs "${pull_request_num}_${commit_id}_doris_logs.tar.gz"); then
upload_doris_log_to_oss "${file_name}"
fi
fi
exit "${exit_flag}"

View File

@ -96,19 +96,30 @@ echo "USER: $USER"
echo "PASSWORD: $PASSWORD"
echo "DB: $DB"
pre_set() {
run_sql() {
echo $@
mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@"
}
pre_set "set global parallel_fragment_exec_instance_num=8;"
pre_set "set global exec_mem_limit=32G;"
pre_set "set global query_timeout=900;"
get_session_variable() {
k="$1"
v=$(mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e"show variables like '${k}'\G" | grep " Value: ")
echo "${v/*Value: /}"
}
_parallel_fragment_exec_instance_num="$(get_session_variable parallel_fragment_exec_instance_num)"
_exec_mem_limit="$(get_session_variable exec_mem_limit)"
_query_timeout="$(get_session_variable query_timeout)"
echo '============================================'
pre_set "show variables"
echo "Optimize session variables"
run_sql "set global parallel_fragment_exec_instance_num=16;"
run_sql "set global exec_mem_limit=32G;"
run_sql "set global query_timeout=900;"
echo '============================================'
pre_set "analyze table hits with sync;"
run_sql "show variables"
echo '============================================'
run_sql "analyze table hits with sync;"
TRIES=3
QUERY_NUM=1
@ -122,7 +133,7 @@ cat ${QUERIES_FILE} | while read query; do
sync
echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null
echo -n "query${QUERY_NUM}: " | tee -a result.csv
echo -n "query${QUERY_NUM}," | tee -a result.csv
for i in $(seq 1 $TRIES); do
RES=$(mysql -vvv -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "${query}" | perl -nle 'print $1 if /\((\d+\.\d+)+ sec\)/' || :)
@ -133,3 +144,14 @@ cat ${QUERIES_FILE} | while read query; do
QUERY_NUM=$((QUERY_NUM + 1))
done
cold_run_sum=$(awk -F ',' '{sum+=$2} END {print sum}' result.csv)
best_hot_run_sum=$(awk -F ',' '{if($3<$4){sum+=$3}else{sum+=$4}} END {print sum}' result.csv)
echo "Total cold run time: ${cold_run_sum} ms"
echo "Total hot run time: ${best_hot_run_sum} ms"
echo 'Finish ClickBench queries.'
echo "Restore session variables"
run_sql "set global parallel_fragment_exec_instance_num=${_parallel_fragment_exec_instance_num};"
run_sql "set global exec_mem_limit=${_exec_mem_limit};"
run_sql "set global query_timeout=${_query_timeout};"

View File

@ -123,8 +123,24 @@ run_sql() {
echo "$*"
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*"
}
get_session_variable() {
k="$1"
v=$(mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e"show variables like '${k}'\G" | grep " Value: ")
echo "${v/*Value: /}"
}
backup_session_variables_file="${CURDIR}/../conf/opt/backup_session_variables.sql"
backup_session_variables() {
while IFS= read -r line; do
k="${line/set global /}"
k="${k%=*}"
v=$(get_session_variable "${k}")
echo "set global ${k}=${v};" >>"${backup_session_variables_file}"
done < <(grep -v '^ *#' <"${TPCDS_OPT_CONF}")
}
backup_session_variables
echo '============================================'
echo "Optimize session variables"
run_sql "source ${TPCDS_OPT_CONF};"
echo '============================================'
run_sql "show variables;"
@ -182,3 +198,7 @@ done
echo "Total cold run time: ${cold_run_sum} ms"
echo "Total hot run time: ${best_hot_run_sum} ms"
echo 'Finish tpcds queries.'
echo "Restore session variables"
run_sql "source ${backup_session_variables_file};"
rm -f "${backup_session_variables_file}"

View File

@ -123,8 +123,24 @@ run_sql() {
echo "$*"
mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*"
}
get_session_variable() {
k="$1"
v=$(mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e"show variables like '${k}'\G" | grep " Value: ")
echo "${v/*Value: /}"
}
backup_session_variables_file="${CURDIR}/../conf/opt/backup_session_variables.sql"
backup_session_variables() {
while IFS= read -r line; do
k="${line/set global /}"
k="${k%=*}"
v=$(get_session_variable "${k}")
echo "set global ${k}=${v};" >>"${backup_session_variables_file}"
done < <(grep -v '^ *#' <"${TPCH_OPT_CONF}")
}
backup_session_variables
echo '============================================'
echo "Optimize session variables"
run_sql "source ${TPCH_OPT_CONF};"
echo '============================================'
run_sql "show variables;"
@ -183,3 +199,7 @@ echo "Total cold run time: ${cold_run_sum} ms"
# tpch 流水线依赖这个'Total hot run time'字符串
echo "Total hot run time: ${best_hot_run_sum} ms"
echo 'Finish tpch queries.'
echo "Restore session variables"
run_sql "source ${backup_session_variables_file};"
rm -f "${backup_session_variables_file}"