upgrade clang-format version to 16 move thrift to fe-common fix core dump on pipeline engine when operator canceled and not prepared
153 lines
5.2 KiB
Bash
Executable File
153 lines
5.2 KiB
Bash
Executable File
#!/usr/bin/env bash
|
|
# Licensed to the Apache Software Foundation (ASF) under one
|
|
# or more contributor license agreements. See the NOTICE file
|
|
# distributed with this work for additional information
|
|
# regarding copyright ownership. The ASF licenses this file
|
|
# to you under the Apache License, Version 2.0 (the
|
|
# "License"); you may not use this file except in compliance
|
|
# with the License. You may obtain a copy of the License at
|
|
#
|
|
# http://www.apache.org/licenses/LICENSE-2.0
|
|
#
|
|
# Unless required by applicable law or agreed to in writing,
|
|
# software distributed under the License is distributed on an
|
|
# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
|
|
# KIND, either express or implied. See the License for the
|
|
# specific language governing permissions and limitations
|
|
# under the License.
|
|
|
|
##############################################################
|
|
# This script is used to load clickbench data into Doris
|
|
##############################################################
|
|
|
|
set -eo pipefail
|
|
|
|
ROOT=$(dirname "$0")
|
|
ROOT=$(
|
|
cd "$ROOT"
|
|
pwd
|
|
)
|
|
|
|
CURDIR=${ROOT}
|
|
DATA_DIR=$CURDIR/
|
|
# DATA_DIR=/mnt/disk1/stephen/data/clickbench
|
|
|
|
usage() {
|
|
echo "
|
|
This script is used to load ClickBench data,
|
|
will use mysql client to connect Doris server which is specified in conf/doris-cluster.conf file.
|
|
Usage: $0
|
|
"
|
|
exit 1
|
|
}
|
|
|
|
OPTS=$(getopt \
|
|
-n $0 \
|
|
-o '' \
|
|
-o 'h' \
|
|
-- "$@")
|
|
eval set -- "$OPTS"
|
|
|
|
HELP=0
|
|
while true; do
|
|
case "$1" in
|
|
-h)
|
|
HELP=1
|
|
shift
|
|
;;
|
|
--)
|
|
shift
|
|
break
|
|
;;
|
|
*)
|
|
echo "Internal error"
|
|
exit 1
|
|
;;
|
|
esac
|
|
done
|
|
|
|
if [[ "${HELP}" -eq 1 ]]; then
|
|
usage
|
|
fi
|
|
|
|
check_prerequest() {
|
|
local CMD=$1
|
|
local NAME=$2
|
|
if ! $CMD; then
|
|
echo "$NAME is missing. This script depends on cURL to load data to Doris."
|
|
exit 1
|
|
fi
|
|
}
|
|
|
|
check_prerequest "mysql --version" "mysql"
|
|
check_prerequest "curl --version" "curl"
|
|
check_prerequest "wget --version" "wget"
|
|
|
|
source $CURDIR/conf/doris-cluster.conf
|
|
|
|
wget_pids=()
|
|
|
|
echo "FE_HOST: $FE_HOST"
|
|
echo "FE_HTTP_PORT: $FE_HTTP_PORT"
|
|
echo "USER: $USER"
|
|
echo "PASSWORD: $PASSWORD"
|
|
echo "DB: $DB"
|
|
|
|
function check_doris_conf() {
|
|
cv=$(mysql -h$FE_HOST -P$FE_QUERY_PORT -u$USER -e 'admin show frontend config' | grep 'stream_load_default_timeout_second' | awk '{print $2}')
|
|
if (($cv < 3600)); then
|
|
echo "advise: revise your Doris FE's conf to set 'stream_load_default_timeout_second=3600' or above"
|
|
fi
|
|
|
|
cv=$(curl "${BE_HOST}:${BE_WEBSERVER_PORT}/varz" 2>/dev/null | grep 'streaming_load_max_mb' | awk -F'=' '{print $2}')
|
|
if (($cv < 16000)); then
|
|
echo -e "advise: revise your Doris BE's conf to set 'streaming_load_max_mb=16000' or above and 'flush_thread_num_per_store=5' to speed up load."
|
|
fi
|
|
}
|
|
|
|
function load() {
|
|
echo "(1/2) prepare clickbench data file"
|
|
need_download=false
|
|
cd $DATA_DIR
|
|
for i in $(seq 0 9); do
|
|
if [ ! -f "$DATA_DIR/hits_split${i}" ]; then
|
|
echo "will download hits_split${i} to $DATA_DIR"
|
|
wget --continue "https://doris-test-data.oss-cn-hongkong.aliyuncs.com/ClickBench/hits_split${i}" &
|
|
# wget --continue "https://doris-test-data.oss-cn-hongkong-internal.aliyuncs.com/ClickBench/hits_split${i}" &
|
|
PID=$!
|
|
wget_pids[${#wget_pids[@]}]=$PID
|
|
fi
|
|
done
|
|
|
|
echo "wait for download task done..."
|
|
wait
|
|
cd -
|
|
|
|
echo "(2/2) load clickbench data file $DATA_DIR/hits_split[0-9] into Doris"
|
|
for i in $(seq 0 9); do
|
|
echo -e "
|
|
start loading hits_split${i}"
|
|
curl --location-trusted \
|
|
-u $USER:$PASSWORD \
|
|
-T "$DATA_DIR/hits_split${i}" \
|
|
-H "columns:WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \
|
|
http://$FE_HOST:$FE_HTTP_PORT/api/$DB/hits/_stream_load
|
|
done
|
|
}
|
|
|
|
function signal_handler() {
|
|
|
|
for PID in ${wget_pids[@]}; do
|
|
kill -9 $PID
|
|
done
|
|
}
|
|
|
|
trap signal_handler 2 3 6 15
|
|
|
|
echo "start..."
|
|
start=$(date +%s)
|
|
check_doris_conf
|
|
load
|
|
end=$(date +%s)
|
|
echo "load cost time: $((end - start)) seconds"
|