diff --git a/.licenserc.yaml b/.licenserc.yaml index 7e3207b299973b..ea761f044f46ef 100644 --- a/.licenserc.yaml +++ b/.licenserc.yaml @@ -88,6 +88,7 @@ header: - "conf/mysql_ssl_default_certificate/client_certificate/client-cert.pem" - "conf/mysql_ssl_default_certificate/client_certificate/client-key.pem" - "regression-test/ssl_default_certificate/*" + - "regression-test/pipeline/performance/conf/session_variables" - "extension/beats/go.mod" - "extension/beats/go.sum" - "pytest/hdfs" diff --git a/regression-test/pipeline/common/doris-utils.sh b/regression-test/pipeline/common/doris-utils.sh index dddc1fc5f5d22c..f90b9a077ae478 100644 --- a/regression-test/pipeline/common/doris-utils.sh +++ b/regression-test/pipeline/common/doris-utils.sh @@ -53,12 +53,15 @@ function set_doris_conf_value() { function start_doris_fe() { if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi - if ! java -version >/dev/null; then sudo apt install openjdk-8-jdk -y >/dev/null; fi + if ! java -version >/dev/null || + [[ -z "$(find /usr/lib/jvm -maxdepth 1 -type d -name 'java-8-*')" ]]; then + sudo apt update && sudo apt install openjdk-8-jdk -y >/dev/null + fi JAVA_HOME="$(find /usr/lib/jvm -maxdepth 1 -type d -name 'java-8-*' | sed -n '1p')" export JAVA_HOME "${DORIS_HOME}"/fe/bin/start_fe.sh --daemon - if ! mysql --version >/dev/null; then sudo apt install -y mysql-client; fi + if ! mysql --version >/dev/null; then sudo apt update && sudo apt install -y mysql-client; fi query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port) cl="mysql -h127.0.0.1 -P${query_port} -uroot " local i=1 @@ -71,12 +74,14 @@ function start_doris_fe() { fi done if [[ ${i} -ge 60 ]]; then echo "ERROR: Start Doris Frontend Failed after 2 mins wait..." && return 1; fi - } function start_doris_be() { if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi - if ! java -version >/dev/null; then sudo apt install openjdk-8-jdk -y >/dev/null; fi + if ! java -version >/dev/null || + [[ -z "$(find /usr/lib/jvm -maxdepth 1 -type d -name 'java-8-*')" ]]; then + sudo apt update && sudo apt install openjdk-8-jdk -y >/dev/null + fi JAVA_HOME="$(find /usr/lib/jvm -maxdepth 1 -type d -name 'java-8-*' | sed -n '1p')" export JAVA_HOME sysctl -w vm.max_map_count=2000000 && @@ -101,7 +106,7 @@ function start_doris_be() { function add_doris_be_to_fe() { if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi - if ! mysql --version >/dev/null; then sudo apt install -y mysql-client; fi + if ! mysql --version >/dev/null; then sudo sudo apt update && apt install -y mysql-client; fi query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port) heartbeat_service_port=$(get_doris_conf_value "${DORIS_HOME}"/be/conf/be.conf heartbeat_service_port) cl="mysql -h127.0.0.1 -P${query_port} -uroot " @@ -116,7 +121,7 @@ function add_doris_be_to_fe() { echo 'Wait for Backends ready, sleep 2 seconds ...' && sleep 2 fi done - if [[ ${i} -eq 60 ]]; then echo "ERROR: Add Doris Backend Failed after 2 mins wait..." && return 1; fi + if [[ ${i} -ge 60 ]]; then echo "ERROR: Add Doris Backend Failed after 2 mins wait..." && return 1; fi } function stop_doris() { @@ -129,6 +134,15 @@ function stop_doris() { fi } +function restart_doris() { + if stop_doris; then echo; fi + if ! start_doris_fe; then return 1; fi + if ! start_doris_be; then return 1; fi + # wait 10s for doris totally started, otherwize may encounter the error below, + # ERROR 1105 (HY000) at line 102: errCode = 2, detailMessage = Failed to find enough backend, please check the replication num,replication tag and storage medium. + sleep 10s +} + function check_tpch_table_rows() { if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi db_name="$1" @@ -138,20 +152,133 @@ function check_tpch_table_rows() { query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port) cl="mysql -h127.0.0.1 -P${query_port} -uroot " declare -A table_rows - if [[ "${scale_factor}" == "100" ]]; then + if [[ "${scale_factor}" == "1" ]]; then + table_rows=(['region']=5 ['nation']=25 ['supplier']=10000 ['customer']=150000 ['part']=200000 ['partsupp']=800000 ['orders']=1500000 ['lineitem']=6001215) + elif [[ "${scale_factor}" == "100" ]]; then table_rows=(['region']=5 ['nation']=25 ['supplier']=1000000 ['customer']=15000000 ['part']=20000000 ['partsupp']=80000000 ['orders']=150000000 ['lineitem']=600037902) else - table_rows=(['region']=5 ['nation']=25 ['supplier']=10000 ['customer']=150000 ['part']=200000 ['partsupp']=800000 ['orders']=1500000 ['lineitem']=6001215) + echo "ERROR: unsupported scale_factor ${scale_factor} for tpch" && return 1 + fi + for table in ${!table_rows[*]}; do + rows_actual=$(${cl} -D"${db_name}" -e"SELECT count(*) FROM ${table}" | sed -n '2p') + rows_expect=${table_rows[${table}]} + if [[ ${rows_actual} -ne ${rows_expect} ]]; then + echo "ERROR: ${table} actual rows: ${rows_actual}, expect rows: ${rows_expect}" && return 1 + fi + done +} + +function check_tpcds_table_rows() { + if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi + db_name="$1" + scale_factor="$2" + if [[ -z "${scale_factor}" ]]; then return 1; fi + + query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port) + cl="mysql -h127.0.0.1 -P${query_port} -uroot " + declare -A table_rows + if [[ "${scale_factor}" == "1" ]]; then + table_rows=(['income_band']=20 ['ship_mode']=20 ['warehouse']=5 ['reason']=35 ['web_site']=30 ['call_center']=6 ['store']=12 ['promotion']=300 ['household_demographics']=7200 ['web_page']=60 ['catalog_page']=11718 ['time_dim']=86400 ['date_dim']=73049 ['item']=18000 ['customer_demographics']=1920800 ['customer_address']=50000 ['customer']=100000 ['web_returns']=71763 ['catalog_returns']=144067 ['store_returns']=287514 ['inventory']=11745000 ['web_sales']=719384 ['catalog_sales']=1441548 ['store_sales']=2880404) + elif [[ "${scale_factor}" == "100" ]]; then + table_rows=(['income_band']=20 ['ship_mode']=20 ['warehouse']=15 ['reason']=55 ['web_site']=24 ['call_center']=30 ['store']=402 ['promotion']=1000 ['household_demographics']=7200 ['web_page']=2040 ['catalog_page']=20400 ['time_dim']=86400 ['date_dim']=73049 ['item']=204000 ['customer_demographics']=1920800 ['customer_address']=1000000 ['customer']=2000000 ['web_returns']=7197670 ['catalog_returns']=14404374 ['store_returns']=28795080 ['inventory']=399330000 ['web_sales']=72001237 ['catalog_sales']=143997065 ['store_sales']=287997024) + elif [[ "${scale_factor}" == "1000" ]]; then + table_rows=(['income_band']=20 ['ship_mode']=20 ['warehouse']=20 ['reason']=65 ['web_site']=54 ['call_center']=42 ['store']=1002 ['promotion']=1500 ['household_demographics']=7200 ['web_page']=3000 ['catalog_page']=30000 ['time_dim']=86400 ['date_dim']=73049 ['item']=300000 ['customer_demographics']=1920800 ['customer_address']=6000000 ['customer']=12000000 ['web_returns']=71997522 ['catalog_returns']=143996756 ['store_returns']=287999764 ['inventory']=783000000 ['web_sales']=720000376 ['catalog_sales']=1439980416 ['store_sales']=2879987999) + elif [[ "${scale_factor}" == "3000" ]]; then + table_rows=(['income_band']=20 ['ship_mode']=20 ['warehouse']=22 ['reason']=67 ['web_site']=66 ['call_center']=48 ['store']=1350 ['promotion']=1800 ['household_demographics']=7200 ['web_page']=3600 ['catalog_page']=36000 ['time_dim']=86400 ['date_dim']=73049 ['item']=360000 ['customer_demographics']=1920800 ['customer_address']=15000000 ['customer']=30000000 ['web_returns']=216003761 ['catalog_returns']=432018033 ['store_returns']=863989652 ['inventory']=1033560000 ['web_sales']=2159968881 ['catalog_sales']=4320078880 ['store_sales']=8639936081) + else + echo "ERROR: unsupported scale_factor ${scale_factor} for tpcds" && return 1 fi for table in ${!table_rows[*]}; do rows_actual=$(${cl} -D"${db_name}" -e"SELECT count(*) FROM ${table}" | sed -n '2p') rows_expect=${table_rows[${table}]} if [[ ${rows_actual} -ne ${rows_expect} ]]; then - echo "WARNING: ${table} actual rows: ${rows_actual}, expect rows: ${rows_expect}" && return 1 + echo "ERROR: ${table} actual rows: ${rows_actual}, expect rows: ${rows_expect}" && return 1 + fi + done +} + +function check_clickbench_table_rows() { + if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi + db_name="$1" + if [[ -z "${db_name}" ]]; then return 1; fi + query_port=$(get_doris_conf_value "${DORIS_HOME}"/fe/conf/fe.conf query_port) + cl="mysql -h127.0.0.1 -P${query_port} -uroot " + declare -A table_rows + table_rows=(['hits']=99997497) + if ${DEBUG:-false}; then table_rows=(['hits']=10000); fi + for table in ${!table_rows[*]}; do + rows_actual=$(${cl} -D"${db_name}" -e"SELECT count(*) FROM ${table}" | sed -n '2p') + rows_expect=${table_rows[${table}]} + if [[ ${rows_actual} -ne ${rows_expect} ]]; then + echo "ERROR: ${table} actual rows: ${rows_actual}, expect rows: ${rows_expect}" && return 1 fi done } +function check_tpch_result() { + log_file="$1" + if [[ -z "${log_file}" ]]; then return 1; fi + if ! grep '^Total cold run time' "${log_file}" || ! grep '^Total hot run time' "${log_file}"; then + echo "ERROR: can not find 'Total hot run time' in '${log_file}'" + return 1 + else + cold_run_time=$(grep '^Total cold run time' "${log_file}" | awk '{print $5}') + hot_run_time=$(grep '^Total hot run time' "${log_file}" | awk '{print $5}') + fi + # 单位是毫秒 + cold_run_time_threshold=${cold_run_time_threshold:-50000} + hot_run_time_threshold=${hot_run_time_threshold:-42000} + if [[ ${cold_run_time} -gt ${cold_run_time_threshold} || ${hot_run_time} -gt ${hot_run_time_threshold} ]]; then + echo "ERROR: + cold_run_time ${cold_run_time} is great than the threshold ${cold_run_time_threshold}, + or, hot_run_time ${hot_run_time} is great than the threshold ${hot_run_time_threshold}" + return 1 + else + echo "INFO: + cold_run_time ${cold_run_time} is less than the threshold ${cold_run_time_threshold}, + hot_run_time ${hot_run_time} is less than the threshold ${hot_run_time_threshold}" + fi +} + +function check_tpcds_result() { + check_tpch_result "$1" +} + +function check_clickbench_query_result() { + echo "TODO" +} + +function check_clickbench_performance_result() { + result_file="$1" + if [[ -z "${result_file}" ]]; then return 1; fi + + empty_query_time="$(awk -F ',' '{if( ($2=="") || ($3=="") || ($4=="") ){print $1}}' "${result_file}")" + if [[ -n ${empty_query_time} ]]; then + echo -e "ERROR: find empty query time of:\n${empty_query_time}" && return 1 + fi + + # 单位是秒 + cold_run_time_threshold=${cold_run_time_threshold:-200} + hot_run_time_threshold=${hot_run_time_threshold:-55} + cold_run_sum=$(awk -F ',' '{sum+=$2} END {print sum}' result.csv) + hot_run_time=$(awk -F ',' '{if($3<$4){sum+=$3}else{sum+=$4}} END {print sum}' "${result_file}") + if [[ $(echo "${hot_run_time} > ${hot_run_time_threshold}" | bc) -eq 1 ]] || + [[ $(echo "${cold_run_sum} > ${cold_run_time_threshold}" | bc) -eq 1 ]]; then + echo "ERROR: + cold_run_time ${cold_run_time} is great than the threshold ${cold_run_time_threshold}, + or, hot_run_time ${hot_run_time} is great than the threshold ${hot_run_time_threshold}" + return 1 + else + echo "INFO: + cold_run_time ${cold_run_time} is less than the threshold ${cold_run_time_threshold}, + hot_run_time ${hot_run_time} is less than the threshold ${hot_run_time_threshold}" + fi +} + +function check_load_performance() { + echo "TODO" +} + get_session_variable() { if [[ ! -d "${DORIS_HOME:-}" ]]; then return 1; fi usage=" @@ -228,12 +355,11 @@ archive_doris_logs() { if [[ -z ${archive_name} ]]; then echo "ERROR: archive file name required" && return 1; fi if tar -I pigz \ --directory "${DORIS_HOME}" \ - --absolute-names \ -cf "${DORIS_HOME}/${archive_name}" \ - "${DORIS_HOME}"/fe/conf \ - "${DORIS_HOME}"/fe/log \ - "${DORIS_HOME}"/be/conf \ - "${DORIS_HOME}"/be/log; then + fe/conf \ + fe/log \ + be/conf \ + be/log; then echo "${DORIS_HOME}/${archive_name}" else return 1 diff --git a/regression-test/pipeline/common/get-or-set-tmp-env.sh b/regression-test/pipeline/common/get-or-set-tmp-env.sh new file mode 100644 index 00000000000000..4e2c11547bb8b7 --- /dev/null +++ b/regression-test/pipeline/common/get-or-set-tmp-env.sh @@ -0,0 +1,38 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +tmp_env_file_path="${PWD}/.my_tmp_env" + +usage() { + echo -e " +Usage: + $0 'get' + $0 'set' \"export skip_pipeline='true'\" + note: 'get' will return env file path; 'set' will add your new item into env file" + exit 1 +} + +if [[ $1 == 'get' ]]; then + if [[ ! -f "${tmp_env_file_path}" ]]; then touch "${tmp_env_file_path}"; fi + echo "${tmp_env_file_path}" +elif [[ $1 == 'set' ]]; then + if [[ -z $2 ]]; then usage; fi + echo "$2" >>"${tmp_env_file_path}" +else + usage +fi diff --git a/regression-test/pipeline/common/github-utils.sh b/regression-test/pipeline/common/github-utils.sh index 3c226955b1552c..ecf7a055df0d25 100644 --- a/regression-test/pipeline/common/github-utils.sh +++ b/regression-test/pipeline/common/github-utils.sh @@ -53,7 +53,33 @@ function create_an_issue_comment_tpch() { local COMMENT_BODY="$2" local machine='aliyun_ecs.c7a.8xlarge_32C64G' COMMENT_BODY=" -TPC-H test result on machine: '${machine}' +TPC-H test result on machine: '${machine}', run with scripts in https://github.com/apache/doris/tree/master/tools/tpch-tools +\`\`\` +${COMMENT_BODY} +\`\`\` +" + create_an_issue_comment "${ISSUE_NUMBER}" "${COMMENT_BODY}" +} + +function create_an_issue_comment_tpcds() { + local ISSUE_NUMBER="$1" + local COMMENT_BODY="$2" + local machine='aliyun_ecs.c7a.8xlarge_32C64G' + COMMENT_BODY=" +TPC-DS test result on machine: '${machine}', run with scripts in https://github.com/apache/doris/tree/master/tools/tpcds-tools +\`\`\` +${COMMENT_BODY} +\`\`\` +" + create_an_issue_comment "${ISSUE_NUMBER}" "${COMMENT_BODY}" +} + +function create_an_issue_comment_clickbench() { + local ISSUE_NUMBER="$1" + local COMMENT_BODY="$2" + local machine='aliyun_ecs.c7a.8xlarge_32C64G' + COMMENT_BODY=" +ClickBench test result on machine: '${machine}', run with scripts in https://github.com/apache/doris/tree/master/tools/clickbench-tools \`\`\` ${COMMENT_BODY} \`\`\` @@ -271,3 +297,30 @@ file_changed_ckb() { done echo "return no need" && return 1 } + +file_changed_perf() { + local all_files + all_files=$(cat all_files) + if _only_modified_regression_conf; then echo "return no need" && return 1; fi + if [[ -z ${all_files} ]]; then echo "return need" && return 0; fi + for af in ${all_files}; do + if [[ "${af}" == 'be'* ]] || + [[ "${af}" == 'bin'* ]] || + [[ "${af}" == 'conf'* ]] || + [[ "${af}" == 'fe'* ]] || + [[ "${af}" == 'gensrc'* ]] || + [[ "${af}" == 'thirdparty'* ]] || + [[ "${af}" == 'build.sh' ]] || + [[ "${af}" == 'env.sh' ]] || + [[ "${af}" == 'regression-test/pipeline/common/github-utils.sh' ]] || + [[ "${af}" == 'regression-test/pipeline/common/doris-utils.sh' ]] || + [[ "${af}" == 'regression-test/pipeline/common/oss-utils.sh' ]] || + [[ "${af}" == 'regression-test/pipeline/performance/'* ]] || + [[ "${af}" == 'tools/tpch-tools/bin/run-tpch-queries.sh' ]] || + [[ "${af}" == 'tools/tpcds-tools/bin/run-tpcds-queries.sh' ]] || + [[ "${af}" == 'regression-test/pipeline/tpch/tpch-sf100/'* ]]; then + echo "performance related file changed, return need" && return 0 + fi + done + echo "return no need" && return 1 +} diff --git a/regression-test/pipeline/performance/clean.sh b/regression-test/pipeline/performance/clean.sh new file mode 100644 index 00000000000000..4cd831d3468140 --- /dev/null +++ b/regression-test/pipeline/performance/clean.sh @@ -0,0 +1,47 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Build Step: Command Line +: <&1 | tee build.log" +set +x +succ_symble="BUILD SUCCESS" +if [[ -d outout ]] && grep "${succ_symble}" "${teamcity_build_checkoutDir}"/build.log; then + echo -e "ERROR: BUILD FAILED" + exit 1 +fi diff --git a/regression-test/pipeline/performance/conf/README.md b/regression-test/pipeline/performance/conf/README.md new file mode 100644 index 00000000000000..d6b8073fc3c3db --- /dev/null +++ b/regression-test/pipeline/performance/conf/README.md @@ -0,0 +1,5 @@ +## 简介 +这里的 conf 用于社区 tpcds 流水线 +基于 master 分支的默认 conf 文件, +参考:https://github.com/apache/doris/tree/master/conf +添加 fe_custom.conf,be_custom.conf,session_variables 来设置跑流水线的自定义设置。 \ No newline at end of file diff --git a/regression-test/pipeline/performance/conf/be_custom.conf b/regression-test/pipeline/performance/conf/be_custom.conf new file mode 100644 index 00000000000000..f6ac4564857b2a --- /dev/null +++ b/regression-test/pipeline/performance/conf/be_custom.conf @@ -0,0 +1,21 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +priority_networks=127.0.0.1/24 +storage_root_path=/data/doris-storage + +streaming_load_max_mb=102400 diff --git a/regression-test/pipeline/performance/conf/custom_env.sh b/regression-test/pipeline/performance/conf/custom_env.sh new file mode 100644 index 00000000000000..27a4544655eff3 --- /dev/null +++ b/regression-test/pipeline/performance/conf/custom_env.sh @@ -0,0 +1,20 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +export DORIS_TOOLCHAIN=gcc +export BUILD_TYPE=release diff --git a/regression-test/pipeline/performance/conf/fe_custom.conf b/regression-test/pipeline/performance/conf/fe_custom.conf new file mode 100644 index 00000000000000..bd6798505321ee --- /dev/null +++ b/regression-test/pipeline/performance/conf/fe_custom.conf @@ -0,0 +1,28 @@ +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +##################################################################### +## The uppercase properties are read and exported by bin/start_fe.sh. +## To see all Frontend configurations, +## see fe/src/org/apache/doris/common/Config.java +##################################################################### + +priority_networks=127.0.0.1/24 +meta_dir=/data/doris-meta + +stream_load_default_timeout_second=3600 +ignore_unknown_metadata_module=true diff --git a/regression-test/pipeline/performance/conf/session_variables b/regression-test/pipeline/performance/conf/session_variables new file mode 100644 index 00000000000000..e69de29bb2d1d6 diff --git a/regression-test/pipeline/performance/deploy.sh b/regression-test/pipeline/performance/deploy.sh new file mode 100644 index 00000000000000..5f2e673f0404a4 --- /dev/null +++ b/regression-test/pipeline/performance/deploy.sh @@ -0,0 +1,115 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Build Step: Command Line +: < 流水线开始跑,这个时间段中如果有新commit, +这时候流水线 checkout 出来的 commit 就不是触发时的传过来的 commit了, +这种情况不需要跑,预期pr owner会重新触发。" + echo -e "ERROR: PR(${pull_request_num}), + the lastest commit id + ${commit_id_from_checkout} + not equail to the commit_id_from_trigger + ${commit_id_from_trigger} + commit_id_from_trigger is outdate" + exit 1 +fi +# shellcheck source=/dev/null +source "$(bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'get')" +if ${skip_pipeline:=false}; then echo "INFO: skip build pipline" && exit 0; else echo "INFO: no skip"; fi +# shellcheck source=/dev/null +# _get_pr_changed_files file_changed_perf +source "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/github-utils.sh +if _get_pr_changed_files "${pull_request_num}"; then + if ! file_changed_perf; then + bash "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh 'set' "export skip_pipeline=true" + exit 0 + fi +fi + +echo "#### 2. check if tpch depending files exist" +if ! [[ -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/oss-utils.sh && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/doris-utils.sh && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/github-utils.sh && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/get-or-set-tmp-env.sh && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/be_custom.conf && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/custom_env.sh && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/conf/fe_custom.conf && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/prepare.sh && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/compile.sh && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/deploy.sh && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/run-tpch.sh && + -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/performance/run-tpcds.sh && + -f "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/run-tpch-queries.sh && + -f "${teamcity_build_checkoutDir}"/tools/tpcds-tools/bin/run-tpcds-queries.sh ]]; then + echo "ERROR: depending files missing" && exit 1 +fi diff --git a/regression-test/pipeline/performance/run-clickbench.sh b/regression-test/pipeline/performance/run-clickbench.sh new file mode 100644 index 00000000000000..225fa802ec6908 --- /dev/null +++ b/regression-test/pipeline/performance/run-clickbench.sh @@ -0,0 +1,323 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Build Step: Command Line +: <>"${backup_session_variables_file}" + done + IFS="${_IFS}" + } + backup_session_variables + mysql -h"${host}" -P"${query_port}" -uroot -e"source ${opt_session_variables_file};" + + echo "#### 1. Restart doris" + if ! restart_doris; then echo "ERROR: Restart doris failed" && exit 1; fi + + echo "#### 2. check if need to load data" + data_home="/data/clickbench" # no / at the end + db_name="clickbench" + if ! check_clickbench_table_rows "${db_name}"; then + echo "INFO: need to load clickbench data" + if ${force_load_data:-false}; then echo "INFO: force_load_data is true"; else echo "ERROR: force_load_data is false" && exit 1; fi + # prepare data + mkdir -p "${data_home}" + + # create table and load data + mysql -h"${host}" -P"${query_port}" -uroot -e "DROP DATABASE IF EXISTS ${db_name}" + mysql -h"${host}" -P"${query_port}" -uroot -e "CREATE DATABASE IF NOT EXISTS ${db_name}" && sleep 10 + mysql -h"${host}" -P"${query_port}" -uroot "${db_name}" -e" + CREATE TABLE IF NOT EXISTS hits ( + CounterID INT NOT NULL, + EventDate DateV2 NOT NULL, + UserID BIGINT NOT NULL, + EventTime DateTimeV2 NOT NULL, + WatchID BIGINT NOT NULL, + JavaEnable SMALLINT NOT NULL, + Title STRING NOT NULL, + GoodEvent SMALLINT NOT NULL, + ClientIP INT NOT NULL, + RegionID INT NOT NULL, + CounterClass SMALLINT NOT NULL, + OS SMALLINT NOT NULL, + UserAgent SMALLINT NOT NULL, + URL STRING NOT NULL, + Referer STRING NOT NULL, + IsRefresh SMALLINT NOT NULL, + RefererCategoryID SMALLINT NOT NULL, + RefererRegionID INT NOT NULL, + URLCategoryID SMALLINT NOT NULL, + URLRegionID INT NOT NULL, + ResolutionWidth SMALLINT NOT NULL, + ResolutionHeight SMALLINT NOT NULL, + ResolutionDepth SMALLINT NOT NULL, + FlashMajor SMALLINT NOT NULL, + FlashMinor SMALLINT NOT NULL, + FlashMinor2 STRING NOT NULL, + NetMajor SMALLINT NOT NULL, + NetMinor SMALLINT NOT NULL, + UserAgentMajor SMALLINT NOT NULL, + UserAgentMinor VARCHAR(255) NOT NULL, + CookieEnable SMALLINT NOT NULL, + JavascriptEnable SMALLINT NOT NULL, + IsMobile SMALLINT NOT NULL, + MobilePhone SMALLINT NOT NULL, + MobilePhoneModel STRING NOT NULL, + Params STRING NOT NULL, + IPNetworkID INT NOT NULL, + TraficSourceID SMALLINT NOT NULL, + SearchEngineID SMALLINT NOT NULL, + SearchPhrase STRING NOT NULL, + AdvEngineID SMALLINT NOT NULL, + IsArtifical SMALLINT NOT NULL, + WindowClientWidth SMALLINT NOT NULL, + WindowClientHeight SMALLINT NOT NULL, + ClientTimeZone SMALLINT NOT NULL, + ClientEventTime DateTimeV2 NOT NULL, + SilverlightVersion1 SMALLINT NOT NULL, + SilverlightVersion2 SMALLINT NOT NULL, + SilverlightVersion3 INT NOT NULL, + SilverlightVersion4 SMALLINT NOT NULL, + PageCharset STRING NOT NULL, + CodeVersion INT NOT NULL, + IsLink SMALLINT NOT NULL, + IsDownload SMALLINT NOT NULL, + IsNotBounce SMALLINT NOT NULL, + FUniqID BIGINT NOT NULL, + OriginalURL STRING NOT NULL, + HID INT NOT NULL, + IsOldCounter SMALLINT NOT NULL, + IsEvent SMALLINT NOT NULL, + IsParameter SMALLINT NOT NULL, + DontCountHits SMALLINT NOT NULL, + WithHash SMALLINT NOT NULL, + HitColor CHAR NOT NULL, + LocalEventTime DateTimeV2 NOT NULL, + Age SMALLINT NOT NULL, + Sex SMALLINT NOT NULL, + Income SMALLINT NOT NULL, + Interests SMALLINT NOT NULL, + Robotness SMALLINT NOT NULL, + RemoteIP INT NOT NULL, + WindowName INT NOT NULL, + OpenerName INT NOT NULL, + HistoryLength SMALLINT NOT NULL, + BrowserLanguage STRING NOT NULL, + BrowserCountry STRING NOT NULL, + SocialNetwork STRING NOT NULL, + SocialAction STRING NOT NULL, + HTTPError SMALLINT NOT NULL, + SendTiming INT NOT NULL, + DNSTiming INT NOT NULL, + ConnectTiming INT NOT NULL, + ResponseStartTiming INT NOT NULL, + ResponseEndTiming INT NOT NULL, + FetchTiming INT NOT NULL, + SocialSourceNetworkID SMALLINT NOT NULL, + SocialSourcePage STRING NOT NULL, + ParamPrice BIGINT NOT NULL, + ParamOrderID STRING NOT NULL, + ParamCurrency STRING NOT NULL, + ParamCurrencyID SMALLINT NOT NULL, + OpenstatServiceName STRING NOT NULL, + OpenstatCampaignID STRING NOT NULL, + OpenstatAdID STRING NOT NULL, + OpenstatSourceID STRING NOT NULL, + UTMSource STRING NOT NULL, + UTMMedium STRING NOT NULL, + UTMCampaign STRING NOT NULL, + UTMContent STRING NOT NULL, + UTMTerm STRING NOT NULL, + FromTag STRING NOT NULL, + HasGCLID SMALLINT NOT NULL, + RefererHash BIGINT NOT NULL, + URLHash BIGINT NOT NULL, + CLID INT NOT NULL + ) + DUPLICATE KEY (CounterID, EventDate, UserID, EventTime, WatchID) + DISTRIBUTED BY HASH(UserID) BUCKETS 16 + PROPERTIES ( \"replication_num\"=\"1\"); + " + echo "####load data" + if [[ ! -f "${data_home}"/hits.tsv ]] || [[ $(wc -c "${data_home}"/hits.tsv | awk '{print $1}') != '74807831229' ]]; then + cd "${data_home}" + wget --continue 'https://datasets.clickhouse.com/hits_compatible/hits.tsv.gz' + gzip -d hits.tsv.gz + if ${DEBUG:-false}; then head -n 10000 hits.tsv >hits.tsv.10000; fi + cd - + fi + data_file_name="${data_home}/hits.tsv" + if ${DEBUG:-false}; then data_file_name="${data_home}/hits.tsv.10000"; fi + echo "start loading ..." + START=$(date +%s) + curl --location-trusted \ + -u root: \ + -T "${data_file_name}" \ + -H "label:hits_${START}" \ + -H "columns: WatchID,JavaEnable,Title,GoodEvent,EventTime,EventDate,CounterID,ClientIP,RegionID,UserID,CounterClass,OS,UserAgent,URL,Referer,IsRefresh,RefererCategoryID,RefererRegionID,URLCategoryID,URLRegionID,ResolutionWidth,ResolutionHeight,ResolutionDepth,FlashMajor,FlashMinor,FlashMinor2,NetMajor,NetMinor,UserAgentMajor,UserAgentMinor,CookieEnable,JavascriptEnable,IsMobile,MobilePhone,MobilePhoneModel,Params,IPNetworkID,TraficSourceID,SearchEngineID,SearchPhrase,AdvEngineID,IsArtifical,WindowClientWidth,WindowClientHeight,ClientTimeZone,ClientEventTime,SilverlightVersion1,SilverlightVersion2,SilverlightVersion3,SilverlightVersion4,PageCharset,CodeVersion,IsLink,IsDownload,IsNotBounce,FUniqID,OriginalURL,HID,IsOldCounter,IsEvent,IsParameter,DontCountHits,WithHash,HitColor,LocalEventTime,Age,Sex,Income,Interests,Robotness,RemoteIP,WindowName,OpenerName,HistoryLength,BrowserLanguage,BrowserCountry,SocialNetwork,SocialAction,HTTPError,SendTiming,DNSTiming,ConnectTiming,ResponseStartTiming,ResponseEndTiming,FetchTiming,SocialSourceNetworkID,SocialSourcePage,ParamPrice,ParamOrderID,ParamCurrency,ParamCurrencyID,OpenstatServiceName,OpenstatCampaignID,OpenstatAdID,OpenstatSourceID,UTMSource,UTMMedium,UTMCampaign,UTMContent,UTMTerm,FromTag,HasGCLID,RefererHash,URLHash,CLID" \ + "http://localhost:8030/api/${db_name}/hits/_stream_load" + END=$(date +%s) + LOADTIME=$(echo "${END} - ${START}" | bc) + echo "INFO: ClickBench Load data costs ${LOADTIME} seconds" + echo "${LOADTIME}" >clickbench_loadtime + + if ! check_clickbench_table_rows "${db_name}"; then + exit 1 + fi + data_reload="true" + fi + + echo "#### 3. run clickbench query" + bash "${teamcity_build_checkoutDir}"/tools/clickbench-tools/run-clickbench-queries.sh + # result.csv 来自 run-clickbench-queries.sh 的产出 + if ! check_clickbench_performance_result result.csv; then exit 1; fi + if ! check_clickbench_query_result; then exit 1; fi + cold_run_sum=$(awk -F ',' '{sum+=$2} END {print sum}' result.csv) + best_hot_run_sum=$(awk -F ',' '{if($3<$4){sum+=$3}else{sum+=$4}} END {print sum}' result.csv) + comment_body="ClickBench test result on commit ${commit_id:-}, data reload: ${data_reload:-"false"} + +$(sed 's|,|\t|g' result.csv) +Total cold run time: ${cold_run_sum} s +Total hot run time: ${best_hot_run_sum} s" + + echo "#### 4. comment result on clickbench" + comment_body=$(echo "${comment_body}" | sed -e ':a;N;$!ba;s/\t/\\t/g;s/\n/\\n/g') # 将所有的 Tab字符替换为\t 换行符替换为\n + create_an_issue_comment_clickbench "${pull_request_num:-}" "${comment_body}" + rm -f result.csv + echo "INFO: Restore session variables" + mysql -h"${host}" -P"${query_port}" -uroot -e "source ${backup_session_variables_file};" + rm -f "${backup_session_variables_file}" +) +exit_flag="$?" + +echo "#### 5. check if need backup doris logs" +if [[ ${exit_flag} != "0" ]]; then + stop_doris + print_doris_fe_log + print_doris_be_log + if file_name=$(archive_doris_logs "${pull_request_num}_${commit_id}_doris_logs.tar.gz"); then + upload_doris_log_to_oss "${file_name}" + fi +fi + +exit "${exit_flag}" diff --git a/regression-test/pipeline/performance/run-load.sh b/regression-test/pipeline/performance/run-load.sh new file mode 100644 index 00000000000000..784ac38aafa21a --- /dev/null +++ b/regression-test/pipeline/performance/run-load.sh @@ -0,0 +1,688 @@ +#!/usr/bin/env bash +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. + +# Build Step: Command Line +: </dev/null; then exit 1; fi + line_end=$(sed -n '/^Total hot run time/=' "${teamcity_build_checkoutDir}"/run-tpch-queries.log) + line_begin=$((line_end - 23)) + comment_body="${comment_body} + +----- Round 2, with runtime_filter_mode=off ----- +$(sed -n "${line_begin},${line_end}p" "${teamcity_build_checkoutDir}"/run-tpch-queries.log)" + + echo "#### 5. comment result on tpch" + comment_body=$(echo "${comment_body}" | sed -e ':a;N;$!ba;s/\t/\\t/g;s/\n/\\n/g') # 将所有的 Tab字符替换为\t 换行符替换为\n + create_an_issue_comment_tpch "${pull_request_num:-}" "${comment_body}" + rm -f result.csv +) +exit_flag="$?" + +echo "#### 5. check if need backup doris logs" +if [[ ${exit_flag} != "0" ]]; then + stop_doris + print_doris_fe_log + print_doris_be_log + if file_name=$(archive_doris_logs "${pull_request_num}_${commit_id}_doris_logs.tar.gz"); then + upload_doris_log_to_oss "${file_name}" + fi +fi + +exit "${exit_flag}" diff --git a/tools/clickbench-tools/run-clickbench-queries.sh b/tools/clickbench-tools/run-clickbench-queries.sh index 831879b7643e86..0d798e51a00374 100755 --- a/tools/clickbench-tools/run-clickbench-queries.sh +++ b/tools/clickbench-tools/run-clickbench-queries.sh @@ -96,19 +96,30 @@ echo "USER: $USER" echo "PASSWORD: $PASSWORD" echo "DB: $DB" -pre_set() { +run_sql() { echo $@ mysql -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "$@" } -pre_set "set global parallel_fragment_exec_instance_num=8;" -pre_set "set global exec_mem_limit=32G;" -pre_set "set global query_timeout=900;" +get_session_variable() { + k="$1" + v=$(mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e"show variables like '${k}'\G" | grep " Value: ") + echo "${v/*Value: /}" +} + +_parallel_fragment_exec_instance_num="$(get_session_variable parallel_fragment_exec_instance_num)" +_exec_mem_limit="$(get_session_variable exec_mem_limit)" +_query_timeout="$(get_session_variable query_timeout)" echo '============================================' -pre_set "show variables" +echo "Optimize session variables" +run_sql "set global parallel_fragment_exec_instance_num=16;" +run_sql "set global exec_mem_limit=32G;" +run_sql "set global query_timeout=900;" +echo '============================================' +run_sql "show variables" echo '============================================' -pre_set "analyze table hits with sync;" +run_sql "analyze table hits with sync;" TRIES=3 QUERY_NUM=1 @@ -122,7 +133,7 @@ cat ${QUERIES_FILE} | while read query; do sync echo 3 | sudo tee /proc/sys/vm/drop_caches >/dev/null - echo -n "query${QUERY_NUM}: " | tee -a result.csv + echo -n "query${QUERY_NUM}," | tee -a result.csv for i in $(seq 1 $TRIES); do RES=$(mysql -vvv -h$FE_HOST -u$USER -P$FE_QUERY_PORT -D$DB -e "${query}" | perl -nle 'print $1 if /\((\d+\.\d+)+ sec\)/' || :) @@ -133,3 +144,14 @@ cat ${QUERIES_FILE} | while read query; do QUERY_NUM=$((QUERY_NUM + 1)) done + +cold_run_sum=$(awk -F ',' '{sum+=$2} END {print sum}' result.csv) +best_hot_run_sum=$(awk -F ',' '{if($3<$4){sum+=$3}else{sum+=$4}} END {print sum}' result.csv) +echo "Total cold run time: ${cold_run_sum} ms" +echo "Total hot run time: ${best_hot_run_sum} ms" +echo 'Finish ClickBench queries.' + +echo "Restore session variables" +run_sql "set global parallel_fragment_exec_instance_num=${_parallel_fragment_exec_instance_num};" +run_sql "set global exec_mem_limit=${_exec_mem_limit};" +run_sql "set global query_timeout=${_query_timeout};" diff --git a/tools/tpcds-tools/bin/run-tpcds-queries.sh b/tools/tpcds-tools/bin/run-tpcds-queries.sh index 9f6542da6d5fec..f3d8e58bffd00c 100755 --- a/tools/tpcds-tools/bin/run-tpcds-queries.sh +++ b/tools/tpcds-tools/bin/run-tpcds-queries.sh @@ -123,8 +123,24 @@ run_sql() { echo "$*" mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*" } +get_session_variable() { + k="$1" + v=$(mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e"show variables like '${k}'\G" | grep " Value: ") + echo "${v/*Value: /}" +} +backup_session_variables_file="${CURDIR}/../conf/opt/backup_session_variables.sql" +backup_session_variables() { + while IFS= read -r line; do + k="${line/set global /}" + k="${k%=*}" + v=$(get_session_variable "${k}") + echo "set global ${k}=${v};" >>"${backup_session_variables_file}" + done < <(grep -v '^ *#' <"${TPCDS_OPT_CONF}") +} +backup_session_variables echo '============================================' +echo "Optimize session variables" run_sql "source ${TPCDS_OPT_CONF};" echo '============================================' run_sql "show variables;" @@ -182,3 +198,7 @@ done echo "Total cold run time: ${cold_run_sum} ms" echo "Total hot run time: ${best_hot_run_sum} ms" echo 'Finish tpcds queries.' + +echo "Restore session variables" +run_sql "source ${backup_session_variables_file};" +rm -f "${backup_session_variables_file}" diff --git a/tools/tpch-tools/bin/run-tpch-queries.sh b/tools/tpch-tools/bin/run-tpch-queries.sh index d88062183cdf11..c2e5350a27bbb7 100755 --- a/tools/tpch-tools/bin/run-tpch-queries.sh +++ b/tools/tpch-tools/bin/run-tpch-queries.sh @@ -123,8 +123,24 @@ run_sql() { echo "$*" mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e "$*" } +get_session_variable() { + k="$1" + v=$(mysql -h"${FE_HOST}" -u"${USER}" -P"${FE_QUERY_PORT}" -D"${DB}" -e"show variables like '${k}'\G" | grep " Value: ") + echo "${v/*Value: /}" +} +backup_session_variables_file="${CURDIR}/../conf/opt/backup_session_variables.sql" +backup_session_variables() { + while IFS= read -r line; do + k="${line/set global /}" + k="${k%=*}" + v=$(get_session_variable "${k}") + echo "set global ${k}=${v};" >>"${backup_session_variables_file}" + done < <(grep -v '^ *#' <"${TPCH_OPT_CONF}") +} +backup_session_variables echo '============================================' +echo "Optimize session variables" run_sql "source ${TPCH_OPT_CONF};" echo '============================================' run_sql "show variables;" @@ -183,3 +199,7 @@ echo "Total cold run time: ${cold_run_sum} ms" # tpch 流水线依赖这个'Total hot run time'字符串 echo "Total hot run time: ${best_hot_run_sum} ms" echo 'Finish tpch queries.' + +echo "Restore session variables" +run_sql "source ${backup_session_variables_file};" +rm -f "${backup_session_variables_file}"