Skip to content

Commit

Permalink
[fix](ci) tpch pipeline should not re-load data (#28874)
Browse files Browse the repository at this point in the history
* [fix](ci) tpch pipeline should not re-load data

* 2

---------

Co-authored-by: stephen <[email protected]>
  • Loading branch information
hello-stephen and stephen authored Dec 22, 2023
1 parent b2b209e commit 37faf1b
Show file tree
Hide file tree
Showing 5 changed files with 22 additions and 63 deletions.
Original file line number Diff line number Diff line change
Expand Up @@ -169,7 +169,12 @@ need_run_ckb() {
[[ "${af}" == 'gensrc'* ]] ||
[[ "${af}" == 'thirdparty'* ]] ||
[[ "${af}" == 'build.sh' ]] ||
[[ "${af}" == 'env.sh' ]]; then
[[ "${af}" == 'env.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/common/github-utils.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/common/doris-utils.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/common/oss-utils.sh' ]] ||
[[ "${af}" == 'tools/tpch-tools/bin/run-tpch-queries.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/tpch/tpch-sf100/'* ]]; then
echo "clickbench performance related file changed, return need" && return 0
fi
done
Expand Down
7 changes: 6 additions & 1 deletion regression-test/pipeline/common/github-utils.sh
Original file line number Diff line number Diff line change
Expand Up @@ -260,7 +260,12 @@ file_changed_ckb() {
[[ "${af}" == 'gensrc'* ]] ||
[[ "${af}" == 'thirdparty'* ]] ||
[[ "${af}" == 'build.sh' ]] ||
[[ "${af}" == 'env.sh' ]]; then
[[ "${af}" == 'env.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/common/github-utils.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/common/doris-utils.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/common/oss-utils.sh' ]] ||
[[ "${af}" == 'tools/tpch-tools/bin/run-tpch-queries.sh' ]] ||
[[ "${af}" == 'regression-test/pipeline/tpch/tpch-sf100/'* ]]; then
echo "clickbench performance related file changed, return need" && return 0
fi
done
Expand Down
29 changes: 8 additions & 21 deletions regression-test/pipeline/tpch/tpch-sf100/deploy.sh
Original file line number Diff line number Diff line change
Expand Up @@ -88,35 +88,22 @@ storage_root_path=$(get_doris_conf_value "${DORIS_HOME}"/be/conf/be.conf storage
mkdir -p "${meta_dir}"
mkdir -p "${storage_root_path}"
if ! start_doris_fe; then
echo "WARNING: Start doris fe failed at first time"
echo "ERROR: Start doris fe failed."
print_doris_fe_log
echo "WARNING: delete meta_dir and storage_root_path, then retry"
rm -rf "${meta_dir:?}/"*
rm -rf "${storage_root_path:?}/"*
if ! start_doris_fe; then
need_backup_doris_logs=true
exit_flag=1
fi
need_backup_doris_logs=true
exit_flag=1
fi
if ! start_doris_be; then
echo "WARNING: Start doris be failed at first time"
echo "ERROR: Start doris be failed."
print_doris_be_log
echo "WARNING: delete storage_root_path, then retry"
rm -rf "${storage_root_path:?}/"*
if ! start_doris_be; then
need_backup_doris_logs=true
exit_flag=1
fi
fi
if ! add_doris_be_to_fe; then
need_backup_doris_logs=true
exit_flag=1
else
# wait 10s for doris totally started, otherwize may encounter the error below,
# ERROR 1105 (HY000) at line 102: errCode = 2, detailMessage = Failed to find enough backend, please check the replication num,replication tag and storage medium.
sleep 10s
fi

# wait 10s for doris totally started, otherwize may encounter the error below,
# ERROR 1105 (HY000) at line 102: errCode = 2, detailMessage = Failed to find enough backend, please check the replication num,replication tag and storage medium.
sleep 10s

echo "#### 5. set session variables"
echo "TODO"

Expand Down
2 changes: 0 additions & 2 deletions regression-test/pipeline/tpch/tpch-sf100/prepare.sh
Original file line number Diff line number Diff line change
Expand Up @@ -64,8 +64,6 @@ if ! [[ -f "${teamcity_build_checkoutDir}"/regression-test/pipeline/tpch/tpch-sf
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/oss-utils.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/doris-utils.sh &&
-f "${teamcity_build_checkoutDir}"/regression-test/pipeline/common/github-utils.sh &&
-f "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/load-tpch-data.sh &&
-f "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/create-tpch-tables.sh &&
-f "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/run-tpch-queries.sh ]]; then
echo "ERROR: depending files missing" && exit 1
fi
40 changes: 2 additions & 38 deletions regression-test/pipeline/tpch/tpch-sf100/run.sh
Original file line number Diff line number Diff line change
Expand Up @@ -80,48 +80,12 @@ check_tpch_result() {

echo "#### 1. check if need to load data"
SF="100" # SCALE FACTOR
if ${DEBUG:-false}; then
SF="100"
fi
TPCH_DATA_DIR="/data/tpch/sf_${SF}" # no / at the end
TPCH_DATA_DIR_LINK="${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/tpch-data # no / at the end
if ${DEBUG:-false}; then SF="1"; fi
db_name="tpch_sf${SF}"
sed -i "s|^export DB=.*$|export DB='${db_name}'|g" \
"${teamcity_build_checkoutDir}"/tools/tpch-tools/conf/doris-cluster.conf
if ! check_tpch_table_rows "${db_name}" "${SF}"; then
echo "INFO: need to load tpch-sf${SF} data"
# prepare data
mkdir -p "${TPCH_DATA_DIR}"
(
cd "${TPCH_DATA_DIR}" || exit 1
declare -A table_file_count
table_file_count=(['region']=1 ['nation']=1 ['supplier']=1 ['customer']=1 ['part']=1 ['partsupp']=10 ['orders']=10 ['lineitem']=10)
for table_name in ${!table_file_count[*]}; do
if [[ ${table_file_count[${table_name}]} -eq 1 ]]; then
url="https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/tpch/sf${SF}/${table_name}.tbl"
if ! wget --continue -t3 -q "${url}"; then echo "ERROR: wget --continue ${url}" && exit 1; fi
elif [[ ${table_file_count[${table_name}]} -eq 10 ]]; then
(
for i in {1..10}; do
url="https://doris-build-1308700295.cos.ap-beijing.myqcloud.com/regression/tpch/sf${SF}/${table_name}.tbl.${i}"
if ! wget --continue -t3 -q "${url}"; then echo "ERROR: wget --continue ${url}" && exit 1; fi
done
) &
wait
fi
done
)
# create table and load data
sed -i "s|^SCALE_FACTOR=[0-9]\+$|SCALE_FACTOR=${SF}|g" "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/create-tpch-tables.sh
bash "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/create-tpch-tables.sh
rm -rf "${TPCH_DATA_DIR_LINK}"
ln -s "${TPCH_DATA_DIR}" "${TPCH_DATA_DIR_LINK}"
bash "${teamcity_build_checkoutDir}"/tools/tpch-tools/bin/load-tpch-data.sh -c 10
if ! check_tpch_table_rows "${db_name}" "${SF}"; then
exit 1
fi
echo "INFO: sleep 10min to wait compaction done" && sleep 10m
data_reload="true"
echo "ERROR: check_tpch_table_rows failed." && exit 1
fi

echo "#### 2. run tpch-sf${SF} query"
Expand Down

0 comments on commit 37faf1b

Please sign in to comment.