-
Notifications
You must be signed in to change notification settings - Fork 42
New issue
Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.
By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.
Already on GitHub? Sign in to your account
Added the running rosa spots ability #349
base: master
Are you sure you want to change the base?
Changes from all commits
File filter
Filter by extension
Conversations
Jump to
Diff view
Diff view
There are no files selected for viewing
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,17 @@ | ||
{ | ||
"aws_profile": "", | ||
"aws_access_key_id": "", | ||
"aws_secret_access_key": "", | ||
"aws_authentication_method": "sts", | ||
"rosa_environment": "staging", | ||
"rosa_cli_version": "container", | ||
"ocm_environment": "stage", | ||
"managed_channel_group": "nightly", | ||
"managed_ocp_version": "latest", | ||
"openshift_worker_count": 24, | ||
"openshift_network_type": "OVNKubernetes", | ||
"openshift_worker_instance_type": "m5.2xlarge", | ||
"machineset_metadata_label_prefix": "machine.openshift.io", | ||
"openshift_workload_node_instance_type": null, | ||
"enable_spot_workers": false | ||
} |
Original file line number | Diff line number | Diff line change |
---|---|---|
|
@@ -31,8 +31,17 @@ _wait_for_nodes_ready(){ | |
export KUBECONFIG=./kubeconfig | ||
ALL_READY_ITERATIONS=0 | ||
ITERATIONS=0 | ||
# Node count is number of workers + 3 infra | ||
NODES_COUNT=$(($2+3)) | ||
if [ "$3" == "rosa-spots=true" ]; then | ||
if [ "$SPOT_POOL_READY" == "true" ]; then | ||
# Node count is number of workers pool + 3 infra | ||
NODES_COUNT=$(($2+3)) | ||
else | ||
NODES_COUNT=$2 | ||
fi | ||
else | ||
# Node count is number of workers + 3 infra | ||
NODES_COUNT=$(($2+3)) | ||
fi | ||
# 30 seconds per node, waiting for all nodes ready to finalize | ||
while [ ${ITERATIONS} -le $((${NODES_COUNT}*5)) ] ; do | ||
NODES_READY_COUNT=$(oc get nodes -l $3 | grep " Ready " | wc -l) | ||
|
@@ -86,7 +95,16 @@ _wait_for_cluster_ready(){ | |
echo "Set end time of prom scrape" | ||
export END_TIME=$(date +"%s") | ||
START_TIMER=$(date +%s) | ||
_wait_for_nodes_ready $1 ${COMPUTE_WORKERS_NUMBER} "node-role.kubernetes.io/worker" | ||
if [ "$ENABLE_SPOT_WORKERS" == "true" ]; then | ||
if [ "$SPOT_POOL_READY" == "true" ]; then | ||
_wait_for_nodes_ready $1 ${COMPUTE_WORKERS_NUMBER} "node-role.kubernetes.io/worker" | ||
else | ||
DEFAULT_WORKER_NODES=3 | ||
There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. Shouldn't the default be always There was a problem hiding this comment. Choose a reason for hiding this commentThe reason will be displayed to describe this comment to others. Learn more. openshift_worker_count=9 |
||
_wait_for_nodes_ready $1 $DEFAULT_WORKER_NODES "node-role.kubernetes.io/worker" | ||
fi | ||
else | ||
_wait_for_nodes_ready $1 ${COMPUTE_WORKERS_NUMBER} "node-role.kubernetes.io/worker" | ||
fi | ||
CURRENT_TIMER=$(date +%s) | ||
# Time since rosa cluster is ready until all nodes are ready | ||
DURATION=$(($CURRENT_TIMER - $START_TIMER)) | ||
|
@@ -129,6 +147,8 @@ setup(){ | |
export MANAGED_CHANNEL_GROUP=$(cat ${json_file} | jq -r .managed_channel_group) | ||
export CLUSTER_NAME=$(cat ${json_file} | jq -r .openshift_cluster_name) | ||
export COMPUTE_WORKERS_NUMBER=$(cat ${json_file} | jq -r .openshift_worker_count) | ||
export COMPUTE_WORKERS_TYPE=$(cat ${json_file} | jq -r .openshift_worker_instance_type) | ||
export ENABLE_SPOT_WORKERS=$(cat ${json_file} | jq -r .enable_spot_workers) | ||
export NETWORK_TYPE=$(cat ${json_file} | jq -r .openshift_network_type) | ||
export ES_SERVER=$(cat ${json_file} | jq -r .es_server) | ||
export UUID=$(uuidgen) | ||
|
@@ -167,16 +187,31 @@ setup(){ | |
return 0 | ||
} | ||
|
||
_create_spot_worker_pool(){ | ||
if [ "$ENABLE_SPOT_WORKERS" == "true" ]; then | ||
if [ "$COMPUTE_WORKERS_NUMBER" != "3" ]; then | ||
rosa create machinepool -c ${CLUSTER_NAME} --name="${CLUSTER_NAME}-spot-pool" --replicas=$((COMPUTE_WORKERS_NUMBER-3)) --instance-type="${COMPUTE_WORKERS_TYPE}" --labels="rosa-spots=true" --use-spot-instances | ||
_wait_for_nodes_ready $CLUSTER_NAME $((COMPUTE_WORKERS_NUMBER-3)) "rosa-spots=true" | ||
export SPOT_POOL_READY=true | ||
fi | ||
fi | ||
} | ||
|
||
install(){ | ||
export COMPUTE_WORKERS_TYPE=$(cat ${json_file} | jq -r .openshift_worker_instance_type) | ||
export CLUSTER_AUTOSCALE=$(cat ${json_file} | jq -r .cluster_autoscale) | ||
export OIDC_CONFIG=$(cat ${json_file} | jq -r .oidc_config) | ||
export INSTALLATION_PARAMS="" | ||
if [ $AWS_AUTHENTICATION_METHOD == "sts" ] ; then | ||
INSTALLATION_PARAMS="${INSTALLATION_PARAMS} --sts -m auto --yes" | ||
fi | ||
INSTALLATION_PARAMS="${INSTALLATION_PARAMS} --multi-az" # Multi AZ is default on hosted-cp cluster | ||
rosa create cluster --tags=User:${GITHUB_USERNAME} --cluster-name ${CLUSTER_NAME} --version "${ROSA_VERSION}" --channel-group=${MANAGED_CHANNEL_GROUP} --compute-machine-type ${COMPUTE_WORKERS_TYPE} --replicas ${COMPUTE_WORKERS_NUMBER} --network-type ${NETWORK_TYPE} ${INSTALLATION_PARAMS} | ||
if [ "$ENABLE_SPOT_WORKERS" == "true" ]; then | ||
rosa create cluster --tags=User:${GITHUB_USERNAME} --cluster-name ${CLUSTER_NAME} --version "${ROSA_VERSION}" --channel-group=${MANAGED_CHANNEL_GROUP} --compute-machine-type ${COMPUTE_WORKERS_TYPE} --network-type ${NETWORK_TYPE} ${INSTALLATION_PARAMS} | ||
_wait_for_cluster_ready ${CLUSTER_NAME} | ||
_create_spot_worker_pool | ||
else | ||
rosa create cluster --tags=User:${GITHUB_USERNAME} --cluster-name ${CLUSTER_NAME} --version "${ROSA_VERSION}" --channel-group=${MANAGED_CHANNEL_GROUP} --compute-machine-type ${COMPUTE_WORKERS_TYPE} --replicas ${COMPUTE_WORKERS_NUMBER} --network-type ${NETWORK_TYPE} ${INSTALLATION_PARAMS} | ||
fi | ||
postinstall | ||
return 0 | ||
} | ||
|
@@ -277,6 +312,7 @@ if [[ "$operation" == "install" ]]; then | |
index_metadata | ||
elif [ "${CLUSTER_STATUS}" == "ready" ] ; then | ||
printf "INFO: Cluster ${CLUSTER_NAME} already installed and ready, reusing..." | ||
_create_spot_worker_pool | ||
postinstall | ||
elif [ "${CLUSTER_STATUS}" == "error" ] ; then | ||
printf "INFO: Cluster ${CLUSTER_NAME} errored, cleaning them now..." | ||
|
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Which condition needs
NODE_COUNT=$2
There was a problem hiding this comment.
Choose a reason for hiding this comment
The reason will be displayed to describe this comment to others. Learn more.
Before spot_pool_ready, I need to check whether the SPOT nodes are ready or not. So, NODES_COUNT =$2 represent the number passing to the wait_for_nodes_ready