Skip to content

Commit

Permalink
Merge branch 'open-horizon:master' into Issue-open-horizon#4115
Browse files Browse the repository at this point in the history
  • Loading branch information
adhishreekadam authored Aug 26, 2024
2 parents 4ee79b2 + 1949d72 commit 7eccde6
Show file tree
Hide file tree
Showing 6 changed files with 92 additions and 95 deletions.
76 changes: 41 additions & 35 deletions agent-install/agent-install.sh
Original file line number Diff line number Diff line change
Expand Up @@ -458,12 +458,28 @@ function get_agent_version_from_repository() {
fi

IFS='/' read -r -a repoarray<<< $IMAGE_ON_EDGE_CLUSTER_REGISTRY
arrayLen=${#repoarray[@]}
if [[ "${repoarray[0]}" == *"docker"* ]]; then
repository_url="https://registry.hub.docker.com/v2/repositories/${repoarray[1]}/${repoarray[2]}/tags"
repository_url="https://registry.hub.docker.com/v2/repositories"
for (( i=1 ; i<$arrayLen ; i++ ));
do
repository_url=$repository_url/${repoarray[i]}
done
repository_url=$repository_url/tags
agent_versions=$(curl $auth $repository_url 2>/dev/null | jq '.results[]["name"] | select(test("testing|latest") | not)' | sort -rV | tr '\n' ',') # "2.31.0-1495","2.31.0-1492","2.31.0-1021"
else
repository_url="https://${repoarray[0]}/v1/repositories/${repoarray[1]}/${repoarray[2]}/tags"
agent_versions=$(curl $auth $repository_url 2>/dev/null | jq 'keys[]' | sort -rV | tr '\n' ',') # "2.31.0-1495","2.31.0-1492","2.31.0-1021"
repository_url="https://${repoarray[0]}/v1/repositories"
for (( i=1 ; i<$arrayLen ; i++ ));
do
repository_url=$repository_url/${repoarray[i]}
done
repository_url=$repository_url/tags
http_code=$( curl $auth $repository_url -o /dev/null -w "%{http_code}" )
if [[ ${http_code} -eq 200 ]]; then
agent_versions=$(curl $auth $repository_url 2>/dev/null | jq 'keys[]' | sort -rV | tr '\n' ',') # "2.31.0-1495","2.31.0-1492","2.31.0-1021"
else
agent_versions=""
fi
fi
IFS=',' read -r -a agent_version_array <<< $agent_versions
highest_var=${agent_version_array[0]}
Expand Down Expand Up @@ -1298,10 +1314,12 @@ function get_all_variables() {
if [[ -z $IMAGE_ON_EDGE_CLUSTER_REGISTRY ]]; then
log_fatal 1 "A value for \$IMAGE_ON_EDGE_CLUSTER_REGISTRY must be specified"
fi
lastpart=$(echo $IMAGE_ON_EDGE_CLUSTER_REGISTRY | cut -d "/" -f 3) # <arch>_anax_k8s
firstpart="${IMAGE_ON_EDGE_CLUSTER_REGISTRY%/*}" # quay.io/zhangle/horizon
lastpart="${IMAGE_ON_EDGE_CLUSTER_REGISTRY##*/}" # <arch>_anax_k8s
image_arch_in_param=$(echo $lastpart | cut -d "_" -f 1)
if [[ "$image_arch" != "$image_arch_in_param" ]]; then
log_fatal 1 "Cannot use agent image with $image_arch_in_param arch to install on $image_arch cluster, please use agent image with '$image_arch'"
log_warning "Cannot use agent image with $image_arch_in_param arch to install on $image_arch cluster, will change arch to '$image_arch'"
IMAGE_ON_EDGE_CLUSTER_REGISTRY=${firstpart}/${image_arch}_anax_k8s
fi
fi
get_variable AGENT_K8S_IMAGE_TAR_FILE "$DEFAULT_AGENT_K8S_IMAGE_TAR_FILE"
Expand Down Expand Up @@ -1438,20 +1456,6 @@ function check_variables() {
log_fatal 1 "HZN_NODE_POLICY file '$HZN_NODE_POLICY' does not exist"
fi

if is_cluster && [[ "$USE_EDGE_CLUSTER_REGISTRY" == "true" ]]; then
parts=$(echo $IMAGE_ON_EDGE_CLUSTER_REGISTRY | awk -F'/' '{print NF}')
if [[ "$parts" != "3" ]]; then
log_fatal 1 "IMAGE_ON_EDGE_CLUSTER_REGISTRY should be this format: <registry-host>/<registry-repo>/<image-name>"
fi
fi

if is_cluster && [[ "$USE_EDGE_CLUSTER_REGISTRY" == "true" ]] && [[ "$ENABLE_AUTO_UPGRADE_CRONJOB" == "true" ]]; then
parts=$(echo $CRONJOB_AUTO_UPGRADE_IMAGE_ON_EDGE_CLUSTER_REGISTRY | awk -F'/' '{print NF}')
if [[ "$parts" != "3" ]]; then
log_fatal 1 "CRONJOB_AUTO_UPGRADE_IMAGE_ON_EDGE_CLUSTER_REGISTRY should be this format: <registry-host>/<registry-repo>/<image-name>"
fi
fi

if [[ -n $AGENT_IMAGE_TAR_FILE && $AGENT_IMAGE_TAR_FILE != *.tar.gz ]]; then
log_fatal 1 "AGENT_IMAGE_TAR_FILE must be in tar.gz format"
fi
Expand Down Expand Up @@ -3683,23 +3687,25 @@ function check_cluster_agent_scope() {
# continue to check_agent_deployment_exist() to check scope
AGENT_DEPLOYMENT_EXIST_IN_SAME_NAMESPACE="true"
else
# has agent in other namespace(s). Pick one agent deployment and check scope
# current is cluster scoped agent => error
# current is namespace scoped agent:
# namespace scope agent in other namespace => can proceed to install
# cluster scope agent in other namespace => error
# A namespace-scoped agent is allowed to install in a cluster with cluster-scoped agent or namespace-scoped agents
# A cluster-scoped agent is NOT allowed to be installed in a cluster which already had a cluster-scoped agent
if ! $NAMESPACE_SCOPED; then
log_fatal 3 "One or more agents detected in $namespaces_have_agent. A cluster scoped agent cannot be installed to the same cluster that has agent(s) already"
fi

IFS="," read -ra namespace_array <<< "$namespaces_have_agent"
namespace_to_check=${namespace_array[0]}
local namespace_scoped_env_value_in_use=$($KUBECTL get deployment agent -n ${namespace_to_check} -o json | jq '.spec.template.spec.containers[0].env' | jq -r '.[] | select(.name=="HZN_NAMESPACE_SCOPED").value')
log_debug "Current HZN_NAMESPACE_SCOPED in agent deployment under namespace $namespace_to_check is: $namespace_scoped_env_value_in_use"
log_debug "NAMESPACE_SCOPED passed to this script is: $NAMESPACE_SCOPED" # namespace scoped

if [[ "$namespace_scoped_env_value_in_use" == "" ]] || [[ "$namespace_scoped_env_value_in_use" == "false" ]] ; then
log_fatal 3 "A cluster scoped agent detected in $namespace_to_check. A namespace scoped agent cannot be installed to the same cluster that has a cluster scoped agent"
log_debug "NAMESPACE_SCOPED passed to this script is: $NAMESPACE_SCOPED" # namespace scoped

local namespace_to_check
local namespace_scoped_env_value_in_use

IFS="," read -ra namespace_array <<< "$namespaces_have_agent"
arrayLen=${#namespace_array[@]}
for (( i=0 ; i<$arrayLen ; i++ ));
do
namespace_to_check=${namespace_array[i]}
namespace_scoped_env_value_in_use=$($KUBECTL get deployment agent -n ${namespace_to_check} -o json | jq '.spec.template.spec.containers[0].env' | jq -r '.[] | select(.name=="HZN_NAMESPACE_SCOPED").value')
log_debug "Current HZN_NAMESPACE_SCOPED in agent deployment under namespace $namespace_to_check is: $namespace_scoped_env_value_in_use"
if [[ "$namespace_scoped_env_value_in_use" == "" ]] || [[ "$namespace_scoped_env_value_in_use" == "false" ]] ; then
log_fatal 3 "A cluster scoped agent detected in $namespace_to_check. A cluster scoped agent cannot be installed to the same cluster that already has a cluster scoped agent"
fi
done
fi
fi

Expand Down
6 changes: 3 additions & 3 deletions agent-install/edgeNodeFiles.sh
Original file line number Diff line number Diff line change
Expand Up @@ -372,6 +372,9 @@ function putOneFileInCss() {

if [ ! -z ${version} ]; then
META_DATA=$( echo "${META_DATA}" | jq --arg VERSION ${version} '. + {version: $VERSION}' )

# Will set the software package version if not set yet
setSoftwarePackageVersion ${version}
fi

if [[ $addExpiration == true ]]; then
Expand Down Expand Up @@ -759,9 +762,6 @@ function putHorizonPkgsInCss() {

# Add the tarFile name array for the manifest
addElementToArray $horizonSoftwareFiles $tarFile

# Will set the software package version if not set yet
setSoftwarePackageVersion ${pkgVersion}
fi

# Remove the tar file (it was only needed to put into CSS)
Expand Down
6 changes: 3 additions & 3 deletions anax-in-container/EPEL.repo
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[dl_fedoraproject_org_pub_epel_7_ppc64le_]
name=https://dl.fedoraproject.org/pub/epel/7/ppc64le/
baseurl=https://dl.fedoraproject.org/pub/epel/7/ppc64le/
name=https://dl.fedoraproject.org/pub/epel/9/Everything/ppc64le/
baseurl=https://dl.fedoraproject.org/pub/epel/9/Everything/ppc64le/
enabled=1
gpgcheck=1
gpgkey=https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-7
gpgkey=https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-9
6 changes: 3 additions & 3 deletions anax-in-k8s/EPEL.repo
Original file line number Diff line number Diff line change
@@ -1,6 +1,6 @@
[dl_fedoraproject_org_pub_epel_7_ppc64le_]
name=https://dl.fedoraproject.org/pub/epel/7/ppc64le/
baseurl=https://dl.fedoraproject.org/pub/epel/7/ppc64le/
name=https://dl.fedoraproject.org/pub/epel/9/Everything/ppc64le/
baseurl=https://dl.fedoraproject.org/pub/epel/9/Everything/ppc64le/
enabled=1
gpgcheck=1
gpgkey=https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-7
gpgkey=https://dl.fedoraproject.org/pub/epel/RPM-GPG-KEY-EPEL-9
85 changes: 38 additions & 47 deletions anax-in-k8s/cronjobs/auto-upgrade-cronjob.sh
Original file line number Diff line number Diff line change
Expand Up @@ -2,10 +2,9 @@

# Variables for interfacing with agent pod
KUBECTL="kubectl"
POD_ID=$($KUBECTL get pod -l app=agent -n ${AGENT_NAMESPACE} 2>/dev/null | grep "agent-" | cut -d " " -f1 2>/dev/null)

# Timeout value for agent deployment
AGENT_DEPLOYMENT_STATUS_TIMEOUT_SECONDS='75'
AGENT_DEPLOYMENT_STATUS_TIMEOUT_SECONDS='300'

# status.json status options
STATUS_FAILED="failed"
Expand Down Expand Up @@ -47,10 +46,12 @@ AGENT_VERBOSITY=4

# Get script flags (should never really run unless testing script manually)
if [[ $AGENT_VERBOSITY -ge $VERB_DEBUG ]]; then echo $(now) "getopts begin"; fi
while getopts "c:h" opt; do
while getopts "c:h:l:" opt; do
case $opt in
h) usage 0
;;
l) AGENT_VERBOSITY="$OPTARG"
;;
\?) echo "Invalid option: -$OPTARG"
usage 1
;;
Expand Down Expand Up @@ -266,7 +267,7 @@ function rollback_agent_image() {
current_version=$($KUBECTL get deployment -n ${AGENT_NAMESPACE} agent -o=jsonpath='{$..spec.template.spec.containers[0].image}' | sed 's/.*://')

# Download the agent deployment to a yaml file
log_verbose "Dowloading agent deployment to yaml file..."
log_verbose "Downloading agent deployment to yaml file..."
cmd_output=$( { $KUBECTL get deployment -n ${AGENT_NAMESPACE} agent -o yaml > /tmp/agentbackup/deployment.yaml; } 2>&1 )
rc=$?
if [[ $rc != 0 ]]; then
Expand Down Expand Up @@ -416,56 +417,46 @@ function restart_agent_pod() {

#====================== Main ======================

log_info "cronjob under namesapce: $AGENT_NAMESPACE"
log_info "cronjob under namespace: $AGENT_NAMESPACE"

# Sets STATUS_PATH for rest of script
get_status_path

# Check agent deployment/pod status and status.json
pod_status=$($KUBECTL get pods ${POD_ID} --no-headers -o custom-columns=":status.phase")
dep_status=$($KUBECTL rollout status --timeout=${AGENT_DEPLOYMENT_STATUS_TIMEOUT_SECONDS}s deployment/agent -n ${AGENT_NAMESPACE} | grep "successfully rolled out")
log_debug "deployment rollout status: $dep_status"

POD_ID=$($KUBECTL get pod -l app=agent,type!=auto-upgrade-cronjob -n ${AGENT_NAMESPACE} 2>/dev/null | grep "agent-" | cut -d " " -f1 2>/dev/null)
pod_status=$($KUBECTL get pods ${POD_ID} -n ${AGENT_NAMESPACE} --no-headers -o custom-columns=":status.phase" | sed -z 's/\n/ /g;s/ //g' )
log_debug "Pod status: $pod_status"
dep_status=$($KUBECTL rollout status deployment/agent -n ${AGENT_NAMESPACE} | awk '{ print $3 }' | sed 's/successfully/Running/g')
log_debug "Deployment status: $dep_status"
json_status=$(cat $STATUS_PATH | jq '.agentUpgradePolicyStatus.status' | sed 's/\"//g')

# Check deployment/pod status
# Instantaneous state where both could be running....
if [[ "${pod_status}" == "RunningRunning" ]] || [[ "${pod_status}" == "RunningSucceeded" ]]; then
log_info "Agent pod status is $pod_status; Exiting"
write_logs
exit 0
fi

log_info "Checking if there is any pending agent pod..."
if [[ "$pod_status" == *Pending* ]]; then
log_info "Agent pod is still in pending. Keeping status as \"$CURRENT_STATUS\" and exiting."
write_logs
exit 0
fi

if [[ ! -f $STATUS_PATH ]]; then
log_debug "status file $STATUS_PATH not exist; Exiting."
write_logs
exit 0
fi

json_status=$(cat $STATUS_PATH | jq '.agentUpgradePolicyStatus.status' | sed 's/\"//g') # directory will be deleted by NMP worker if the upgrade is successful
log_debug "Cron Job status: $json_status"
CURRENT_STATUS=$json_status
panic_rollback=false

# Check deployment/pod status
log_info "Checking if agent is running and deployment is successful..."
if [[ "$pod_status" != "Running" || "$dep_status" != "Running" ]]; then

# Should never happen, but if k8s deployment status is "error" and if status.json has "rollback started", set status to "rollback failed" and exit
log_info "Agent is not running. Checking if rollback was already attempted..."
if [[ "$json_status" == "$STATUS_ROLLBACK_STARTED" ]]; then
log_info "Waiting up to $AGENT_DEPLOYMENT_STATUS_TIMEOUT_SECONDS seconds for the agent deployment to complete..."
dep_status=$($KUBECTL rollout status --timeout=${AGENT_DEPLOYMENT_STATUS_TIMEOUT_SECONDS}s deployment/agent -n ${AGENT_NAMESPACE} | grep "successfully rolled out")

# Agent is running, exit and let agent upgrade worker determine if agent upgrade was successful
if [[ ! -z "$dep_status" ]]; then
log_info "Agent pod is running successfully"
log_verbose "Setting the status to \"$STATUS_ROLLBACK_SUCCESSFUL\"..."
echo $(jq --arg updated_status "$STATUS_ROLLBACK_SUCCESSFUL" '.agentUpgradePolicyStatus.status = $updated_status' $STATUS_PATH) > $STATUS_PATH
write_logs
exit 0
else
rollback_failed "Rollback was already attempted, but it failed. Deployment status: $dep_status"
fi
fi

# If k8s deployment status is "error" and status.json is "initiated" status, wait to see if pod is just waiting to spin back up
log_debug "Checking if agent upgrade was initiated..."
if [[ "$json_status" == "$STATUS_INITIATED" ]]; then
log_info "Waiting up to $AGENT_DEPLOYMENT_STATUS_TIMEOUT_SECONDS seconds for the agent deployment to complete..."
dep_status=$($KUBECTL rollout status --timeout=${AGENT_DEPLOYMENT_STATUS_TIMEOUT_SECONDS}s deployment/agent -n ${AGENT_NAMESPACE} | grep "successfully rolled out")

# Agent is running, exit and let agent upgrade worker determine if agent upgrade was successful
if [[ ! -z "$dep_status" ]]; then
log_info "Agent is running. Keeping status as \"$CURRENT_STATUS\" and exiting."
write_logs
exit 0
fi
fi
if [[ "$pod_status" != "Running" || -z "$dep_status" ]]; then # pod is not running, deployment rollout failed
panic_rollback=true

# Should never happen, but if status is "rollback started" and agent is running properly, check agent pod for panic"
Expand All @@ -478,7 +469,7 @@ elif [[ "$json_status" == "$STATUS_ROLLBACK_STARTED" ]]; then
if [[ $rc -eq 0 && "$cmd_output" == *"nodeType"*"cluster"* ]]; then
log_info "Agent pod is running successfully."
log_verbose "Setting the status to \"$STATUS_ROLLBACK_SUCCESSFUL\"..."
echo $(jq --arg updated_status "$STATUS_ROLLBACK_SUCCESSFUL" '.agentUpgradePolicyStatus.status = $updated_status' $STATUS_PATH) > $STATUS_PATH
echo $(jq --arg updated_status "$STATUS_ROLLBACK_SUCCESSFUL" '.agentUpgradePolicyStatus.status = $updated_status' $STATUS_PATH) > $STATUS_PATH
write_logs
exit 0
fi
Expand Down Expand Up @@ -507,7 +498,7 @@ elif [[ "$json_status" == "$STATUS_INITIATED" ]]; then
elif [[ "$old_image_version" == "null" || "$current_version" == "$old_image_version" ]]; then
# set status to "failed"
log_info "Agent pod is in panic state and the image version was not updated. Setting status to \"$ROLLBACK_FAILED\" and exiting."
echo $(jq --arg updated_status "$ROLLBACK_FAILED" '.agentUpgradePolicyStatus.status = $updated_status' $STATUS_PATH) > $STATUS_PATH
echo $(jq --arg updated_status "$ROLLBACK_FAILED" '.agentUpgradePolicyStatus.status = $updated_status' $STATUS_PATH) > $STATUS_PATH
log_debug "Output of \"hzn node list\": $cmd_output"
write_logs
log_fatal 1 "Agent pod is in panic state and the image version was not updated."
Expand All @@ -517,7 +508,7 @@ elif [[ "$json_status" == "$STATUS_INITIATED" ]]; then
cmd_output=$(agent_cmd "hzn node list")
log_info "Agent pod is in panic state. Rollback will be performed."
log_debug "Output of \"hzn node list\": $cmd_output"
update_error_message "Agent pod is in panic state"
update_error_message "Agent pod is in panic state"
fi

panic_rollback=true
Expand Down
8 changes: 4 additions & 4 deletions cli/kube_deployment/kube_deployment.go
Original file line number Diff line number Diff line change
Expand Up @@ -85,9 +85,9 @@ func (p *KubeDeploymentConfigPlugin) Sign(dep map[string]interface{}, privKey *r
delete(dep, "mmsPVC")
}

if pvcSizeVal, ok := mmsPVCConfig["pvcSize"]; ok {
if pvcSizeVal, ok := mmsPVCConfig["pvcSizeGB"]; ok {
pvcSize := int64(pvcSizeVal.(float64))
msgPrinter.Printf("pvcSize: %v\n", pvcSize)
msgPrinter.Printf("pvcSizeGB: %v\n", pvcSize)
}
}

Expand Down Expand Up @@ -126,8 +126,8 @@ func (p *KubeDeploymentConfigPlugin) DefaultClusterConfig() interface{} {
return map[string]interface{}{
"operatorYamlArchive": "",
"mmsPVC": map[string]interface{}{
"enable": false,
"pvcSize": 0,
"enable": false,
"pvcSizeGB": 0,
},
}
}
Expand Down

0 comments on commit 7eccde6

Please sign in to comment.