Skip to content

Commit

Permalink
[horus] fixed remove pod logic
Browse files Browse the repository at this point in the history
  • Loading branch information
mfordjody committed Sep 28, 2024
1 parent fc00eff commit db87b9b
Show file tree
Hide file tree
Showing 2 changed files with 45 additions and 38 deletions.
79 changes: 43 additions & 36 deletions app/horus/core/horuser/pod_abnormal.go
Original file line number Diff line number Diff line change
Expand Up @@ -30,6 +30,7 @@ import (

const (
ModuleName = "pod_abnormal_clean"
Reason = "clean up"
)

func (h *Horuser) PodAbnormalCleanManager(ctx context.Context) error {
Expand All @@ -52,11 +53,10 @@ func (h *Horuser) PodAbnormalClean(ctx context.Context) {
}

func (h *Horuser) PodsOnCluster(clusterName string) {
var podNamespace string
pods, err := h.Fetch(clusterName, podNamespace, h.cc.PodAbnormal.FieldSelector)
pods, err := h.Fetch(clusterName, h.cc.PodAbnormal.FieldSelector)
if err != nil {
klog.Errorf("Failed to fetch pods on cluster:%v", err)
klog.Infof("clusterName:%v podNamespace:%v", clusterName, podNamespace)
klog.Infof("clusterName:%v", clusterName)
return
}
count := len(pods)
Expand All @@ -66,10 +66,11 @@ func (h *Horuser) PodsOnCluster(clusterName string) {
}
wp := workerpool.New(10)
for index, pod := range pods {
if pod.Status.Phase == corev1.PodRunning || pod.Status.Phase == corev1.PodSucceeded || pod.Status.Phase == corev1.PodFailed {
pod := pod
if pod.Status.Phase == corev1.PodRunning {
continue
}
msg := fmt.Sprintf("\n【集群:%v】\n存活:%d/%d】\n【PodName:%v】\n【Namespace:%v】\n【Phase:%v】\n【节点:%v】\n", clusterName, index+1, count, pod.Name, pod.Namespace, pod.Status.Phase, pod.Spec.NodeName)
msg := fmt.Sprintf("\n【集群:%v】\n【%d/%d】\n【PodName:%v】\n【Namespace:%v】\n【Phase:%v】\n【节点:%v】\n", clusterName, index+1, count, pod.Name, pod.Namespace, pod.Status.Phase, pod.Spec.NodeName)
klog.Infof(msg)

wp.Submit(func() {
Expand All @@ -80,44 +81,50 @@ func (h *Horuser) PodsOnCluster(clusterName string) {
}

func (h *Horuser) PodSingle(pod corev1.Pod, clusterName string) {
var err error
if !pod.DeletionTimestamp.IsZero() {
var err error
action := ""
switch len(pod.Finalizers) {
case 0:
if pod.Name != "" {
return
}
err = h.Evict(pod.Name, pod.Namespace, clusterName)
action = "try patch-finalizer"
default:
if len(pod.Finalizers) > 0 {
time.Sleep(time.Duration(h.cc.PodAbnormal.DoubleSecond) * time.Second)
pass := h.Terminating(clusterName, &pod)
if !pass {
if !h.Terminating(clusterName, &pod) {
klog.Infof("Pod %s is still terminating, skipping.", pod.Name)
return
}
err = h.Finalizer(clusterName, pod.Name, pod.Namespace)
action = "try patch-finalizer"
res := "Success"
err := h.Finalizer(clusterName, pod.Name, pod.Namespace)
if err != nil {
res = fmt.Sprintf("failed:%v", err)
}
today := time.Now().Format("2006-01-02")
msg := fmt.Sprintf("\n【集群:%v】\n【Pod:%v】\n【Namespace:%v】\n【无法删除的 finalizer:%v】\n【处理结果:%v】\n", clusterName, pod.Name, pod.Namespace, err, res)
alert.DingTalkSend(h.cc.PodAbnormal.DingTalk, msg)
write := db.PodDataInfo{
PodName: pod.Name,
PodIP: pod.Status.PodIP,
NodeName: pod.Spec.NodeName,
ClusterName: clusterName,
ModuleName: ModuleName,
Reason: action,
FirstDate: today,
klog.Errorf("Failed to patch finalizer for pod %s: %v", pod.Name, err)
return
}
_, err = write.AddOrGet()
klog.Errorf("write AddOrGet err:%v", err)
klog.Infof("podName:%v", pod.Name)
klog.Infof("Successfully patched finalizer for pod %s", pod.Name)
}
return
}

if len(pod.Finalizers) == 0 && pod.Name != "" {
err := h.Evict(pod.Name, pod.Namespace, clusterName)
if err != nil {
klog.Errorf("Failed to evict pod %s: %v", pod.Name, err)
return
}
klog.Infof("Evicted pod %s successfully", pod.Name)
}
res := "Success"
if err != nil {
res = fmt.Sprintf("failed:%v", err)
}
today := time.Now().Format("2006-01-02")
msg := fmt.Sprintf("\n【集群:%v】\n【Pod:%v】\n【Namespace:%v】\n【清除 finalizer:%v】\n", clusterName, pod.Name, pod.Namespace, res)
alert.DingTalkSend(h.cc.PodAbnormal.DingTalk, msg)
write := db.PodDataInfo{
PodName: pod.Name,
PodIP: pod.Status.PodIP,
NodeName: pod.Spec.NodeName,
ClusterName: clusterName,
ModuleName: ModuleName,
Reason: Reason,
FirstDate: today,
}
_, err = write.AddOrGet()
klog.Errorf("write AddOrGet err:%v", err)
klog.Infof("podName:%v", pod.Name)
return
}
4 changes: 2 additions & 2 deletions app/horus/core/horuser/pod_remove.go
Original file line number Diff line number Diff line change
Expand Up @@ -67,7 +67,7 @@ func (h *Horuser) Terminating(clusterName string, oldPod *corev1.Pod) bool {
return true
}

func (h *Horuser) Fetch(clusterName, podNamespace, fieldSelector string) ([]corev1.Pod, error) {
func (h *Horuser) Fetch(clusterName, fieldSelector string) ([]corev1.Pod, error) {
kubeClient := h.kubeClientMap[clusterName]
if kubeClient == nil {
klog.Errorf("Fetch kubeClient by clusterName empty.")
Expand All @@ -77,7 +77,7 @@ func (h *Horuser) Fetch(clusterName, podNamespace, fieldSelector string) ([]core
ctx, cancel := h.GetK8sContext()
defer cancel()
list := v1.ListOptions{FieldSelector: fieldSelector}
pods, err := kubeClient.CoreV1().Pods(podNamespace).List(ctx, list)
pods, err := kubeClient.CoreV1().Pods("default").List(ctx, list)
if err != nil {
klog.Errorf("Fetch list pod err:%v", err)
klog.Infof("clusterName:%v fieldSelector:%v", clusterName, fieldSelector)
Expand Down

0 comments on commit db87b9b

Please sign in to comment.