Skip to content

Commit

Permalink
[horus] Try to add downtime recovery (#444)
Browse files Browse the repository at this point in the history
  • Loading branch information
mfordjody authored Oct 10, 2024
1 parent 156e5fa commit 1be672d
Show file tree
Hide file tree
Showing 4 changed files with 24 additions and 16 deletions.
1 change: 1 addition & 0 deletions app/horus/base/config/config.go
Original file line number Diff line number Diff line change
Expand Up @@ -73,6 +73,7 @@ type DowntimeConfiguration struct {
KubeMultiple map[string]string `yaml:"kubeMultiple"`
AbnormalityQL []string `yaml:"abnormalityQL"`
AbnormalInfoSystemQL string `yaml:"abnormalInfoSystemQL"`
AbnormalRecoveryQL []string `yaml:"abnormalRecoveryQL"`
AllSystemUser string `yaml:"allSystemUser"`
AllSystemPassword string `yaml:"allSystemPassword"`
DingTalk *DingTalkConfiguration `yaml:"dingTalk"`
Expand Down
32 changes: 17 additions & 15 deletions app/horus/base/db/db.go
Original file line number Diff line number Diff line change
Expand Up @@ -25,21 +25,23 @@ import (
)

type NodeDataInfo struct {
Id int64 `json:"id"`
NodeName string `json:"node_name" xorm:"node_name"`
NodeIP string `json:"node_ip" xorm:"node_ip"`
Sn string `json:"sn"`
ClusterName string `json:"cluster_name" xorm:"cluster_name"`
ModuleName string `json:"module_name" xorm:"module_name"`
Reason string `json:"reason"`
Restart int `json:"restart"`
Repair int `json:"repair"`
RepairTicketUrl string `json:"repair_ticket_url" xorm:"repair_ticket_url"`
FirstDate string `json:"first_date" xorm:"first_date"`
CreateTime time.Time `json:"create_time" xorm:"create_time created"`
UpdateTime time.Time `json:"update_time" xorm:"update_time updated"`
RecoveryMark int64 `json:"recovery_mark" xorm:"recovery_mark"`
RecoveryQL string `json:"recovery_ql" xorm:"recovery_ql"`
Id int64 `json:"id"`
NodeName string `json:"node_name" xorm:"node_name"`
NodeIP string `json:"node_ip" xorm:"node_ip"`
Sn string `json:"sn"`
ClusterName string `json:"cluster_name" xorm:"cluster_name"`
ModuleName string `json:"module_name" xorm:"module_name"`
Reason string `json:"reason"`
Restart int `json:"restart"`
Repair int `json:"repair"`
RepairTicketUrl string `json:"repair_ticket_url" xorm:"repair_ticket_url"`
FirstDate string `json:"first_date" xorm:"first_date"`
CreateTime time.Time `json:"create_time" xorm:"create_time created"`
UpdateTime time.Time `json:"update_time" xorm:"update_time updated"`
RecoveryMark int64 `json:"recovery_mark" xorm:"recovery_mark"`
RecoveryQL string `json:"recovery_ql" xorm:"recovery_ql"`
DownTimeRecoveryQL []string `json:"downtime_recovery_ql xorm:downtime_recovery_ql"`
DownTimeRecoveryMark int64 `json:"downtime_recovery_mark xorm:downtime_recovery_mark"`
}

type PodDataInfo struct {
Expand Down
1 change: 1 addition & 0 deletions app/horus/core/horuser/node_downtime.go
Original file line number Diff line number Diff line change
Expand Up @@ -147,6 +147,7 @@ func (h *Horuser) DownTimeNodes(clusterName, addr string) {
NodeIP: nodeIP,
ClusterName: clusterName,
ModuleName: NODE_DOWN,
RecoveryQL: ,

Check failure on line 150 in app/horus/core/horuser/node_downtime.go

View workflow job for this annotation

GitHub Actions / Go fmt

expected operand, found ','

Check failure on line 150 in app/horus/core/horuser/node_downtime.go

View workflow job for this annotation

GitHub Actions / Go Test

syntax error: unexpected comma, expected expression
}
exist, _ := write.Check()
if exist {

Check failure on line 153 in app/horus/core/horuser/node_downtime.go

View workflow job for this annotation

GitHub Actions / Go fmt

missing ',' in composite literal
Expand Down
6 changes: 5 additions & 1 deletion manifests/horus/horus.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -75,6 +75,10 @@ nodeDownTime:
node_os_info{node="%s"}
allSystemUser: "zxj"
allSystemPassword: "1"
abnormalRecoveryQL:
- 100 - (avg by (node) (rate(node_cpu_seconds_total{mode="idle",node="%s"}[5m])) * 100) < 20
- (avg by (node) (node_memory_MemFree_bytes{node="%s"} / node_memory_MemTotal_bytes{node="%s"} )) * 100 < 25
# - node_filesystem_avail_bytes{mountpoint="/"} / node_filesystem_size_bytes{mountpoint="/"} * 100 > 15
kubeMultiple:
cluster: config.1
dingTalk:
Expand All @@ -87,7 +91,7 @@ nodeDownTime:
title: "自定义通知"

podStagnationCleaner:
enabled: false
enabled: true
intervalSecond: 15
doubleSecond: 60
fieldSelector: "status.phase!=Running"
Expand Down

0 comments on commit 1be672d

Please sign in to comment.