From 159f0c9372df6d7a60f00c450f3e4a253c4f5c22 Mon Sep 17 00:00:00 2001 From: Daxin Wang Date: Tue, 15 Oct 2024 19:46:54 +0800 Subject: [PATCH] =?UTF-8?q?fix=20=E5=A4=9A=E4=B8=AA=E5=AE=9E=E4=BE=8B?= =?UTF-8?q?=E6=97=B6=E6=97=A0=E6=B3=95=E6=9F=A5=E8=AF=A2=E9=83=A8=E5=88=86?= =?UTF-8?q?=E5=AE=9E=E4=BE=8B=E6=97=A5=E5=BF=97=E9=94=99=E8=AF=AF=E6=95=B0?= =?UTF-8?q?=E7=9A=84=E9=97=AE=E9=A2=98?= MIME-Version: 1.0 Content-Type: text/plain; charset=UTF-8 Content-Transfer-Encoding: 8bit --- backend/pkg/repository/prometheus/query.go | 675 +++------------------ 1 file changed, 86 insertions(+), 589 deletions(-) diff --git a/backend/pkg/repository/prometheus/query.go b/backend/pkg/repository/prometheus/query.go index 45d6ea2..b150b2f 100644 --- a/backend/pkg/repository/prometheus/query.go +++ b/backend/pkg/repository/prometheus/query.go @@ -25,11 +25,6 @@ const ( AvgLog LogDOD LogWOW - ServiceAvgLog - ServiceInstancePod - ServiceInstanceContainer - ServiceInstancePid - AvgDependencyLatency // 平均外部依赖耗时 Avg1minError Avg1minLatency ) @@ -1013,7 +1008,6 @@ or ) func QueryEndPointPromql(duration string, queryType QueryType, serviceNames string) string { - switch queryType { //突变排序的1m平均指标数据 case Avg1minError: @@ -1102,9 +1096,9 @@ func QueryEndPointPromql(duration string, queryType QueryType, serviceNames stri } else { return fmt.Sprintf(DELAY_SOURCE, duration, duration, duration, duration, duration, duration) } + default: + return "" } - - return "" } func QueryEndPointRangePromql(step string, duration string, queryType QueryType, contentKeys []string) string { @@ -1150,9 +1144,9 @@ func QueryPodPromql(duration string, queryType QueryType, serviceName string, co return fmt.Sprintf(TPS_DOD_BY_POD, contentKey, serviceName, duration, contentKey, serviceName, duration, contentKey, serviceName, duration) case TPSWOW: return fmt.Sprintf(TPS_WOW_BY_POD, contentKey, serviceName, duration, contentKey, serviceName, duration, contentKey, serviceName, duration) - + default: + return "" } - return "" } func QueryPodRangePromql(duration string, queryType QueryType, contentKey string, serviceName string) string { contentKey = EscapeRegexp(contentKey) @@ -1192,9 +1186,9 @@ func QueryContainerIdPromql(duration string, queryType QueryType, serviceName st return fmt.Sprintf(TPS_DOD_BY_CONTAINERID, contentKey, serviceName, duration, contentKey, serviceName, duration, contentKey, serviceName, duration) case TPSWOW: return fmt.Sprintf(TPS_WOW_BY_CONTAINERID, contentKey, serviceName, duration, contentKey, serviceName, duration, contentKey, serviceName, duration) - + default: + return "" } - return "" } func QueryContainerIdRangePromql(duration string, queryType QueryType, contentKey string, serviceName string) string { contentKey = EscapeRegexp(contentKey) @@ -1234,9 +1228,9 @@ func QueryPidPromql(duration string, queryType QueryType, serviceName string, co return fmt.Sprintf(TPS_DOD_BY_PID, contentKey, serviceName, duration, contentKey, serviceName, duration, contentKey, serviceName, duration) case TPSWOW: return fmt.Sprintf(TPS_WOW_BY_PID, contentKey, serviceName, duration, contentKey, serviceName, duration, contentKey, serviceName, duration) - + default: + return "" } - return "" } func QueryPidRangePromql(duration string, queryType QueryType, contentKey string, serviceName string) string { contentKey = EscapeRegexp(contentKey) @@ -1254,9 +1248,18 @@ func QueryPidRangePromql(duration string, queryType QueryType, contentKey string } -func QueryLogPromql(duration string, queryType QueryType, containerIds []string) string { - escapedKeys := make([]string, len(containerIds)) - for i, key := range containerIds { +const queryPodLogCountTemplate = `( + (sum(increase(originx_logparser_level_count_total{pod_name=~"%s",level=~"error|critical"}[%s]) offset %s) by(pod_name) + + + sum(increase(originx_logparser_exception_count_total{pod_name=~"%s"}[%s]) offset %s) by(pod_name)) + or + sum(increase(originx_logparser_level_count_total{pod_name=~"%s",level=~"error|critical"}[%s] offset %s)) by(pod_name) + or + sum(increase(originx_logparser_exception_count_total{pod_name=~"%s"}[%s] offset %s)) by(pod_name))` + +func QueryLogPromql(duration string, queryType QueryType, pods []string) string { + escapedKeys := make([]string, len(pods)) + for i, key := range pods { escapedKeys[i] = EscapeRegexp(key) } // 使用 strings.Join 生成正则表达式模式 @@ -1264,395 +1267,36 @@ func QueryLogPromql(duration string, queryType QueryType, containerIds []string) switch queryType { case AvgLog: - return fmt.Sprintf(`( - sum( - increase( - originx_logparser_level_count_total{ - pod_name=~"%s",level=~"error|critical" - }[%s] - ) - ) by(pod_name) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s] - ) - ) by(pod_name) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s] - ) -) by(pod_name)`, regexPattern, duration, regexPattern, duration, regexPattern, duration) + return fmt.Sprintf(queryPodLogCountTemplate, + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "0", regexPattern, duration, "0") case LogDOD: - return fmt.Sprintf(`(( - sum( - increase( - originx_logparser_level_count_total{ - pod_name=~"%s",level=~"error|critical" - }[%s] - ) - ) by(pod_name) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s] - ) - ) by(pod_name) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s] - ) -) by(pod_name) -( - sum( - increase( - originx_logparser_level_count_total{ - pod_name=~"%s",level=~"error|critical" - }[%s] offset 24h - ) - ) by(pod_name) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s] offset 24h - ) - ) by(pod_name) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s]offset 24h - ) -) by(pod_name))/( - sum( - increase( - originx_logparser_level_count_total{ - pod_name=~"%s",level=~"error|critical" - }[%s]offset 24h - ) - ) by(pod_name) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s]offset 24h - ) - ) by(pod_name) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s]offset 24h - ) -) by(pod_name)`, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration) + return fmt.Sprintf(queryPodLogCountTemplate+"/"+queryPodLogCountTemplate+"-1", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "24h", regexPattern, duration, "24h", + regexPattern, duration, "24h", regexPattern, duration, "24h") case LogWOW: - return fmt.Sprintf(`(( - sum( - increase( - originx_logparser_level_count_total{ - pod_name=~"%s",level=~"error|critical" - }[%s] - ) - ) by(pod_name) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s] - ) - ) by(pod_name) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s] - ) -) by(pod_name) -( - sum( - increase( - originx_logparser_level_count_total{ - pod_name=~"%s",level=~"error|critical" - }[%s] offset 7d - ) - ) by(pod_name) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s] offset 7d - ) - ) by(pod_name) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s]offset 7d - ) -) by(pod_name))/( - sum( - increase( - originx_logparser_level_count_total{ - pod_name=~"%s",level=~"error|critical" - }[%s]offset 7d - ) - ) by(pod_name) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s]offset 7d - ) - ) by(pod_name) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s]offset 7d - ) -) by(pod_name)`, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration) - case ServiceAvgLog: - return fmt.Sprintf(`( - sum( - increase( - originx_logparser_level_count_total{ - pod_name=~"%s",level=~"error|critical" - }[%s] - ) - ) by(pod_name) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s] - ) - ) by(pod_name) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pod_name=~"%s"}[%s] - ) -)or 0 `, regexPattern, duration, regexPattern, duration, regexPattern, duration) + return fmt.Sprintf(queryPodLogCountTemplate+"/"+queryPodLogCountTemplate+"-1", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "7d", regexPattern, duration, "7d", + regexPattern, duration, "7d", regexPattern, duration, "7d") + default: + return "" } - return "" } -func QueryLogByContainerIdPromql(duration string, queryType QueryType, containerIds []string) string { - escapedKeys := make([]string, len(containerIds)) - for i, key := range containerIds { - escapedKeys[i] = EscapeRegexp(key) - } - // 使用 strings.Join 生成正则表达式模式 - regexPattern := strings.Join(escapedKeys, "|") - switch queryType { - case AvgLog: - return fmt.Sprintf(`( - sum( - increase( - originx_logparser_level_count_total{ - container_id=~"%s" - }[%s] - ) - ) by(container_id) +const queryContainerLogCountTemplate = `( + (sum(increase(originx_logparser_level_count_total{container_id=~"%s",level=~"error|critical"}[%s]) offset %s) by(container_id) + - ( - sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] - ) - ) by(container_id) - or - 0 - ) -) + sum(increase(originx_logparser_exception_count_total{container_id=~"%s"}[%s]) offset %s) by(container_id)) + or + sum(increase(originx_logparser_level_count_total{container_id=~"%s",level=~"error|critical"}[%s] offset %s)) by(container_id) or -sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] - ) -) by(container_id)or 0`, regexPattern, duration, regexPattern, duration, regexPattern, duration) - case LogDOD: - return fmt.Sprintf(`(( - sum( - increase( - originx_logparser_level_count_total{ - container_id=~"%s" - }[%s] - ) - ) by(container_id) - + - ( - sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] - ) - ) by(container_id) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] - ) -) by(container_id) -( - sum( - increase( - originx_logparser_level_count_total{ - container_id=~"%s" - }[%s] offset 24h - ) - ) by(container_id) - + - ( - sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] offset 24h - ) - ) by(container_id) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] offset 24h - ) -) by(container_id))/( - sum( - increase( - originx_logparser_level_count_total{ - container_id=~"%s" - }[%s] offset 24h - ) - ) by(container_id) - + - ( - sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] offset 24h - ) - ) by(container_id) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] offset 24h - ) -) by(container_id)`, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration) - case LogWOW: - return fmt.Sprintf(`(( - sum( - increase( - originx_logparser_level_count_total{ - container_id=~"%s" - }[%s] - ) - ) by(container_id) - + - ( - sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] - ) - ) by(container_id) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] - ) -) by(container_id) -( - sum( - increase( - originx_logparser_level_count_total{ - container_id=~"%s" - }[%s] offset 7d - ) - ) by(container_id) - + - ( - sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] offset 7d - ) - ) by(container_id) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] offset 7d - ) -) by(container_id))/( - sum( - increase( - originx_logparser_level_count_total{ - container_id=~"%s" - }[%s] offset 7d - ) - ) by(container_id) - + - ( - sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] offset 7d - ) - ) by(container_id) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{container_id=~"%s"}[%s] offset 7d - ) -) by(container_id)`, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration) - } - return "" -} -func QueryLogByPidPromql(duration string, queryType QueryType, containerIds []string) string { + sum(increase(originx_logparser_exception_count_total{container_id=~"%s"}[%s] offset %s)) by(container_id))` + +func QueryLogByContainerIdPromql(duration string, queryType QueryType, containerIds []string) string { escapedKeys := make([]string, len(containerIds)) for i, key := range containerIds { escapedKeys[i] = EscapeRegexp(key) @@ -1662,203 +1306,56 @@ func QueryLogByPidPromql(duration string, queryType QueryType, containerIds []st switch queryType { case AvgLog: - return fmt.Sprintf(`( - sum( - increase( - originx_logparser_level_count_total{ - pid=~"%s",level=~"error|critical" - }[%s] - ) - ) by(pid) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s] - ) - ) by(pid) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s] - ) -) by(pid) or 0`, regexPattern, duration, regexPattern, duration, regexPattern, duration) + return fmt.Sprintf(queryContainerLogCountTemplate, + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "0", regexPattern, duration, "0") case LogDOD: - return fmt.Sprintf(`(( - sum( - increase( - originx_logparser_level_count_total{ - pid=~"%s",level=~"error|critical" - }[%s] - ) - ) by(pid) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s] - ) - ) by(pid) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s] - ) -) by(pid) -( - sum( - increase( - originx_logparser_level_count_total{ - pid=~"%s",level=~"error|critical" - }[%s] offset 24h - ) - ) by(pid) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s] offset 24h - ) - ) by(pid) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s]offset 24h - ) -) by(pid))/( - sum( - increase( - originx_logparser_level_count_total{ - pid=~"%s",level=~"error|critical" - }[%s]offset 24h - ) - ) by(pid) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s]offset 24h - ) - ) by(pid) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s]offset 24h - ) -) by(pid)`, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration) + return fmt.Sprintf(queryContainerLogCountTemplate+"/"+queryContainerLogCountTemplate+"-1", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "24h", regexPattern, duration, "24h", + regexPattern, duration, "24h", regexPattern, duration, "24h") case LogWOW: - return fmt.Sprintf(`(( - sum( - increase( - originx_logparser_level_count_total{ - pid=~"%s",level=~"error|critical" - }[%s] - ) - ) by(pid) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s] - ) - ) by(pid) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s] - ) -) by(pid) -( - sum( - increase( - originx_logparser_level_count_total{ - pid=~"%s",level=~"error|critical" - }[%s] offset 7d - ) - ) by(pid) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s] offset 7d - ) - ) by(pid) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s]offset 7d - ) -) by(pid))/( - sum( - increase( - originx_logparser_level_count_total{ - pid=~"%s",level=~"error|critical" - }[%s]offset 7d - ) - ) by(pid) - + - ( - sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s]offset 7d - ) - ) by(pid) - or - 0 - ) -) - or -sum( - increase( - originx_logparser_exception_count_total{pid=~"%s"}[%s]offset 7d - ) -) by(pid)`, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration, regexPattern, duration) + return fmt.Sprintf(queryContainerLogCountTemplate+"/"+queryContainerLogCountTemplate+"-1", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "7d", regexPattern, duration, "7d", + regexPattern, duration, "7d", regexPattern, duration, "7d") + default: + return "" } - return "" } -const SERVICES_POD_INSTANCE = `sum(kindling_span_trace_duration_nanoseconds_count{svc_name=~"%s"}) by (pod,svc_name,node_name,pid)` - -const SERVICES_CONTAINER_INSTANCE = `sum(kindling_span_trace_duration_nanoseconds_count{svc_name=~"%s",pod=""}) by (container_id,svc_name,node_name,pid)` - -const SERVICES_PID_INSTANCE = `sum(kindling_span_trace_duration_nanoseconds_count{svc_name=~"%s",pod="",container_id=""}) by (pid,svc_name,node_name)` +const queryPidLogCountTemplate = `( + (sum(increase(originx_logparser_level_count_total{pid=~"%s",level=~"error|critical"}[%s]) offset %s) by(pid) + + + sum(increase(originx_logparser_exception_count_total{pid=~"%s"}[%s]) offset %s) by(pid)) + or + sum(increase(originx_logparser_level_count_total{pid=~"%s",level=~"error|critical"}[%s] offset %s)) by(pid) + or + sum(increase(originx_logparser_exception_count_total{pid=~"%s"}[%s] offset %s)) by(pid))` -func QueryServiceInstancePromql(queryType QueryType, svcNames []string) string { - escapedKeys := make([]string, len(svcNames)) - for i, key := range svcNames { - escapedKeys[i] = EscapeRegexp(key) - } +func QueryLogByPidPromql(duration string, queryType QueryType, pids []string) string { // 使用 strings.Join 生成正则表达式模式 - regexPattern := strings.Join(escapedKeys, "|") + regexPattern := strings.Join(pids, "|") switch queryType { - case ServiceInstancePod: - return fmt.Sprintf(SERVICES_POD_INSTANCE, regexPattern) - case ServiceInstanceContainer: - return fmt.Sprintf(SERVICES_CONTAINER_INSTANCE, regexPattern) - case ServiceInstancePid: - return fmt.Sprintf(SERVICES_PID_INSTANCE, regexPattern) + case AvgLog: + return fmt.Sprintf(queryPidLogCountTemplate, + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "0", regexPattern, duration, "0") + case LogDOD: + return fmt.Sprintf(queryPidLogCountTemplate+"/"+queryPidLogCountTemplate+"-1", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "24h", regexPattern, duration, "24h", + regexPattern, duration, "24h", regexPattern, duration, "24h") + case LogWOW: + return fmt.Sprintf(queryPidLogCountTemplate+"/"+queryPidLogCountTemplate+"-1", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "0", regexPattern, duration, "0", + regexPattern, duration, "7d", regexPattern, duration, "7d", + regexPattern, duration, "7d", regexPattern, duration, "7d") + default: + return "" } - return "" }