From 17e19a79daefd22d4a4d54f22a0a647220363c70 Mon Sep 17 00:00:00 2001 From: Andre Ziviani Date: Sat, 18 Mar 2023 13:18:20 -0300 Subject: [PATCH] feat: Add support for spot instances --- exporter/{ondemand.go => ec2.go} | 45 +++++++++++++++++++++++++++----- exporter/fargate.go | 10 ++++--- exporter/kubernetes.go | 24 +++++++++++------ exporter/metrics.go | 14 +++++----- exporter/types.go | 21 ++++++++++----- 5 files changed, 82 insertions(+), 32 deletions(-) rename exporter/{ondemand.go => ec2.go} (73%) diff --git a/exporter/ondemand.go b/exporter/ec2.go similarity index 73% rename from exporter/ondemand.go rename to exporter/ec2.go index 2e1b114..da20b2e 100644 --- a/exporter/ondemand.go +++ b/exporter/ec2.go @@ -33,6 +33,7 @@ func (m *Metrics) GetInstances(ctx context.Context) { m.getInstances(ctx) m.GetOnDemandPricing(ctx) + m.GetSpotPricing(ctx) } func (m *Metrics) getInstances(ctx context.Context) { @@ -47,10 +48,11 @@ func (m *Metrics) getInstances(ctx context.Context) { } for _, instance := range instances.InstanceTypes { m.Instances[string(instance.InstanceType)] = &Instance{ - Memory: aws.ToInt64(instance.MemoryInfo.SizeInMiB), - VCpu: aws.ToInt32(instance.VCpuInfo.DefaultVCpus), - Kind: "ec2", - Type: string(instance.InstanceType), + Memory: aws.ToInt64(instance.MemoryInfo.SizeInMiB), + VCpu: aws.ToInt32(instance.VCpuInfo.DefaultVCpus), + Type: string(instance.InstanceType), + OnDemandCost: &Ec2Cost{}, + SpotCost: make(map[string]*Ec2Cost, 0), } } } @@ -133,11 +135,40 @@ func (m *Metrics) GetOnDemandPricing(ctx context.Context) { vcpu, memory := m.getNormalizedCost(value, tmp.Product.Attributes["instanceType"]) - m.Instances[tmp.Product.Attributes["instanceType"]].Cost = value - m.Instances[tmp.Product.Attributes["instanceType"]].VCpuCost = vcpu - m.Instances[tmp.Product.Attributes["instanceType"]].MemoryCost = memory + m.Instances[tmp.Product.Attributes["instanceType"]].OnDemandCost.Type = "ondemand" + m.Instances[tmp.Product.Attributes["instanceType"]].OnDemandCost.Total = value + m.Instances[tmp.Product.Attributes["instanceType"]].OnDemandCost.VCpu = vcpu + m.Instances[tmp.Product.Attributes["instanceType"]].OnDemandCost.Memory = memory } } } + +func (m *Metrics) GetSpotPricing(ctx context.Context) { + config := m.awsconfig + + ec2Svc := ec2.NewFromConfig(config) + + pag := ec2.NewDescribeSpotPriceHistoryPaginator( + ec2Svc, + &ec2.DescribeSpotPriceHistoryInput{ + StartTime: aws.Time(time.Now()), + ProductDescriptions: []string{"Linux/UNIX"}, + }) + + for pag.HasMorePages() { + history, err := pag.NextPage(ctx) + if err != nil { + panic(err.Error()) + } + + for _, price := range history.SpotPriceHistory { + value, _ := strconv.ParseFloat(*price.SpotPrice, 64) + + vcpu, memory := m.getNormalizedCost(value, string(price.InstanceType)) + + m.Instances[string(price.InstanceType)].SpotCost[aws.ToString(price.AvailabilityZone)] = &Ec2Cost{Type: "spot", Total: value, VCpu: vcpu, Memory: memory} + } + } +} diff --git a/exporter/fargate.go b/exporter/fargate.go index 9df5c25..348dc87 100644 --- a/exporter/fargate.go +++ b/exporter/fargate.go @@ -23,7 +23,7 @@ func (m *Metrics) GetFargatePricing(ctx context.Context) { pricingSvc := pricing.NewFromConfig(config) - m.Instances["fargate"] = &Instance{Type: "fargate", Kind: "fargate"} + m.Instances["fargate"] = &Instance{Type: "fargate"} pag := pricing.NewGetProductsPaginator( pricingSvc, @@ -62,10 +62,14 @@ func (m *Metrics) GetFargatePricing(ctx context.Context) { value, _ := strconv.ParseFloat(tmp.Terms.OnDemand[skuOnDemand].PriceDimensions[skuOnDemandPerHour].PricePerUnit["USD"], 64) description := tmp.Terms.OnDemand[skuOnDemand].PriceDimensions[skuOnDemandPerHour].Description + + if m.Instances["fargate"].OnDemandCost == nil { + m.Instances["fargate"].OnDemandCost = &Ec2Cost{Type: "fargate"} + } if strings.Contains(description, "AWS Fargate - vCPU - ") { - m.Instances["fargate"].VCpuCost = value + m.Instances["fargate"].OnDemandCost.VCpu = value } else if strings.Contains(description, "AWS Fargate - Memory - ") { - m.Instances["fargate"].MemoryCost = value + m.Instances["fargate"].OnDemandCost.Memory = value } } } diff --git a/exporter/kubernetes.go b/exporter/kubernetes.go index 8aadcde..19fde8e 100644 --- a/exporter/kubernetes.go +++ b/exporter/kubernetes.go @@ -119,7 +119,7 @@ func (m *Metrics) podCreated(obj interface{}) { resources := m.mergeResources(pod.Spec.Containers) if m.Nodes[pod.Spec.NodeName] != nil { - if m.Nodes[pod.Spec.NodeName].Instance.Kind == "fargate" { + if m.Nodes[pod.Spec.NodeName].Instance.Type == "fargate" { // fargate allocates more resources than requested and charges accordingly // the allocation size is exposed as an annotation // https://docs.aws.amazon.com/eks/latest/userguide/fargate-pod-configuration.html @@ -220,11 +220,19 @@ func (m *Metrics) nodeCreated(obj interface{}) { } if _, ok := node.ObjectMeta.Labels["node.kubernetes.io/instance-type"]; ok { + // EC2 tmp.Instance = m.Instances[node.ObjectMeta.Labels["node.kubernetes.io/instance-type"]] - } else if _, ok := node.Labels["eks.amazonaws.com/compute-type"]; ok { - if node.Labels["eks.amazonaws.com/compute-type"] == "fargate" { - tmp.Instance = m.Instances["fargate"] + + if _, ok := node.ObjectMeta.Labels["karpenter.sh/capacity-type"]; ok && node.ObjectMeta.Labels["karpenter.sh/capacity-type"] == "spot" { + // Node managed by Karpenter and is Spot + tmp.Cost = tmp.Instance.SpotCost[tmp.AZ] + } else { + tmp.Cost = tmp.Instance.OnDemandCost } + } else if _, ok := node.Labels["eks.amazonaws.com/compute-type"]; ok && node.Labels["eks.amazonaws.com/compute-type"] == "fargate" { + // Fargate + tmp.Instance = m.Instances["fargate"] + tmp.Cost = tmp.Instance.OnDemandCost } m.nodesMtx.Lock() @@ -290,12 +298,12 @@ func (m *Metrics) updatePodCost(pod *Pod) { } // convert bytes to GB - pod.MemoryCost = float64(pod.Usage.Memory.Value()) / 1024 / 1024 / 1024 * pod.Node.Instance.MemoryCost - pod.MemoryRequestsCost = float64(pod.Resources.Memory.Value()) / 1024 / 1024 / 1024 * pod.Node.Instance.MemoryCost + pod.MemoryCost = float64(pod.Usage.Memory.Value()) / 1024 / 1024 / 1024 * pod.Node.Cost.Memory + pod.MemoryRequestsCost = float64(pod.Resources.Memory.Value()) / 1024 / 1024 / 1024 * pod.Node.Cost.Memory //convert millicore to core - pod.VCpuCost = float64(pod.Usage.Cpu.MilliValue()) / 1000 * pod.Node.Instance.VCpuCost - pod.VCpuRequestsCost = float64(pod.Resources.Cpu.MilliValue()) / 1000 * pod.Node.Instance.VCpuCost + pod.VCpuCost = float64(pod.Usage.Cpu.MilliValue()) / 1000 * pod.Node.Cost.VCpu + pod.VCpuRequestsCost = float64(pod.Resources.Cpu.MilliValue()) / 1000 * pod.Node.Cost.VCpu pod.Cost = max(pod.MemoryCost, pod.MemoryRequestsCost) + max(pod.VCpuCost, pod.VCpuRequestsCost) } diff --git a/exporter/metrics.go b/exporter/metrics.go index 33fa672..668b45a 100644 --- a/exporter/metrics.go +++ b/exporter/metrics.go @@ -67,7 +67,7 @@ func (m *Metrics) Collect(ch chan<- prometheus.Metric) { m.podsMtx.Lock() m.GetUsageCost() - podLabels := []string{"pod", "namespace", "node", "kind", "type"} + podLabels := []string{"pod", "namespace", "node", "type", "lifecycle"} if len(m.addPodLabels) > 0 { for _, v := range m.addPodLabels { podLabels = append(podLabels, sanitizeLabel(v)) @@ -75,7 +75,7 @@ func (m *Metrics) Collect(ch chan<- prometheus.Metric) { } for _, pod := range m.Pods { - podLabelValues := []string{pod.Name, pod.Namespace, pod.Node.Name, pod.Node.Instance.Kind, pod.Node.Instance.Type} + podLabelValues := []string{pod.Name, pod.Namespace, pod.Node.Name, pod.Node.Instance.Type, pod.Node.Cost.Type} for _, l := range m.addPodLabels { podLabelValues = append(podLabelValues, pod.Labels[l]) } @@ -137,7 +137,7 @@ func (m *Metrics) Collect(ch chan<- prometheus.Metric) { } m.podsMtx.Unlock() - nodeLabels := []string{"node", "region", "az", "kind", "type"} + nodeLabels := []string{"node", "region", "az", "type", "lifecycle"} if len(m.addNodeLabels) > 0 { for _, v := range m.addNodeLabels { nodeLabels = append(nodeLabels, sanitizeLabel(v)) @@ -145,7 +145,7 @@ func (m *Metrics) Collect(ch chan<- prometheus.Metric) { } for _, node := range m.Nodes { - nodeLabelValues := []string{node.Name, node.Region, node.AZ, node.Instance.Type, node.Instance.Kind} + nodeLabelValues := []string{node.Name, node.Region, node.AZ, node.Instance.Type, node.Cost.Type} for _, l := range m.addNodeLabels { nodeLabelValues = append(nodeLabelValues, node.Labels[l]) } @@ -157,7 +157,7 @@ func (m *Metrics) Collect(ch chan<- prometheus.Metric) { nodeLabels, nil, ), prometheus.GaugeValue, - node.Instance.Cost, + node.Cost.Total, nodeLabelValues..., ) @@ -168,7 +168,7 @@ func (m *Metrics) Collect(ch chan<- prometheus.Metric) { nodeLabels, nil, ), prometheus.GaugeValue, - node.Instance.VCpuCost, + node.Cost.VCpu, nodeLabelValues..., ) @@ -179,7 +179,7 @@ func (m *Metrics) Collect(ch chan<- prometheus.Metric) { nodeLabels, nil, ), prometheus.GaugeValue, - node.Instance.MemoryCost, + node.Cost.Memory, nodeLabelValues..., ) } diff --git a/exporter/types.go b/exporter/types.go index 34d5e3f..fea37d2 100644 --- a/exporter/types.go +++ b/exporter/types.go @@ -32,14 +32,20 @@ type Metrics struct { addNodeLabels []string } +type Ec2Cost struct { + Type string + Total float64 + VCpu float64 + Memory float64 +} + type Instance struct { - Kind string - Type string - VCpu int32 - Memory int64 - Cost float64 - VCpuCost float64 - MemoryCost float64 + //Kind string + Type string + VCpu int32 + Memory int64 + OnDemandCost *Ec2Cost + SpotCost map[string]*Ec2Cost } type Pod struct { @@ -62,6 +68,7 @@ type Node struct { AZ string Region string Instance *Instance + Cost *Ec2Cost } type PodResources struct {