Skip to content

Commit

Permalink
feat: implement centralized otel collector via terraform
Browse files Browse the repository at this point in the history
- closes #expanso-planning/issues/429
  • Loading branch information
frrist committed Jan 30, 2024
1 parent 4468b24 commit d6a85d5
Show file tree
Hide file tree
Showing 23 changed files with 452 additions and 3 deletions.
18 changes: 18 additions & 0 deletions ops/metrics/terraform/cloud-init/cloud-init.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,18 @@
#cloud-config

write_files:
# otel config file
- path: /etc/otel-collector.yaml
encoding: b64
owner: root:root
permissions: "0600"
content: |
${otel_config_file}
# otel service file
- path: /etc/systemd/system/otel.service
encoding: b64
owner: root:root
permissions: "0600"
content: |
${otel_service_file}
6 changes: 6 additions & 0 deletions ops/metrics/terraform/gcp/backend.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,6 @@
terraform {
backend "gcs" {
bucket = "bacalhau-otel-collector-infra-state"
prefix = "terraform"
}
}
27 changes: 27 additions & 0 deletions ops/metrics/terraform/gcp/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,27 @@
provider "google" {
project = var.gcp_project_id
region = var.gcp_region
zone = var.gcp_zone
}

module "gcp_network" {
source = "./modules/network"
region = var.gcp_region
subnet_cidr = "10.0.0.0/16"
}

module "otel_collector_instance" {
source = "./modules/compute_instances/otelcollector"

cloud_init_content = ""

zone = var.gcp_zone
network = module.gcp_network.vpc_network_name
subnetwork = module.gcp_network.subnetwork_name

boot_image = var.gcp_boot_image
otel_collector_instance_type = var.otel_collector_machine_type

grafana_prometheus_username = var.grafana_prometheus_username
grafana_prometheus_password = var.grafana_prometheus_password
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,67 @@
resource "google_compute_instance" "otel_collector" {
name = "bacalhau-otel-collector"
machine_type = var.otel_collector_instance_type
zone = var.zone

metadata = {
startup-script = local.otel_start_script
user-data = data.cloudinit_config.otel_collector_cloud_init.rendered
}

boot_disk {
initialize_params {
image = var.boot_image
size = var.boot_size
}
}

network_interface {
network = var.network
subnetwork = var.subnetwork
access_config {
// TODO here is where we may wish to assign a static IP so this instance can be fronted with DNS
// Ephemeral public IP will be assigned
}
}
}

locals {
//
// templating the bacalhau start script
//
otel_start_script = templatefile("${path.module}/../../../../instance_files/start.sh", {
// Add more arguments as needed
})

//
// templating otel config file
//
otel_config_content = templatefile("${path.module}/../../../../instance_files/otel-collector-config.yaml", {
grafana_prometheus_username = var.grafana_prometheus_username
grafana_prometheus_password = var.grafana_prometheus_password
})

//
// templating otel service file
//
otel_service_content = templatefile("${path.module}/../../../../instance_files/otel.service", {
// add more arguments as needed
})

}

data "cloudinit_config" "otel_collector_cloud_init" {
gzip = false
base64_encode = false

// provide parameters to cloud-init like files and arguments to scripts in the above part.
part {
filename = "cloud-config.yaml"
content_type = "text/cloud-config"

content = templatefile("${path.module}/../../../../cloud-init/cloud-init.yml", {
otel_config_file : base64encode(local.otel_config_content)
otel_service_file : base64encode(local.otel_service_content),
})
}
}
Original file line number Diff line number Diff line change
@@ -0,0 +1,8 @@
output "otel_collector_public_ips" {
value = google_compute_instance.otel_collector.*.network_interface.0.access_config.0.nat_ip
}

output "otel_collector_private_ips" {
value = google_compute_instance.otel_collector.*.network_interface.0.network_ip
}

Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
variable "otel_collector_instance_type" {
description = "The instance type for the otel collector"
type = string
}

variable "zone" {
description = "The zone in which to provision instances"
type = string
}

variable "boot_size" {
description = "The size of the boot disk"
type = number
default = 50
}

variable "boot_image" {
description = "The boot image for the instances"
type = string
}

variable "cloud_init_content" {
description = "Content of the cloud-init script"
type = string
}

variable "network" {
description = "The VPC network to attach to the instances"
type = string
}

variable "subnetwork" {
description = "The subnetwork to attach to the instances"
type = string
}

variable "grafana_prometheus_username" {
description = "username for hosted grafana prometheus"
type = string
}

variable "grafana_prometheus_password" {
description = "password for hosted grafana prometheus"
type = string
}
52 changes: 52 additions & 0 deletions ops/metrics/terraform/gcp/modules/network/main.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,52 @@
resource "google_compute_network" "vpc_network" {
name = "${var.region}-bacalhau-otel-vpc-network"
auto_create_subnetworks = var.auto_subnets
}

resource "google_compute_subnetwork" "subnetwork" {
name = "${var.region}-bacalhau-otel-subnetwork"
ip_cidr_range = var.subnet_cidr
region = var.region
network = google_compute_network.vpc_network.name
}

resource "google_compute_address" "otel_collector_ip" {
name = "otel-collector-ip"
region = var.region
}

resource "google_compute_firewall" "google_firewall_egress" {
name = "bacalhau-otel-firewall-egress"
network = google_compute_network.vpc_network.name

direction = "EGRESS"

allow {
protocol = "icmp"
}

allow {
protocol = "tcp"
ports = var.egress_tcp_ports
}

source_ranges = var.egress_source_ranges
}

resource "google_compute_firewall" "bacalhau_protocol_firewall_ingress" {
name = "bacalhau-otel-firewall-ingress"
network = google_compute_network.vpc_network.name

direction = "INGRESS"

allow {
protocol = "icmp"
}

allow {
protocol = "tcp"
ports = var.ingress_tcp_ports
}

source_ranges = var.ingress_source_ranges
}
11 changes: 11 additions & 0 deletions ops/metrics/terraform/gcp/modules/network/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,11 @@
output "vpc_network_name" {
value = google_compute_network.vpc_network.name
}

output "subnetwork_name" {
value = google_compute_subnetwork.subnetwork.name
}

output "requester_ip" {
value = google_compute_address.otel_collector_ip.address
}
53 changes: 53 additions & 0 deletions ops/metrics/terraform/gcp/modules/network/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,53 @@
variable "region" {
description = "The region to host the network in"
type = string
}

variable "subnet_cidr" {
description = "The CIDR block for the subnet"
type = string
}

variable "auto_subnets" {
description = "When true GCP will automatically create subnetworks"
type = bool
default = true
}

//
// Egress
//
variable "egress_tcp_ports" {
description = "List of TCP ports for egress rules"
type = list(string)
default = [
// Grafana
"443"
]
}

variable "egress_source_ranges" {
description = "Source ranges for egress rules"
type = list(string)
default = ["0.0.0.0/0"]
}

//
// Ingress
//
variable "ingress_tcp_ports" {
description = "List of TCP ports for ingress rules"
type = list(string)
default = [
// SSH
"22",
// OpenTelemetry collector
"4318"
]
}

variable "ingress_source_ranges" {
description = "Source ranges for ingress rules"
type = list(string)
default = ["0.0.0.0/0"]
}
3 changes: 3 additions & 0 deletions ops/metrics/terraform/gcp/outputs.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
output "otel_collector_public_ip" {
value = module.otel_collector_instance.otel_collector_public_ips
}
35 changes: 35 additions & 0 deletions ops/metrics/terraform/gcp/variables.tf
Original file line number Diff line number Diff line change
@@ -0,0 +1,35 @@
variable "gcp_project_id" {
description = "GCP Project ID"
type = string
}

variable "gcp_region" {
description = "GCP Region"
type = string
}

variable "gcp_zone" {
description = "GCP Zone"
type = string
}

variable "gcp_boot_image" {
description = "Boot image for GCP instances"
type = string
default = "projects/ubuntu-os-cloud/global/images/family/ubuntu-2304-amd64"
}

variable "otel_collector_machine_type" {
description = "Machine type for collector instances"
type = string
}

variable "grafana_prometheus_username" {
description = "username for hosted grafana prometheus"
type = string
}

variable "grafana_prometheus_password" {
description = "password for hosted grafana prometheus"
type = string
}
7 changes: 7 additions & 0 deletions ops/metrics/terraform/gcp/vars.tfvars
Original file line number Diff line number Diff line change
@@ -0,0 +1,7 @@
gcp_project_id = "forrest-dev-407420"
gcp_region = "us-west1"
gcp_zone = "us-west1-b"
gcp_boot_image = "projects/forrest-dev-407420/global/images/bacalhau-ubuntu-2004-lts-test"
otel_collector_machine_type = "e2-standard-4"
grafana_prometheus_username = ""
grafana_prometheus_password = ""
45 changes: 45 additions & 0 deletions ops/metrics/terraform/instance_files/otel-collector-config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
extensions:
basicauth/prometheus:
client_auth:
username: ${grafana_prometheus_username}
password: ${grafana_prometheus_password}

receivers:
otlp:
protocols:
http:
endpoint: "0.0.0.0:4318"

exporters:
logging:
loglevel: debug
prometheusremotewrite:
endpoint: https://prometheus-us-central1.grafana.net/api/prom/push
auth:
authenticator: basicauth/prometheus
resource_to_telemetry_conversion:
enabled: true

processors:
batch:
memory_limiter:
check_interval: 5s
limit_mib: 4000
spike_limit_mib: 500
resource:
attributes:
- key: service.collector
value: bacalhau-otel-collector
action: insert
attributes/metrics:
actions:
- pattern: net\.sock.+
action: delete

service:
extensions: [basicauth/prometheus]
pipelines:
metrics:
receivers: [otlp]
processors: [memory_limiter, resource, attributes/metrics, batch]
exporters: [prometheusremotewrite, logging]
Loading

0 comments on commit d6a85d5

Please sign in to comment.