Skip to content

Commit

Permalink
Merge pull request #103 from grafana/health-checks
Browse files Browse the repository at this point in the history
add health checks for all components
  • Loading branch information
zeitlinger authored Sep 17, 2024
2 parents dcc0107 + cb2081b commit f45032e
Show file tree
Hide file tree
Showing 3 changed files with 28 additions and 9 deletions.
1 change: 1 addition & 0 deletions RELEASING.md
Original file line number Diff line number Diff line change
Expand Up @@ -3,3 +3,4 @@
1. Go to https://github.com/grafana/docker-otel-lgtm/releases/new
2. Click on "Choose a tag", enter the tag name (e.g. `v0.1.0`), and click "Create a new tag".
3. Click on "Generate release notes" to auto-generate the release notes based on the commits since the last release.
4. Click on "Publish release".
12 changes: 6 additions & 6 deletions docker/otelcol-config.yaml
Original file line number Diff line number Diff line change
Expand Up @@ -28,11 +28,11 @@ exporters:
endpoint: http://localhost:3100/otlp
tls:
insecure: true
logging/metrics:
debug/metrics:
verbosity: detailed
logging/traces:
debug/traces:
verbosity: detailed
logging/logs:
debug/logs:
verbosity: detailed

service:
Expand All @@ -41,14 +41,14 @@ service:
receivers: [otlp]
processors: [batch]
exporters: [otlphttp/traces]
#exporters: [otlphttp/traces,logging/traces]
#exporters: [otlphttp/traces,debug/traces]
metrics:
receivers: [otlp,prometheus/collector]
processors: [batch]
exporters: [otlphttp/metrics]
#exporters: [otlphttp/metrics,logging/metrics]
#exporters: [otlphttp/metrics,debug/metrics]
logs:
receivers: [otlp]
processors: [batch]
exporters: [otlphttp/logs]
#exporters: [otlphttp/logs,logging/logs]
#exporters: [otlphttp/logs,debug/logs]
24 changes: 21 additions & 3 deletions docker/run-all.sh
Original file line number Diff line number Diff line change
Expand Up @@ -8,10 +8,28 @@

echo "Waiting for the OpenTelemetry collector and the Grafana LGTM stack to start up..."

# This waits for collector metrics to be available in Prometheus.
# TODO: Also curl Grafana, Loki, Tempo to be sure all services are up.
while ! curl -sg 'http://localhost:9090/api/v1/query?query=up{job="opentelemetry-collector"}' | jq -r .data.result[0].value[1] | grep '1' > /dev/null ; do
function wait_ready() {
service=$1
url=$2

while [[ $(curl -o /dev/null -sg "${url}" -w "%{response_code}") != "200" ]] ; do
echo "Waiting for ${service} to start up..."
sleep 1
done
echo "${service} is up and running."
}

wait_ready "Grafana" "http://localhost:3000/api/health"
wait_ready "Loki" "http://localhost:3100/ready"
wait_ready "Prometheus" "http://localhost:9090/api/v1/status/runtimeinfo"
wait_ready "Tempo" "http://localhost:3200/ready"

# collector may not have a prometheus endpoint exposed if the config has been replaced,
# so we query the otelcol_process_uptime_total metric instead, which checks if the collector is up,
# and indirectly checks if the prometheus endpoint is up.
while ! curl -sg 'http://localhost:9090/api/v1/query?query=otelcol_process_uptime_total{}' | jq -r .data.result[0].value[1] | grep '[0-9]' > /dev/null ; do
echo "Waiting for the OpenTelemetry collector to start up..."
sleep 1
done

echo "The OpenTelemetry collector and the Grafana LGTM stack are up and running."
Expand Down

0 comments on commit f45032e

Please sign in to comment.