diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index a829bb9dcfe2..fa0eb454d29d 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,5 @@ { - "image": "grafana/loki-build-image:0.29.0", + "image": "grafana/loki-build-image:0.33.0", "containerEnv": { "BUILD_IN_CONTAINER": "false" }, diff --git a/.drone/drone.jsonnet b/.drone/drone.jsonnet index 96acc3cd2b5b..6dcea160a78c 100644 --- a/.drone/drone.jsonnet +++ b/.drone/drone.jsonnet @@ -496,7 +496,7 @@ local manifest_ecr(apps, archs) = pipeline('manifest-ecr') { ], }; -local build_image_tag = '0.32.0'; +local build_image_tag = '0.33.0'; [ pipeline('loki-build-image-' + arch) { workspace: { @@ -640,6 +640,7 @@ local build_image_tag = '0.32.0'; 'GIT_TARGET_BRANCH="$DRONE_TARGET_BRANCH"', ]) { depends_on: ['loki'], when: onPRs }, make('validate-example-configs', container=false) { depends_on: ['loki'] }, + make('validate-dev-cluster-config', container=false) { depends_on: ['loki'] }, make('check-example-config-doc', container=false) { depends_on: ['clone'] }, { name: 'build-docs-website', diff --git a/.drone/drone.yml b/.drone/drone.yml index d4d91d242433..d45f7898a085 100644 --- a/.drone/drone.yml +++ b/.drone/drone.yml @@ -15,7 +15,7 @@ steps: dry_run: true repo: grafana/loki-build-image tags: - - 0.32.0-amd64 + - 0.33.0-amd64 when: event: - pull_request @@ -33,7 +33,7 @@ steps: from_secret: docker_password repo: grafana/loki-build-image tags: - - 0.32.0-amd64 + - 0.33.0-amd64 username: from_secret: docker_username when: @@ -68,7 +68,7 @@ steps: dry_run: true repo: grafana/loki-build-image tags: - - 0.32.0-arm64 + - 0.33.0-arm64 when: event: - pull_request @@ -86,7 +86,7 @@ steps: from_secret: docker_password repo: grafana/loki-build-image tags: - - 0.32.0-arm64 + - 0.33.0-arm64 username: from_secret: docker_username when: @@ -118,7 +118,7 @@ steps: password: from_secret: docker_password spec: .drone/docker-manifest-build-image.tmpl - target: loki-build-image:0.32.0 + target: loki-build-image:0.33.0 username: from_secret: docker_username when: @@ -182,14 +182,14 @@ steps: depends_on: - clone environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: check-drone-drift - commands: - make BUILD_IN_CONTAINER=false check-generated-files depends_on: - clone environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: check-generated-files - commands: - cd .. @@ -199,7 +199,7 @@ steps: depends_on: - clone environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: clone-target-branch when: event: @@ -210,14 +210,14 @@ steps: - clone-target-branch - check-generated-files environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: test - commands: - cd ../loki-target-branch && BUILD_IN_CONTAINER=false make test depends_on: - clone-target-branch environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: test-target-branch when: event: @@ -230,7 +230,7 @@ steps: - test - test-target-branch environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: compare-coverage when: event: @@ -248,7 +248,7 @@ steps: TOKEN: from_secret: github_token USER: grafanabot - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: report-coverage when: event: @@ -258,7 +258,7 @@ steps: depends_on: - check-generated-files environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: lint - commands: - make BUILD_IN_CONTAINER=false check-mod @@ -266,7 +266,7 @@ steps: - test - lint environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: check-mod - commands: - apk add make bash && make lint-scripts @@ -277,21 +277,21 @@ steps: depends_on: - check-generated-files environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: loki - commands: - make BUILD_IN_CONTAINER=false check-doc depends_on: - loki environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: check-doc - commands: - make BUILD_IN_CONTAINER=false check-format GIT_TARGET_BRANCH="$DRONE_TARGET_BRANCH" depends_on: - loki environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: check-format when: event: @@ -301,14 +301,21 @@ steps: depends_on: - loki environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: validate-example-configs +- commands: + - make BUILD_IN_CONTAINER=false validate-dev-cluster-config + depends_on: + - loki + environment: {} + image: grafana/loki-build-image:0.33.0 + name: validate-dev-cluster-config - commands: - make BUILD_IN_CONTAINER=false check-example-config-doc depends_on: - clone environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: check-example-config-doc - commands: - mkdir -p /hugo/content/docs/loki/latest @@ -341,7 +348,7 @@ steps: depends_on: - clone environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: loki-mixin-check when: event: @@ -366,7 +373,7 @@ steps: depends_on: - clone environment: {} - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: documentation-helm-reference-check trigger: ref: @@ -1772,7 +1779,7 @@ steps: NFPM_SIGNING_KEY: from_secret: gpg_private_key NFPM_SIGNING_KEY_FILE: /drone/src/private-key.key - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: write-key - commands: - make BUILD_IN_CONTAINER=false packages @@ -1780,7 +1787,7 @@ steps: NFPM_PASSPHRASE: from_secret: gpg_passphrase NFPM_SIGNING_KEY_FILE: /drone/src/private-key.key - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: test packaging - commands: - ./tools/packaging/verify-deb-install.sh @@ -1806,7 +1813,7 @@ steps: NFPM_PASSPHRASE: from_secret: gpg_passphrase NFPM_SIGNING_KEY_FILE: /drone/src/private-key.key - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: publish when: event: @@ -1841,7 +1848,7 @@ steps: from_secret: docker_password DOCKER_USERNAME: from_secret: docker_username - image: grafana/loki-build-image:0.31.2 + image: grafana/loki-build-image:0.33.0 name: build and push privileged: true volumes: @@ -2106,6 +2113,6 @@ kind: secret name: gpg_private_key --- kind: signature -hmac: 8ae9cff1a379503d0b568f727d9c12bcb486a5e8d1fc3271deea32f07939baf1 +hmac: fe7669a21410ae5f2d1ad6b6205fdc582af874f65f7bd6a679731a88174e3a1c ... diff --git a/.github/workflows/checks.yml b/.github/workflows/checks.yml index 987df91c8c3e..cb9bc3a867ee 100644 --- a/.github/workflows/checks.yml +++ b/.github/workflows/checks.yml @@ -6,7 +6,7 @@ jobs: env: BUILD_IN_CONTAINER: false container: - image: grafana/loki-build-image:0.32.0 + image: grafana/loki-build-image:0.33.0 steps: - uses: actions/checkout@v4 - run: git config --global --add safe.directory "$GITHUB_WORKSPACE" diff --git a/CHANGELOG.md b/CHANGELOG.md index dc758f28e665..edca779fefa5 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -47,12 +47,15 @@ * [11545](https://github.com/grafana/loki/pull/11545) **dannykopping** Force correct memcached timeout when fetching chunks. * [11589](https://github.com/grafana/loki/pull/11589) **ashwanthgoli** Results Cache: Adds `query_length_served` cache stat to measure the length of the query served from cache. * [11535](https://github.com/grafana/loki/pull/11535) **dannykopping** Query Frontend: Allow customisable splitting of queries which overlap the `query_ingester_within` window to reduce query pressure on ingesters. +* [11654](https://github.com/grafana/loki/pull/11654) **dannykopping** Cache: atomically check background cache size limit correctly. ##### Fixes * [11074](https://github.com/grafana/loki/pull/11074) **hainenber** Fix panic in lambda-promtail due to mishandling of empty DROP_LABELS env var. * [11195](https://github.com/grafana/loki/pull/11195) **canuteson** Generate tsdb_shipper storage_config even if using_boltdb_shipper is false +* [9831](https://github.com/grafana/loki/pull/9831) **sijmenhuizenga**: Fix Promtail excludepath not evaluated on newly added files. * [11551](https://github.com/grafana/loki/pull/11551) **dannykopping** Do not reflect label names in request metrics' "route" label. * [11601](https://github.com/grafana/loki/pull/11601) **dannykopping** Ruler: Fixed a panic that can be caused by concurrent read-write access of tenant configs when there are a large amount of rules. +* [11606](https://github.com/grafana/loki/pull/11606) **dannykopping** Fixed regression adding newlines to HTTP error response bodies which may break client integrations. ##### Changes @@ -78,6 +81,7 @@ * [10677](https://github.com/grafana/loki/pull/10677) **chaudum** Remove deprecated `stream_lag_labels` setting from both the `options` and `client` configuration sections. * [10689](https://github.com/grafana/loki/pull/10689) **dylanguedes**: Ingester: Make jitter to be 20% of flush check period instead of 1%. +* [11420](https://github.com/grafana/loki/pull/11420) **zry98**: Show a clearer reason in "disable watchConfig" log message when server is disabled. ##### Fixes diff --git a/Makefile b/Makefile index 23a22d3e55bc..d311ed1c4f3c 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ DOCKER_IMAGE_DIRS := $(patsubst %/Dockerfile,%,$(DOCKERFILES)) BUILD_IN_CONTAINER ?= true # ensure you run `make drone` after changing this -BUILD_IMAGE_VERSION ?= 0.31.2 +BUILD_IMAGE_VERSION ?= 0.33.0 # Docker image info IMAGE_PREFIX ?= grafana @@ -801,6 +801,9 @@ EXAMPLES_SKIP_VALIDATION_FLAG := "doc-example:skip-validation=true" validate-example-configs: loki for f in $$(grep -rL $(EXAMPLES_SKIP_VALIDATION_FLAG) $(EXAMPLES_YAML_PATH)/*.yaml); do echo "Validating provided example config: $$f" && ./cmd/loki/loki -config.file=$$f -verify-config || exit 1; done +validate-dev-cluster-config: loki + ./cmd/loki/loki -config.file=./tools/dev/loki-boltdb-storage-s3/config/loki.yaml -verify-config + # Dynamically generate ./docs/sources/configure/examples.md using the example configs that we provide. # This target should be run if any of our example configs change. generate-example-config-doc: @@ -836,14 +839,16 @@ dev-k3d-down: # Trivy is used to scan images for vulnerabilities .PHONY: trivy -trivy: loki-image +trivy: loki-image build-image trivy i $(IMAGE_PREFIX)/loki:$(IMAGE_TAG) + trivy i $(IMAGE_PREFIX)/loki-build-image:$(IMAGE_TAG) trivy fs go.mod # Synk is also used to scan for vulnerabilities, and detects things that trivy might miss .PHONY: snyk -snyk: loki-image - snyk container test $(IMAGE_PREFIX)/loki:$(IMAGE_TAG) +snyk: loki-image build-image + snyk container test $(IMAGE_PREFIX)/loki:$(IMAGE_TAG) --file=cmd/loki/Dockerfile + snyk container test $(IMAGE_PREFIX)/loki-build-image:$(IMAGE_TAG) --file=loki-build-image/Dockerfile snyk code test .PHONY: scan-vulnerabilities diff --git a/clients/cmd/docker-driver/Dockerfile b/clients/cmd/docker-driver/Dockerfile index 5fe3fae2c97d..2b5baab318ad 100644 --- a/clients/cmd/docker-driver/Dockerfile +++ b/clients/cmd/docker-driver/Dockerfile @@ -1,4 +1,4 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.31.2 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/loki -f cmd/loki/Dockerfile . @@ -9,7 +9,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false clients/cmd/docker-driver/docker-driver -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates tzdata COPY --from=build /src/loki/clients/cmd/docker-driver/docker-driver /bin/docker-driver WORKDIR /bin/ diff --git a/clients/cmd/docker-driver/README.md b/clients/cmd/docker-driver/README.md index e95675b2ef6e..c56874ad3cd1 100644 --- a/clients/cmd/docker-driver/README.md +++ b/clients/cmd/docker-driver/README.md @@ -23,4 +23,4 @@ To build and contribute. you will need: To build the driver you can use `make docker-driver`, then you can install this driver using `make docker-driver-enable`. If you want to uninstall the driver simply run `make docker-driver-clean`. -Make you update the [documentation](../../docs/sources/clients/docker-driver/) accordingly when submitting a new change. +Make sure you update the [documentation](../../docs/sources/send-data/docker-driver/) accordingly when submitting a new change. diff --git a/clients/cmd/promtail/Dockerfile.cross b/clients/cmd/promtail/Dockerfile.cross index 92784390105a..084186e1a3d5 100644 --- a/clients/cmd/promtail/Dockerfile.cross +++ b/clients/cmd/promtail/Dockerfile.cross @@ -1,4 +1,4 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.29.3 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f clients/cmd/promtail/Dockerfile . diff --git a/clients/cmd/promtail/Dockerfile.debug b/clients/cmd/promtail/Dockerfile.debug index 1ff864251982..97e191cf4982 100644 --- a/clients/cmd/promtail/Dockerfile.debug +++ b/clients/cmd/promtail/Dockerfile.debug @@ -2,14 +2,14 @@ # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f clients/cmd/promtail/Dockerfile.debug . -FROM grafana/loki-build-image:0.29.3 as build +FROM grafana/loki-build-image:0.33.0 as build ARG GOARCH="amd64" COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false PROMTAIL_JOURNAL_ENABLED=true promtail-debug -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates tzdata COPY --from=build /src/loki/clients/cmd/promtail/promtail-debug /usr/bin/promtail-debug COPY --from=build /usr/bin/dlv /usr/bin/dlv diff --git a/clients/pkg/promtail/promtail.go b/clients/pkg/promtail/promtail.go index 1586e2d97115..1ef3368a697e 100644 --- a/clients/pkg/promtail/promtail.go +++ b/clients/pkg/promtail/promtail.go @@ -255,25 +255,29 @@ func (p *Promtail) watchConfig() { level.Warn(p.logger).Log("msg", "disable watchConfig", "reason", "Promtail newConfig func is Empty") return } - promtailServer, ok := p.server.(*server.PromtailServer) - if !ok { - level.Warn(p.logger).Log("msg", "disable watchConfig", "reason", "promtailServer cast fail") + switch srv := p.server.(type) { + case *server.NoopServer: + level.Warn(p.logger).Log("msg", "disable watchConfig", "reason", "Promtail server is disabled") return - } - level.Warn(p.logger).Log("msg", "enable watchConfig") - hup := make(chan os.Signal, 1) - signal.Notify(hup, syscall.SIGHUP) - for { - select { - case <-hup: - _ = p.reload() - case rc := <-promtailServer.Reload(): - if err := p.reload(); err != nil { - rc <- err - } else { - rc <- nil + case *server.PromtailServer: + level.Warn(p.logger).Log("msg", "enable watchConfig") + hup := make(chan os.Signal, 1) + signal.Notify(hup, syscall.SIGHUP) + for { + select { + case <-hup: + _ = p.reload() + case rc := <-srv.Reload(): + if err := p.reload(); err != nil { + rc <- err + } else { + rc <- nil + } } } + default: + level.Warn(p.logger).Log("msg", "disable watchConfig", "reason", "Unknown Promtail server type") + return } } diff --git a/clients/pkg/promtail/server/server.go b/clients/pkg/promtail/server/server.go index 4eb61361d10a..1b47247630e0 100644 --- a/clients/pkg/promtail/server/server.go +++ b/clients/pkg/promtail/server/server.go @@ -321,25 +321,25 @@ func computeExternalURL(u string, port int) (*url.URL, error) { return eu, nil } -type noopServer struct { +type NoopServer struct { log log.Logger sigs chan os.Signal } -func newNoopServer(log log.Logger) *noopServer { - return &noopServer{ +func newNoopServer(log log.Logger) *NoopServer { + return &NoopServer{ log: log, sigs: make(chan os.Signal, 1), } } -func (s *noopServer) Run() error { +func (s *NoopServer) Run() error { signal.Notify(s.sigs, syscall.SIGINT, syscall.SIGTERM) sig := <-s.sigs level.Info(s.log).Log("msg", "received shutdown signal", "sig", sig) return nil } -func (s *noopServer) Shutdown() { +func (s *NoopServer) Shutdown() { s.sigs <- syscall.SIGTERM } diff --git a/clients/pkg/promtail/targets/file/filetarget.go b/clients/pkg/promtail/targets/file/filetarget.go index 9c363f9d38a6..2c52cbead922 100644 --- a/clients/pkg/promtail/targets/file/filetarget.go +++ b/clients/pkg/promtail/targets/file/filetarget.go @@ -350,6 +350,20 @@ func (t *FileTarget) startTailing(ps []string) { continue } + if t.pathExclude != "" { + matched, err := doublestar.Match(t.pathExclude, p) + if err != nil { + level.Error(t.logger).Log("msg", "ignoring file, exclude pattern match failed", "error", err, "filename", p, "pathExclude", t.pathExclude) + t.metrics.totalBytes.DeleteLabelValues(p) + continue + } + if matched { + level.Info(t.logger).Log("msg", "ignoring file", "error", "file matches exclude pattern", "filename", p, "pathExclude", t.pathExclude) + t.metrics.totalBytes.DeleteLabelValues(p) + continue + } + } + var reader Reader if t.decompressCfg != nil && t.decompressCfg.Enabled { level.Debug(t.logger).Log("msg", "reading from compressed file", "filename", p) diff --git a/clients/pkg/promtail/targets/file/filetarget_test.go b/clients/pkg/promtail/targets/file/filetarget_test.go index 31b48c75b670..f3cde7bf819a 100644 --- a/clients/pkg/promtail/targets/file/filetarget_test.go +++ b/clients/pkg/promtail/targets/file/filetarget_test.go @@ -475,6 +475,7 @@ func TestHandleFileCreationEvent(t *testing.T) { positionsFileName := filepath.Join(dirName, "positions.yml") logDir := filepath.Join(dirName, "log") logFile := filepath.Join(logDir, "test1.log") + logFileIgnored := filepath.Join(logDir, "test.donot.log") if err := os.MkdirAll(logDir, 0750); err != nil { t.Fatal(err) @@ -511,7 +512,8 @@ func TestHandleFileCreationEvent(t *testing.T) { } }() - target, err := NewFileTarget(metrics, logger, client, ps, path, "", nil, nil, &Config{ + pathExclude := "**/*.donot.log" + target, err := NewFileTarget(metrics, logger, client, ps, path, pathExclude, nil, nil, &Config{ // To handle file creation event from channel, set enough long time as sync period SyncPeriod: 10 * time.Minute, }, DefaultWatchConig, fakeFileHandler, fakeTargetHandler, "", nil) @@ -523,10 +525,18 @@ func TestHandleFileCreationEvent(t *testing.T) { if err != nil { t.Fatal(err) } + _, err = os.Create(logFileIgnored) + if err != nil { + t.Fatal(err) + } fakeFileHandler <- fsnotify.Event{ Name: logFile, Op: fsnotify.Create, } + fakeFileHandler <- fsnotify.Event{ + Name: logFileIgnored, + Op: fsnotify.Create, + } requireEventually(t, func() bool { return len(target.readers) == 1 }, "Expected tails to be 1 at this point in the test...") diff --git a/cmd/logcli/Dockerfile b/cmd/logcli/Dockerfile index c273d8cc70e4..3ec342895089 100644 --- a/cmd/logcli/Dockerfile +++ b/cmd/logcli/Dockerfile @@ -4,7 +4,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false logcli -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --no-cache ca-certificates diff --git a/cmd/logql-analyzer/Dockerfile b/cmd/logql-analyzer/Dockerfile index d434281ce411..87b6893ae5a3 100644 --- a/cmd/logql-analyzer/Dockerfile +++ b/cmd/logql-analyzer/Dockerfile @@ -4,7 +4,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && CGO_ENABLED=0 go build ./cmd/logql-analyzer/ -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --no-cache ca-certificates diff --git a/cmd/loki-canary-boringcrypto/Dockerfile b/cmd/loki-canary-boringcrypto/Dockerfile index e6793f2a6d0e..0c4086911632 100644 --- a/cmd/loki-canary-boringcrypto/Dockerfile +++ b/cmd/loki-canary-boringcrypto/Dockerfile @@ -5,7 +5,7 @@ WORKDIR /src/loki RUN go env GOARCH > /goarch RUN make clean && make GOARCH=$(cat /goarch) BUILD_IN_CONTAINER=true GOEXPERIMENT=boringcrypto loki-canary-boringcrypto -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates RUN apk add --no-cache libc6-compat COPY --from=build /src/loki/cmd/loki-canary-boringcrypto/loki-canary-boringcrypto /usr/bin/loki-canary diff --git a/cmd/loki-canary/Dockerfile b/cmd/loki-canary/Dockerfile index 017bf6083df9..7f44b73ab5e6 100644 --- a/cmd/loki-canary/Dockerfile +++ b/cmd/loki-canary/Dockerfile @@ -4,7 +4,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false loki-canary -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates COPY --from=build /src/loki/cmd/loki-canary/loki-canary /usr/bin/loki-canary ENTRYPOINT [ "/usr/bin/loki-canary" ] diff --git a/cmd/loki-canary/Dockerfile.cross b/cmd/loki-canary/Dockerfile.cross index 6815f45dcbf1..20077b196a8f 100644 --- a/cmd/loki-canary/Dockerfile.cross +++ b/cmd/loki-canary/Dockerfile.cross @@ -1,4 +1,4 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.29.3 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f cmd/promtail/Dockerfile . @@ -12,7 +12,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && GOARCH=$(cat /goarch) GOARM=$(cat /goarm) make BUILD_IN_CONTAINER=false loki-canary -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates COPY --from=build /src/loki/cmd/loki-canary/loki-canary /usr/bin/loki-canary ENTRYPOINT [ "/usr/bin/loki-canary" ] diff --git a/cmd/loki/Dockerfile b/cmd/loki/Dockerfile index 520600e75982..4f8fc3961c90 100644 --- a/cmd/loki/Dockerfile +++ b/cmd/loki/Dockerfile @@ -4,7 +4,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false loki -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --no-cache ca-certificates libcap diff --git a/cmd/loki/Dockerfile.cross b/cmd/loki/Dockerfile.cross index 134683f615bb..d7bd233100e6 100644 --- a/cmd/loki/Dockerfile.cross +++ b/cmd/loki/Dockerfile.cross @@ -1,4 +1,4 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.29.3 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/loki -f cmd/loki/Dockerfile . @@ -12,7 +12,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && GOARCH=$(cat /goarch) GOARM=$(cat /goarm) make BUILD_IN_CONTAINER=false loki -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --no-cache ca-certificates diff --git a/cmd/loki/Dockerfile.debug b/cmd/loki/Dockerfile.debug index 539dfdf90b26..fe64eb120823 100644 --- a/cmd/loki/Dockerfile.debug +++ b/cmd/loki/Dockerfile.debug @@ -1,4 +1,4 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.29.3 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/loki -f cmd/loki/Dockerfile.debug . @@ -15,7 +15,7 @@ WORKDIR /src/loki RUN make clean && \ GOARCH=$(cat /goarch) GOARM=$(cat /goarm) make BUILD_IN_CONTAINER=false loki-debug -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates COPY --from=build /src/loki/cmd/loki/loki-debug /usr/bin/loki-debug COPY --from=goenv /go/bin/dlv /usr/bin/dlv diff --git a/cmd/migrate/Dockerfile b/cmd/migrate/Dockerfile index 3fe4bbdc7a4e..44ffe26513df 100644 --- a/cmd/migrate/Dockerfile +++ b/cmd/migrate/Dockerfile @@ -3,7 +3,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false migrate -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates COPY --from=build /src/loki/cmd/migrate/migrate /usr/bin/migrate #ENTRYPOINT [ "/usr/bin/migrate" ] diff --git a/cmd/querytee/Dockerfile b/cmd/querytee/Dockerfile index a750c0efeb98..858a4d66b971 100644 --- a/cmd/querytee/Dockerfile +++ b/cmd/querytee/Dockerfile @@ -4,7 +4,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false loki-querytee -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates COPY --from=build /src/loki/cmd/querytee/querytee /usr/bin/querytee ENTRYPOINT [ "/usr/bin/querytee" ] diff --git a/cmd/querytee/Dockerfile.cross b/cmd/querytee/Dockerfile.cross index f759f5403f47..94d2665c6a61 100644 --- a/cmd/querytee/Dockerfile.cross +++ b/cmd/querytee/Dockerfile.cross @@ -1,4 +1,4 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.29.3 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f cmd/promtail/Dockerfile . @@ -12,7 +12,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && GOARCH=$(cat /goarch) GOARM=$(cat /goarm) make BUILD_IN_CONTAINER=false loki-querytee -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates COPY --from=build /src/loki/cmd/querytee/querytee /usr/bin/querytee ENTRYPOINT [ "/usr/bin/querytee" ] diff --git a/docs/sources/community/contributing.md b/docs/sources/community/contributing.md index 5baaa66205fb..f5918dfe82f4 100644 --- a/docs/sources/community/contributing.md +++ b/docs/sources/community/contributing.md @@ -30,8 +30,10 @@ $ git commit -m "docs: fix spelling error" $ git push -u fork HEAD ``` -Note that if you downloaded Loki using `go get`, the message `package github.com/grafana/loki: no Go files in /go/src/github.com/grafana/loki` +{{% admonition type="note" %}} +If you downloaded Loki using `go get`, the message `package github.com/grafana/loki: no Go files in /go/src/github.com/grafana/loki` is normal and requires no actions to resolve. +{{% /admonition %}} ### Building diff --git a/docs/sources/community/design-documents/2021-01-Ordering-Constraint-Removal.md b/docs/sources/community/design-documents/2021-01-Ordering-Constraint-Removal.md index df3e511d50d6..64e59e21d07d 100644 --- a/docs/sources/community/design-documents/2021-01-Ordering-Constraint-Removal.md +++ b/docs/sources/community/design-documents/2021-01-Ordering-Constraint-Removal.md @@ -129,7 +129,10 @@ The performance losses against the current approach includes: Loki regularly combines multiple blocks into a chunk and "flushes" it to storage. In order to ensure that reads over flushed chunks remain as performant as possible, we will re-order a possibly-overlapping set of blocks into a set of blocks that maintain monotonically increasing order between them. From the perspective of the rest of Loki’s components (queriers/rulers fetching chunks from storage), nothing has changed. -**Note: In the case that data for a stream is ingested in order, this is effectively a no-op, making it well optimized for in-order writes (which is both the requirement and default in Loki currently). Thus, this should have little performance impact on ordered data while enabling Loki to ingest unordered data.** +{{% admonition type="note" %}} +**In the case that data for a stream is ingested in order, this is effectively a no-op, making it well optimized for in-order writes (which is both the requirement and default in Loki currently). Thus, this should have little performance impact on ordered data while enabling Loki to ingest unordered data.** +{{% /admonition %}} + #### Chunk Durations @@ -150,7 +153,9 @@ The second is simple to implement and an effective way to ensure Loki can ingest We also cut chunks according to the `sync_period`. The first timestamp ingested past this bound will trigger a cut. This process aids in increasing chunk determinism and therefore our deduplication ratio in object storage because chunks are [content addressed](https://en.wikipedia.org/wiki/Content-addressable_storage). With the removal of our ordering constraint, it's possible that in some cases the synchronization method will not be as effective, such as during concurrent writes to the same stream across this bound. -**Note: It's important to mention that this is possible today with the current ordering constraint, but we'll be increasing the likelihood by removing it** +{{% admonition type="note" %}} +**It's important to mention that this is possible today with the current ordering constraint, but we'll be increasing the likelihood by removing it.** +{{% /admonition %}} ``` Figure 5 diff --git a/docs/sources/community/maintaining/release-loki-build-image.md b/docs/sources/community/maintaining/release-loki-build-image.md index ceeb799f4c68..d6e1f15b1d81 100644 --- a/docs/sources/community/maintaining/release-loki-build-image.md +++ b/docs/sources/community/maintaining/release-loki-build-image.md @@ -16,19 +16,21 @@ if any changes were made in the folder `./loki-build-image/`. ## Step 1 -1. create a branch with the desired changes to the Dockerfile -2. update the version tag of the `loki-build-image` pipeline defined in `.drone/drone.jsonnet` (search for `pipeline('loki-build-image')`) to a new version number (try to follow semver) -3. run `DRONE_SERVER=https://drone.grafana.net/ DRONE_TOKEN= make drone` and commit the changes to the same branch - 1. the `` is your personal drone token, which can be found by navigating to https://drone.grafana.net/account. -4. create a PR -5. once approved and merged to `main`, the image with the new version is built and published - - **Note:** keep an eye on https://drone.grafana.net/grafana/loki for the build after merging ([example](https://drone.grafana.net/grafana/loki/17760/1/2)) +1. Create a branch with the desired changes to the Dockerfile. +2. Update the version tag of the `loki-build-image` pipeline defined in `.drone/drone.jsonnet` (search for `pipeline('loki-build-image')`) to a new version number (try to follow semver). +3. Run `DRONE_SERVER=https://drone.grafana.net/ DRONE_TOKEN= make drone` and commit the changes to the same branch. + 1. The `` is your personal drone token, which can be found by navigating to https://drone.grafana.net/account. +4. Create a PR. +5. Once approved and merged to `main`, the image with the new version is built and published. + {{% admonition type="note" %}} + Keep an eye on https://drone.grafana.net/grafana/loki for the build after merging ([example](https://drone.grafana.net/grafana/loki/17760/1/2)). + {{% /admonition %}} ## Step 2 -1. create a branch -2. update the `BUILD_IMAGE_VERSION` variable in the `Makefile` -3. run `loki-build-image/version-updater.sh ` to update all the references -4. run `DRONE_SERVER=https://drone.grafana.net/ DRONE_TOKEN= make drone` to update the Drone config to use the new build image -5. create a new PR +1. Create a branch. +2. Update the `BUILD_IMAGE_VERSION` variable in the `Makefile`. +3. Run `loki-build-image/version-updater.sh ` to update all the references. +4. Run `DRONE_SERVER=https://drone.grafana.net/ DRONE_TOKEN= make drone` to update the Drone config to use the new build image. +5. Create a new PR. diff --git a/docs/sources/get-started/deployment-modes.md b/docs/sources/get-started/deployment-modes.md index d0b590086cfe..5b4766f65253 100644 --- a/docs/sources/get-started/deployment-modes.md +++ b/docs/sources/get-started/deployment-modes.md @@ -75,11 +75,14 @@ For release 2.9 the components are: - Ruler - Table Manager (deprecated) -TIP: You can see the complete list of targets for your version of Loki by running Loki with the flag `-list-targets`, for example: +{{% admonition type="tip" %}} +You can see the complete list of targets for your version of Loki by running Loki with the flag `-list-targets`, for example: ```bash docker run docker.io/grafana/loki:2.9.2 -config.file=/etc/loki/local-config.yaml -list-targets ``` +{{% /admonition %}} + ![Microservices mode diagram](../microservices-mode.png "Microservices mode") Running components as individual microservices provides more granularity, letting you scale each component as individual microservices, to better match your specific use case. diff --git a/docs/sources/get-started/labels/_index.md b/docs/sources/get-started/labels/_index.md index 12c4bc6852e3..e33f36d91f41 100644 --- a/docs/sources/get-started/labels/_index.md +++ b/docs/sources/get-started/labels/_index.md @@ -28,9 +28,10 @@ See [structured metadata]({{< relref "./structured-metadata" >}}) for more infor Loki places the same restrictions on label naming as [Prometheus](https://prometheus.io/docs/concepts/data_model/#metric-names-and-labels): -> It may contain ASCII letters and digits, as well as underscores and colons. It must match the regex `[a-zA-Z_:][a-zA-Z0-9_:]*`. -> -> Note: The colons are reserved for user defined recording rules. They should not be used by exporters or direct instrumentation. +- It may contain ASCII letters and digits, as well as underscores and colons. It must match the regex `[a-zA-Z_:][a-zA-Z0-9_:]*`. +- The colons are reserved for user defined recording rules. They should not be used by exporters or direct instrumentation. +- Unsupported characters in the label should be converted to an underscore. For example, the label `app.kubernetes.io/name` should be written as `app_kubernetes_io_name`. + ## Loki labels demo diff --git a/docs/sources/operations/authentication.md b/docs/sources/operations/authentication.md index 4235959b1f2c..96081dbab52e 100644 --- a/docs/sources/operations/authentication.md +++ b/docs/sources/operations/authentication.md @@ -18,10 +18,11 @@ A list of open-source reverse proxies you can use: - [OAuth2 proxy](https://github.com/oauth2-proxy/oauth2-proxy) - [HAProxy](https://www.haproxy.org/) -Note that when using Loki in multi-tenant mode, Loki requires the HTTP header +{{% admonition type="note" %}} +When using Loki in multi-tenant mode, Loki requires the HTTP header `X-Scope-OrgID` to be set to a string identifying the tenant; the responsibility of populating this value should be handled by the authenticating reverse proxy. -For more information, read the [multi-tenancy]({{< relref "./multi-tenancy" >}}) documentation. +For more information, read the [multi-tenancy]({{< relref "./multi-tenancy" >}}) documentation.{{% /admonition %}} For information on authenticating Promtail, see the documentation for [how to configure Promtail]({{< relref "../send-data/promtail/configuration" >}}). diff --git a/docs/sources/operations/recording-rules.md b/docs/sources/operations/recording-rules.md index dfef1bf0e470..afac69b75e27 100644 --- a/docs/sources/operations/recording-rules.md +++ b/docs/sources/operations/recording-rules.md @@ -30,9 +30,12 @@ is that Prometheus will, for example, reject a remote-write request with 100 sam When the `ruler` starts up, it will load the WALs for the tenants who have recording rules. These WAL files are stored on disk and are loaded into memory. -Note: WALs are loaded one at a time upon start-up. This is a current limitation of the Loki ruler. +{{% admonition type="note" %}} +WALs are loaded one at a time upon start-up. This is a current limitation of the Loki ruler. For this reason, it is adviseable that the number of rule groups serviced by a ruler be kept to a reasonable size, since _no rule evaluation occurs while WAL replay is in progress (this includes alerting rules)_. +{{% /admonition %}} + ### Truncation @@ -52,8 +55,11 @@ excessively large due to truncation. ## Scaling See Mimir's guide for [configuring Grafana Mimir hash rings](/docs/mimir/latest/configure/configure-hash-rings/) for scaling the ruler using a ring. -Note: the `ruler` shards by rule _group_, not by individual rules. This is an artifact of the fact that Prometheus + +{{% admonition type="note" %}} +The `ruler` shards by rule _group_, not by individual rules. This is an artifact of the fact that Prometheus recording rules need to run in order since one recording rule can reuse another - but this is not possible in Loki. +{{% /admonition %}} ## Deployment diff --git a/docs/sources/operations/storage/boltdb-shipper.md b/docs/sources/operations/storage/boltdb-shipper.md index 7f29e1c23a86..df32b95f3eed 100644 --- a/docs/sources/operations/storage/boltdb-shipper.md +++ b/docs/sources/operations/storage/boltdb-shipper.md @@ -7,7 +7,7 @@ weight: 200 # Single Store BoltDB (boltdb-shipper) {{% admonition type="note" %}} -Note that single store BoltDB Shipper is a legacy storage option and is not recommended for new deployments. The [TSDB]({{< relref "./tsdb" >}}) index is the recommended index. +Single store BoltDB Shipper is a legacy storage option and is not recommended for new deployments. The [TSDB]({{< relref "./tsdb" >}}) index is the recommended index. {{% /admonition %}} BoltDB Shipper lets you run Grafana Loki without any dependency on NoSQL stores for storing index. @@ -75,7 +75,10 @@ they both having shipped files for day `18371` and `18372` with prefix `loki_ind └── ingester-1-1587254400.gz ... ``` -**Note:** We also add a timestamp to names of the files to randomize the names to avoid overwriting files when running Ingesters with same name and not have a persistent storage. Timestamps not shown here for simplification. + +{{% admonition type="note" %}} +Loki also adds a timestamp to names of the files to randomize the names to avoid overwriting files when running Ingesters with same name and not have a persistent storage. Timestamps not shown here for simplification. +{{% /admonition %}} Let us talk about more in depth about how both Ingesters and Queriers work when running them with BoltDB Shipper. @@ -86,7 +89,9 @@ and the BoltDB Shipper looks for new and updated files in that directory at 1 mi When running Loki in microservices mode, there could be multiple ingesters serving write requests. Each ingester generates BoltDB files locally. -**Note:** To avoid any loss of index when an ingester crashes, we recommend running ingesters as a statefulset (when using Kubernetes) with a persistent storage for storing index files. +{{% admonition type="note" %}} +To avoid any loss of index when an ingester crashes, we recommend running ingesters as a StatefulSet (when using Kubernetes) with a persistent storage for storing index files. +{{% /admonition %}} When chunks are flushed, they are available for reads in the object store instantly. The index is not available instantly, since we upload every 15 minutes with the BoltDB shipper. Ingesters expose a new RPC for letting queriers query the ingester's local index for chunks which were recently flushed, but its index might not be available yet with queriers. @@ -132,7 +137,9 @@ While using `boltdb-shipper` avoid configuring WriteDedupe cache since it is use Compactor is a BoltDB Shipper specific service that reduces the index size by deduping the index and merging all the files to a single file per table. We recommend running a Compactor since a single Ingester creates 96 files per day which include a lot of duplicate index entries and querying multiple files per table adds up the overall query latency. -**Note:** There should be only 1 compactor instance running at a time that otherwise could create problems and may lead to data loss. +{{% admonition type="note" %}} +There should be only one compactor instance running at a time that otherwise could create problems and may lead to data loss. +{{% /admonition %}} Example compactor configuration with GCS: diff --git a/docs/sources/operations/storage/table-manager/_index.md b/docs/sources/operations/storage/table-manager/_index.md index 81b835a11382..0e6ba42cc71f 100644 --- a/docs/sources/operations/storage/table-manager/_index.md +++ b/docs/sources/operations/storage/table-manager/_index.md @@ -145,9 +145,11 @@ number_of_tables_to_keep = floor(retention_period / table_period) + 1 ![retention](./table-manager-retention.png) +{{% admonition type="note" %}} It's important to note that - due to the internal implementation - the table `period` and `retention_period` **must** be multiples of `24h` in order to get the expected behavior. +{{% /admonition %}} For detailed information on configuring the retention, refer to the [Loki Storage Retention]({{< relref "../retention" >}}) diff --git a/docs/sources/operations/storage/wal.md b/docs/sources/operations/storage/wal.md index 45f8c396ccca..6baf78adc5f4 100644 --- a/docs/sources/operations/storage/wal.md +++ b/docs/sources/operations/storage/wal.md @@ -21,13 +21,13 @@ The Write Ahead Log in Loki takes a few particular tradeoffs compared to other W In the event the WAL is corrupted/partially deleted, Loki will not be able to recover all of its data. In this case, Loki will attempt to recover any data it can, but will not prevent Loki from starting. -Note: the Prometheus metric `loki_ingester_wal_corruptions_total` can be used to track and alert when this happens. +You can use the Prometheus metric `loki_ingester_wal_corruptions_total` to track and alert when this happens. 1) No space left on disk In the event the underlying WAL disk is full, Loki will not fail incoming writes, but neither will it log them to the WAL. In this case, the persistence guarantees across process restarts will not hold. -Note: the Prometheus metric `loki_ingester_wal_disk_full_failures_total` can be used to track and alert when this happens. +You can use the Prometheus metric `loki_ingester_wal_disk_full_failures_total` to track and alert when this happens. ### Backpressure diff --git a/docs/sources/query/logcli.md b/docs/sources/query/logcli.md index 0d870c44150d..297730a589ee 100644 --- a/docs/sources/query/logcli.md +++ b/docs/sources/query/logcli.md @@ -70,9 +70,11 @@ without needing a username and password: export LOKI_ADDR=http://localhost:3100 ``` -> Note: If you are running Loki behind a proxy server and you have -> authentication configured, you will also have to pass in LOKI_USERNAME -> and LOKI_PASSWORD, LOKI_BEARER_TOKEN or LOKI_BEARER_TOKEN_FILE accordingly. +{{% admonition type="note" %}} +If you are running Loki behind a proxy server and you have +authentication configured, you will also have to pass in LOKI_USERNAME +and LOKI_PASSWORD, LOKI_BEARER_TOKEN or LOKI_BEARER_TOKEN_FILE accordingly. +{{% /admonition %}} ```bash $ logcli labels job @@ -512,7 +514,9 @@ You can consume log lines from your `stdin` instead of Loki servers. Say you have log files in your local, and just want to do run some LogQL queries for that, `--stdin` flag can help. -**NOTE: Currently it doesn't support any type of metric queries** +{{% admonition type="note" %}} +Currently it doesn't support any type of metric queries. +{{% /admonition %}} You may have to use `stdin` flag for several reasons 1. Quick way to check and validate a LogQL expressions. diff --git a/docs/sources/query/template_functions.md b/docs/sources/query/template_functions.md index 0d2097a0fc23..5b660fa786da 100644 --- a/docs/sources/query/template_functions.md +++ b/docs/sources/query/template_functions.md @@ -470,7 +470,7 @@ Signature: `min(a interface{}, i ...interface{}) int64` Example: ```template -{{ max 1 2 3 }} //output 1 +{{ min 1 2 3 }} //output 1 ``` ## maxf diff --git a/docs/sources/send-data/fluentbit/_index.md b/docs/sources/send-data/fluentbit/_index.md index 3e29bf367794..c9088fdc8f88 100644 --- a/docs/sources/send-data/fluentbit/_index.md +++ b/docs/sources/send-data/fluentbit/_index.md @@ -22,6 +22,43 @@ docker run -v /var/log:/var/log \ grafana/fluent-bit-plugin-loki:latest ``` +Or, an alternative is to run the fluent-bit container using [Docker Hub](https://hub.docker.com/r/fluent/fluent-bit) image: + +### Docker Container Logs + +To ship logs from Docker containers to Grafana Cloud using Fluent Bit, you can use the Fluent Bit Docker image and configure it to forward logs directly to Grafana Cloud's Loki. Below is a step-by-step guide on setting up Fluent Bit for this purpose. + +#### Prerequisites + +- Docker is installed on your machine. +- You have a Grafana Cloud account with access to Loki. + +#### Configuration + +1. Create a Fluent Bit configuration file named `fluent-bit.conf` with the following content, which defines the input from Docker container logs and sets up the output to send logs to your Grafana Cloud Loki instance: + + ```ini + [SERVICE] + Flush 1 + Log_Level info + + [INPUT] + Name tail + Path /var/lib/docker/containers/*/*.log + Parser docker + Tag docker.* + + [OUTPUT] + Name loki + Match * + Host logs-prod-006.grafana.net + Port 443 + TLS On + TLS.Verify On + HTTP_User 478625 + HTTP_Passwd YOUR_GRAFANA_CLOUD_API_KEY + Labels job=fluentbit + ### Kubernetes You can run Fluent Bit as a [Daemonset](https://kubernetes.io/docs/concepts/workloads/controllers/daemonset/) to collect all your Kubernetes workload logs. diff --git a/docs/sources/send-data/lambda-promtail/_index.md b/docs/sources/send-data/lambda-promtail/_index.md index 0d39a75f143e..783b5d231bb9 100644 --- a/docs/sources/send-data/lambda-promtail/_index.md +++ b/docs/sources/send-data/lambda-promtail/_index.md @@ -99,7 +99,9 @@ Ephemeral jobs can quite easily run afoul of cardinality best practices. During For those using Cloudwatch and wishing to test out Loki in a low-risk way, this workflow allows piping Cloudwatch logs to Loki regardless of the event source (EC2, Kubernetes, Lambda, ECS, etc) without setting up a set of Promtail daemons across their infrastructure. However, running Promtail as a daemon on your infrastructure is the best-practice deployment strategy in the long term for flexibility, reliability, performance, and cost. -Note: Propagating logs from Cloudwatch to Loki means you'll still need to _pay_ for Cloudwatch. +{{% admonition type="note" %}} +Propagating logs from Cloudwatch to Loki means you'll still need to _pay_ for Cloudwatch. +{{% /admonition %}} ### VPC Flow logs @@ -163,7 +165,9 @@ Incoming logs can have seven special labels assigned to them which can be used i ### Promtail labels -Note: This section is relevant if running Promtail between lambda-promtail and the end Loki deployment and was used to circumvent `out of order` problems prior to the v2.4 Loki release which removed the ordering constraint. +{{% admonition type="note" %}} +This section is relevant if running Promtail between lambda-promtail and the end Loki deployment and was used to circumvent `out of order` problems prior to the v2.4 Loki release which removed the ordering constraint. +{{% /admonition %}} As stated earlier, this workflow moves the worst case stream cardinality from `number_of_log_streams` -> `number_of_log_groups` * `number_of_promtails`. For this reason, each Promtail must have a unique label attached to logs it processes (ideally via something like `--client.external-labels=promtail=${HOSTNAME}`) and it's advised to run a small number of Promtails behind a load balancer according to your throughput and redundancy needs. @@ -191,7 +195,9 @@ The provided Terraform and CloudFormation files are meant to cover the default u ## Example Promtail Config -Note: this should be run in conjunction with a Promtail-specific label attached, ideally via a flag argument like `--client.external-labels=promtail=${HOSTNAME}`. It will receive writes via the push-api on ports `3500` (http) and `3600` (grpc). +{{% admonition type="note" %}} +This should be run in conjunction with a Promtail-specific label attached, ideally via a flag argument like `--client.external-labels=promtail=${HOSTNAME}`. It will receive writes via the push-api on ports `3500` (http) and `3600` (grpc). +{{% /admonition %}} ```yaml server: diff --git a/docs/sources/send-data/promtail/cloud/ec2/_index.md b/docs/sources/send-data/promtail/cloud/ec2/_index.md index 18434d734bac..8f90523018ee 100644 --- a/docs/sources/send-data/promtail/cloud/ec2/_index.md +++ b/docs/sources/send-data/promtail/cloud/ec2/_index.md @@ -287,5 +287,5 @@ Let's head back to Grafana and verify that your Promtail logs are available in G [live tailing]: https://grafana.com/docs/grafana/latest/features/datasources/loki/#live-tailing [systemd]: ../../../installation/helm#run-promtail-with-systemd-journal-support [journald]: https://www.freedesktop.org/software/systemd/man/systemd-journald.service.html -[final config]: https://github.com/grafana/loki/blob/main/docs/sources/clients/aws/ec2/promtail-ec2-final.yaml +[final config]: https://github.com/grafana/loki/blob/main/docs/sources/send-data/promtail/cloud/ec2/promtail-ec2-final.yaml [relabeling]: https://prometheus.io/docs/prometheus/latest/configuration/configuration/#relabel_config diff --git a/docs/sources/send-data/promtail/cloud/ecs/_index.md b/docs/sources/send-data/promtail/cloud/ecs/_index.md index e9eaca48f52a..5b933c2ec611 100644 --- a/docs/sources/send-data/promtail/cloud/ecs/_index.md +++ b/docs/sources/send-data/promtail/cloud/ecs/_index.md @@ -39,7 +39,7 @@ We will also need an [IAM Role to run containers][ecs iam] with, let's create a > You might already have this `ecsTaskExecutionRole` role in your AWS account if that's the case you can skip this step. ```bash -curl https://raw.githubusercontent.com/grafana/loki/main/docs/sources/clients/aws/ecs/ecs-role.json > ecs-role.json +curl https://raw.githubusercontent.com/grafana/loki/main/docs/sources/send-data/promtail/cloud/ecs/ecs-role.json > ecs-role.json aws iam create-role --role-name ecsTaskExecutionRole --assume-role-policy-document file://ecs-role.json { diff --git a/docs/sources/send-data/promtail/cloud/eks/_index.md b/docs/sources/send-data/promtail/cloud/eks/_index.md index 450ba562da53..bc6ad6f366d8 100644 --- a/docs/sources/send-data/promtail/cloud/eks/_index.md +++ b/docs/sources/send-data/promtail/cloud/eks/_index.md @@ -233,7 +233,7 @@ helm upgrade promtail loki/promtail -n monitoring -f values.yaml And deploy the `eventrouter` using: ```bash -kubectl create -f https://raw.githubusercontent.com/grafana/loki/main/docs/sources/clients/aws/eks/eventrouter.yaml +kubectl create -f https://raw.githubusercontent.com/grafana/loki/main/docs/sources/send-data/promtail/cloud/eks/eventrouter.yaml ``` You should see output similar to the following: ```bash diff --git a/docs/sources/send-data/promtail/stages/drop.md b/docs/sources/send-data/promtail/stages/drop.md index 2acc2443ba85..77b66020bb23 100644 --- a/docs/sources/send-data/promtail/stages/drop.md +++ b/docs/sources/send-data/promtail/stages/drop.md @@ -126,7 +126,9 @@ Would drop this log line: #### Drop old log lines -**NOTE** For `older_than` to work, you must be using the [timestamp]({{< relref "./timestamp" >}}) stage to set the timestamp from the ingested log line _before_ applying the `drop` stage. +{{% admonition type="note" %}} +For `older_than` to work, you must be using the [timestamp]({{< relref "./timestamp" >}}) stage to set the timestamp from the ingested log line _before_ applying the `drop` stage. +{{% /admonition %}} Given the pipeline: diff --git a/docs/sources/send-data/promtail/stages/json.md b/docs/sources/send-data/promtail/stages/json.md index 2f3c1bd44c73..6babe1f60700 100644 --- a/docs/sources/send-data/promtail/stages/json.md +++ b/docs/sources/send-data/promtail/stages/json.md @@ -134,5 +134,7 @@ The following key-value pairs would be created in the set of extracted data: - `stream`: `stderr` - `timestamp`: `2019-04-30T02:12:41.8443515` -Note that referring to `grpc.stream` without the combination of double quotes +{{% admonition type="note" %}} +Referring to `grpc.stream` without the combination of double quotes wrapped in single quotes will not work properly. +{{% /admonition %}} diff --git a/docs/sources/setup/install/local.md b/docs/sources/setup/install/local.md index 91d972b6368e..dbdeb8ca3a16 100644 --- a/docs/sources/setup/install/local.md +++ b/docs/sources/setup/install/local.md @@ -34,10 +34,10 @@ The configuration specifies running Loki as a single binary. 1. Navigate to the [release page](https://github.com/grafana/loki/releases/). 2. Scroll down to the Assets section under the version that you want to install. 3. Download the Loki and Promtail .zip files that correspond to your system. - **Note:** Do not download LogCLI or Loki Canary at this time. `LogCLI` allows you to run Loki queries in a command line interface. [Loki Canary]({{< relref "../../operations/loki-canary" >}}) is a tool to audit Loki performance. + Do not download LogCLI or Loki Canary at this time. `LogCLI` allows you to run Loki queries in a command line interface. [Loki Canary]({{< relref "../../operations/loki-canary" >}}) is a tool to audit Loki performance. 4. Unzip the package contents into the same directory. This is where the two programs will run. 5. In the command line, change directory (`cd` on most systems) to the directory with Loki and Promtail. Copy and paste the commands below into your command line to download generic configuration files. - **Note:** Use the corresponding Git refs that match your downloaded Loki version to get the correct configuration file. For example, if you are using Loki version 2.9.2, you need to use the `https://raw.githubusercontent.com/grafana/loki/v2.9.2/cmd/loki/loki-local-config.yaml` URL to download the configuration file that corresponds to the Loki version you aim to run. + Use the corresponding Git refs that match your downloaded Loki version to get the correct configuration file. For example, if you are using Loki version 2.9.2, you need to use the `https://raw.githubusercontent.com/grafana/loki/v2.9.2/cmd/loki/loki-local-config.yaml` URL to download the configuration file that corresponds to the Loki version you aim to run. ``` wget https://raw.githubusercontent.com/grafana/loki/main/cmd/loki/loki-local-config.yaml diff --git a/docs/sources/setup/upgrade/_index.md b/docs/sources/setup/upgrade/_index.md index e9483e521940..663201820e1e 100644 --- a/docs/sources/setup/upgrade/_index.md +++ b/docs/sources/setup/upgrade/_index.md @@ -372,8 +372,10 @@ limits_config: retention_period: 744h ``` -**Note:** In previous versions, the zero value of `0` or `0s` will result in **immediate deletion of all logs**, +{{% admonition type="note" %}} +In previous versions, the zero value of `0` or `0s` will result in **immediate deletion of all logs**, only in 2.8 and forward releases does the zero value disable retention. +{{% /admonition %}} #### metrics.go log line `subqueries` replaced with `splits` and `shards` @@ -387,7 +389,9 @@ In 2.8 we no longer include `subqueries` in metrics.go, it does still exist in t Instead, now you can use `splits` to see how many split by time intervals were created and `shards` to see the total number of shards created for a query. -Note: currently not every query can be sharded and a shards value of zero is a good indicator the query was not able to be sharded. +{{% admonition type="note" %}} +Currently not every query can be sharded and a shards value of zero is a good indicator the query was not able to be sharded. +{{% /admonition %}} ### Promtail @@ -418,7 +422,9 @@ ruler: #### query-frontend Kubernetes headless service changed to load balanced service -*Note:* This is relevant only if you are using [jsonnet for deploying Loki in Kubernetes](/docs/loki/latest/installation/tanka/) +{{% admonition type="note" %}} +This is relevant only if you are using [jsonnet for deploying Loki in Kubernetes](/docs/loki/latest/installation/tanka/). +{{% /admonition %}} The `query-frontend` Kubernetes service was previously headless and was used for two purposes: * Distributing the Loki query requests amongst all the available Query Frontend pods. @@ -951,7 +957,9 @@ In Loki 2.2 we changed the internal version of our chunk format from v2 to v3, t This makes it important to first upgrade to 2.0, 2.0.1, or 2.1 **before** upgrading to 2.2 so that if you need to rollback for any reason you can do so easily. -**Note:** 2.0 and 2.0.1 are identical in every aspect except 2.0.1 contains the code necessary to read the v3 chunk format. Therefor if you are on 2.0 and ugrade to 2.2, if you want to rollback, you must rollback to 2.0.1. +{{% admonition type="note" %}} +2.0 and 2.0.1 are identical in every aspect except 2.0.1 contains the code necessary to read the v3 chunk format. Therefor if you are on 2.0 and ugrade to 2.2, if you want to rollback, you must rollback to 2.0.1. +{{% /admonition %}} ### Loki Config @@ -1095,9 +1103,14 @@ This likely only affects a small portion of tanka users because the default sche } ``` ->**NOTE** If you had set `index_period_hours` to a value other than 168h (the previous default) you must update this in the above config `period:` to match what you chose. +{{% admonition type="note" %}} +If you had set `index_period_hours` to a value other than 168h (the previous default) you must update this in the above config `period:` to match what you chose. +{{% /admonition %}} + ->**NOTE** We have changed the default index store to `boltdb-shipper` it's important to add `using_boltdb_shipper: false,` until you are ready to change (if you want to change) +{{% admonition type="note" %}} +We have changed the default index store to `boltdb-shipper` it's important to add `using_boltdb_shipper: false,` until you are ready to change (if you want to change) +{{% /admonition %}} Changing the jsonnet config to use the `boltdb-shipper` type is the same as [below](#upgrading-schema-to-use-boltdb-shipper-andor-v11-schema) where you need to add a new schema section. @@ -1139,9 +1152,9 @@ _THIS BEING SAID_ we are not expecting problems, our testing so far has not unco Report any problems via GitHub issues or reach us on the #loki slack channel. -**Note if are using boltdb-shipper and were running with high availability and separate filesystems** - -This was a poorly documented and even more experimental mode we toyed with using boltdb-shipper. For now we removed the documentation and also any kind of support for this mode. +{{% admonition type="note" %}} +If are using boltdb-shipper and were running with high availability and separate filesystems, this was a poorly documented and even more experimental mode we toyed with using boltdb-shipper. For now we removed the documentation and also any kind of support for this mode. +{{% /admonition %}} To use boltdb-shipper in 2.0 you need a shared storage (S3, GCS, etc), the mode of running with separate filesystem stores in HA using a ring is not officially supported. @@ -1284,7 +1297,9 @@ schema_config: ``` If you are not on `schema: v11` this would be a good opportunity to make that change _in the new schema config_ also. -**NOTE** If the current time in your timezone is after midnight UTC already, set the date one additional day forward. +{{% admonition type="note" %}} +If the current time in your timezone is after midnight UTC already, set the date one additional day forward. +{{% /admonition %}} There was also a significant overhaul to how boltdb-shipper internals, this should not be visible to a user but as this feature is experimental and under development bug are possible! @@ -1343,7 +1358,9 @@ Defaulting to `gcs,bigtable` was confusing for anyone using ksonnet with other s ## 1.5.0 -Note: The required upgrade path outlined for version 1.4.0 below is still true for moving to 1.5.0 from any release older than 1.4.0 (e.g. 1.3.0->1.5.0 needs to also look at the 1.4.0 upgrade requirements). +{{% admonition type="note" %}} +The required upgrade path outlined for version 1.4.0 below is still true for moving to 1.5.0 from any release older than 1.4.0 (e.g. 1.3.0 -> 1.5.0 needs to also look at the 1.4.0 upgrade requirements). +{{% /admonition %}} ### Breaking config changes! @@ -1397,7 +1414,9 @@ Not every environment will allow this capability however, it's possible to restr #### Filesystem -**Note the location Loki is looking for files with the provided config in the docker image has changed** +{{% admonition type="note" %}} +The location Loki is looking for files with the provided config in the docker image has changed. +{{% /admonition %}} In 1.4.0 and earlier the included config file in the docker container was using directories: @@ -1498,7 +1517,7 @@ The other config changes should not be relevant to Loki. The newly vendored version of Cortex removes code related to de-normalized tokens in the ring. What you need to know is this: -*Note:* A "shared ring" as mentioned below refers to using *consul* or *etcd* for values in the following config: +A "shared ring" as mentioned below refers to using *consul* or *etcd* for values in the following config: ```yaml kvstore: @@ -1517,14 +1536,14 @@ There are two options for upgrade if you are not on version 1.3.0 and are using OR -**Note:** If you are running a single binary you only need to add this flag to your single binary command. +- If you are running a single binary you only need to add this flag to your single binary command. 1. Add the following configuration to your ingesters command: `-ingester.normalise-tokens=true` 1. Restart your ingesters with this config 1. Proceed with upgrading to v1.4.0 1. Remove the config option (only do this after everything is running v1.4.0) -**Note:** It's also possible to enable this flag via config file, see the [`lifecycler_config`](https://github.com/grafana/loki/tree/v1.3.0/docs/configuration#lifecycler_config) configuration option. +It is also possible to enable this flag via config file, see the [`lifecycler_config`](https://github.com/grafana/loki/tree/v1.3.0/docs/configuration#lifecycler_config) configuration option. If using the Helm Loki chart: diff --git a/docs/sources/storage/_index.md b/docs/sources/storage/_index.md index 81a767e1add3..bbbebf756fc7 100644 --- a/docs/sources/storage/_index.md +++ b/docs/sources/storage/_index.md @@ -82,7 +82,9 @@ You may use any substitutable services, such as those that implement the S3 API Cassandra is a popular database and one of Loki's possible chunk stores and is production safe. -> **Note:** This storage type for chunks is deprecated and may be removed in future major versions of Loki. +{{< collapse title="Title of hidden content" >}} +This storage type for chunks is deprecated and may be removed in future major versions of Loki. +{{< /collapse >}} ## Index storage @@ -90,19 +92,25 @@ Cassandra is a popular database and one of Loki's possible chunk stores and is p Cassandra can also be utilized for the index store and aside from the [boltdb-shipper]({{< relref "../operations/storage/boltdb-shipper" >}}), it's the only non-cloud offering that can be used for the index that's horizontally scalable and has configurable replication. It's a good candidate when you already run Cassandra, are running on-prem, or do not wish to use a managed cloud offering. -> **Note:** This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< collapse title="Title of hidden content" >}} +This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< /collapse >}} ### BigTable (deprecated) Bigtable is a cloud database offered by Google. It is a good candidate for a managed index store if you're already using it (due to its heavy fixed costs) or wish to run in GCP. -> **Note:** This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< collapse title="Title of hidden content" >}} +This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< /collapse >}} ### DynamoDB (deprecated) DynamoDB is a cloud database offered by AWS. It is a good candidate for a managed index store, especially if you're already running in AWS. -> **Note:** This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< collapse title="Title of hidden content" >}} +This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< /collapse >}} #### Rate limiting @@ -112,7 +120,9 @@ DynamoDB is susceptible to rate limiting, particularly due to overconsuming what BoltDB is an embedded database on disk. It is not replicated and thus cannot be used for high availability or clustered Loki deployments, but is commonly paired with a `filesystem` chunk store for proof of concept deployments, trying out Loki, and development. The [boltdb-shipper]({{< relref "../operations/storage/boltdb-shipper" >}}) aims to support clustered deployments using `boltdb` as an index. -> **Note:** This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< collapse title="Title of hidden content" >}} +This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< /collapse >}} ## Schema Config @@ -428,7 +438,9 @@ storage_config: ### On premise deployment (Cassandra+Cassandra) -> **Note:** Cassandra as storage backend for chunks and indexes is deprecated. +{{< collapse title="Title of hidden content" >}} +Cassandra as storage backend for chunks and indexes is deprecated. +{{< /collapse >}} **Keeping this for posterity, but this is likely not a common config. Cassandra should work and could be faster in some situations but is likely much more expensive.** diff --git a/loki-build-image/Dockerfile b/loki-build-image/Dockerfile index 5dba1a33347d..a0263270f66f 100644 --- a/loki-build-image/Dockerfile +++ b/loki-build-image/Dockerfile @@ -15,7 +15,7 @@ RUN BIN=$([ "$TARGETARCH" = "arm64" ] && echo "helm-docs_Linux_arm64" || echo "h curl -L "https://github.com/norwoodj/helm-docs/releases/download/v1.11.2/$BIN.tar.gz" | tar zx && \ install -t /usr/local/bin helm-docs -FROM alpine:3.18.4 as lychee +FROM alpine:3.18.5 as lychee ARG TARGETARCH ARG LYCHEE_VER="0.7.0" RUN apk add --no-cache curl && \ @@ -24,18 +24,18 @@ RUN apk add --no-cache curl && \ mv /tmp/lychee /usr/bin/lychee && \ rm -rf "/tmp/linux-$TARGETARCH" /tmp/lychee-$LYCHEE_VER.tgz -FROM alpine:3.18.4 as golangci +FROM alpine:3.18.5 as golangci RUN apk add --no-cache curl && \ cd / && \ curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s v1.55.1 -FROM alpine:3.18.4 as buf +FROM alpine:3.18.5 as buf ARG TARGETOS RUN apk add --no-cache curl && \ curl -sSL "https://github.com/bufbuild/buf/releases/download/v1.4.0/buf-$TARGETOS-$(uname -m)" -o "/usr/bin/buf" && \ chmod +x "/usr/bin/buf" -FROM alpine:3.18.4 as docker +FROM alpine:3.18.5 as docker RUN apk add --no-cache docker-cli docker-cli-buildx FROM golang:1.21.3-bullseye as drone diff --git a/loki-build-image/README.md b/loki-build-image/README.md index 735d5a2e6aa6..26cc4c148259 100644 --- a/loki-build-image/README.md +++ b/loki-build-image/README.md @@ -2,7 +2,11 @@ ## Versions -### 0.30.18 +### 0.33.0 + +- Update to Alpine 3.18.5 + +### 0.30.1 - Update to Go version 1.21.3 diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index f5fb473b7b8f..9ea61a0dba4e 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,6 @@ ## Main +- [11481](https://github.com/grafana/loki/pull/11481) **JoaoBraveCoding**: Adds AWS STS support - [11533](https://github.com/grafana/loki/pull/11533) **periklis**: Add serviceaccount per LokiStack resource - [11158](https://github.com/grafana/loki/pull/11158) **btaani**: operator: Add warning for old schema configuration - [11473](https://github.com/grafana/loki/pull/11473) **JoaoBraveCoding**: Adds structured metadata dashboards diff --git a/operator/Dockerfile.cross b/operator/Dockerfile.cross index 5a3986c95c98..ba2b4319a922 100644 --- a/operator/Dockerfile.cross +++ b/operator/Dockerfile.cross @@ -1,4 +1,4 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.30.1 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 FROM golang:1.20.10-alpine as goenv RUN go env GOARCH > /goarch && \ diff --git a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml index 36151790a209..2915af504fd3 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:0.5.0 - createdAt: "2023-12-12T09:22:19Z" + createdAt: "2024-01-10T18:25:00Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. features.operators.openshift.io/disconnected: "true" @@ -1591,6 +1591,12 @@ spec: - alertmanagers verbs: - patch + - apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers/api + verbs: + - create - apiGroups: - monitoring.coreos.com resources: diff --git a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml index 322bc606611f..b78b8f6d30b9 100644 --- a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: docker.io/grafana/loki-operator:0.5.0 - createdAt: "2023-12-12T09:22:17Z" + createdAt: "2024-01-10T18:24:59Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. operators.operatorframework.io/builder: operator-sdk-unknown @@ -1571,6 +1571,12 @@ spec: - alertmanagers verbs: - patch + - apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers/api + verbs: + - create - apiGroups: - monitoring.coreos.com resources: diff --git a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml index f4a951400e94..b0fca996ce78 100644 --- a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: quay.io/openshift-logging/loki-operator:0.1.0 - createdAt: "2023-12-12T09:22:21Z" + createdAt: "2024-01-10T18:25:02Z" description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements @@ -1576,6 +1576,12 @@ spec: - alertmanagers verbs: - patch + - apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers/api + verbs: + - create - apiGroups: - monitoring.coreos.com resources: diff --git a/operator/config/rbac/role.yaml b/operator/config/rbac/role.yaml index d7b881ef8e33..09dc60b8c33b 100644 --- a/operator/config/rbac/role.yaml +++ b/operator/config/rbac/role.yaml @@ -175,6 +175,12 @@ rules: - alertmanagers verbs: - patch +- apiGroups: + - monitoring.coreos.com + resources: + - alertmanagers/api + verbs: + - create - apiGroups: - monitoring.coreos.com resources: diff --git a/operator/controllers/loki/lokistack_controller.go b/operator/controllers/loki/lokistack_controller.go index 49b5bdab069e..487390d7287b 100644 --- a/operator/controllers/loki/lokistack_controller.go +++ b/operator/controllers/loki/lokistack_controller.go @@ -123,6 +123,7 @@ type LokiStackReconciler struct { // +kubebuilder:rbac:groups=rbac.authorization.k8s.io,resources=clusterrolebindings;clusterroles;roles;rolebindings,verbs=get;list;watch;create;update;patch;delete // +kubebuilder:rbac:groups=monitoring.coreos.com,resources=servicemonitors;prometheusrules,verbs=get;list;watch;create;update;delete // +kubebuilder:rbac:groups=monitoring.coreos.com,resources=alertmanagers,verbs=patch +// +kubebuilder:rbac:groups=monitoring.coreos.com,resources=alertmanagers/api,verbs=create // +kubebuilder:rbac:urls=/api/v2/alerts,verbs=create // +kubebuilder:rbac:groups=coordination.k8s.io,resources=leases,verbs=get;create;update // +kubebuilder:rbac:groups=networking.k8s.io,resources=ingresses,verbs=get;list;watch;create;update diff --git a/operator/internal/handlers/internal/storage/secrets.go b/operator/internal/handlers/internal/storage/secrets.go index 0e027be8f326..1341728e7cec 100644 --- a/operator/internal/handlers/internal/storage/secrets.go +++ b/operator/internal/handlers/internal/storage/secrets.go @@ -126,37 +126,62 @@ func extractGCSConfigSecret(s *corev1.Secret) (*storage.GCSStorageConfig, error) func extractS3ConfigSecret(s *corev1.Secret) (*storage.S3StorageConfig, error) { // Extract and validate mandatory fields - endpoint := s.Data[storage.KeyAWSEndpoint] - if len(endpoint) == 0 { - return nil, kverrors.New("missing secret field", "field", storage.KeyAWSEndpoint) - } buckets := s.Data[storage.KeyAWSBucketNames] if len(buckets) == 0 { return nil, kverrors.New("missing secret field", "field", storage.KeyAWSBucketNames) } - id := s.Data[storage.KeyAWSAccessKeyID] - if len(id) == 0 { - return nil, kverrors.New("missing secret field", "field", storage.KeyAWSAccessKeyID) - } - secret := s.Data[storage.KeyAWSAccessKeySecret] - if len(secret) == 0 { - return nil, kverrors.New("missing secret field", "field", storage.KeyAWSAccessKeySecret) - } - // Extract and validate optional fields - region := s.Data[storage.KeyAWSRegion] + var ( + // Fields related with static authentication + endpoint = s.Data[storage.KeyAWSEndpoint] + id = s.Data[storage.KeyAWSAccessKeyID] + secret = s.Data[storage.KeyAWSAccessKeySecret] + // Fields related with STS authentication + roleArn = s.Data[storage.KeyAWSRoleArn] + audience = s.Data[storage.KeyAWSAudience] + // Optional fields + region = s.Data[storage.KeyAWSRegion] + ) sseCfg, err := extractS3SSEConfig(s.Data) if err != nil { return nil, err } - return &storage.S3StorageConfig{ - Endpoint: string(endpoint), - Buckets: string(buckets), - Region: string(region), - SSE: sseCfg, - }, nil + cfg := &storage.S3StorageConfig{ + Buckets: string(buckets), + Region: string(region), + SSE: sseCfg, + } + + switch { + case len(roleArn) == 0: + cfg.Endpoint = string(endpoint) + + if len(endpoint) == 0 { + return nil, kverrors.New("missing secret field", "field", storage.KeyAWSEndpoint) + } + if len(id) == 0 { + return nil, kverrors.New("missing secret field", "field", storage.KeyAWSAccessKeyID) + } + if len(secret) == 0 { + return nil, kverrors.New("missing secret field", "field", storage.KeyAWSAccessKeySecret) + } + + return cfg, nil + // TODO(JoaoBraveCoding) For CCO integration here we will first check if we get a secret, OS use-case + case len(roleArn) != 0: // Extract STS from user provided values + cfg.STS = true + cfg.Audience = string(audience) + + // In the STS case region is not an optional field + if len(region) == 0 { + return nil, kverrors.New("missing secret field", "field", storage.KeyAWSRegion) + } + return cfg, nil + default: + return nil, kverrors.New("missing secret fields for static or sts authentication") + } } func extractS3SSEConfig(d map[string][]byte) (storage.S3SSEConfig, error) { diff --git a/operator/internal/handlers/internal/storage/secrets_test.go b/operator/internal/handlers/internal/storage/secrets_test.go index eea31fbd522c..46ddc133f9f4 100644 --- a/operator/internal/handlers/internal/storage/secrets_test.go +++ b/operator/internal/handlers/internal/storage/secrets_test.go @@ -320,6 +320,40 @@ func TestS3Extract(t *testing.T) { }, }, }, + { + name: "STS missing region", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketnames": []byte("this,that"), + "role_arn": []byte("role"), + }, + }, + wantErr: true, + }, + { + name: "STS with region", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketnames": []byte("this,that"), + "role_arn": []byte("role"), + "region": []byte("here"), + }, + }, + }, + { + name: "STS all set", + secret: &corev1.Secret{ + ObjectMeta: metav1.ObjectMeta{Name: "test"}, + Data: map[string][]byte{ + "bucketnames": []byte("this,that"), + "role_arn": []byte("role"), + "region": []byte("here"), + "audience": []byte("audience"), + }, + }, + }, } for _, tst := range table { tst := tst diff --git a/operator/internal/handlers/lokistack_create_or_update.go b/operator/internal/handlers/lokistack_create_or_update.go index 7735888a79b7..49c84af4dcf4 100644 --- a/operator/internal/handlers/lokistack_create_or_update.go +++ b/operator/internal/handlers/lokistack_create_or_update.go @@ -88,6 +88,7 @@ func CreateOrUpdateLokiStack( Requeue: false, } } + objStore.OpenShiftEnabled = fg.OpenShift.Enabled storageSchemas, err := storageoptions.BuildSchemaConfig( time.Now().UTC(), diff --git a/operator/internal/manifests/internal/config/build_test.go b/operator/internal/manifests/internal/config/build_test.go index 537ec84bf71a..27f7b7ed02cb 100644 --- a/operator/internal/manifests/internal/config/build_test.go +++ b/operator/internal/manifests/internal/config/build_test.go @@ -5418,3 +5418,187 @@ analytics: }) } } + +func TestBuild_ConfigAndRuntimeConfig_STS(t *testing.T) { + objStorageConfig := storage.Options{ + SharedStore: lokiv1.ObjectStorageSecretS3, + S3: &storage.S3StorageConfig{ + STS: true, + Region: "my-region", + Buckets: "my-bucket", + }, + Schemas: []lokiv1.ObjectStorageSchema{ + { + Version: lokiv1.ObjectStorageSchemaV11, + EffectiveDate: "2020-10-01", + }, + }, + } + expStorageConfig := ` + s3: + s3: s3://my-region/my-bucket + s3forcepathstyle: false` + + expCfg := ` +--- +auth_enabled: true +chunk_store_config: + chunk_cache_config: + embedded_cache: + enabled: true + max_size_mb: 500 +common: + storage: +${STORAGE_CONFIG} + compactor_grpc_address: loki-compactor-grpc-lokistack-dev.default.svc.cluster.local:9095 + ring: + kvstore: + store: memberlist + heartbeat_period: 5s + heartbeat_timeout: 1m + instance_port: 9095 +compactor: + compaction_interval: 2h + working_directory: /tmp/loki/compactor +frontend: + tail_proxy_url: http://loki-querier-http-lokistack-dev.default.svc.cluster.local:3100 + compress_responses: true + max_outstanding_per_tenant: 4096 + log_queries_longer_than: 5s +frontend_worker: + frontend_address: loki-query-frontend-grpc-lokistack-dev.default.svc.cluster.local:9095 + grpc_client_config: + max_send_msg_size: 104857600 + match_max_concurrent: true +ingester: + chunk_block_size: 262144 + chunk_encoding: snappy + chunk_idle_period: 1h + chunk_retain_period: 5m + chunk_target_size: 2097152 + flush_op_timeout: 10m + lifecycler: + final_sleep: 0s + join_after: 30s + num_tokens: 512 + ring: + replication_factor: 1 + max_chunk_age: 2h + max_transfer_retries: 0 + wal: + enabled: true + dir: /tmp/wal + replay_memory_ceiling: 2500 +ingester_client: + grpc_client_config: + max_recv_msg_size: 67108864 + remote_timeout: 1s +# NOTE: Keep the order of keys as in Loki docs +# to enable easy diffs when vendoring newer +# Loki releases. +# (See https://grafana.com/docs/loki/latest/configuration/#limits_config) +# +# Values for not exposed fields are taken from the grafana/loki production +# configuration manifests. +# (See https://github.com/grafana/loki/blob/main/production/ksonnet/loki/config.libsonnet) +limits_config: + ingestion_rate_strategy: global + ingestion_rate_mb: 4 + ingestion_burst_size_mb: 6 + max_label_name_length: 1024 + max_label_value_length: 2048 + max_label_names_per_series: 30 + reject_old_samples: true + reject_old_samples_max_age: 168h + creation_grace_period: 10m + enforce_metric_name: false + # Keep max_streams_per_user always to 0 to default + # using max_global_streams_per_user always. + # (See https://github.com/grafana/loki/blob/main/pkg/ingester/limiter.go#L73) + max_streams_per_user: 0 + max_line_size: 256000 + max_entries_limit_per_query: 5000 + max_global_streams_per_user: 0 + max_chunks_per_query: 2000000 + max_query_length: 721h + max_query_parallelism: 32 + tsdb_max_query_parallelism: 512 + max_query_series: 500 + cardinality_limit: 100000 + max_streams_matchers_per_query: 1000 + max_cache_freshness_per_query: 10m + per_stream_rate_limit: 3MB + per_stream_rate_limit_burst: 15MB + split_queries_by_interval: 30m + query_timeout: 1m + allow_structured_metadata: true +memberlist: + abort_if_cluster_join_fails: true + advertise_port: 7946 + bind_port: 7946 + join_members: + - loki-gossip-ring-lokistack-dev.default.svc.cluster.local:7946 + max_join_backoff: 1m + max_join_retries: 10 + min_join_backoff: 1s +querier: + engine: + max_look_back_period: 30s + extra_query_delay: 0s + max_concurrent: 2 + query_ingesters_within: 3h + tail_max_duration: 1h +query_range: + align_queries_with_step: true + cache_results: true + max_retries: 5 + results_cache: + cache: + embedded_cache: + enabled: true + max_size_mb: 500 + parallelise_shardable_queries: true +schema_config: + configs: + - from: "2020-10-01" + index: + period: 24h + prefix: index_ + object_store: s3 + schema: v11 + store: boltdb-shipper +server: + graceful_shutdown_timeout: 5s + grpc_server_min_time_between_pings: '10s' + grpc_server_ping_without_stream_allowed: true + grpc_server_max_concurrent_streams: 1000 + grpc_server_max_recv_msg_size: 104857600 + grpc_server_max_send_msg_size: 104857600 + http_listen_port: 3100 + http_server_idle_timeout: 30s + http_server_read_timeout: 30s + http_server_write_timeout: 10m0s + log_level: info +storage_config: + boltdb_shipper: + active_index_directory: /tmp/loki/index + cache_location: /tmp/loki/index_cache + cache_ttl: 24h + resync_interval: 5m + shared_store: s3 + index_gateway_client: + server_address: dns:///loki-index-gateway-grpc-lokistack-dev.default.svc.cluster.local:9095 +tracing: + enabled: false +analytics: + reporting_enabled: true +` + expCfg = strings.Replace(expCfg, "${STORAGE_CONFIG}", expStorageConfig, 1) + + opts := defaultOptions() + opts.ObjectStorage = objStorageConfig + + cfg, _, err := Build(opts) + require.NoError(t, err) + require.YAMLEq(t, expCfg, string(cfg)) +} diff --git a/operator/internal/manifests/internal/config/loki-config.yaml b/operator/internal/manifests/internal/config/loki-config.yaml index a11191627d37..61c0de401dc1 100644 --- a/operator/internal/manifests/internal/config/loki-config.yaml +++ b/operator/internal/manifests/internal/config/loki-config.yaml @@ -24,11 +24,17 @@ common: {{- end }} {{- with .ObjectStorage.S3 }} s3: + {{- if .STS }} + s3: "s3://{{.Region}}/{{.Buckets}}" + s3forcepathstyle: false + {{- else }} s3: {{ .Endpoint }} bucketnames: {{ .Buckets }} region: {{ .Region }} access_key_id: ${AWS_ACCESS_KEY_ID} secret_access_key: ${AWS_ACCESS_KEY_SECRET} + s3forcepathstyle: true + {{- end }} {{- with .SSE }} {{- if .Type }} sse: @@ -42,7 +48,6 @@ common: {{- end}} {{- end }} {{- end }} - s3forcepathstyle: true {{- end }} {{- with .ObjectStorage.Swift }} swift: diff --git a/operator/internal/manifests/openshift/rbac.go b/operator/internal/manifests/openshift/rbac.go index ebd464274c43..46e5837a2c26 100644 --- a/operator/internal/manifests/openshift/rbac.go +++ b/operator/internal/manifests/openshift/rbac.go @@ -108,6 +108,17 @@ func BuildRulerClusterRole(opts Options) *rbacv1.ClusterRole { "create", }, }, + { + APIGroups: []string{ + "monitoring.coreos.com", + }, + Resources: []string{ + "alertmanagers/api", + }, + Verbs: []string{ + "create", + }, + }, }, } } diff --git a/operator/internal/manifests/storage/configure.go b/operator/internal/manifests/storage/configure.go index a4606789d967..a48f07ff5b46 100644 --- a/operator/internal/manifests/storage/configure.go +++ b/operator/internal/manifests/storage/configure.go @@ -8,6 +8,7 @@ import ( "github.com/imdario/mergo" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/utils/pointer" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" ) @@ -125,7 +126,14 @@ func ensureObjectStoreCredentials(p *corev1.PodSpec, opts Options) corev1.PodSpe MountPath: secretDirectory, }) - container.Env = append(container.Env, staticAuthCredentials(opts)...) + if managedAuthEnabled(opts) { + setSATokenPath(&opts) + container.Env = append(container.Env, managedAuthCredentials(opts)...) + volumes = append(volumes, saTokenVolume(opts)) + container.VolumeMounts = append(container.VolumeMounts, saTokenVolumeMount(opts)) + } else { + container.Env = append(container.Env, staticAuthCredentials(opts)...) + } container.Env = append(container.Env, serverSideEncryption(opts)...) return corev1.PodSpec{ @@ -168,6 +176,18 @@ func staticAuthCredentials(opts Options) []corev1.EnvVar { } } +func managedAuthCredentials(opts Options) []corev1.EnvVar { + switch opts.SharedStore { + case lokiv1.ObjectStorageSecretS3: + return []corev1.EnvVar{ + envVarFromSecret(EnvAWSRoleArn, opts.SecretName, KeyAWSRoleArn), + envVarFromValue(EnvAWSWebIdentityTokenFile, path.Join(opts.S3.WebIdentityTokenFile, "token")), + } + default: + return []corev1.EnvVar{} + } +} + func serverSideEncryption(opts Options) []corev1.EnvVar { secretName := opts.SecretName switch opts.SharedStore { @@ -236,3 +256,65 @@ func envVarFromValue(name, value string) corev1.EnvVar { Value: value, } } + +func managedAuthEnabled(opts Options) bool { + switch opts.SharedStore { + case lokiv1.ObjectStorageSecretS3: + return opts.S3 != nil && opts.S3.STS + default: + return false + } +} + +func setSATokenPath(opts *Options) { + switch opts.SharedStore { + case lokiv1.ObjectStorageSecretS3: + opts.S3.WebIdentityTokenFile = saTokenVolumeK8sDirectory + if opts.OpenShiftEnabled { + opts.S3.WebIdentityTokenFile = saTokenVolumeOcpDirectory + } + } +} + +func saTokenVolumeMount(opts Options) corev1.VolumeMount { + var tokenPath string + switch opts.SharedStore { + case lokiv1.ObjectStorageSecretS3: + tokenPath = opts.S3.WebIdentityTokenFile + } + return corev1.VolumeMount{ + Name: saTokenVolumeName, + MountPath: tokenPath, + } +} + +func saTokenVolume(opts Options) corev1.Volume { + var audience string + storeType := opts.SharedStore + switch storeType { + case lokiv1.ObjectStorageSecretS3: + audience = awsDefaultAudience + if opts.S3.Audience != "" { + audience = opts.S3.Audience + } + if opts.OpenShiftEnabled { + audience = awsOpenShiftAudience + } + } + return corev1.Volume{ + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + ExpirationSeconds: pointer.Int64(saTokenExpiration), + Path: corev1.ServiceAccountTokenKey, + Audience: audience, + }, + }, + }, + }, + }, + } +} diff --git a/operator/internal/manifests/storage/configure_test.go b/operator/internal/manifests/storage/configure_test.go index 6614453df22d..8cf82d8c65db 100644 --- a/operator/internal/manifests/storage/configure_test.go +++ b/operator/internal/manifests/storage/configure_test.go @@ -6,6 +6,7 @@ import ( "github.com/stretchr/testify/require" appsv1 "k8s.io/api/apps/v1" corev1 "k8s.io/api/core/v1" + "k8s.io/utils/pointer" lokiv1 "github.com/grafana/loki/operator/apis/loki/v1" ) @@ -297,6 +298,191 @@ func TestConfigureDeploymentForStorageType(t *testing.T) { }, }, }, + { + desc: "object storage S3 in STS Mode", + opts: Options{ + SecretName: "test", + SharedStore: lokiv1.ObjectStorageSecretS3, + S3: &S3StorageConfig{ + STS: true, + Audience: "test", + }, + }, + dpl: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: "/var/run/secrets/kubernetes.io/serviceaccount", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvAWSRoleArn, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAWSRoleArn, + }, + }, + }, + { + Name: "AWS_WEB_IDENTITY_TOKEN_FILE", + Value: "/var/run/secrets/kubernetes.io/serviceaccount/token", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "test", + }, + }, + }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: "test", + ExpirationSeconds: pointer.Int64(3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + { + desc: "object storage S3 in STS Mode in OpenShift", + opts: Options{ + SecretName: "test", + OpenShiftEnabled: true, + SharedStore: lokiv1.ObjectStorageSecretS3, + S3: &S3StorageConfig{ + STS: true, + Audience: "test", + }, + }, + dpl: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + }, + }, + }, + }, + }, + }, + want: &appsv1.Deployment{ + Spec: appsv1.DeploymentSpec{ + Template: corev1.PodTemplateSpec{ + Spec: corev1.PodSpec{ + Containers: []corev1.Container{ + { + Name: "loki-ingester", + VolumeMounts: []corev1.VolumeMount{ + { + Name: "test", + ReadOnly: false, + MountPath: "/etc/storage/secrets", + }, + { + Name: saTokenVolumeName, + ReadOnly: false, + MountPath: "/var/run/secrets/openshift/serviceaccount", + }, + }, + Env: []corev1.EnvVar{ + { + Name: EnvAWSRoleArn, + ValueFrom: &corev1.EnvVarSource{ + SecretKeyRef: &corev1.SecretKeySelector{ + LocalObjectReference: corev1.LocalObjectReference{ + Name: "test", + }, + Key: KeyAWSRoleArn, + }, + }, + }, + { + Name: "AWS_WEB_IDENTITY_TOKEN_FILE", + Value: "/var/run/secrets/openshift/serviceaccount/token", + }, + }, + }, + }, + Volumes: []corev1.Volume{ + { + Name: "test", + VolumeSource: corev1.VolumeSource{ + Secret: &corev1.SecretVolumeSource{ + SecretName: "test", + }, + }, + }, + { + Name: saTokenVolumeName, + VolumeSource: corev1.VolumeSource{ + Projected: &corev1.ProjectedVolumeSource{ + Sources: []corev1.VolumeProjection{ + { + ServiceAccountToken: &corev1.ServiceAccountTokenProjection{ + Audience: "openshift", + ExpirationSeconds: pointer.Int64(3600), + Path: corev1.ServiceAccountTokenKey, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, + }, { desc: "object storage S3 with SSE KMS encryption context", opts: Options{ diff --git a/operator/internal/manifests/storage/options.go b/operator/internal/manifests/storage/options.go index 8767f576848a..7ecf7f78b425 100644 --- a/operator/internal/manifests/storage/options.go +++ b/operator/internal/manifests/storage/options.go @@ -16,9 +16,10 @@ type Options struct { Swift *SwiftStorageConfig AlibabaCloud *AlibabaCloudStorageConfig - SecretName string - SecretSHA1 string - TLS *TLSConfig + SecretName string + SecretSHA1 string + TLS *TLSConfig + OpenShiftEnabled bool } // AzureStorageConfig for Azure storage config @@ -35,10 +36,13 @@ type GCSStorageConfig struct { // S3StorageConfig for S3 storage config type S3StorageConfig struct { - Endpoint string - Region string - Buckets string - SSE S3SSEConfig + Endpoint string + Region string + Buckets string + WebIdentityTokenFile string + Audience string + STS bool + SSE S3SSEConfig } type S3SSEType string diff --git a/operator/internal/manifests/storage/var.go b/operator/internal/manifests/storage/var.go index aae6e1ea0e58..bbd377a43f46 100644 --- a/operator/internal/manifests/storage/var.go +++ b/operator/internal/manifests/storage/var.go @@ -9,8 +9,12 @@ const ( EnvAWSAccessKeyID = "AWS_ACCESS_KEY_ID" // EnvAWSAccessKeySecret is the environment variable to specify the AWS client secret to access S3. EnvAWSAccessKeySecret = "AWS_ACCESS_KEY_SECRET" - // EnvAWSSseKmsEncryptionContext is the environment variable to specity the AWS KMS encryption context when using type SSE-KMS. + // EnvAWSSseKmsEncryptionContext is the environment variable to specify the AWS KMS encryption context when using type SSE-KMS. EnvAWSSseKmsEncryptionContext = "AWS_SSE_KMS_ENCRYPTION_CONTEXT" + // EnvAWSRoleArn is the environment variable to specify the AWS role ARN secret for the federated identity workflow. + EnvAWSRoleArn = "AWS_ROLE_ARN" + // EnvAWSWebIdentityToken is the environment variable to specify the path to the web identity token file used in the federated identity workflow. + EnvAWSWebIdentityTokenFile = "AWS_WEB_IDENTITY_TOKEN_FILE" // EnvAzureStorageAccountName is the environment variable to specify the Azure storage account name to access the container. EnvAzureStorageAccountName = "AZURE_STORAGE_ACCOUNT_NAME" // EnvAzureStorageAccountKey is the environment variable to specify the Azure storage account key to access the container. @@ -47,6 +51,10 @@ const ( KeyAWSSseKmsEncryptionContext = "sse_kms_encryption_context" // KeyAWSSseKmsKeyID is the secret data key for the AWS SSE KMS key id. KeyAWSSseKmsKeyID = "sse_kms_key_id" + // KeyAWSRoleArn is the secret data key for the AWS STS role ARN. + KeyAWSRoleArn = "role_arn" + // KeyAWSAudience is the audience for the AWS STS workflow. + KeyAWSAudience = "audience" // KeyAzureStorageAccountKey is the secret data key for the Azure storage account key. KeyAzureStorageAccountKey = "account_key" @@ -93,7 +101,15 @@ const ( // KeySwiftPassword is the secret data key for the OpenStack Swift password. KeySwiftUsername = "username" + saTokenVolumeK8sDirectory = "/var/run/secrets/kubernetes.io/serviceaccount" + saTokenVolumeOcpDirectory = "/var/run/secrets/openshift/serviceaccount" + saTokenVolumeName = "bound-sa-token" + saTokenExpiration = 3600 + secretDirectory = "/etc/storage/secrets" storageTLSVolume = "storage-tls" caDirectory = "/etc/storage/ca" + + awsDefaultAudience = "sts.amazonaws.com" + awsOpenShiftAudience = "openshift" ) diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index 07e8389c5b84..ca8323863370 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -643,6 +643,10 @@ func (c *Compactor) CompactTable(ctx context.Context, tableName string, applyRet level.Error(util_log.Logger).Log("msg", "failed to compact files", "table", tableName, "err", err) return err } + + if !applyRetention { + c.metrics.skippedCompactingLockedTables.WithLabelValues(tableName).Set(0) + } return nil } diff --git a/pkg/compactor/compactor_test.go b/pkg/compactor/compactor_test.go index 17df04029073..9f3f23424f2d 100644 --- a/pkg/compactor/compactor_test.go +++ b/pkg/compactor/compactor_test.go @@ -420,7 +420,12 @@ func TestCompactor_TableLocking(t *testing.T) { if tc.applyRetention { require.Equal(t, float64(0), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables.WithLabelValues(tc.lockTable))) } else { - require.Equal(t, float64(1), testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables.WithLabelValues(tc.lockTable))) + // we only lock table during first run so second run should reset the skip count metric to 0 + skipCount := float64(0) + if n == 1 { + skipCount = 1 + } + require.Equal(t, skipCount, testutil.ToFloat64(compactor.metrics.skippedCompactingLockedTables.WithLabelValues(tc.lockTable))) } } diff --git a/pkg/compactor/metrics.go b/pkg/compactor/metrics.go index 96fc9b16541e..d396017d7ac3 100644 --- a/pkg/compactor/metrics.go +++ b/pkg/compactor/metrics.go @@ -18,7 +18,7 @@ type metrics struct { applyRetentionOperationDurationSeconds prometheus.Gauge applyRetentionLastSuccess prometheus.Gauge compactorRunning prometheus.Gauge - skippedCompactingLockedTables *prometheus.CounterVec + skippedCompactingLockedTables *prometheus.GaugeVec } func newMetrics(r prometheus.Registerer) *metrics { @@ -58,10 +58,10 @@ func newMetrics(r prometheus.Registerer) *metrics { Name: "compactor_running", Help: "Value will be 1 if compactor is currently running on this instance", }), - skippedCompactingLockedTables: promauto.With(r).NewCounterVec(prometheus.CounterOpts{ + skippedCompactingLockedTables: promauto.With(r).NewGaugeVec(prometheus.GaugeOpts{ Namespace: "loki_compactor", - Name: "skipped_compacting_locked_table_total", - Help: "Count of uncompacted tables being skipped due to them being locked by retention", + Name: "locked_table_successive_compaction_skips", + Help: "Number of times uncompacted tables were consecutively skipped due to them being locked by retention", }, []string{"table_name"}), } diff --git a/pkg/compactor/retention/retention.go b/pkg/compactor/retention/retention.go index 04fe89ecddb2..703e7e0182e6 100644 --- a/pkg/compactor/retention/retention.go +++ b/pkg/compactor/retention/retention.go @@ -203,7 +203,7 @@ func markForDelete( // Mark the chunk for deletion only if it is completely deleted, or this is the last table that the chunk is index in. // For a partially deleted chunk, if we delete the source chunk before all the tables which index it are processed then // the retention would fail because it would fail to find it in the storage. - if filterFunc == nil || c.Through <= tableInterval.End { + if filterFunc == nil || c.From >= tableInterval.Start { if err := marker.Put(c.ChunkID); err != nil { return false, err } diff --git a/pkg/compactor/retention/retention_test.go b/pkg/compactor/retention/retention_test.go index 59355e1c9e08..36faaaf332dc 100644 --- a/pkg/compactor/retention/retention_test.go +++ b/pkg/compactor/retention/retention_test.go @@ -175,11 +175,16 @@ func Test_Retention(t *testing.T) { } } -type noopWriter struct{} +type noopWriter struct { + count int64 +} -func (noopWriter) Put(_ []byte) error { return nil } -func (noopWriter) Count() int64 { return 0 } -func (noopWriter) Close() error { return nil } +func (n *noopWriter) Put(_ []byte) error { + n.count++ + return nil +} +func (n *noopWriter) Count() int64 { return n.count } +func (n *noopWriter) Close() error { return nil } func Test_EmptyTable(t *testing.T) { schema := allSchemas[0] @@ -197,11 +202,11 @@ func Test_EmptyTable(t *testing.T) { tables := store.indexTables() require.Len(t, tables, 1) // Set a very low retention to make sure all chunks are marked for deletion which will create an empty table. - empty, _, err := markForDelete(context.Background(), 0, tables[0].name, noopWriter{}, tables[0], NewExpirationChecker(&fakeLimits{perTenant: map[string]retentionLimit{"1": {retentionPeriod: time.Second}, "2": {retentionPeriod: time.Second}}}), nil, util_log.Logger) + empty, _, err := markForDelete(context.Background(), 0, tables[0].name, &noopWriter{}, tables[0], NewExpirationChecker(&fakeLimits{perTenant: map[string]retentionLimit{"1": {retentionPeriod: time.Second}, "2": {retentionPeriod: time.Second}}}), nil, util_log.Logger) require.NoError(t, err) require.True(t, empty) - _, _, err = markForDelete(context.Background(), 0, tables[0].name, noopWriter{}, newTable("test"), NewExpirationChecker(&fakeLimits{}), nil, util_log.Logger) + _, _, err = markForDelete(context.Background(), 0, tables[0].name, &noopWriter{}, newTable("test"), NewExpirationChecker(&fakeLimits{}), nil, util_log.Logger) require.Equal(t, err, errNoChunksFound) } @@ -632,6 +637,7 @@ func TestMarkForDelete_SeriesCleanup(t *testing.T) { expectedDeletedSeries []map[uint64]struct{} expectedEmpty []bool expectedModified []bool + numChunksDeleted []int64 }{ { name: "no chunk and series deleted", @@ -652,6 +658,9 @@ func TestMarkForDelete_SeriesCleanup(t *testing.T) { expectedModified: []bool{ false, }, + numChunksDeleted: []int64{ + 0, + }, }, { name: "chunk deleted with filter but no lines matching", @@ -675,6 +684,9 @@ func TestMarkForDelete_SeriesCleanup(t *testing.T) { expectedModified: []bool{ false, }, + numChunksDeleted: []int64{ + 0, + }, }, { name: "only one chunk in store which gets deleted", @@ -695,6 +707,9 @@ func TestMarkForDelete_SeriesCleanup(t *testing.T) { expectedModified: []bool{ true, }, + numChunksDeleted: []int64{ + 1, + }, }, { name: "only one chunk in store which gets partially deleted", @@ -723,6 +738,9 @@ func TestMarkForDelete_SeriesCleanup(t *testing.T) { expectedModified: []bool{ true, }, + numChunksDeleted: []int64{ + 1, + }, }, { name: "one of two chunks deleted", @@ -747,6 +765,9 @@ func TestMarkForDelete_SeriesCleanup(t *testing.T) { expectedModified: []bool{ true, }, + numChunksDeleted: []int64{ + 1, + }, }, { name: "one of two chunks partially deleted", @@ -779,6 +800,9 @@ func TestMarkForDelete_SeriesCleanup(t *testing.T) { expectedModified: []bool{ true, }, + numChunksDeleted: []int64{ + 1, + }, }, { name: "one big chunk partially deleted for yesterdays table without rewrite", @@ -802,6 +826,9 @@ func TestMarkForDelete_SeriesCleanup(t *testing.T) { expectedModified: []bool{ true, true, }, + numChunksDeleted: []int64{ + 1, 0, + }, }, { name: "one big chunk partially deleted for yesterdays table with rewrite", @@ -825,6 +852,9 @@ func TestMarkForDelete_SeriesCleanup(t *testing.T) { expectedModified: []bool{ true, true, }, + numChunksDeleted: []int64{ + 1, 0, + }, }, } { t.Run(tc.name, func(t *testing.T) { @@ -847,10 +877,12 @@ func TestMarkForDelete_SeriesCleanup(t *testing.T) { seriesCleanRecorder := newSeriesCleanRecorder(table) cr := newChunkRewriter(store.chunkClient, table.name, table) - empty, isModified, err := markForDelete(context.Background(), 0, table.name, noopWriter{}, seriesCleanRecorder, expirationChecker, cr, util_log.Logger) + marker := &noopWriter{} + empty, isModified, err := markForDelete(context.Background(), 0, table.name, marker, seriesCleanRecorder, expirationChecker, cr, util_log.Logger) require.NoError(t, err) require.Equal(t, tc.expectedEmpty[i], empty) require.Equal(t, tc.expectedModified[i], isModified) + require.Equal(t, tc.numChunksDeleted[i], marker.count) require.EqualValues(t, tc.expectedDeletedSeries[i], seriesCleanRecorder.deletedSeries[userID]) } @@ -884,7 +916,7 @@ func TestDeleteTimeout(t *testing.T) { context.Background(), tc.timeout, table.name, - noopWriter{}, + &noopWriter{}, newSeriesCleanRecorder(table), expirationChecker, newChunkRewriter(store.chunkClient, table.name, table), @@ -925,7 +957,7 @@ func TestMarkForDelete_DropChunkFromIndex(t *testing.T) { require.Len(t, tables, 8) for i, table := range tables { - empty, _, err := markForDelete(context.Background(), 0, table.name, noopWriter{}, table, + empty, _, err := markForDelete(context.Background(), 0, table.name, &noopWriter{}, table, NewExpirationChecker(fakeLimits{perTenant: map[string]retentionLimit{"1": {retentionPeriod: retentionPeriod}}}), nil, util_log.Logger) require.NoError(t, err) if i == 7 { diff --git a/pkg/logql/downstream.go b/pkg/logql/downstream.go index b7d37390d11f..76594dc040c2 100644 --- a/pkg/logql/downstream.go +++ b/pkg/logql/downstream.go @@ -285,11 +285,11 @@ func (ev DownstreamEvaluator) Downstream(ctx context.Context, queries []Downstre type errorQuerier struct{} func (errorQuerier) SelectLogs(_ context.Context, _ SelectLogParams) (iter.EntryIterator, error) { - return nil, errors.New("unimplemented") + return nil, errors.New("SelectLogs unimplemented: the query-frontend cannot evaluate an expression that selects logs. this is likely a bug in the query engine. please contact your system operator") } func (errorQuerier) SelectSamples(_ context.Context, _ SelectSampleParams) (iter.SampleIterator, error) { - return nil, errors.New("unimplemented") + return nil, errors.New("SelectSamples unimplemented: the query-frontend cannot evaluate an expression that selects samples. this is likely a bug in the query engine. please contact your system operator") } func NewDownstreamEvaluator(downstreamer Downstreamer) *DownstreamEvaluator { diff --git a/pkg/logql/downstream_test.go b/pkg/logql/downstream_test.go index 218957f862bb..426722a55459 100644 --- a/pkg/logql/downstream_test.go +++ b/pkg/logql/downstream_test.go @@ -55,6 +55,14 @@ func TestMappingEquivalence(t *testing.T) { {`avg_over_time({a=~".+"} | logfmt | unwrap value [1s])`, false}, {`avg_over_time({a=~".+"} | logfmt | unwrap value [1s]) by (a)`, true}, {`quantile_over_time(0.99, {a=~".+"} | logfmt | unwrap value [1s])`, true}, + { + ` + (quantile_over_time(0.99, {a=~".+"} | logfmt | unwrap value [1s]) by (a) > 1) + and + avg by (a) (rate({a=~".+"}[1s])) + `, + false, + }, // topk prefers already-seen values in tiebreakers. Since the test data generates // the same log lines for each series & the resulting promql.Vectors aren't deterministically // sorted by labels, we don't expect this to pass. diff --git a/pkg/logql/log/fmt.go b/pkg/logql/log/fmt.go index 9257834eee34..34a1bb32c5e4 100644 --- a/pkg/logql/log/fmt.go +++ b/pkg/logql/log/fmt.go @@ -222,8 +222,12 @@ func (lf *LineFormatter) Process(ts int64, line []byte, lbs *LabelsBuilder) ([]b lf.currentTs = ts // map now is taking from a pool - m := lbs.Map() - defer smp.Put(m) + m, ret := lbs.Map() + defer func() { + if ret { // if we return the base map from the labels builder we should not put it back in the pool + smp.Put(m) + } + }() if err := lf.Template.Execute(lf.buf, m); err != nil { lbs.SetErr(errTemplateFormat) lbs.SetErrorDetails(err.Error()) diff --git a/pkg/logql/log/fmt_test.go b/pkg/logql/log/fmt_test.go index 06cf20c37606..637caec29a46 100644 --- a/pkg/logql/log/fmt_test.go +++ b/pkg/logql/log/fmt_test.go @@ -2,6 +2,7 @@ package log import ( "fmt" + "sync" "testing" "time" @@ -923,3 +924,48 @@ func TestInvalidUnixTimes(t *testing.T) { _, err = unixToTime("464") require.Error(t, err) } + +func TestMapPoolPanic(_ *testing.T) { + wg := sync.WaitGroup{} + wg.Add(1) + wgFinished := sync.WaitGroup{} + + ls := labels.FromStrings("cluster", "us-central-0") + builder := NewBaseLabelsBuilder().ForLabels(ls, ls.Hash()) + // this specific line format was part of the query that first alerted us to the panic caused by map pooling in the label/line formatter Process functions + tmpl := `[1m{{if .level }}{{alignRight 5 .level}}{{else if .severity}}{{alignRight 5 .severity}}{{end}}[0m [90m[{{alignRight 10 .resources_service_instance_id}}{{if .attributes_thread_name}}/{{alignRight 20 .attributes_thread_name}}{{else if eq "java" .resources_telemetry_sdk_language }} {{end}}][0m [36m{{if .instrumentation_scope_name }}{{alignRight 40 .instrumentation_scope_name}}{{end}}[0m {{.body}} {{if .traceid}} [37m[3m[traceid={{.traceid}}]{{end}}` + a := newMustLineFormatter(tmpl) + a.Process(0, + []byte("logger=sqlstore.metrics traceID=XXXXXXXXXXXXXXXXXXXXXXXXXXXX t=2024-01-04T23:58:47.696779826Z level=debug msg=\"query finished\" status=success elapsedtime=1.523571ms sql=\"some SQL query\" error=null"), + builder, + ) + + for i := 0; i < 100; i++ { + wgFinished.Add(1) + go func() { + wg.Wait() + a := newMustLineFormatter(tmpl) + a.Process(0, + []byte("logger=sqlstore.metrics traceID=XXXXXXXXXXXXXXXXXXXXXXXXXXXX t=2024-01-04T23:58:47.696779826Z level=debug msg=\"query finished\" status=success elapsedtime=1.523571ms sql=\"some SQL query\" error=null"), + builder, + ) + wgFinished.Done() + }() + } + for i := 0; i < 100; i++ { + wgFinished.Add(1) + j := i + go func() { + wg.Wait() + m := smp.Get() + for k, v := range m { + m[k] = fmt.Sprintf("%s%d", v, j) + } + smp.Put(m) + wgFinished.Done() + + }() + } + wg.Done() + wgFinished.Wait() +} diff --git a/pkg/logql/log/labels.go b/pkg/logql/log/labels.go index 76a1ae0d7d5e..2144abdf5d7e 100644 --- a/pkg/logql/log/labels.go +++ b/pkg/logql/log/labels.go @@ -495,22 +495,21 @@ func (b *LabelsBuilder) IntoMap(m map[string]string) { } } -func (b *LabelsBuilder) Map() map[string]string { +func (b *LabelsBuilder) Map() (map[string]string, bool) { if !b.hasDel() && !b.hasAdd() && !b.HasErr() { if b.baseMap == nil { b.baseMap = b.base.Map() } - return b.baseMap + return b.baseMap, false } b.buf = b.UnsortedLabels(b.buf) // todo should we also cache maps since limited by the result ? // Maps also don't create a copy of the labels. res := smp.Get() - clear(res) for _, l := range b.buf { res[l.Name] = l.Value } - return res + return res, true } // LabelsResult returns the LabelsResult from the builder. diff --git a/pkg/logql/optimize.go b/pkg/logql/optimize.go index 2f9c80a64f91..9b885b0fd229 100644 --- a/pkg/logql/optimize.go +++ b/pkg/logql/optimize.go @@ -8,7 +8,7 @@ func optimizeSampleExpr(expr syntax.SampleExpr) (syntax.SampleExpr, error) { // we skip sharding AST for now, it's not easy to clone them since they are not part of the language. expr.Walk(func(e syntax.Expr) { switch e.(type) { - case *ConcatSampleExpr, *DownstreamSampleExpr, *QuantileSketchEvalExpr, *QuantileSketchMergeExpr: + case *ConcatSampleExpr, DownstreamSampleExpr, *QuantileSketchEvalExpr, *QuantileSketchMergeExpr: skip = true return } diff --git a/pkg/logql/shardmapper.go b/pkg/logql/shardmapper.go index 4bee2616bf03..e8d78a438c9b 100644 --- a/pkg/logql/shardmapper.go +++ b/pkg/logql/shardmapper.go @@ -62,9 +62,7 @@ func (m ShardMapper) Parse(parsed syntax.Expr) (noop bool, bytesPerShard uint64, return false, 0, nil, err } - originalStr := parsed.String() - mappedStr := mapped.String() - noop = originalStr == mappedStr + noop = isNoOp(parsed, mapped) if noop { m.metrics.ParsedQueries.WithLabelValues(NoopKey).Inc() } else { @@ -97,32 +95,62 @@ func (m ShardMapper) Map(expr syntax.Expr, r *downstreamRecorder) (syntax.Expr, case *syntax.RangeAggregationExpr: return m.mapRangeAggregationExpr(e, r) case *syntax.BinOpExpr: - lhsMapped, lhsBytesPerShard, err := m.Map(e.SampleExpr, r) - if err != nil { - return nil, 0, err - } - rhsMapped, rhsBytesPerShard, err := m.Map(e.RHS, r) - if err != nil { - return nil, 0, err - } - lhsSampleExpr, ok := lhsMapped.(syntax.SampleExpr) - if !ok { - return nil, 0, badASTMapping(lhsMapped) - } - rhsSampleExpr, ok := rhsMapped.(syntax.SampleExpr) - if !ok { - return nil, 0, badASTMapping(rhsMapped) + return m.mapBinOpExpr(e, r) + default: + return nil, 0, errors.Errorf("unexpected expr type (%T) for ASTMapper type (%T) ", expr, m) + } +} + +func (m ShardMapper) mapBinOpExpr(e *syntax.BinOpExpr, r *downstreamRecorder) (*syntax.BinOpExpr, uint64, error) { + // In a BinOp expression both sides need to be either executed locally or wrapped + // into a downstream expression to be executed on the querier, since the default + // evaluator on the query frontend cannot select logs or samples. + // However, it can evaluate literals and vectors. + + // check if LHS is shardable by mapping the tree + // only wrap in downstream expression if the mapping is a no-op and the + // expression is a vector or literal + lhsMapped, lhsBytesPerShard, err := m.Map(e.SampleExpr, r) + if err != nil { + return nil, 0, err + } + if isNoOp(e.SampleExpr, lhsMapped) && !isLiteralOrVector(lhsMapped) { + lhsMapped = DownstreamSampleExpr{ + shard: nil, + SampleExpr: e.SampleExpr, } - e.SampleExpr = lhsSampleExpr - e.RHS = rhsSampleExpr + } - // We take the maximum bytes per shard of both sides of the operation - bytesPerShard := uint64(math.Max(int(lhsBytesPerShard), int(rhsBytesPerShard))) + // check if RHS is shardable by mapping the tree + // only wrap in downstream expression if the mapping is a no-op and the + // expression is a vector or literal + rhsMapped, rhsBytesPerShard, err := m.Map(e.RHS, r) + if err != nil { + return nil, 0, err + } + if isNoOp(e.RHS, rhsMapped) && !isLiteralOrVector(rhsMapped) { + // TODO: check if literal or vector + rhsMapped = DownstreamSampleExpr{ + shard: nil, + SampleExpr: e.RHS, + } + } - return e, bytesPerShard, nil - default: - return nil, 0, errors.Errorf("unexpected expr type (%T) for ASTMapper type (%T) ", expr, m) + lhsSampleExpr, ok := lhsMapped.(syntax.SampleExpr) + if !ok { + return nil, 0, badASTMapping(lhsMapped) } + rhsSampleExpr, ok := rhsMapped.(syntax.SampleExpr) + if !ok { + return nil, 0, badASTMapping(rhsMapped) + } + e.SampleExpr = lhsSampleExpr + e.RHS = rhsSampleExpr + + // We take the maximum bytes per shard of both sides of the operation + bytesPerShard := uint64(math.Max(int(lhsBytesPerShard), int(rhsBytesPerShard))) + + return e, bytesPerShard, nil } func (m ShardMapper) mapLogSelectorExpr(expr syntax.LogSelectorExpr, r *downstreamRecorder) (syntax.LogSelectorExpr, uint64, error) { @@ -338,11 +366,7 @@ var rangeMergeMap = map[string]string{ func (m ShardMapper) mapRangeAggregationExpr(expr *syntax.RangeAggregationExpr, r *downstreamRecorder) (syntax.SampleExpr, uint64, error) { if !expr.Shardable() { - exprStats, err := m.shards.GetStats(expr) - if err != nil { - return nil, 0, err - } - return expr, exprStats.Bytes, nil + return noOp(expr, m.shards) } switch expr.Operation { @@ -437,7 +461,7 @@ func (m ShardMapper) mapRangeAggregationExpr(expr *syntax.RangeAggregationExpr, return nil, 0, err } if shards == 0 || !m.quantileOverTimeSharding { - return m.mapSampleExpr(expr, r) + return noOp(expr, m.shards) } // quantile_over_time() by (foo) -> @@ -465,11 +489,29 @@ func (m ShardMapper) mapRangeAggregationExpr(expr *syntax.RangeAggregationExpr, default: // don't shard if there's not an appropriate optimization - exprStats, err := m.shards.GetStats(expr) - if err != nil { - return nil, 0, err - } - return expr, exprStats.Bytes, nil + return noOp(expr, m.shards) + } +} + +func noOp[E syntax.Expr](expr E, shards ShardResolver) (E, uint64, error) { + exprStats, err := shards.GetStats(expr) + if err != nil { + var empty E + return empty, 0, err + } + return expr, exprStats.Bytes, nil +} + +func isNoOp(left syntax.Expr, right syntax.Expr) bool { + return left.String() == right.String() +} + +func isLiteralOrVector(e syntax.Expr) bool { + switch e.(type) { + case *syntax.VectorExpr, *syntax.LiteralExpr: + return true + default: + return false } } diff --git a/pkg/logql/shardmapper_test.go b/pkg/logql/shardmapper_test.go index 517fb1aedb42..96955109a941 100644 --- a/pkg/logql/shardmapper_test.go +++ b/pkg/logql/shardmapper_test.go @@ -418,7 +418,7 @@ func TestMappingStrings(t *testing.T) { } func TestMapping(t *testing.T) { - m := NewShardMapper(ConstantShards(2), nilShardMetrics, []string{ShardQuantileOverTime}) + m := NewShardMapper(ConstantShards(2), nilShardMetrics, []string{}) for _, tc := range []struct { in string @@ -1340,12 +1340,186 @@ func TestMapping(t *testing.T) { }, }, }, + { + in: `quantile_over_time(0.8, {foo="bar"} | unwrap bytes [5m]) by (cluster)`, + expr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeQuantile, + Params: float64p(0.8), + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "foo", "bar")}, + }, + Unwrap: &syntax.UnwrapExpr{ + Identifier: "bytes", + }, + Interval: 5 * time.Minute, + }, + Grouping: &syntax.Grouping{ + Groups: []string{"cluster"}, + }, + }, + }, + { + in: ` + quantile_over_time(0.99, {a="foo"} | unwrap bytes [1s]) by (b) + and + sum by (b) (rate({a="bar"}[1s])) + `, + expr: &syntax.BinOpExpr{ + SampleExpr: DownstreamSampleExpr{ + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeQuantile, + Params: float64p(0.99), + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "a", "foo")}, + }, + Unwrap: &syntax.UnwrapExpr{ + Identifier: "bytes", + }, + Interval: 1 * time.Second, + }, + Grouping: &syntax.Grouping{ + Groups: []string{"b"}, + }, + }, + }, + RHS: &syntax.VectorAggregationExpr{ + Left: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 0, + Of: 2, + }, + SampleExpr: &syntax.VectorAggregationExpr{ + Left: &syntax.RangeAggregationExpr{ + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "a", "bar")}, + }, + Interval: 1 * time.Second, + }, + Operation: syntax.OpRangeTypeRate, + }, + Grouping: &syntax.Grouping{ + Groups: []string{"b"}, + }, + Params: 0, + Operation: syntax.OpTypeSum, + }, + }, + next: &ConcatSampleExpr{ + DownstreamSampleExpr: DownstreamSampleExpr{ + shard: &astmapper.ShardAnnotation{ + Shard: 1, + Of: 2, + }, + SampleExpr: &syntax.VectorAggregationExpr{ + Left: &syntax.RangeAggregationExpr{ + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "a", "bar")}, + }, + Interval: 1 * time.Second, + }, + Operation: syntax.OpRangeTypeRate, + }, + Grouping: &syntax.Grouping{ + Groups: []string{"b"}, + }, + Params: 0, + Operation: syntax.OpTypeSum, + }, + }, + next: nil, + }, + }, + Grouping: &syntax.Grouping{ + Groups: []string{"b"}, + }, + Operation: syntax.OpTypeSum, + }, + Op: syntax.OpTypeAnd, + Opts: &syntax.BinOpOptions{ + ReturnBool: false, + VectorMatching: &syntax.VectorMatching{}, + }, + }, + }, + { + in: `quantile_over_time(0.99, {a="foo"} | unwrap bytes [1s]) by (a, b) > 1`, + expr: &syntax.BinOpExpr{ + SampleExpr: DownstreamSampleExpr{ + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeQuantile, + Params: float64p(0.99), + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "a", "foo")}, + }, + Unwrap: &syntax.UnwrapExpr{ + Identifier: "bytes", + }, + Interval: 1 * time.Second, + }, + Grouping: &syntax.Grouping{ + Groups: []string{"a", "b"}, + }, + }, + }, + RHS: &syntax.LiteralExpr{ + Val: 1, + }, + Op: syntax.OpTypeGT, + Opts: &syntax.BinOpOptions{ + ReturnBool: false, + VectorMatching: &syntax.VectorMatching{}, + }, + }, + }, + { + in: `1 < quantile_over_time(0.99, {a="foo"} | unwrap bytes [1s]) by (a, b)`, + expr: &syntax.BinOpExpr{ + SampleExpr: &syntax.LiteralExpr{ + Val: 1, + }, + RHS: DownstreamSampleExpr{ + SampleExpr: &syntax.RangeAggregationExpr{ + Operation: syntax.OpRangeTypeQuantile, + Params: float64p(0.99), + Left: &syntax.LogRange{ + Left: &syntax.MatchersExpr{ + Mts: []*labels.Matcher{mustNewMatcher(labels.MatchEqual, "a", "foo")}, + }, + Unwrap: &syntax.UnwrapExpr{ + Identifier: "bytes", + }, + Interval: 1 * time.Second, + }, + Grouping: &syntax.Grouping{ + Groups: []string{"a", "b"}, + }, + }, + }, + Op: syntax.OpTypeLT, + Opts: &syntax.BinOpOptions{ + ReturnBool: false, + VectorMatching: &syntax.VectorMatching{}, + }, + }, + }, } { t.Run(tc.in, func(t *testing.T) { ast, err := syntax.ParseExpr(tc.in) require.Equal(t, tc.err, err) mapped, _, err := m.Map(ast, nilShardMetrics.downstreamRecorder()) + switch e := mapped.(type) { + case syntax.SampleExpr: + optimized, err := optimizeSampleExpr(e) + require.NoError(t, err) + require.Equal(t, mapped.String(), optimized.String()) + } require.Equal(t, tc.err, err) require.Equal(t, tc.expr.String(), mapped.String()) @@ -1420,3 +1594,7 @@ func TestStringTrimming(t *testing.T) { }) } } + +func float64p(v float64) *float64 { + return &v +} diff --git a/pkg/querier/http_test.go b/pkg/querier/http_test.go index c234a1947955..180e82c6b07d 100644 --- a/pkg/querier/http_test.go +++ b/pkg/querier/http_test.go @@ -45,7 +45,7 @@ func TestTailHandler(t *testing.T) { handler.ServeHTTP(rr, req) require.Equal(t, http.StatusBadRequest, rr.Code) - require.Equal(t, "multiple org IDs present\n", rr.Body.String()) + require.Equal(t, "multiple org IDs present", rr.Body.String()) } type slowConnectionSimulator struct { diff --git a/pkg/querier/queryrange/serialize_test.go b/pkg/querier/queryrange/serialize_test.go index 6f276f39b273..f926da9f19b5 100644 --- a/pkg/querier/queryrange/serialize_test.go +++ b/pkg/querier/queryrange/serialize_test.go @@ -104,7 +104,7 @@ func TestResponseFormat(t *testing.T) { url: "/loki/wrong/path", response: nil, expectedCode: http.StatusNotFound, - expectedRespone: "unknown request path: /loki/wrong/path\n", + expectedRespone: "unknown request path: /loki/wrong/path", }, } { t.Run(fmt.Sprintf("%s returns the expected format", tc.url), func(t *testing.T) { diff --git a/pkg/ruler/registry.go b/pkg/ruler/registry.go index a9c6734efea3..adb4f7cf8667 100644 --- a/pkg/ruler/registry.go +++ b/pkg/ruler/registry.go @@ -179,6 +179,9 @@ func (r *walRegistry) stop() { } func (r *walRegistry) getTenantConfig(tenant string) (instance.Config, error) { + r.overridesMu.Lock() + defer r.overridesMu.Unlock() + conf, err := r.config.WAL.Clone() if err != nil { return instance.Config{}, err @@ -225,9 +228,6 @@ func (r *walRegistry) getTenantConfig(tenant string) (instance.Config, error) { } func (r *walRegistry) getTenantRemoteWriteConfig(tenant string, base RemoteWriteConfig) (*RemoteWriteConfig, error) { - r.overridesMu.Lock() - defer r.overridesMu.Unlock() - overrides, err := base.Clone() if err != nil { return nil, fmt.Errorf("error generating tenant remote-write config: %w", err) diff --git a/pkg/storage/chunk/cache/background.go b/pkg/storage/chunk/cache/background.go index 16feb62551f5..299444c6a54e 100644 --- a/pkg/storage/chunk/cache/background.go +++ b/pkg/storage/chunk/cache/background.go @@ -148,8 +148,11 @@ func (c *backgroundCache) Store(ctx context.Context, keys []string, bufs [][]byt } size := bgWrite.size() - newSize := c.size.Load() + int64(size) + // prospectively add new size + newSize := c.size.Add(int64(size)) if newSize > int64(c.sizeLimit) { + // subtract it since we've exceeded the limit + c.size.Sub(int64(size)) c.failStore(ctx, size, num, "queue at byte size limit") return nil } diff --git a/pkg/util/server/error.go b/pkg/util/server/error.go index fc04218d5a73..65cb430bb3f3 100644 --- a/pkg/util/server/error.go +++ b/pkg/util/server/error.go @@ -3,6 +3,7 @@ package server import ( "context" "errors" + "fmt" "net/http" "github.com/grafana/dskit/httpgrpc" @@ -29,7 +30,10 @@ const ( // WriteError write a go error with the correct status code. func WriteError(err error, w http.ResponseWriter) { status, cerr := ClientHTTPStatusAndError(err) - http.Error(w, cerr.Error(), status) + w.Header().Set("Content-Type", "text/plain; charset=utf-8") + w.Header().Set("X-Content-Type-Options", "nosniff") + w.WriteHeader(status) + fmt.Fprint(w, cerr.Error()) } // ClientHTTPStatusAndError returns error and http status that is "safe" to return to client without diff --git a/pkg/util/server/error_test.go b/pkg/util/server/error_test.go index 1fe15b0322e4..47b2453f1492 100644 --- a/pkg/util/server/error_test.go +++ b/pkg/util/server/error_test.go @@ -56,7 +56,7 @@ func Test_writeError(t *testing.T) { require.Equal(t, tt.expectedStatus, rec.Result().StatusCode) b, err := io.ReadAll(rec.Result().Body) require.NoError(t, err) - require.Equal(t, tt.msg, string(b[:len(b)-1])) + require.EqualValues(t, tt.msg, b) }) t.Run(tt.name+"-roundtrip", func(t *testing.T) { @@ -68,7 +68,7 @@ func Test_writeError(t *testing.T) { require.Equal(t, tt.expectedStatus, rec.Result().StatusCode) b, err := io.ReadAll(rec.Result().Body) require.NoError(t, err) - require.Equal(t, tt.msg, string(b[:len(b)-1])) + require.EqualValues(t, tt.msg, b) }) } } diff --git a/production/helm/loki/CHANGELOG.md b/production/helm/loki/CHANGELOG.md index 626523e1bae4..ca04f5d18ce5 100644 --- a/production/helm/loki/CHANGELOG.md +++ b/production/helm/loki/CHANGELOG.md @@ -13,6 +13,10 @@ Entries should include a reference to the pull request that introduced the chang [//]: # ( : do not remove this line. This locator is used by the CI pipeline to automatically create a changelog entry for each new Loki release. Add other chart versions and respective changelog entries bellow this line.) +## 5.41.5 + +- [BUGFIX] Added "swift" type object storage to resolve Loki HELM Chart error. + ## 5.41.4 - [CHANGE] Use `/ingester/shutdown?terminate=false` for write `preStop` hook diff --git a/production/helm/loki/Chart.yaml b/production/helm/loki/Chart.yaml index 095e2745a364..1e08c0c8f0d1 100644 --- a/production/helm/loki/Chart.yaml +++ b/production/helm/loki/Chart.yaml @@ -3,7 +3,7 @@ name: loki description: Helm chart for Grafana Loki in simple, scalable mode type: application appVersion: 2.9.3 -version: 5.41.4 +version: 5.41.5 home: https://grafana.github.io/helm-charts sources: - https://github.com/grafana/loki diff --git a/production/helm/loki/README.md b/production/helm/loki/README.md index 2857f553e13f..ec3360d378d7 100644 --- a/production/helm/loki/README.md +++ b/production/helm/loki/README.md @@ -1,6 +1,6 @@ # loki -![Version: 5.41.4](https://img.shields.io/badge/Version-5.41.4-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.3](https://img.shields.io/badge/AppVersion-2.9.3-informational?style=flat-square) +![Version: 5.41.5](https://img.shields.io/badge/Version-5.41.5-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 2.9.3](https://img.shields.io/badge/AppVersion-2.9.3-informational?style=flat-square) Helm chart for Grafana Loki in simple, scalable mode diff --git a/production/helm/loki/src/helm-test/Dockerfile b/production/helm/loki/src/helm-test/Dockerfile index 012e48b84a38..cf4420a2a68d 100644 --- a/production/helm/loki/src/helm-test/Dockerfile +++ b/production/helm/loki/src/helm-test/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.20.4 as build +FROM golang:1.21.3 as build # build via Makefile target helm-test-image in root # Makefile. Building from this directory will not be @@ -7,7 +7,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false helm-test -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates=20230506-r0 COPY --from=build /src/loki/production/helm/loki/src/helm-test/helm-test /usr/bin/helm-test ENTRYPOINT [ "/usr/bin/helm-test" ] diff --git a/production/helm/loki/templates/_helpers.tpl b/production/helm/loki/templates/_helpers.tpl index 08e4dd063bab..9ebcc6351957 100644 --- a/production/helm/loki/templates/_helpers.tpl +++ b/production/helm/loki/templates/_helpers.tpl @@ -597,7 +597,7 @@ Create the service endpoint including port for MinIO. {{/* Determine if deployment is using object storage */}} {{- define "loki.isUsingObjectStorage" -}} -{{- or (eq .Values.loki.storage.type "gcs") (eq .Values.loki.storage.type "s3") (eq .Values.loki.storage.type "azure") -}} +{{- or (eq .Values.loki.storage.type "gcs") (eq .Values.loki.storage.type "s3") (eq .Values.loki.storage.type "azure") (eq .Values.loki.storage.type "swift") -}} {{- end -}} {{/* Configure the correct name for the memberlist service */}} diff --git a/tools/dev/loki-boltdb-storage-s3/config/loki.yaml b/tools/dev/loki-boltdb-storage-s3/config/loki.yaml index 83149885fe85..ea0cf186e269 100644 --- a/tools/dev/loki-boltdb-storage-s3/config/loki.yaml +++ b/tools/dev/loki-boltdb-storage-s3/config/loki.yaml @@ -67,7 +67,6 @@ ingester_client: remote_timeout: 1s limits_config: cardinality_limit: 100000 - enforce_metric_name: false ingestion_burst_size_mb: 5 ingestion_rate_mb: 2 ingestion_rate_strategy: global diff --git a/tools/dev/loki-boltdb-storage-s3/dev.dockerfile b/tools/dev/loki-boltdb-storage-s3/dev.dockerfile index 4a2a420fd093..f975c76d102c 100644 --- a/tools/dev/loki-boltdb-storage-s3/dev.dockerfile +++ b/tools/dev/loki-boltdb-storage-s3/dev.dockerfile @@ -2,7 +2,7 @@ FROM golang:1.20.4 ENV CGO_ENABLED=0 RUN go install github.com/go-delve/delve/cmd/dlv@v1.21.1 -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN mkdir /loki WORKDIR /loki diff --git a/tools/lambda-promtail/Dockerfile b/tools/lambda-promtail/Dockerfile index bac1cdf258f2..dc4b7601499b 100644 --- a/tools/lambda-promtail/Dockerfile +++ b/tools/lambda-promtail/Dockerfile @@ -12,7 +12,7 @@ RUN go mod download RUN go build -o ./main -tags lambda.norpc -ldflags="-s -w" lambda-promtail/*.go -FROM alpine:3.18.4 +FROM alpine:3.18.5 WORKDIR /app diff --git a/tools/tsdb/bloom-tester/Dockerfile b/tools/tsdb/bloom-tester/Dockerfile index d5f45d54da35..6ff54394edc7 100644 --- a/tools/tsdb/bloom-tester/Dockerfile +++ b/tools/tsdb/bloom-tester/Dockerfile @@ -6,7 +6,7 @@ WORKDIR /src/bloom-tester RUN make bloom-tester -FROM alpine:3.18.4 +FROM alpine:3.18.5 RUN apk add --update --no-cache ca-certificates COPY --from=build /src/bloom-tester/tools/tsdb/bloom-tester/bloom-tester /usr/bin/bloom-tester ENTRYPOINT [ "/usr/bin/bloom-tester", "--config.file=/etc/loki/config.yaml" ]