diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index fa0eb454d29d9..a8145a27bac7f 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,5 @@ { - "image": "grafana/loki-build-image:0.33.0", + "image": "grafana/loki-build-image:0.33.1", "containerEnv": { "BUILD_IN_CONTAINER": "false" }, diff --git a/.drone/drone.jsonnet b/.drone/drone.jsonnet index 9d0589fe22a8e..c9084a3eb37a6 100644 --- a/.drone/drone.jsonnet +++ b/.drone/drone.jsonnet @@ -151,30 +151,6 @@ local arch_image(arch, tags='') = { }], }; -local promtail_win() = pipeline('promtail-windows') { - platform: { - os: 'windows', - arch: 'amd64', - version: '1809', - }, - steps: [ - { - name: 'identify-runner', - image: 'golang:1.21.3-windowsservercore-1809', - commands: [ - 'Write-Output $env:DRONE_RUNNER_NAME', - ], - }, - { - name: 'test', - image: 'golang:1.21.3-windowsservercore-1809', - commands: [ - 'go test .\\clients\\pkg\\promtail\\targets\\windows\\... -v', - ], - }, - ], -}; - local querytee() = pipeline('querytee-amd64') + arch_image('amd64', 'main') { steps+: [ // publish for tag or main @@ -400,7 +376,7 @@ local manifest_ecr(apps, archs) = pipeline('manifest-ecr') { ], }; -local build_image_tag = '0.33.0'; +local build_image_tag = '0.33.1'; [ pipeline('loki-build-image-' + arch) { workspace: { @@ -474,23 +450,6 @@ local build_image_tag = '0.33.0'; }, ], }, - pipeline('mixins') { - workspace: { - base: '/src', - path: 'loki', - }, - steps: [ - make('lint-jsonnet', container=false) { - // Docker image defined at https://github.com/grafana/jsonnet-libs/tree/master/build - image: 'grafana/jsonnet-build:c8b75df', - depends_on: ['clone'], - }, - make('loki-mixin-check', container=false) { - depends_on: ['clone'], - when: onPRs + onPath('production/loki-mixin/**'), - }, - ], - }, pipeline('documentation-checks') { workspace: { base: '/src', @@ -654,7 +613,6 @@ local build_image_tag = '0.33.0'; }, ], }, - promtail_win(), logql_analyzer(), pipeline('docker-driver') { trigger+: onTagOrMain, diff --git a/.drone/drone.yml b/.drone/drone.yml index ccac7a2c6ce51..07d0e23b62149 100644 --- a/.drone/drone.yml +++ b/.drone/drone.yml @@ -17,7 +17,7 @@ steps: from_secret: docker_password repo: grafana/loki-build-image tags: - - 0.33.0-amd64 + - 0.33.1-amd64 username: from_secret: docker_username when: @@ -54,7 +54,7 @@ steps: from_secret: docker_password repo: grafana/loki-build-image tags: - - 0.33.0-arm64 + - 0.33.1-arm64 username: from_secret: docker_username when: @@ -86,7 +86,7 @@ steps: password: from_secret: docker_password spec: .drone/docker-manifest-build-image.tmpl - target: loki-build-image:0.33.0 + target: loki-build-image:0.33.1 username: from_secret: docker_username when: @@ -132,38 +132,6 @@ workspace: path: loki --- kind: pipeline -name: mixins -steps: -- commands: - - make BUILD_IN_CONTAINER=false lint-jsonnet - depends_on: - - clone - environment: {} - image: grafana/jsonnet-build:c8b75df - name: lint-jsonnet -- commands: - - make BUILD_IN_CONTAINER=false loki-mixin-check - depends_on: - - clone - environment: {} - image: grafana/loki-build-image:0.33.0 - name: loki-mixin-check - when: - event: - - pull_request - paths: - - production/loki-mixin/** -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -workspace: - base: /src - path: loki ---- -kind: pipeline name: documentation-checks steps: - commands: @@ -171,7 +139,7 @@ steps: depends_on: - clone environment: {} - image: grafana/loki-build-image:0.33.0 + image: grafana/loki-build-image:0.33.1 name: documentation-helm-reference-check trigger: ref: @@ -1070,28 +1038,6 @@ trigger: - refs/tags/v* --- kind: pipeline -name: promtail-windows -platform: - arch: amd64 - os: windows - version: "1809" -steps: -- commands: - - Write-Output $env:DRONE_RUNNER_NAME - image: golang:1.21.3-windowsservercore-1809 - name: identify-runner -- commands: - - go test .\clients\pkg\promtail\targets\windows\... -v - image: golang:1.21.3-windowsservercore-1809 - name: test -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline name: logql-analyzer platform: arch: amd64 @@ -1139,7 +1085,7 @@ steps: from_secret: docker_password DOCKER_USERNAME: from_secret: docker_username - image: grafana/loki-build-image:0.33.0 + image: grafana/loki-build-image:0.33.1 name: build and push privileged: true volumes: @@ -1362,6 +1308,6 @@ kind: secret name: gpg_private_key --- kind: signature -hmac: 32b44aecaad0258ed9494225595e1016a56bea960bcd0b15b2db3449bed957e0 +hmac: e0940674c7a2b5ae47c6509b0bc97dc594a054e5b881fd1962b81837d6b1dee6 ... diff --git a/.github/jsonnetfile.json b/.github/jsonnetfile.json index cd4469eb6e501..605c762e9f8cb 100644 --- a/.github/jsonnetfile.json +++ b/.github/jsonnetfile.json @@ -8,7 +8,7 @@ "subdir": "workflows" } }, - "version": "release-1.10.x" + "version": "main" } ], "legacyImports": true diff --git a/.github/jsonnetfile.lock.json b/.github/jsonnetfile.lock.json index ee1f7b9596b4b..395ab9190e3e1 100644 --- a/.github/jsonnetfile.lock.json +++ b/.github/jsonnetfile.lock.json @@ -8,8 +8,8 @@ "subdir": "workflows" } }, - "version": "c005223f58b83f288b655dde5bcfeff7490c7aa5", - "sum": "5K+r6Bsb8JMR1ytQjSObjvHFpH7SJBi5D4ysSwvC4/g=" + "version": "634945b73e8eed4f5161ec08810178ddeca7505b", + "sum": "BOnwSjzyOjWwv9ikwJSAgPBNnYHTU2PEDJ0PWY6nr7I=" } ], "legacyImports": false diff --git a/.github/pull_request_template.md b/.github/pull_request_template.md index 439dfa637939c..ba6f4dbe60157 100644 --- a/.github/pull_request_template.md +++ b/.github/pull_request_template.md @@ -9,8 +9,7 @@ Fixes # - [ ] Reviewed the [`CONTRIBUTING.md`](https://github.com/grafana/loki/blob/main/CONTRIBUTING.md) guide (**required**) - [ ] Documentation added - [ ] Tests updated -- [ ] `CHANGELOG.md` updated - - [ ] If the change is worth mentioning in the release notes, add `add-to-release-notes` label +- [ ] Title matches the required conventional commits format, see [here](https://www.conventionalcommits.org/en/v1.0.0/) - [ ] Changes that require user attention or interaction to upgrade are documented in `docs/sources/setup/upgrade/_index.md` - [ ] For Helm chart changes bump the Helm chart version in `production/helm/loki/Chart.yaml` and update `production/helm/loki/CHANGELOG.md` and `production/helm/loki/README.md`. [Example PR](https://github.com/grafana/loki/commit/d10549e3ece02120974929894ee333d07755d213) - [ ] If the change is deprecating or removing a configuration option, update the `deprecated-config.yaml` and `deleted-config.yaml` files respectively in the `tools/deprecated-config-checker` directory. [Example PR](https://github.com/grafana/loki/pull/10840/commits/0d4416a4b03739583349934b96f272fb4f685d15) diff --git a/.github/release-workflows.jsonnet b/.github/release-workflows.jsonnet index ae1f868fa651e..bf13bb5da1bbb 100644 --- a/.github/release-workflows.jsonnet +++ b/.github/release-workflows.jsonnet @@ -1,58 +1,99 @@ local lokiRelease = import 'workflows/main.jsonnet'; local build = lokiRelease.build; + +local releaseLibRef = std.filter( + function(dep) dep.source.git.remote == 'https://github.com/grafana/loki-release.git', + (import 'jsonnetfile.json').dependencies +)[0].version; + +local checkTemplate = 'grafana/loki-release/.github/workflows/check.yml@%s' % releaseLibRef; + +local imageJobs = { + loki: build.image('loki', 'cmd/loki'), + fluentd: build.image('fluent-plugin-loki', 'clients/cmd/fluentd', platform=['linux/amd64']), + 'fluent-bit': build.image('fluent-bit-plugin-loki', 'clients/cmd/fluent-bit', platform=['linux/amd64']), + logstash: build.image('logstash-output-loki', 'clients/cmd/logstash', platform=['linux/amd64']), + logcli: build.image('logcli', 'cmd/logcli'), + 'loki-canary': build.image('loki-canary', 'cmd/loki-canary'), + 'loki-canary-boringcrypto': build.image('loki-canary-boringcrypto', 'cmd/loki-canary-boringcrypto'), + promtail: build.image('promtail', 'clients/cmd/promtail'), + querytee: build.image('loki-query-tee', 'cmd/querytee', platform=['linux/amd64']), +}; + +local buildImage = 'grafana/loki-build-image:0.33.1'; +local golangCiLintVersion = 'v1.55.1'; + +local imageBuildTimeoutMin = 40; +local imagePrefix = 'grafana'; + { 'patch-release-pr.yml': std.manifestYamlDoc( lokiRelease.releasePRWorkflow( - imageJobs={ - loki: build.image('loki', 'cmd/loki'), - fluentd: build.image('fluentd', 'clients/cmd/fluentd', platform=['linux/amd64']), - 'fluent-bit': build.image('fluent-bit', 'clients/cmd/fluent-bit', platform=['linux/amd64']), - logstash: build.image('logstash', 'clients/cmd/logstash', platform=['linux/amd64']), - logcli: build.image('logcli', 'cmd/logcli'), - 'loki-canary': build.image('loki-canary', 'cmd/loki-canary'), - 'loki-canary-boringcrypto': build.image('loki-canary-boringcrypto', 'cmd/loki-canary-boringcrypto'), - 'loki-operator': build.image('loki-operator', 'operator', context='release/operator', platform=['linux/amd64']), - promtail: build.image('promtail', 'clients/cmd/promtail'), - querytee: build.image('querytee', 'cmd/querytee', platform=['linux/amd64']), - }, branches=['release-[0-9]+.[0-9]+.x'], - checkTemplate='grafana/loki-release/.github/workflows/check.yml@release-1.10.x', - imagePrefix='grafana', + buildImage=buildImage, + checkTemplate=checkTemplate, + golangCiLintVersion=golangCiLintVersion, + imageBuildTimeoutMin=imageBuildTimeoutMin, + imageJobs=imageJobs, + imagePrefix=imagePrefix, + releaseLibRef=releaseLibRef, releaseRepo='grafana/loki', skipArm=false, skipValidation=false, + useGitHubAppToken=true, versioningStrategy='always-bump-patch', - ), false, false + ) + { + name: 'Prepare Patch Release PR', + }, false, false ), 'minor-release-pr.yml': std.manifestYamlDoc( lokiRelease.releasePRWorkflow( - imageJobs={ - loki: build.image('loki', 'cmd/loki'), - fluentd: build.image('fluentd', 'clients/cmd/fluentd', platform=['linux/amd64']), - 'fluent-bit': build.image('fluent-bit', 'clients/cmd/fluent-bit', platform=['linux/amd64']), - logstash: build.image('logstash', 'clients/cmd/logstash', platform=['linux/amd64']), - logcli: build.image('logcli', 'cmd/logcli'), - 'loki-canary': build.image('loki-canary', 'cmd/loki-canary'), - 'loki-canary-boringcrypto': build.image('loki-canary-boringcrypto', 'cmd/loki-canary-boringcrypto'), - 'loki-operator': build.image('loki-operator', 'operator', context='release/operator', platform=['linux/amd64']), - promtail: build.image('promtail', 'clients/cmd/promtail'), - querytee: build.image('querytee', 'cmd/querytee', platform=['linux/amd64']), - }, branches=['k[0-9]+'], - checkTemplate='grafana/loki-release/.github/workflows/check.yml@release-1.10.x', - imagePrefix='grafana', + buildImage=buildImage, + checkTemplate=checkTemplate, + golangCiLintVersion=golangCiLintVersion, + imageBuildTimeoutMin=imageBuildTimeoutMin, + imageJobs=imageJobs, + imagePrefix=imagePrefix, + releaseLibRef=releaseLibRef, releaseRepo='grafana/loki', skipArm=false, skipValidation=false, + useGitHubAppToken=true, versioningStrategy='always-bump-minor', - ), false, false + ) + { + name: 'Prepare Minor Release PR from Weekly', + }, false, false ), 'release.yml': std.manifestYamlDoc( lokiRelease.releaseWorkflow( - branches=['release-[0-9]+.[0-9]+.x', 'k[0-9]+'], + branches=['release-[0-9]+.[0-9]+.x', 'k[0-9]+', 'main'], getDockerCredsFromVault=true, imagePrefix='grafana', + releaseLibRef=releaseLibRef, releaseRepo='grafana/loki', + useGitHubAppToken=false, ), false, false ), + 'check.yml': std.manifestYamlDoc({ + name: 'check', + on: { + pull_request: {}, + push: { + branches: ['main'], + }, + }, + jobs: { + check: { + uses: checkTemplate, + with: { + build_image: buildImage, + golang_ci_lint_version: golangCiLintVersion, + release_lib_ref: releaseLibRef, + skip_validation: false, + use_github_app_token: true, + }, + }, + }, + }), } diff --git a/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet b/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet index cdd6b82463e4b..1857836d66655 100644 --- a/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet +++ b/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet @@ -8,6 +8,7 @@ local releaseLibStep = common.releaseLibStep; image: function( name, path, + dockerfile='Dockerfile', context='release', platform=[ 'linux/amd64', @@ -42,24 +43,69 @@ local releaseLibStep = common.releaseLibStep; |||), step.new('Build and export', 'docker/build-push-action@v5') - + step.withTimeoutMinutes(25) + + step.withTimeoutMinutes('${{ fromJSON(env.BUILD_TIMEOUT) }}') + step.withIf('${{ fromJSON(needs.version.outputs.pr_created) }}') + + step.withEnv({ + IMAGE_TAG: '${{ needs.version.outputs.version }}', + }) + step.with({ context: context, - file: 'release/%s/Dockerfile' % path, + file: 'release/%s/%s' % [path, dockerfile], platforms: '${{ matrix.platform }}', tags: '${{ env.IMAGE_PREFIX }}/%s:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}' % [name], outputs: 'type=docker,dest=release/images/%s-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar' % name, + 'build-args': 'IMAGE_TAG=${{ needs.version.outputs.version }}', }), step.new('upload artifacts', 'google-github-actions/upload-cloud-storage@v2') + step.withIf('${{ fromJSON(needs.version.outputs.pr_created) }}') + step.with({ path: 'release/images/%s-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar' % name, - destination: 'loki-build-artifacts/${{ github.sha }}/images', //TODO: make bucket configurable + destination: '${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images', //TODO: make bucket configurable process_gcloudignore: false, }), ]), + + weeklyImage: function( + name, + path, + dockerfile='Dockerfile', + context='release', + platform=[ + 'linux/amd64', + 'linux/arm64', + 'linux/arm', + ] + ) + job.new() + + job.withSteps([ + common.fetchReleaseLib, + common.fetchReleaseRepo, + common.setupNode, + + step.new('Set up QEMU', 'docker/setup-qemu-action@v3'), + step.new('set up docker buildx', 'docker/setup-buildx-action@v3'), + step.new('Login to DockerHub (from vault)', 'grafana/shared-workflows/actions/dockerhub-login@main'), + + releaseStep('Get weekly version') + + step.withId('weekly-version') + + step.withRun(||| + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + |||), + + step.new('Build and push', 'docker/build-push-action@v5') + + step.withTimeoutMinutes('${{ fromJSON(env.BUILD_TIMEOUT) }}') + + step.with({ + context: context, + file: 'release/%s/%s' % [path, dockerfile], + platforms: '%s' % std.join(',', platform), + push: true, + tags: '${{ env.IMAGE_PREFIX }}/%s:${{ steps.weekly-version.outputs.version }}' % [name], + 'build-args': 'IMAGE_TAG=${{ steps.weekly-version.outputs.version }}', + }), + ]), + + version: job.new() + job.withSteps([ @@ -67,19 +113,44 @@ local releaseLibStep = common.releaseLibStep; common.fetchReleaseRepo, common.setupNode, common.extractBranchName, + common.githubAppToken, + common.setToken, releaseLibStep('get release version') + step.withId('version') + step.withRun(||| npm install - npm exec -- release-please release-pr \ - --consider-all-branches \ - --dry-run \ - --dry-run-output release.json \ - --release-type simple \ - --repo-url="${{ env.RELEASE_REPO }}" \ - --target-branch "${{ steps.extract_branch.outputs.branch }}" \ - --token="${{ secrets.GH_TOKEN }}" \ - --versioning-strategy "${{ env.VERSIONING_STRATEGY }}" + + if [[ -z "${{ env.RELEASE_AS }}" ]]; then + npm exec -- release-please release-pr \ + --consider-all-branches \ + --dry-run \ + --dry-run-output release.json \ + --group-pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --manifest-file .release-please-manifest.json \ + --pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --release-type simple \ + --repo-url "${{ env.RELEASE_REPO }}" \ + --separate-pull-requests false \ + --target-branch "${{ steps.extract_branch.outputs.branch }}" \ + --token "${{ steps.github_app_token.outputs.token }}" \ + --versioning-strategy "${{ env.VERSIONING_STRATEGY }}" + else + npm exec -- release-please release-pr \ + --consider-all-branches \ + --dry-run \ + --dry-run-output release.json \ + --group-pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --manifest-file .release-please-manifest.json \ + --pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --release-type simple \ + --repo-url "${{ env.RELEASE_REPO }}" \ + --separate-pull-requests false \ + --target-branch "${{ steps.extract_branch.outputs.branch }}" \ + --token "${{ steps.github_app_token.outputs.token }}" \ + --release-as "${{ env.RELEASE_AS }}" + fi + + cat release.json if [[ `jq length release.json` -gt 1 ]]; then echo 'release-please would create more than 1 PR, so cannot determine correct version' @@ -102,11 +173,12 @@ local releaseLibStep = common.releaseLibStep; pr_created: '${{ steps.version.outputs.pr_created }}', }), - dist: function(buildImage, skipArm=true) + dist: function(buildImage, skipArm=true, useGCR=false, makeTargets=['dist', 'packages']) job.new() + job.withSteps([ common.fetchReleaseRepo, common.googleAuth, + common.setupGoogleCloudSdk, step.new('get nfpm signing keys', 'grafana/shared-workflows/actions/get-vault-secrets@main') + step.withId('get-secrets') + step.with({ @@ -117,6 +189,7 @@ local releaseLibStep = common.releaseLibStep; }), releaseStep('build artifacts') + + step.withIf('${{ fromJSON(needs.version.outputs.pr_created) }}') + step.withEnv({ BUILD_IN_CONTAINER: false, DRONE_TAG: '${{ needs.version.outputs.version }}', @@ -125,30 +198,41 @@ local releaseLibStep = common.releaseLibStep; SKIP_ARM: skipArm, }) //TODO: the workdir here is loki specific - + step.withRun(||| - cat < $NFPM_SIGNING_KEY_FILE - make dist packages - EOF - ||| % buildImage), - - step.new('upload build artifacts', 'google-github-actions/upload-cloud-storage@v2') + + step.withRun( + ( + if useGCR then ||| + gcloud auth configure-docker + ||| else '' + ) + + ||| + cat < $NFPM_SIGNING_KEY_FILE + make %s + EOF + ||| % [buildImage, std.join(' ', makeTargets)] + ), + + step.new('upload artifacts', 'google-github-actions/upload-cloud-storage@v2') + + step.withIf('${{ fromJSON(needs.version.outputs.pr_created) }}') + step.with({ path: 'release/dist', - destination: 'loki-build-artifacts/${{ github.sha }}', //TODO: make bucket configurable + destination: '${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}', //TODO: make bucket configurable process_gcloudignore: false, }), - ]), + ]) + + job.withOutputs({ + version: '${{ needs.version.outputs.version }}', + }), } diff --git a/.github/vendor/github.com/grafana/loki-release/workflows/common.libsonnet b/.github/vendor/github.com/grafana/loki-release/workflows/common.libsonnet index e3346f2bd5e4e..48cca43890ed7 100644 --- a/.github/vendor/github.com/grafana/loki-release/workflows/common.libsonnet +++ b/.github/vendor/github.com/grafana/loki-release/workflows/common.libsonnet @@ -61,8 +61,8 @@ withEnv: function(env) { env: env, }, - withSecrets: function(env) { - secrets: env, + withSecrets: function(secrets) { + secrets: secrets, }, }, @@ -86,6 +86,7 @@ + $.step.with({ repository: 'grafana/loki-release', path: 'lib', + ref: '${{ env.RELEASE_LIB_REF }}', }), setupNode: $.step.new('setup node', 'actions/setup-node@v4') @@ -121,4 +122,39 @@ + $.step.withRun(||| git config --global --add safe.directory "$GITHUB_WORKSPACE" |||), + + githubAppToken: $.step.new('get github app token', 'actions/github-app-token@v1') + + $.step.withId('get_github_app_token') + + $.step.withIf('${{ fromJSON(env.USE_GITHUB_APP_TOKEN) }}') + + $.step.with({ + 'app-id': '${{ secrets.APP_ID }}', + 'private-key': '${{ secrets.APP_PRIVATE_KEY }}', + // By setting owner, we should get access to all repositories in current owner's installation: https://github.com/marketplace/actions/create-github-app-token#create-a-token-for-all-repositories-in-the-current-owners-installation + owner: '${{ github.repository_owner }}', + }), + + setToken: $.step.new('set github token') + + $.step.withId('github_app_token') + + $.step.withRun(||| + if [[ "${USE_GITHUB_APP_TOKEN}" == "true" ]]; then + echo "token=${{ steps.get_github_app_token.outputs.token }}" >> $GITHUB_OUTPUT + else + echo "token=${{ secrets.GH_TOKEN }}" >> $GITHUB_OUTPUT + fi + |||), + + validationJob: function(useGCR=false) + $.job.new() + + $.job.withContainer({ + image: '${{ inputs.build_image }}', + } + if useGCR then { + credentials: { + username: '_json_key', + password: '${{ secrets.GCS_SERVICE_ACCOUNT_KEY }}', + }, + } else {}) + + $.job.withEnv({ + BUILD_IN_CONTAINER: false, + SKIP_VALIDATION: '${{ inputs.skip_validation }}', + }), } diff --git a/.github/vendor/github.com/grafana/loki-release/workflows/main.jsonnet b/.github/vendor/github.com/grafana/loki-release/workflows/main.jsonnet index 0a033b81221ff..d274d21a0571d 100644 --- a/.github/vendor/github.com/grafana/loki-release/workflows/main.jsonnet +++ b/.github/vendor/github.com/grafana/loki-release/workflows/main.jsonnet @@ -5,16 +5,27 @@ build: import 'build.libsonnet', release: import 'release.libsonnet', validate: import 'validate.libsonnet', + validateGel: import 'validate-gel.libsonnet', releasePRWorkflow: function( branches=['release-[0-9]+.[0-9]+.x', 'k[0-9]+'], + buildArtifactsBucket='loki-build-artifacts', buildImage='grafana/loki-build-image:0.33.0', + changelogPath='CHANGELOG.md', checkTemplate='./.github/workflows/check.yml', + distMakeTargets=['dist', 'packages'], + dryRun=false, dockerUsername='grafana', + golangCiLintVersion='v1.55.1', + imageBuildTimeoutMin=25, imageJobs={}, imagePrefix='grafana', + releaseAs=null, + releaseLibRef='main', releaseRepo='grafana/loki-release', - skipArm=true, + skipArm=false, skipValidation=false, + useGitHubAppToken=true, + useGCR=false, versioningStrategy='always-bump-patch', ) { name: 'create release PR', @@ -32,31 +43,51 @@ group: 'create-release-pr-${{ github.sha }}', }, env: { - RELEASE_REPO: releaseRepo, + BUILD_ARTIFACTS_BUCKET: buildArtifactsBucket, + BUILD_TIMEOUT: imageBuildTimeoutMin, + CHANGELOG_PATH: changelogPath, DOCKER_USERNAME: dockerUsername, + DRY_RUN: dryRun, IMAGE_PREFIX: imagePrefix, + RELEASE_LIB_REF: releaseLibRef, + RELEASE_REPO: releaseRepo, SKIP_VALIDATION: skipValidation, + USE_GITHUB_APP_TOKEN: useGitHubAppToken, VERSIONING_STRATEGY: versioningStrategy, - }, + } + if releaseAs != null then { + RELEASE_AS: releaseAs, + } else {}, local validationSteps = ['check'], jobs: { check: {} + $.job.withUses(checkTemplate) + $.job.with({ skip_validation: skipValidation, - }), + build_image: buildImage, + golang_ci_lint_version: golangCiLintVersion, + release_lib_ref: releaseLibRef, + use_github_app_token: useGitHubAppToken, + }) + + if useGCR then $.job.withSecrets({ + GCS_SERVICE_ACCOUNT_KEY: '${{ secrets.GCS_SERVICE_ACCOUNT_KEY }}', + }) else {}, version: $.build.version + $.common.job.withNeeds(validationSteps), - dist: $.build.dist(buildImage, skipArm) + $.common.job.withNeeds(['version']), + dist: $.build.dist(buildImage, skipArm, useGCR, distMakeTargets) + $.common.job.withNeeds(['version']), } + std.mapWithKey(function(name, job) job + $.common.job.withNeeds(['version']), imageJobs) + { local buildImageSteps = ['dist'] + std.objectFields(imageJobs), 'create-release-pr': $.release.createReleasePR + $.common.job.withNeeds(buildImageSteps), }, }, releaseWorkflow: function( - releaseRepo='grafana/loki-release', - dockerUsername='grafana', - imagePrefix='grafana', branches=['release-[0-9].[0-9].x', 'k[0-9]*'], - getDockerCredsFromVault=false + buildArtifactsBucket='loki-build-artifacts', + dockerUsername='grafanabot', + getDockerCredsFromVault=false, + imagePrefix='grafana', + publishBucket='', + publishToGCS=false, + releaseLibRef='main', + releaseRepo='grafana/loki-release', + useGitHubAppToken=true, ) { name: 'create release', on: { @@ -73,28 +104,58 @@ group: 'create-release-${{ github.sha }}', }, env: { - RELEASE_REPO: releaseRepo, + BUILD_ARTIFACTS_BUCKET: buildArtifactsBucket, IMAGE_PREFIX: imagePrefix, + RELEASE_LIB_REF: releaseLibRef, + RELEASE_REPO: releaseRepo, + USE_GITHUB_APP_TOKEN: useGitHubAppToken, + } + if publishToGCS then { + PUBLISH_BUCKET: publishBucket, + PUBLISH_TO_GCS: true, + } else { + PUBLISH_TO_GCS: false, }, jobs: { shouldRelease: $.release.shouldRelease, createRelease: $.release.createRelease, publishImages: $.release.publishImages(getDockerCredsFromVault, dockerUsername), + publishRelease: $.release.publishRelease, }, }, - check: function( - buildImage='grafana/loki-build-image:0.33.0', - ) { + check: { name: 'check', on: { workflow_call: { inputs: { + build_image: { + description: 'loki build image to use', + required: true, + type: 'string', + }, skip_validation: { default: false, description: 'skip validation steps', required: false, type: 'boolean', }, + golang_ci_lint_version: { + default: 'v1.55.1', + description: 'version of golangci-lint to use', + required: false, + type: 'string', + }, + release_lib_ref: { + default: 'main', + description: 'git ref of release library to use', + required: false, + type: 'string', + }, + use_github_app_token: { + default: true, + description: 'whether to use the GitHub App token for GH_TOKEN secret', + required: false, + type: 'boolean', + }, }, }, }, @@ -106,6 +167,67 @@ concurrency: { group: 'check-${{ github.sha }}', }, - jobs: $.validate(buildImage), + env: { + RELEASE_LIB_REF: '${{ inputs.release_lib_ref }}', + USE_GITHUB_APP_TOKEN: '${{ inputs.use_github_app_token }}', + }, + jobs: $.validate, + }, + checkGel: { + name: 'check', + on: { + workflow_call: { + inputs: { + build_image: { + description: 'loki build image to use', + required: true, + type: 'string', + }, + skip_validation: { + default: false, + description: 'skip validation steps', + required: false, + type: 'boolean', + }, + golang_ci_lint_version: { + default: 'v1.55.1', + description: 'version of golangci-lint to use', + required: false, + type: 'string', + }, + release_lib_ref: { + default: 'main', + description: 'git ref of release library to use', + required: false, + type: 'string', + }, + use_github_app_token: { + default: true, + description: 'whether to use the GitHub App token for GH_TOKEN secret', + required: false, + type: 'boolean', + }, + }, + secrets: { + GCS_SERVICE_ACCOUNT_KEY: { + description: 'GCS service account key', + required: true, + }, + }, + }, + }, + permissions: { + contents: 'write', + 'pull-requests': 'write', + 'id-token': 'write', + }, + concurrency: { + group: 'check-${{ github.sha }}', + }, + env: { + RELEASE_LIB_REF: '${{ inputs.release_lib_ref }}', + USE_GITHUB_APP_TOKEN: '${{ inputs.use_github_app_token }}', + }, + jobs: $.validateGel, }, } diff --git a/.github/vendor/github.com/grafana/loki-release/workflows/release.libsonnet b/.github/vendor/github.com/grafana/loki-release/workflows/release.libsonnet index 6bf2daa8f0334..62f065b40288a 100644 --- a/.github/vendor/github.com/grafana/loki-release/workflows/release.libsonnet +++ b/.github/vendor/github.com/grafana/loki-release/workflows/release.libsonnet @@ -9,7 +9,7 @@ local releaseLibStep = common.releaseLibStep; // sha to release and pull aritfacts from. If you need to change this, make sure // to change it in both places. //TODO: make bucket configurable -local pullRequestFooter = 'Merging this PR will release the [artifacts](https://console.cloud.google.com/storage/browser/loki-build-artifacts/${SHA}) of ${SHA}'; +local pullRequestFooter = 'Merging this PR will release the [artifacts](https://console.cloud.google.com/storage/browser/${BUILD_ARTIFACTS_BUCKET}/${SHA}) of ${SHA}'; { createReleasePR: @@ -19,6 +19,8 @@ local pullRequestFooter = 'Merging this PR will release the [artifacts](https:// common.fetchReleaseLib, common.setupNode, common.extractBranchName, + common.githubAppToken, + common.setToken, releaseLibStep('release please') + step.withId('release') @@ -30,19 +32,23 @@ local pullRequestFooter = 'Merging this PR will release the [artifacts](https:// //TODO backport action should not bring over autorelease: pending label + step.withRun(||| npm install - echo "Pull request footer: %s" npm exec -- release-please release-pr \ + --changelog-path "${CHANGELOG_PATH}" \ --consider-all-branches \ - --label "backport main,autorelease: pending,type/docs" \ + --group-pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --label "backport main,autorelease: pending,product-approved" \ + --manifest-file .release-please-manifest.json \ --pull-request-footer "%s" \ + --pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --release-as "${{ needs.dist.outputs.version }}" \ --release-type simple \ --repo-url "${{ env.RELEASE_REPO }}" \ - --target-branch "${{ steps.extract_branch.outputs.branch }}" \ - --token "${{ secrets.GH_TOKEN }}" \ - --versioning-strategy "${{ env.VERSIONING_STRATEGY }}" \ --separate-pull-requests false \ - --debug - ||| % [pullRequestFooter, pullRequestFooter]), + --target-branch "${{ steps.extract_branch.outputs.branch }}" \ + --token "${{ steps.github_app_token.outputs.token }}" \ + --dry-run ${{ fromJSON(env.DRY_RUN) }} + + ||| % pullRequestFooter), ]), shouldRelease: job.new() @@ -61,6 +67,8 @@ local pullRequestFooter = 'Merging this PR will release the [artifacts](https:// shouldRelease: '${{ steps.should_release.outputs.shouldRelease }}', sha: '${{ steps.should_release.outputs.sha }}', name: '${{ steps.should_release.outputs.name }}', + prNumber: '${{ steps.should_release.outputs.prNumber }}', + isLatest: '${{ steps.should_release.outputs.isLatest }}', branch: '${{ steps.extract_branch.outputs.branch }}', }), @@ -73,6 +81,8 @@ local pullRequestFooter = 'Merging this PR will release the [artifacts](https:// common.setupNode, common.googleAuth, common.setupGoogleCloudSdk, + common.githubAppToken, + common.setToken, // exits with code 1 if the url does not match // meaning there are no artifacts for that sha @@ -80,33 +90,67 @@ local pullRequestFooter = 'Merging this PR will release the [artifacts](https:// releaseStep('download binaries') + step.withRun(||| echo "downloading binaries to $(pwd)/dist" - gsutil cp -r gs://loki-build-artifacts/${{ needs.shouldRelease.outputs.sha }}/dist . + gsutil cp -r gs://${BUILD_ARTIFACTS_BUCKET}/${{ needs.shouldRelease.outputs.sha }}/dist . + |||), + + releaseStep('check if release exists') + + step.withId('check_release') + + step.withEnv({ + GH_TOKEN: '${{ steps.github_app_token.outputs.token }}', + }) + + step.withRun(||| + set +e + isDraft="$(gh release view --json="isDraft" --jq=".isDraft" ${{ needs.shouldRelease.outputs.name }} 2>&1)" + set -e + if [[ "$isDraft" == "release not found" ]]; then + echo "exists=false" >> $GITHUB_OUTPUT + else + echo "exists=true" >> $GITHUB_OUTPUT + fi + + if [[ "$isDraft" == "true" ]]; then + echo "draft=true" >> $GITHUB_OUTPUT + fi |||), releaseLibStep('create release') + step.withId('release') + + step.withIf('${{ !fromJSON(steps.check_release.outputs.exists) }}') + step.withRun(||| npm install npm exec -- release-please github-release \ --draft \ --release-type simple \ - --repo-url="${{ env.RELEASE_REPO }}" \ + --repo-url "${{ env.RELEASE_REPO }}" \ --target-branch "${{ needs.shouldRelease.outputs.branch }}" \ - --token="${{ secrets.GH_TOKEN }}" + --token "${{ steps.github_app_token.outputs.token }}" \ + --shas-to-tag "${{ needs.shouldRelease.outputs.prNumber }}:${{ needs.shouldRelease.outputs.sha }}" |||), releaseStep('upload artifacts') + step.withId('upload') + step.withEnv({ - GH_TOKEN: '${{ secrets.GH_TOKEN }}', + GH_TOKEN: '${{ steps.github_app_token.outputs.token }}', }) + step.withRun(||| - gh release upload ${{ needs.shouldRelease.outputs.name }} dist/* - gh release edit ${{ needs.shouldRelease.outputs.name }} --draft=false + gh release upload --clobber ${{ needs.shouldRelease.outputs.name }} dist/* |||), + + step.new('release artifacts', 'google-github-actions/upload-cloud-storage@v2') + + step.withIf('${{ fromJSON(env.PUBLISH_TO_GCS) }}') + + step.with({ + path: 'release/dist', + destination: '${{ env.PUBLISH_BUCKET }}', + parent: false, + process_gcloudignore: false, + }), ]) + job.withOutputs({ sha: '${{ needs.shouldRelease.outputs.sha }}', + name: '${{ needs.shouldRelease.outputs.name }}', + isLatest: '${{ needs.shouldRelease.outputs.isLatest }}', + draft: '${{ steps.check_release.outputs.draft }}', + exists: '${{ steps.check_release.outputs.exists }}', }), publishImages: function(getDockerCredsFromVault=false, dockerUsername='grafanabot') @@ -141,4 +185,20 @@ local pullRequestFooter = 'Merging this PR will release the [artifacts](https:// }), ] ), + + publishRelease: job.new() + + job.withNeeds(['createRelease', 'publishImages']) + + job.withSteps([ + common.fetchReleaseRepo, + common.githubAppToken, + common.setToken, + releaseStep('publish release') + + step.withIf('${{ !fromJSON(needs.createRelease.outputs.exists) || (needs.createRelease.outputs.draft && fromJSON(needs.createRelease.outputs.draft)) }}') + + step.withEnv({ + GH_TOKEN: '${{ steps.github_app_token.outputs.token }}', + }) + + step.withRun(||| + gh release edit ${{ needs.createRelease.outputs.name }} --draft=false --latest=${{ needs.createRelease.outputs.isLatest }} + |||), + ]), } diff --git a/.github/vendor/github.com/grafana/loki-release/workflows/validate-gel.libsonnet b/.github/vendor/github.com/grafana/loki-release/workflows/validate-gel.libsonnet new file mode 100644 index 0000000000000..af741478a7cbe --- /dev/null +++ b/.github/vendor/github.com/grafana/loki-release/workflows/validate-gel.libsonnet @@ -0,0 +1,97 @@ +local common = import 'common.libsonnet'; +local job = common.job; +local step = common.step; +local _validationJob = common.validationJob; + +local setupValidationDeps = function(job) job { + steps: [ + common.checkout, + common.fetchReleaseLib, + common.fixDubiousOwnership, + step.new('install dependencies') + + step.withIf('${{ !fromJSON(env.SKIP_VALIDATION) }}') + + step.withRun(||| + apt update + apt install -qy tar xz-utils + |||), + step.new('install shellcheck', './lib/actions/install-binary') + + step.withIf('${{ !fromJSON(env.SKIP_VALIDATION) }}') + + step.with({ + binary: 'shellcheck', + version: '0.9.0', + download_url: 'https://github.com/koalaman/shellcheck/releases/download/v${version}/shellcheck-v${version}.linux.x86_64.tar.xz', + tarball_binary_path: '*/${binary}', + smoke_test: '${binary} --version', + tar_args: 'xvf', + }), + step.new('install jsonnetfmt', './lib/actions/install-binary') + + step.withIf('${{ !fromJSON(env.SKIP_VALIDATION) }}') + + step.with({ + binary: 'jsonnetfmt', + version: '0.18.0', + download_url: 'https://github.com/google/go-jsonnet/releases/download/v${version}/go-jsonnet_${version}_Linux_x86_64.tar.gz', + tarball_binary_path: '${binary}', + smoke_test: '${binary} --version', + }), + ] + job.steps, +}; + +local validationJob = _validationJob(true); + + +{ + local validationMakeStep = function(name, target) + step.new(name) + + step.withIf('${{ !fromJSON(env.SKIP_VALIDATION) }}') + + step.withRun(common.makeTarget(target)), + + test: setupValidationDeps( + validationJob + + job.withSteps([ + validationMakeStep('test', 'test'), + ]) + ), + + lint: setupValidationDeps( + validationJob + + job.withSteps( + [ + step.new('golangci-lint', 'golangci/golangci-lint-action@08e2f20817b15149a52b5b3ebe7de50aff2ba8c5') + + step.withIf('${{ !fromJSON(env.SKIP_VALIDATION) }}') + + step.with({ + version: '${{ inputs.golang_ci_lint_version }}', + 'only-new-issues': true, + args: '--skip-files cmd/enterprise-logs/fips.go', + }), + ], + ) + ), + + check: setupValidationDeps( + validationJob + + job.withSteps([ + validationMakeStep('build enterprise logs', 'all'), + validationMakeStep('check docs', 'check-docs'), + ]) + { + steps+: [ + step.new('build docs website') + + step.withIf('${{ !fromJSON(env.SKIP_VALIDATION) }}') + + step.withRun(||| + cat <> $GITHUB_OUTPUT working-directory: "release" + - id: "get_github_app_token" + if: "${{ fromJSON(env.USE_GITHUB_APP_TOKEN) }}" + name: "get github app token" + uses: "actions/github-app-token@v1" + with: + app-id: "${{ secrets.APP_ID }}" + owner: "${{ github.repository_owner }}" + private-key: "${{ secrets.APP_PRIVATE_KEY }}" + - id: "github_app_token" + name: "set github token" + run: | + if [[ "${USE_GITHUB_APP_TOKEN}" == "true" ]]; then + echo "token=${{ steps.get_github_app_token.outputs.token }}" >> $GITHUB_OUTPUT + else + echo "token=${{ secrets.GH_TOKEN }}" >> $GITHUB_OUTPUT + fi - env: SHA: "${{ github.sha }}" id: "release" name: "release please" run: | npm install - echo "Pull request footer: Merging this PR will release the [artifacts](https://console.cloud.google.com/storage/browser/loki-build-artifacts/${SHA}) of ${SHA}" npm exec -- release-please release-pr \ + --changelog-path "${CHANGELOG_PATH}" \ --consider-all-branches \ - --label "backport main,autorelease: pending,type/docs" \ - --pull-request-footer "Merging this PR will release the [artifacts](https://console.cloud.google.com/storage/browser/loki-build-artifacts/${SHA}) of ${SHA}" \ + --group-pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --label "backport main,autorelease: pending,product-approved" \ + --manifest-file .release-please-manifest.json \ + --pull-request-footer "Merging this PR will release the [artifacts](https://console.cloud.google.com/storage/browser/${BUILD_ARTIFACTS_BUCKET}/${SHA}) of ${SHA}" \ + --pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --release-as "${{ needs.dist.outputs.version }}" \ --release-type simple \ --repo-url "${{ env.RELEASE_REPO }}" \ - --target-branch "${{ steps.extract_branch.outputs.branch }}" \ - --token "${{ secrets.GH_TOKEN }}" \ - --versioning-strategy "${{ env.VERSIONING_STRATEGY }}" \ --separate-pull-requests false \ - --debug + --target-branch "${{ steps.extract_branch.outputs.branch }}" \ + --token "${{ steps.github_app_token.outputs.token }}" \ + --dry-run ${{ fromJSON(env.DRY_RUN) }} + working-directory: "lib" dist: needs: - "version" + outputs: + version: "${{ needs.version.outputs.version }}" runs-on: "ubuntu-latest" steps: - name: "pull code to release" @@ -78,6 +110,10 @@ jobs: uses: "google-github-actions/auth@v2" with: credentials_json: "${{ secrets.GCS_SERVICE_ACCOUNT_KEY }}" + - name: "Set up Cloud SDK" + uses: "google-github-actions/setup-gcloud@v2" + with: + version: ">= 452.0.0" - id: "get-secrets" name: "get nfpm signing keys" uses: "grafana/shared-workflows/actions/get-vault-secrets@main" @@ -91,6 +127,7 @@ jobs: IMAGE_TAG: "${{ needs.version.outputs.version }}" NFPM_SIGNING_KEY_FILE: "nfpm-private-key.key" SKIP_ARM: false + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "build artifacts" run: | cat < $NFPM_SIGNING_KEY_FILE make dist packages EOF working-directory: "release" - - name: "upload build artifacts" + - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}" path: "release/dist" process_gcloudignore: false fluent-bit: @@ -125,6 +163,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -152,22 +191,25 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/clients/cmd/fluent-bit/Dockerfile" - outputs: "type=docker,dest=release/images/fluent-bit-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + outputs: "type=docker,dest=release/images/fluent-bit-plugin-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" platforms: "${{ matrix.platform }}" - tags: "${{ env.IMAGE_PREFIX }}/fluent-bit:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" + tags: "${{ env.IMAGE_PREFIX }}/fluent-bit-plugin-loki:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" - path: "release/images/fluent-bit-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" + path: "release/images/fluent-bit-plugin-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: fail-fast: true @@ -183,6 +225,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -210,22 +253,25 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/clients/cmd/fluentd/Dockerfile" - outputs: "type=docker,dest=release/images/fluentd-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + outputs: "type=docker,dest=release/images/fluent-plugin-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" platforms: "${{ matrix.platform }}" - tags: "${{ env.IMAGE_PREFIX }}/fluentd:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" + tags: "${{ env.IMAGE_PREFIX }}/fluent-plugin-loki:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" - path: "release/images/fluentd-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" + path: "release/images/fluent-plugin-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: fail-fast: true @@ -241,6 +287,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -268,11 +315,14 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/cmd/logcli/Dockerfile" outputs: "type=docker,dest=release/images/logcli-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" @@ -282,7 +332,7 @@ jobs: name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" path: "release/images/logcli-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: @@ -301,6 +351,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -328,22 +379,25 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/clients/cmd/logstash/Dockerfile" - outputs: "type=docker,dest=release/images/logstash-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + outputs: "type=docker,dest=release/images/logstash-output-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" platforms: "${{ matrix.platform }}" - tags: "${{ env.IMAGE_PREFIX }}/logstash:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" + tags: "${{ env.IMAGE_PREFIX }}/logstash-output-loki:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" - path: "release/images/logstash-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" + path: "release/images/logstash-output-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: fail-fast: true @@ -359,6 +413,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -386,11 +441,14 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/cmd/loki/Dockerfile" outputs: "type=docker,dest=release/images/loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" @@ -400,7 +458,7 @@ jobs: name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" path: "release/images/loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: @@ -419,6 +477,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -446,11 +505,14 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/cmd/loki-canary/Dockerfile" outputs: "type=docker,dest=release/images/loki-canary-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" @@ -460,7 +522,7 @@ jobs: name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" path: "release/images/loki-canary-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: @@ -479,6 +541,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -506,11 +569,14 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/cmd/loki-canary-boringcrypto/Dockerfile" outputs: "type=docker,dest=release/images/loki-canary-boringcrypto-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" @@ -520,7 +586,7 @@ jobs: name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" path: "release/images/loki-canary-boringcrypto-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: @@ -530,64 +596,6 @@ jobs: - "linux/amd64" - "linux/arm64" - "linux/arm" - loki-operator: - needs: - - "version" - runs-on: "ubuntu-latest" - steps: - - name: "pull release library code" - uses: "actions/checkout@v4" - with: - path: "lib" - repository: "grafana/loki-release" - - name: "pull code to release" - uses: "actions/checkout@v4" - with: - path: "release" - repository: "${{ env.RELEASE_REPO }}" - - name: "setup node" - uses: "actions/setup-node@v4" - with: - node-version: 20 - - name: "auth gcs" - uses: "google-github-actions/auth@v2" - with: - credentials_json: "${{ secrets.GCS_SERVICE_ACCOUNT_KEY }}" - - name: "Set up QEMU" - uses: "docker/setup-qemu-action@v3" - - name: "set up docker buildx" - uses: "docker/setup-buildx-action@v3" - - id: "platform" - name: "parse image platform" - run: | - mkdir -p images - - platform="$(echo "${{ matrix.platform}}" | sed "s/\(.*\)\/\(.*\)/\1-\2/")" - echo "platform=${platform}" >> $GITHUB_OUTPUT - echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT - working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" - name: "Build and export" - timeout-minutes: 25 - uses: "docker/build-push-action@v5" - with: - context: "release/operator" - file: "release/operator/Dockerfile" - outputs: "type=docker,dest=release/images/loki-operator-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" - platforms: "${{ matrix.platform }}" - tags: "${{ env.IMAGE_PREFIX }}/loki-operator:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" - name: "upload artifacts" - uses: "google-github-actions/upload-cloud-storage@v2" - with: - destination: "loki-build-artifacts/${{ github.sha }}/images" - path: "release/images/loki-operator-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" - process_gcloudignore: false - strategy: - fail-fast: true - matrix: - platform: - - "linux/amd64" promtail: needs: - "version" @@ -597,6 +605,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -624,11 +633,14 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/clients/cmd/promtail/Dockerfile" outputs: "type=docker,dest=release/images/promtail-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" @@ -638,7 +650,7 @@ jobs: name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" path: "release/images/promtail-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: @@ -657,6 +669,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -684,22 +697,25 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/cmd/querytee/Dockerfile" - outputs: "type=docker,dest=release/images/querytee-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + outputs: "type=docker,dest=release/images/loki-query-tee-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" platforms: "${{ matrix.platform }}" - tags: "${{ env.IMAGE_PREFIX }}/querytee:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" + tags: "${{ env.IMAGE_PREFIX }}/loki-query-tee:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" - path: "release/images/querytee-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" + path: "release/images/loki-query-tee-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: fail-fast: true @@ -718,6 +734,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -733,19 +750,58 @@ jobs: run: | echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT working-directory: "release" + - id: "get_github_app_token" + if: "${{ fromJSON(env.USE_GITHUB_APP_TOKEN) }}" + name: "get github app token" + uses: "actions/github-app-token@v1" + with: + app-id: "${{ secrets.APP_ID }}" + owner: "${{ github.repository_owner }}" + private-key: "${{ secrets.APP_PRIVATE_KEY }}" + - id: "github_app_token" + name: "set github token" + run: | + if [[ "${USE_GITHUB_APP_TOKEN}" == "true" ]]; then + echo "token=${{ steps.get_github_app_token.outputs.token }}" >> $GITHUB_OUTPUT + else + echo "token=${{ secrets.GH_TOKEN }}" >> $GITHUB_OUTPUT + fi - id: "version" name: "get release version" run: | npm install - npm exec -- release-please release-pr \ - --consider-all-branches \ - --dry-run \ - --dry-run-output release.json \ - --release-type simple \ - --repo-url="${{ env.RELEASE_REPO }}" \ - --target-branch "${{ steps.extract_branch.outputs.branch }}" \ - --token="${{ secrets.GH_TOKEN }}" \ - --versioning-strategy "${{ env.VERSIONING_STRATEGY }}" + + if [[ -z "${{ env.RELEASE_AS }}" ]]; then + npm exec -- release-please release-pr \ + --consider-all-branches \ + --dry-run \ + --dry-run-output release.json \ + --group-pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --manifest-file .release-please-manifest.json \ + --pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --release-type simple \ + --repo-url "${{ env.RELEASE_REPO }}" \ + --separate-pull-requests false \ + --target-branch "${{ steps.extract_branch.outputs.branch }}" \ + --token "${{ steps.github_app_token.outputs.token }}" \ + --versioning-strategy "${{ env.VERSIONING_STRATEGY }}" + else + npm exec -- release-please release-pr \ + --consider-all-branches \ + --dry-run \ + --dry-run-output release.json \ + --group-pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --manifest-file .release-please-manifest.json \ + --pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --release-type simple \ + --repo-url "${{ env.RELEASE_REPO }}" \ + --separate-pull-requests false \ + --target-branch "${{ steps.extract_branch.outputs.branch }}" \ + --token "${{ steps.github_app_token.outputs.token }}" \ + --release-as "${{ env.RELEASE_AS }}" + fi + + cat release.json if [[ `jq length release.json` -gt 1 ]]; then echo 'release-please would create more than 1 PR, so cannot determine correct version' @@ -762,7 +818,7 @@ jobs: echo "pr_created=true" >> $GITHUB_OUTPUT fi working-directory: "lib" -name: "create release PR" +name: "Prepare Minor Release PR from Weekly" "on": push: branches: diff --git a/.github/workflows/nix-ci.yaml b/.github/workflows/nix-ci.yaml index 70e418425b9c2..d5900d7fa03b7 100644 --- a/.github/workflows/nix-ci.yaml +++ b/.github/workflows/nix-ci.yaml @@ -29,7 +29,7 @@ jobs: - uses: cachix/install-nix-action@v22 with: nix_path: nixpkgs=channel:nixos-unstable + - run: nix build --print-build-logs .#promtail - run: nix build --print-build-logs .#logcli - run: nix build --print-build-logs .#loki - run: nix build --print-build-logs .#loki-canary - - run: nix build --print-build-logs .#promtail diff --git a/.github/workflows/operator-check-prepare-release-commit.yml b/.github/workflows/operator-check-prepare-release-commit.yml new file mode 100644 index 0000000000000..bbc27643c1abe --- /dev/null +++ b/.github/workflows/operator-check-prepare-release-commit.yml @@ -0,0 +1,42 @@ +name: operator-check-prepare-release-commit + +on: + pull_request: + paths: + - 'operator/**' + branches: + - main + +jobs: + check-commit: + runs-on: ubuntu-latest + if: | + github.event.pull_request.head.ref == 'release-please--branches--main--components--operator' && + contains(github.event.pull_request.title, 'chore( operator): community release') + steps: + - name: Extract release version + id: pr_semver + run: | + PR_TITLE="${{ github.event.pull_request.title }}" + SEMVER=$(echo "$PR_TITLE" | sed -n 's/^chore( operator): community release \([0-9]\+\.[0-9]\+\.[0-9]\+\)$/\1/p') + echo "semver=$SEMVER" >> $GITHUB_OUTPUT + + - name: Checkout code + uses: actions/checkout@v4 + with: + ref: main + path: "release" + + - name: Check main commits for prepare release commit + id: check_commit + env: + GH_TOKEN: ${{ secrets.GH_TOKEN }} + working-directory: "release" + run: | + COMMIT=$(gh search commits "chore(operator): prepare community release v${{ steps.pr_semver.outputs.semver }}") + if [ -n "$COMMIT" ]; then + echo "Prepare release commit found." + else + echo "No prepare release commit found for the release version ${{ steps.pr_semver.outputs.semver }}" + exit 1 + fi diff --git a/.github/workflows/operator-publish-operator-hub.yml b/.github/workflows/operator-publish-operator-hub.yml new file mode 100644 index 0000000000000..c3fa69b466298 --- /dev/null +++ b/.github/workflows/operator-publish-operator-hub.yml @@ -0,0 +1,23 @@ +name: "Publish release on operator hub" +on: + release: + types: [published] + +jobs: + operator-hub-prod-release: + if: startsWith(github.event.release.tag_name, 'operator/') + uses: ./.github/workflows/operator-reusable-hub-release.yml + with: + org: redhat-openshift-ecosystem + repo: community-operators-prod + secrets: + GRAFANABOT_GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} + + operator-hub-community-release: + if: startsWith(github.event.release.tag_name, 'operator/') + uses: ./.github/workflows/operator-reusable-hub-release.yml + with: + org: k8s-operatorhub + repo: community-operators + secrets: + GRAFANABOT_GITHUB_TOKEN: ${{ secrets.GH_TOKEN }} diff --git a/.github/workflows/operator-release-please.yml b/.github/workflows/operator-release-please.yml new file mode 100644 index 0000000000000..77be2bc58a237 --- /dev/null +++ b/.github/workflows/operator-release-please.yml @@ -0,0 +1,42 @@ +name: operator-release-please + +on: + push: + paths: + - 'operator/**' + branches: + - main + +permissions: + contents: write + pull-requests: write + +jobs: + releasePlease: + runs-on: ubuntu-latest + outputs: + release_created: ${{ steps.release.outputs.operator--release_created }} + release_name: ${{ steps.release.outputs.operator--tag_name }} + steps: + - uses: google-github-actions/release-please-action@v4 + id: release + with: + path: operator + config-file: operator/release-please-config.json + token: ${{ secrets.GH_TOKEN }} + publishRelease: + needs: + - "releasePlease" + runs-on: ubuntu-latest + if: ${{ needs.releasePlease.outputs.release_created }} + steps: + - name: "pull code to release" + uses: "actions/checkout@v4" + with: + path: "release" + - name: "publish release" + env: + GH_TOKEN: ${{ secrets.GH_TOKEN }} + working-directory: "release" + run: | + gh release edit "${{ needs.releasePlease.outputs.release_name }}" --draft=false --latest=false \ No newline at end of file diff --git a/.github/workflows/operator-reusable-hub-release.yml b/.github/workflows/operator-reusable-hub-release.yml new file mode 100644 index 0000000000000..862d072401dd3 --- /dev/null +++ b/.github/workflows/operator-reusable-hub-release.yml @@ -0,0 +1,108 @@ +name: Reusable - Create operator hub pull request + +on: + workflow_call: + inputs: + org: + type: string + required: true + repo: + type: string + required: true + secrets: + GRAFANABOT_GITHUB_TOKEN: + required: true + +jobs: + create-operator-pull-request: + runs-on: ubuntu-latest + steps: + - name: Set redhat-openshift-ecosystem specific variables + if: ${{ inputs.org == 'redhat-openshift-ecosystem' }} + env: + ocpDir: '-openshift' + ocpSupportedVersions: 'v4.12' + run: | + echo "ocpDir=${ocpDir}" >> $GITHUB_ENV + echo "ocpSupportedVersions=${ocpSupportedVersions}" >> $GITHUB_ENV + + - name: Set version as env variable + env: + TAG: ${{ github.ref_name }} + run: | + echo $TAG + TAG=${TAG:10} # remove "operator/v" (prefix) + echo version=${TAG} >> $GITHUB_ENV # update GitHub ENV vars + + - name: Sync fork + env: + GH_TOKEN: ${{ secrets.GRAFANABOT_GITHUB_TOKEN }} + run: | + # synchronizing the fork is fast, and avoids the need to fetch the full upstream repo + # (fetching the upstream repo with "--depth 1" would lead to "shallow update not allowed" + # error when pushing back to the origin repo) + gh repo sync grafanabot/${{ inputs.repo }} \ + --source ${{ inputs.org }}/${{ inputs.repo }} \ + --force + + - name: Checkout operatorhub repo + uses: actions/checkout@v4 + with: + repository: grafanabot/${{ inputs.repo }} + token: ${{ secrets.GRAFANABOT_GITHUB_TOKEN }} + + - name: Checkout loki to tmp/ directory + uses: actions/checkout@v4 + with: + repository: grafana/loki + token: ${{ secrets.GRAFANABOT_GITHUB_TOKEN }} + path: tmp/ + + - name: Update version + env: + VERSION: ${{ env.version }} + OCP_DIR: ${{ env.ocpDir || ''}} + run: | + mkdir operators/loki-operator/${VERSION} + cp -R ./tmp/operator/bundle/community${OCP_DIR}/* operators/loki-operator/${VERSION} + rm -f "operators/loki-operator/${VERSION}/bundle.Dockerfile" + rm -rf ./tmp + + - name: Add OpenShift Supported versions to redhat catalog + if: ${{ inputs.org == 'redhat-openshift-ecosystem' }} + uses: fjogeleit/yaml-update-action@main + with: + valueFile: "operators/loki-operator/${{ env.version }}/metadata/annotations.yaml" + propertyPath: "annotations['com.redhat.openshift.versions']" + value: ${{ env.ocpSupportedVersions }} + commitChange: false + + - name: Use CLA approved github bot + run: | + git config user.name grafanabot + git config user.email 43478413+grafanabot@users.noreply.github.com + + - name: Create pull request against ${{ inputs.org }}/${{ inputs.repo }} + env: + VERSION: ${{ env.version }} + GH_TOKEN: ${{ secrets.GRAFANABOT_GITHUB_TOKEN }} + run: | + message="Update the loki-operator to $VERSION" + body="Release loki-operator \`$VERSION\`. + + cc @periklis @xperimental @JoaoBraveCoding @btaani @shwetaap + " + branch="update-loki-operator-to-${VERSION}" + + # gh pr create doesn't have a way to explicitly specify different head and base + # repositories currently, but it will implicitly pick up the head from a different + # repository if you set up a tracking branch + + git checkout -b $branch + git add -A + git commit -s -m "$message" + git push -f --set-upstream origin $branch + gh pr create --title "$message" \ + --body "$body" \ + --repo ${{ inputs.org }}/${{ inputs.repo }} \ + --base main diff --git a/.github/workflows/patch-release-pr.yml b/.github/workflows/patch-release-pr.yml index 411fff87d4105..831ca48b4087d 100644 --- a/.github/workflows/patch-release-pr.yml +++ b/.github/workflows/patch-release-pr.yml @@ -1,16 +1,26 @@ concurrency: group: "create-release-pr-${{ github.sha }}" env: + BUILD_ARTIFACTS_BUCKET: "loki-build-artifacts" + BUILD_TIMEOUT: 40 + CHANGELOG_PATH: "CHANGELOG.md" DOCKER_USERNAME: "grafana" + DRY_RUN: false IMAGE_PREFIX: "grafana" + RELEASE_LIB_REF: "main" RELEASE_REPO: "grafana/loki" SKIP_VALIDATION: false + USE_GITHUB_APP_TOKEN: true VERSIONING_STRATEGY: "always-bump-patch" jobs: check: - uses: "grafana/loki-release/.github/workflows/check.yml@release-1.10.x" + uses: "grafana/loki-release/.github/workflows/check.yml@main" with: + build_image: "grafana/loki-build-image:0.33.1" + golang_ci_lint_version: "v1.55.1" + release_lib_ref: "main" skip_validation: false + use_github_app_token: true create-release-pr: needs: - "dist" @@ -21,7 +31,6 @@ jobs: - "loki" - "loki-canary" - "loki-canary-boringcrypto" - - "loki-operator" - "promtail" - "querytee" runs-on: "ubuntu-latest" @@ -35,6 +44,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "setup node" uses: "actions/setup-node@v4" @@ -45,28 +55,50 @@ jobs: run: | echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT working-directory: "release" + - id: "get_github_app_token" + if: "${{ fromJSON(env.USE_GITHUB_APP_TOKEN) }}" + name: "get github app token" + uses: "actions/github-app-token@v1" + with: + app-id: "${{ secrets.APP_ID }}" + owner: "${{ github.repository_owner }}" + private-key: "${{ secrets.APP_PRIVATE_KEY }}" + - id: "github_app_token" + name: "set github token" + run: | + if [[ "${USE_GITHUB_APP_TOKEN}" == "true" ]]; then + echo "token=${{ steps.get_github_app_token.outputs.token }}" >> $GITHUB_OUTPUT + else + echo "token=${{ secrets.GH_TOKEN }}" >> $GITHUB_OUTPUT + fi - env: SHA: "${{ github.sha }}" id: "release" name: "release please" run: | npm install - echo "Pull request footer: Merging this PR will release the [artifacts](https://console.cloud.google.com/storage/browser/loki-build-artifacts/${SHA}) of ${SHA}" npm exec -- release-please release-pr \ + --changelog-path "${CHANGELOG_PATH}" \ --consider-all-branches \ - --label "backport main,autorelease: pending,type/docs" \ - --pull-request-footer "Merging this PR will release the [artifacts](https://console.cloud.google.com/storage/browser/loki-build-artifacts/${SHA}) of ${SHA}" \ + --group-pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --label "backport main,autorelease: pending,product-approved" \ + --manifest-file .release-please-manifest.json \ + --pull-request-footer "Merging this PR will release the [artifacts](https://console.cloud.google.com/storage/browser/${BUILD_ARTIFACTS_BUCKET}/${SHA}) of ${SHA}" \ + --pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --release-as "${{ needs.dist.outputs.version }}" \ --release-type simple \ --repo-url "${{ env.RELEASE_REPO }}" \ - --target-branch "${{ steps.extract_branch.outputs.branch }}" \ - --token "${{ secrets.GH_TOKEN }}" \ - --versioning-strategy "${{ env.VERSIONING_STRATEGY }}" \ --separate-pull-requests false \ - --debug + --target-branch "${{ steps.extract_branch.outputs.branch }}" \ + --token "${{ steps.github_app_token.outputs.token }}" \ + --dry-run ${{ fromJSON(env.DRY_RUN) }} + working-directory: "lib" dist: needs: - "version" + outputs: + version: "${{ needs.version.outputs.version }}" runs-on: "ubuntu-latest" steps: - name: "pull code to release" @@ -78,6 +110,10 @@ jobs: uses: "google-github-actions/auth@v2" with: credentials_json: "${{ secrets.GCS_SERVICE_ACCOUNT_KEY }}" + - name: "Set up Cloud SDK" + uses: "google-github-actions/setup-gcloud@v2" + with: + version: ">= 452.0.0" - id: "get-secrets" name: "get nfpm signing keys" uses: "grafana/shared-workflows/actions/get-vault-secrets@main" @@ -91,6 +127,7 @@ jobs: IMAGE_TAG: "${{ needs.version.outputs.version }}" NFPM_SIGNING_KEY_FILE: "nfpm-private-key.key" SKIP_ARM: false + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "build artifacts" run: | cat < $NFPM_SIGNING_KEY_FILE make dist packages EOF working-directory: "release" - - name: "upload build artifacts" + - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}" path: "release/dist" process_gcloudignore: false fluent-bit: @@ -125,6 +163,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -152,22 +191,25 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/clients/cmd/fluent-bit/Dockerfile" - outputs: "type=docker,dest=release/images/fluent-bit-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + outputs: "type=docker,dest=release/images/fluent-bit-plugin-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" platforms: "${{ matrix.platform }}" - tags: "${{ env.IMAGE_PREFIX }}/fluent-bit:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" + tags: "${{ env.IMAGE_PREFIX }}/fluent-bit-plugin-loki:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" - path: "release/images/fluent-bit-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" + path: "release/images/fluent-bit-plugin-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: fail-fast: true @@ -183,6 +225,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -210,22 +253,25 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/clients/cmd/fluentd/Dockerfile" - outputs: "type=docker,dest=release/images/fluentd-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + outputs: "type=docker,dest=release/images/fluent-plugin-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" platforms: "${{ matrix.platform }}" - tags: "${{ env.IMAGE_PREFIX }}/fluentd:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" + tags: "${{ env.IMAGE_PREFIX }}/fluent-plugin-loki:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" - path: "release/images/fluentd-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" + path: "release/images/fluent-plugin-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: fail-fast: true @@ -241,6 +287,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -268,11 +315,14 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/cmd/logcli/Dockerfile" outputs: "type=docker,dest=release/images/logcli-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" @@ -282,7 +332,7 @@ jobs: name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" path: "release/images/logcli-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: @@ -301,6 +351,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -328,22 +379,25 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/clients/cmd/logstash/Dockerfile" - outputs: "type=docker,dest=release/images/logstash-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + outputs: "type=docker,dest=release/images/logstash-output-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" platforms: "${{ matrix.platform }}" - tags: "${{ env.IMAGE_PREFIX }}/logstash:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" + tags: "${{ env.IMAGE_PREFIX }}/logstash-output-loki:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" - path: "release/images/logstash-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" + path: "release/images/logstash-output-loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: fail-fast: true @@ -359,6 +413,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -386,11 +441,14 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/cmd/loki/Dockerfile" outputs: "type=docker,dest=release/images/loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" @@ -400,7 +458,7 @@ jobs: name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" path: "release/images/loki-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: @@ -419,6 +477,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -446,11 +505,14 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/cmd/loki-canary/Dockerfile" outputs: "type=docker,dest=release/images/loki-canary-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" @@ -460,7 +522,7 @@ jobs: name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" path: "release/images/loki-canary-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: @@ -479,6 +541,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -506,11 +569,14 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/cmd/loki-canary-boringcrypto/Dockerfile" outputs: "type=docker,dest=release/images/loki-canary-boringcrypto-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" @@ -520,7 +586,7 @@ jobs: name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" path: "release/images/loki-canary-boringcrypto-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: @@ -530,64 +596,6 @@ jobs: - "linux/amd64" - "linux/arm64" - "linux/arm" - loki-operator: - needs: - - "version" - runs-on: "ubuntu-latest" - steps: - - name: "pull release library code" - uses: "actions/checkout@v4" - with: - path: "lib" - repository: "grafana/loki-release" - - name: "pull code to release" - uses: "actions/checkout@v4" - with: - path: "release" - repository: "${{ env.RELEASE_REPO }}" - - name: "setup node" - uses: "actions/setup-node@v4" - with: - node-version: 20 - - name: "auth gcs" - uses: "google-github-actions/auth@v2" - with: - credentials_json: "${{ secrets.GCS_SERVICE_ACCOUNT_KEY }}" - - name: "Set up QEMU" - uses: "docker/setup-qemu-action@v3" - - name: "set up docker buildx" - uses: "docker/setup-buildx-action@v3" - - id: "platform" - name: "parse image platform" - run: | - mkdir -p images - - platform="$(echo "${{ matrix.platform}}" | sed "s/\(.*\)\/\(.*\)/\1-\2/")" - echo "platform=${platform}" >> $GITHUB_OUTPUT - echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT - working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" - name: "Build and export" - timeout-minutes: 25 - uses: "docker/build-push-action@v5" - with: - context: "release/operator" - file: "release/operator/Dockerfile" - outputs: "type=docker,dest=release/images/loki-operator-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" - platforms: "${{ matrix.platform }}" - tags: "${{ env.IMAGE_PREFIX }}/loki-operator:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" - name: "upload artifacts" - uses: "google-github-actions/upload-cloud-storage@v2" - with: - destination: "loki-build-artifacts/${{ github.sha }}/images" - path: "release/images/loki-operator-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" - process_gcloudignore: false - strategy: - fail-fast: true - matrix: - platform: - - "linux/amd64" promtail: needs: - "version" @@ -597,6 +605,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -624,11 +633,14 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/clients/cmd/promtail/Dockerfile" outputs: "type=docker,dest=release/images/promtail-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" @@ -638,7 +650,7 @@ jobs: name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" path: "release/images/promtail-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: @@ -657,6 +669,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -684,22 +697,25 @@ jobs: echo "platform=${platform}" >> $GITHUB_OUTPUT echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT working-directory: "release" - - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" + - env: + IMAGE_TAG: "${{ needs.version.outputs.version }}" + if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" - timeout-minutes: 25 + timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" uses: "docker/build-push-action@v5" with: + build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" file: "release/cmd/querytee/Dockerfile" - outputs: "type=docker,dest=release/images/querytee-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + outputs: "type=docker,dest=release/images/loki-query-tee-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" platforms: "${{ matrix.platform }}" - tags: "${{ env.IMAGE_PREFIX }}/querytee:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" + tags: "${{ env.IMAGE_PREFIX }}/loki-query-tee:${{ needs.version.outputs.version }}-${{ steps.platform.outputs.platform_short }}" - if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "upload artifacts" uses: "google-github-actions/upload-cloud-storage@v2" with: - destination: "loki-build-artifacts/${{ github.sha }}/images" - path: "release/images/querytee-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" + destination: "${{ env.BUILD_ARTIFACTS_BUCKET }}/${{ github.sha }}/images" + path: "release/images/loki-query-tee-${{ needs.version.outputs.version}}-${{ steps.platform.outputs.platform }}.tar" process_gcloudignore: false strategy: fail-fast: true @@ -718,6 +734,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "pull code to release" uses: "actions/checkout@v4" @@ -733,19 +750,58 @@ jobs: run: | echo "branch=${GITHUB_HEAD_REF:-${GITHUB_REF#refs/heads/}}" >> $GITHUB_OUTPUT working-directory: "release" + - id: "get_github_app_token" + if: "${{ fromJSON(env.USE_GITHUB_APP_TOKEN) }}" + name: "get github app token" + uses: "actions/github-app-token@v1" + with: + app-id: "${{ secrets.APP_ID }}" + owner: "${{ github.repository_owner }}" + private-key: "${{ secrets.APP_PRIVATE_KEY }}" + - id: "github_app_token" + name: "set github token" + run: | + if [[ "${USE_GITHUB_APP_TOKEN}" == "true" ]]; then + echo "token=${{ steps.get_github_app_token.outputs.token }}" >> $GITHUB_OUTPUT + else + echo "token=${{ secrets.GH_TOKEN }}" >> $GITHUB_OUTPUT + fi - id: "version" name: "get release version" run: | npm install - npm exec -- release-please release-pr \ - --consider-all-branches \ - --dry-run \ - --dry-run-output release.json \ - --release-type simple \ - --repo-url="${{ env.RELEASE_REPO }}" \ - --target-branch "${{ steps.extract_branch.outputs.branch }}" \ - --token="${{ secrets.GH_TOKEN }}" \ - --versioning-strategy "${{ env.VERSIONING_STRATEGY }}" + + if [[ -z "${{ env.RELEASE_AS }}" ]]; then + npm exec -- release-please release-pr \ + --consider-all-branches \ + --dry-run \ + --dry-run-output release.json \ + --group-pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --manifest-file .release-please-manifest.json \ + --pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --release-type simple \ + --repo-url "${{ env.RELEASE_REPO }}" \ + --separate-pull-requests false \ + --target-branch "${{ steps.extract_branch.outputs.branch }}" \ + --token "${{ steps.github_app_token.outputs.token }}" \ + --versioning-strategy "${{ env.VERSIONING_STRATEGY }}" + else + npm exec -- release-please release-pr \ + --consider-all-branches \ + --dry-run \ + --dry-run-output release.json \ + --group-pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --manifest-file .release-please-manifest.json \ + --pull-request-title-pattern "chore\${scope}: release\${component} \${version}" \ + --release-type simple \ + --repo-url "${{ env.RELEASE_REPO }}" \ + --separate-pull-requests false \ + --target-branch "${{ steps.extract_branch.outputs.branch }}" \ + --token "${{ steps.github_app_token.outputs.token }}" \ + --release-as "${{ env.RELEASE_AS }}" + fi + + cat release.json if [[ `jq length release.json` -gt 1 ]]; then echo 'release-please would create more than 1 PR, so cannot determine correct version' @@ -762,7 +818,7 @@ jobs: echo "pr_created=true" >> $GITHUB_OUTPUT fi working-directory: "lib" -name: "create release PR" +name: "Prepare Patch Release PR" "on": push: branches: diff --git a/.github/workflows/promtail-windows-test.yml b/.github/workflows/promtail-windows-test.yml new file mode 100644 index 0000000000000..cb47ae2831612 --- /dev/null +++ b/.github/workflows/promtail-windows-test.yml @@ -0,0 +1,24 @@ +name: Promtail Windows Test +on: + pull_request: + branches: ["main", "k*", "release-[0-9]+.[0-9]+.x"] + push: + tags: ['v[0-9].[0-9]+.[0-9]+'] + branches: ["main", "k*", "release-[0-9]+.[0-9]+.x"] +jobs: + build: + runs-on: windows-latest + strategy: + matrix: + go-version: [ '1.21.9', '1.22.2' ] + steps: + - uses: actions/checkout@v4 + - name: Setup Go ${{ matrix.go-version }} + uses: actions/setup-go@v4 + with: + go-version: ${{ matrix.go-version }} + # You can test your matrix by printing the current Go version + - name: Display Go version + run: go version + - name: Run promtail tests + run: go test .\clients\pkg\promtail\targets\windows\... -v \ No newline at end of file diff --git a/.github/workflows/publish-technical-documentation-next.yml b/.github/workflows/publish-technical-documentation-next.yml index 052ec1b07158c..b4cf557cc75c9 100644 --- a/.github/workflows/publish-technical-documentation-next.yml +++ b/.github/workflows/publish-technical-documentation-next.yml @@ -8,24 +8,11 @@ on: - "docs/sources/**" workflow_dispatch: jobs: - test: - runs-on: "ubuntu-latest" - steps: - - name: "Check out code" - uses: "actions/checkout@v3" - - name: "Build website" - # -e HUGO_REFLINKSERRORLEVEL=ERROR prevents merging broken refs with the downside - # that no refs to external content can be used as these refs will not resolve in the - # docs-base image. - run: | - docker run -v ${PWD}/docs/sources:/hugo/content/docs/loki/next -e HUGO_REFLINKSERRORLEVEL=ERROR --rm grafana/docs-base:latest /bin/bash -c 'make hugo' - sync: runs-on: "ubuntu-latest" - needs: "test" steps: - name: "Check out code" - uses: "actions/checkout@v3" + uses: "actions/checkout@v4" - name: "Clone website-sync Action" # WEBSITE_SYNC_TOKEN is a fine-grained GitHub Personal Access Token that expires. diff --git a/.github/workflows/publish-technical-documentation-release.yml b/.github/workflows/publish-technical-documentation-release.yml index 02e13270b924f..d8f17f4d457f6 100644 --- a/.github/workflows/publish-technical-documentation-release.yml +++ b/.github/workflows/publish-technical-documentation-release.yml @@ -10,30 +10,16 @@ on: - "docs/sources/**" workflow_dispatch: jobs: - test: - runs-on: "ubuntu-latest" - steps: - - name: "Check out code" - uses: "actions/checkout@v3" - - name: - "Build website" - # -e HUGO_REFLINKSERRORLEVEL=ERROR prevents merging broken refs with the downside - # that no refs to external content can be used as these refs will not resolve in the - # docs-base image. - run: | - docker run -v ${PWD}/docs/sources:/hugo/content/docs/loki/release -e HUGO_REFLINKSERRORLEVEL=ERROR --rm grafana/docs-base:latest /bin/bash -c 'make hugo' - sync: runs-on: "ubuntu-latest" - needs: "test" steps: - name: "Checkout code and tags" - uses: "actions/checkout@v3" + uses: "actions/checkout@v4" with: fetch-depth: 0 - name: "Checkout Actions library" - uses: "actions/checkout@v3" + uses: "actions/checkout@v4" with: repository: "grafana/grafana-github-actions" path: "./actions" diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 64970d1bd7192..cecbee6513b75 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -1,14 +1,22 @@ concurrency: group: "create-release-${{ github.sha }}" env: + BUILD_ARTIFACTS_BUCKET: "loki-build-artifacts" IMAGE_PREFIX: "grafana" + PUBLISH_TO_GCS: false + RELEASE_LIB_REF: "main" RELEASE_REPO: "grafana/loki" + USE_GITHUB_APP_TOKEN: false jobs: createRelease: if: "${{ fromJSON(needs.shouldRelease.outputs.shouldRelease) }}" needs: - "shouldRelease" outputs: + draft: "${{ steps.check_release.outputs.draft }}" + exists: "${{ steps.check_release.outputs.exists }}" + isLatest: "${{ needs.shouldRelease.outputs.isLatest }}" + name: "${{ needs.shouldRelease.outputs.name }}" sha: "${{ needs.shouldRelease.outputs.sha }}" runs-on: "ubuntu-latest" steps: @@ -21,6 +29,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "setup node" uses: "actions/setup-node@v4" @@ -34,30 +43,73 @@ jobs: uses: "google-github-actions/setup-gcloud@v2" with: version: ">= 452.0.0" + - id: "get_github_app_token" + if: "${{ fromJSON(env.USE_GITHUB_APP_TOKEN) }}" + name: "get github app token" + uses: "actions/github-app-token@v1" + with: + app-id: "${{ secrets.APP_ID }}" + owner: "${{ github.repository_owner }}" + private-key: "${{ secrets.APP_PRIVATE_KEY }}" + - id: "github_app_token" + name: "set github token" + run: | + if [[ "${USE_GITHUB_APP_TOKEN}" == "true" ]]; then + echo "token=${{ steps.get_github_app_token.outputs.token }}" >> $GITHUB_OUTPUT + else + echo "token=${{ secrets.GH_TOKEN }}" >> $GITHUB_OUTPUT + fi - name: "download binaries" run: | echo "downloading binaries to $(pwd)/dist" - gsutil cp -r gs://loki-build-artifacts/${{ needs.shouldRelease.outputs.sha }}/dist . + gsutil cp -r gs://${BUILD_ARTIFACTS_BUCKET}/${{ needs.shouldRelease.outputs.sha }}/dist . + working-directory: "release" + - env: + GH_TOKEN: "${{ steps.github_app_token.outputs.token }}" + id: "check_release" + name: "check if release exists" + run: | + set +e + isDraft="$(gh release view --json="isDraft" --jq=".isDraft" ${{ needs.shouldRelease.outputs.name }} 2>&1)" + set -e + if [[ "$isDraft" == "release not found" ]]; then + echo "exists=false" >> $GITHUB_OUTPUT + else + echo "exists=true" >> $GITHUB_OUTPUT + fi + + if [[ "$isDraft" == "true" ]]; then + echo "draft=true" >> $GITHUB_OUTPUT + fi working-directory: "release" - id: "release" + if: "${{ !fromJSON(steps.check_release.outputs.exists) }}" name: "create release" run: | npm install npm exec -- release-please github-release \ --draft \ --release-type simple \ - --repo-url="${{ env.RELEASE_REPO }}" \ + --repo-url "${{ env.RELEASE_REPO }}" \ --target-branch "${{ needs.shouldRelease.outputs.branch }}" \ - --token="${{ secrets.GH_TOKEN }}" + --token "${{ steps.github_app_token.outputs.token }}" \ + --shas-to-tag "${{ needs.shouldRelease.outputs.prNumber }}:${{ needs.shouldRelease.outputs.sha }}" working-directory: "lib" - env: - GH_TOKEN: "${{ secrets.GH_TOKEN }}" + GH_TOKEN: "${{ steps.github_app_token.outputs.token }}" id: "upload" name: "upload artifacts" run: | - gh release upload ${{ needs.shouldRelease.outputs.name }} dist/* - gh release edit ${{ needs.shouldRelease.outputs.name }} --draft=false + gh release upload --clobber ${{ needs.shouldRelease.outputs.name }} dist/* working-directory: "release" + - if: "${{ fromJSON(env.PUBLISH_TO_GCS) }}" + name: "release artifacts" + uses: "google-github-actions/upload-cloud-storage@v2" + with: + destination: "${{ env.PUBLISH_BUCKET }}" + parent: false + path: "release/dist" + process_gcloudignore: false publishImages: needs: - "createRelease" @@ -67,6 +119,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - name: "auth gcs" uses: "google-github-actions/auth@v2" @@ -91,10 +144,46 @@ jobs: with: imageDir: "images" imagePrefix: "${{ env.IMAGE_PREFIX }}" + publishRelease: + needs: + - "createRelease" + - "publishImages" + runs-on: "ubuntu-latest" + steps: + - name: "pull code to release" + uses: "actions/checkout@v4" + with: + path: "release" + repository: "${{ env.RELEASE_REPO }}" + - id: "get_github_app_token" + if: "${{ fromJSON(env.USE_GITHUB_APP_TOKEN) }}" + name: "get github app token" + uses: "actions/github-app-token@v1" + with: + app-id: "${{ secrets.APP_ID }}" + owner: "${{ github.repository_owner }}" + private-key: "${{ secrets.APP_PRIVATE_KEY }}" + - id: "github_app_token" + name: "set github token" + run: | + if [[ "${USE_GITHUB_APP_TOKEN}" == "true" ]]; then + echo "token=${{ steps.get_github_app_token.outputs.token }}" >> $GITHUB_OUTPUT + else + echo "token=${{ secrets.GH_TOKEN }}" >> $GITHUB_OUTPUT + fi + - env: + GH_TOKEN: "${{ steps.github_app_token.outputs.token }}" + if: "${{ !fromJSON(needs.createRelease.outputs.exists) || (needs.createRelease.outputs.draft && fromJSON(needs.createRelease.outputs.draft)) }}" + name: "publish release" + run: | + gh release edit ${{ needs.createRelease.outputs.name }} --draft=false --latest=${{ needs.createRelease.outputs.isLatest }} + working-directory: "release" shouldRelease: outputs: branch: "${{ steps.extract_branch.outputs.branch }}" + isLatest: "${{ steps.should_release.outputs.isLatest }}" name: "${{ steps.should_release.outputs.name }}" + prNumber: "${{ steps.should_release.outputs.prNumber }}" sha: "${{ steps.should_release.outputs.sha }}" shouldRelease: "${{ steps.should_release.outputs.shouldRelease }}" runs-on: "ubuntu-latest" @@ -108,6 +197,7 @@ jobs: uses: "actions/checkout@v4" with: path: "lib" + ref: "${{ env.RELEASE_LIB_REF }}" repository: "grafana/loki-release" - id: "extract_branch" name: "extract branch name" @@ -125,6 +215,7 @@ name: "create release" branches: - "release-[0-9]+.[0-9]+.x" - "k[0-9]+" + - "main" permissions: contents: "write" id-token: "write" diff --git a/.github/workflows/snyk-pr-comment.yml b/.github/workflows/snyk-pr-comment.yml deleted file mode 100644 index c54e9c55c3b58..0000000000000 --- a/.github/workflows/snyk-pr-comment.yml +++ /dev/null @@ -1,51 +0,0 @@ -name: PR Vulnerability Scan -on: pull_request - -permissions: - pull-requests: write - issues: write - -jobs: - snyk: - name: Snyk Scan - runs-on: ubuntu-latest - if: ${{ !github.event.pull_request.head.repo.fork }} - steps: - - name: Checkout code - uses: actions/checkout@master - - name: Run Snyk to check for vulnerabilities - uses: snyk/actions/golang@master - continue-on-error: true # To make sure that PR comment is made - env: - SNYK_TOKEN: ${{ secrets.SNYK_TOKEN }} - with: - command: test - args: --severity-threshold=high --json-file-output=snyk.json - - - name: Prepare Snyk message - run: | - echo "Snyk scan found the following vulnerabilities:" > snyk.txt - - - name: Format Snyk Message - uses: sergeysova/jq-action@v2 - continue-on-error: true - with: - cmd: jq -r '.vulnerabilities[] | "* **\(.severity)** - [\(.identifiers.CVE[0])] \(.title) in `\(.moduleName)` v\(.version). Fixed in \(.fixedIn)"' snyk.json >> snyk.txt - - - name: Determine whether to comment - continue-on-error: true - id: should-comment - run: | - if [[ $(wc -l < snyk.txt) -gt 1 ]]; then - echo "\nTo see more details on these vulnerabilities, and how/where to fix them, please run `make scan-vulnerabilities` on your branch. If these were not introduced by your PR, please considering fixing them in `main` via a subsequent PR. Thanks!" >> snyk.txt - exit 0; - fi - - exit 1 - - - name: Comment on PR with Snyk scan results - uses: mshick/add-pr-comment@v2 - if: ${{ steps.should-comment.outcome == 'success' }} - with: - message-id: snyk-${{ github.event.number }} - message-path: snyk.txt diff --git a/.github/workflows/snyk.yml b/.github/workflows/snyk.yml index 1c4c8a3616789..2239756cb67c8 100644 --- a/.github/workflows/snyk.yml +++ b/.github/workflows/snyk.yml @@ -1,11 +1,12 @@ name: Snyk Monitor Scanning on: release: - types: [published] + types: [published] push: branches: - - 'main' + - 'main' - 'master' + - 'release-[0-9]+.[0-9]+.x' workflow_dispatch: jobs: diff --git a/.github/workflows/trivy-pr-comment.yml b/.github/workflows/trivy-pr-comment.yml deleted file mode 100644 index ca69cb1b3ba7d..0000000000000 --- a/.github/workflows/trivy-pr-comment.yml +++ /dev/null @@ -1,67 +0,0 @@ -name: PR Vulnerability Scan -on: pull_request_target - -permissions: - pull-requests: write - issues: write - -jobs: - trivy: - name: Trivy Scan - runs-on: ubuntu-20.04 - steps: - - name: Checkout code - uses: actions/checkout@v3 - - - name: Build Loki Image - run: | - IMAGE_TAG="$(./tools/image-tag)" - make loki-image - echo "IMAGE_TAG=${IMAGE_TAG}" >> $GITHUB_ENV - - - name: Run Trivy image scanner - uses: aquasecurity/trivy-action@master - with: - image-ref: "docker.io/grafana/loki:${{ env.IMAGE_TAG }}" - format: "json" - output: "trivy-image.json" - severity: "CRITICAL,HIGH" - - - name: Run Trivy fs scanner - uses: aquasecurity/trivy-action@master - with: - scan-type: "fs" - scan-ref: "go.mod" - format: "json" - output: "trivy-fs.json" - severity: "CRITICAL,HIGH" - - - name: Prepare Trivy Message - run: | - echo "Trivy scan found the following vulnerabilities:" > trivy.txt - - - name: Format Trivy Message - uses: sergeysova/jq-action@v2 - continue-on-error: true - with: - cmd: | - jq -r '.Results[] | select(.Vulnerabilities != null) | .Target as $target | .Type as $type | .Vulnerabilities[] | "* **\(.Severity)**, Target: \($target), Type: \($type) [\(.Title)](\(.PrimaryURL)) in `\(.PkgName)` v\(.InstalledVersion). Fixed in v\(.FixedVersion)"' trivy-image.json >> trivy.txt - jq -r '.Results[] | select(.Vulnerabilities != null) | .Target as $target | .Type as $type | .Vulnerabilities[] | "* **\(.Severity)**, Target: \($target), Type: \($type) [\(.Title)](\(.PrimaryURL)) in `\(.PkgName)` v\(.InstalledVersion). Fixed in v\(.FixedVersion)"' trivy-fs.json >> trivy.text - - - name: Determine whether to comment - continue-on-error: true - id: should-comment - run: | - if [[ $(wc -l < trivy.txt) -gt 1 ]]; then - echo "\nTo see more details on these vulnerabilities, and how/where to fix them, please run `make scan-vulnerabilities` on your branch. If these were not introduced by your PR, please considering fixing them in `main` via a subsequent PR. Thanks!" >> trivy.txt - exit 0; - fi - - exit 1 - - - name: Comment on PR with Trivy scan results - uses: mshick/add-pr-comment@v2 - if: ${{ steps.should-comment.outcome == 'success' }} - with: - message-id: trivy-${{ github.event.number }} - message-path: trivy.txt diff --git a/.github/workflows/verify-drone.yml b/.github/workflows/verify-drone.yml new file mode 100644 index 0000000000000..cfc5721d2d402 --- /dev/null +++ b/.github/workflows/verify-drone.yml @@ -0,0 +1,52 @@ +name: Verify drone updates +on: [pull_request] +jobs: + check-drone-changes: + runs-on: ubuntu-latest + steps: + - uses: actions/checkout@v3 + - name: Get changed files + # we need continue on error because the git diff | grep pipe can return a non-zero error code if no result is found + continue-on-error: true + id: changed-files + run: | + echo "changed_files=$(git diff --name-only main -- .drone/ | xargs)" >> $GITHUB_OUTPUT + git diff main .drone/ | grep "+hmac " + echo "sha_updated=$?" >> $GITHUB_OUTPUT + - name: Check that drone was updated properly + if: always() + run: | + jsonnetChanged=false + yamlChanged=false + echo "sha updated? ${{ steps.changed-files.outputs.sha_updated }}" + + # check whether the drone jsonnet and yaml files were updated + for file in ${{ steps.changed-files.outputs.changed_files }}; do + if [ "$file" == ".drone/drone.jsonnet" ]; then + echo "$file was changed" + jsonnetChanged=true + fi + if [ "$file" == ".drone/drone.yml" ]; then + echo "$file was changed" + yamlChanged=true + fi + done + + # if niether file was changed we're okay + if { [ "$yamlChanged" = false ] && [ "$jsonnetChanged" = false ]; } then + echo "neither file was changed" + exit 0 + fi + # if both files were changed then we should ensure that the sha in the yaml was also updated + if { [ "$yamlChanged" = true ] && [ "$jsonnetChanged" = true ]; } then + # annoyingly, the return value is a string + if [ "${{ steps.changed-files.outputs.sha_updated }}" = "0" ]; then + echo "both files were changed and sha was updated" + exit 0 + fi + echo "both drone yaml and jsonnet were updated but the sha in the yaml file was not updated" + exit 1 + fi + # only one of the two files was updated + echo "if one of the drone files (yaml or jsonnet) was changed then bothy files must be updated" + exit 1 \ No newline at end of file diff --git a/.gitignore b/.gitignore index 83ab9c808d348..3121fffbe44c1 100644 --- a/.gitignore +++ b/.gitignore @@ -12,6 +12,7 @@ cmd/loki/loki cmd/logcli/logcli clients/cmd/promtail/promtail cmd/loki/loki-debug +cmd/lokitool/lokitool clients/cmd/promtail/promtail-debug clients/cmd/docker-driver/docker-driver cmd/loki-canary/loki-canary @@ -51,6 +52,7 @@ pkg/loki/wal # nix nix/result +/result # snyk .dccache diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 928eee2e123e3..468b13f62ac3e 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,3 +1,4 @@ { - ".": "2.9.4" + ".": "3.0.0", + "operator": "0.6.0" } diff --git a/CHANGELOG.md b/CHANGELOG.md index 2c19c74a065a1..d6ee3e8112651 100644 --- a/CHANGELOG.md +++ b/CHANGELOG.md @@ -1,100 +1,131 @@ -## Main/Unreleased +# Changelog -### All Changes +## [3.0.0](https://github.com/grafana/loki/compare/v2.9.6...v3.0.0) (2024-04-08) -#### Loki +Starting with the 3.0 release we began using [conventional commits](https://www.conventionalcommits.org/en/v1.0.0/) and [release-please](https://github.com/googleapis/release-please) to generate the changelog. As a result the format has changed slightly from previous releases. -##### Enhancements +### Features -* [11840](https://github.com/grafana/loki/pull/11840) **jeschkies**: Allow custom usage trackers for ingested and discarded bytes metric. -* [11814](https://github.com/grafana/loki/pull/11814) **kavirajk**: feat: Support split align and caching for instant metric query results -* [11851](https://github.com/grafana/loki/pull/11851) **elcomtik**: Helm: Allow the definition of resources for GrafanaAgent pods. -* [11819](https://github.com/grafana/loki/pull/11819) **jburnham**: Ruler: Add the ability to disable the `X-Scope-OrgId` tenant identification header in remote write requests. -* [11633](https://github.com/grafana/loki/pull/11633) **cyriltovena**: Add profiling integrations to tracing instrumentation. -* [11571](https://github.com/grafana/loki/pull/11571) **MichelHollands**: Add a metrics.go log line for requests from querier to ingester -* [11477](https://github.com/grafana/loki/pull/11477) **MichelHollands**: support GET for /ingester/shutdown -* [11363](https://github.com/grafana/loki/pull/11363) **kavirajk**: bugfix(memcached): Make memcached batch fetch truely context aware. -* [11319](https://github.com/grafana/loki/pull/11319) **someStrangerFromTheAbyss**: Helm: Add extraContainers to the write pods. -* [11243](https://github.com/grafana/loki/pull/11243) **kavirajk**: Inflight-logging: Add extra metadata to inflight requests logging. -* [11110](https://github.com/grafana/loki/pull/11003) **MichelHollands**: Change the default of the `metrics-namespace` flag to 'loki'. -* [11086](https://github.com/grafana/loki/pull/11086) **kandrew5**: Helm: Allow topologySpreadConstraints -* [11003](https://github.com/grafana/loki/pull/11003) **MichelHollands**: Add the `metrics-namespace` flag to change the namespace of metrics currently using cortex as namespace. -* [10096](https://github.com/grafana/loki/pull/10096) **aschleck**: Storage: Allow setting a constant prefix for all created keys -* [11038](https://github.com/grafana/loki/pull/11038) **kavirajk**: Remove already deprecated `store.max-look-back-period`. -* [10906](https://github.com/grafana/loki/pull/10906) **kavirajk**: Support Loki ruler to notify WAL writes to remote storage. -* [10613](https://github.com/grafana/loki/pull/10613) **ngc4579**: Helm: allow GrafanaAgent tolerations -* [10295](https://github.com/grafana/loki/pull/10295) **changhyuni**: Storage: remove signatureversionv2 from s3. -* [10140](https://github.com/grafana/loki/pull/10140) **dannykopping**: Dynamic client-side throttling to avoid object storage rate-limits (GCS only) -* [10302](https://github.com/grafana/loki/pull/10302) **ashwanthgoli**: Removes already deprecated `-querier.engine.timeout` CLI flag and corresponding YAML setting as well as the `querier.query_timeout` YAML setting. -* [10308](https://github.com/grafana/loki/pull/10308) **bboreham** Tracing: elide small traces for Stats call. -* [10373](https://github.com/grafana/loki/pull/10373) **jeschkies** Shard `avg_over_time` range aggregations. -* [10377](https://github.com/grafana/loki/pull/10377) **shantanualsi** Remove deprecated config `-s3.sse-encryption` in favor or `-s3.sse.*` settings. -* [10378](https://github.com/grafana/loki/pull/10378) **shantanualsi** Remove deprecated `ruler.wal-cleaer.period` -* [10380](https://github.com/grafana/loki/pull/10380) **shantanualsi** Remove `experimental.ruler.enable-api` in favour of `ruler.enable-api` -* [10395](https://github.com/grafana/loki/pull/10395/) **shantanualshi** Remove deprecated `split_queries_by_interval` and `forward_headers_list` configuration options in the `query_range` section -* [10456](https://github.com/grafana/loki/pull/10456) **dannykopping** Add `loki_distributor_ingester_append_timeouts_total` metric, remove `loki_distributor_ingester_append_failures_total` metric -* [10534](https://github.com/grafana/loki/pull/10534) **chaudum** Remove configuration `use_boltdb_shipper_as_backup` -* [10620](https://github.com/grafana/loki/pull/10620) **ashwanthgoli** Enable embedded cache if no other cache is explicitly enabled. -* [10655](https://github.com/grafana/loki/pull/10655) **chaudum** Remove legacy ingester shutdown handler `/ingester/flush_shutdown`. -* [10709](https://github.com/grafana/loki/pull/10709) **chaudum**/**salvacorts** Remove `ingester.max-transfer-retries` configuration option in favor of using the WAL. -* [10736](https://github.com/grafana/loki/pull/10736) **ashwanthgoli** Deprecate write dedupe cache as this is not required by the newer single store indexes (tsdb and boltdb-shipper). -* [10693](https://github.com/grafana/loki/pull/10693) **ashwanthgoli** Embedded cache: Updates the metric prefix from `querier_cache_` to `loki_embeddedcache_` and removes duplicate metrics. -* [10840](https://github.com/grafana/loki/pull/10840) **ashwanthgoli** Removes `shared_store` and `shared_store_key_prefix` from tsdb, boltdb shipper and compactor configs and their corresponding CLI flags. -* [10793](https://github.com/grafana/loki/pull/10793) **ashwanthgoli** Config: Better configuration defaults to provide a better experience for users out of the box. -* [10785](https://github.com/grafana/loki/pull/10785) **ashwanthgoli** Config: Removes `querier.worker-parallelism` and updates default value of `querier.max-concurrent` to 4. -* [10733](https://github.com/grafana/loki/pull/10733) **shantanualsi** Add support for case-insensitive logql funtions -* [10727](https://github.com/grafana/loki/pull/10727) **sandeepsukhani** Native otlp ingestion support -* [11051](https://github.com/grafana/loki/pull/11051) Refactor to not use global logger in modules -* [10956](https://github.com/grafana/loki/pull/10956) **jeschkies** do not wrap requests but send pure Protobuf from frontend v2 via scheduler to querier when `-frontend.encoding=protobuf`. -* [10417](https://github.com/grafana/loki/pull/10417) **jeschkies** shard `quantile_over_time` range queries using probabilistic data structures. -* [11284](https://github.com/grafana/loki/pull/11284) **ashwanthgoli** Config: Adds `frontend.max-query-capacity` to tune per-tenant query capacity. -* [11539](https://github.com/grafana/loki/pull/11539) **kaviraj,ashwanthgoli** Support caching /series and /labels query results -* [11545](https://github.com/grafana/loki/pull/11545) **dannykopping** Force correct memcached timeout when fetching chunks. -* [11589](https://github.com/grafana/loki/pull/11589) **ashwanthgoli** Results Cache: Adds `query_length_served` cache stat to measure the length of the query served from cache. -* [11535](https://github.com/grafana/loki/pull/11535) **dannykopping** Query Frontend: Allow customisable splitting of queries which overlap the `query_ingester_within` window to reduce query pressure on ingesters. -* [11654](https://github.com/grafana/loki/pull/11654) **dannykopping** Cache: atomically check background cache size limit correctly. -* [11682](https://github.com/grafana/loki/pull/11682) **ashwanthgoli** Metadata cache: Adds `frontend.max-metadata-cache-freshness` to configure the time window for which metadata results are not cached. This helps avoid returning inaccurate results by not caching recent results. -* [11679](https://github.com/grafana/loki/pull/11679) **dannykopping** Cache: extending #11535 to align custom ingester query split with cache keys for correct caching of results. -* [11143](https://github.com/grafana/loki/pull/11143) **sandeepsukhani** otel: Add support for per tenant configuration for mapping otlp data to loki format -* [11499](https://github.com/grafana/loki/pull/11284) **jmichalek132** Config: Adds `frontend.log-query-request-headers` to enable logging of request headers in query logs. -* [11817](https://github.com/grafana/loki/pull/11817) **ashwanthgoli** Ruler: Add support for filtering results of `/prometheus/api/v1/rules` endpoint by rule_name, rule_group, file and type. -* [11897](https://github.com/grafana/loki/pull/11897) **ashwanthgoli** Metadata: Introduces a separate split interval of `split_recent_metadata_queries_by_interval` for `recent_metadata_query_window` to help with caching recent metadata query results. -* [11970](https://github.com/grafana/loki/pull/11897) **masslessparticle** Ksonnet: Introduces memory limits to the compactor configuration to avoid unbounded memory usage. +* **helm:** configurable API version for PodLog CRD ([#10812](https://github.com/grafana/loki/issues/10812)) ([d1dee91](https://github.com/grafana/loki/commit/d1dee9150b0e69941b2bd3ce4b23afead174ea29)) +* **lambda/promtail:** support dropping labels ([#10755](https://github.com/grafana/loki/issues/10755)) ([ec54c72](https://github.com/grafana/loki/commit/ec54c723ebbeeda88000dde188d539ecfe05dad8)) +* **logstash:** clients logstash output structured metadata support ([#10899](https://github.com/grafana/loki/issues/10899)) ([32f1ec2](https://github.com/grafana/loki/commit/32f1ec2fda5057732a2b20b98942aafec112c4ba)) +* **loki**: Allow custom usage trackers for ingested and discarded bytes metric. [11840](https://github.com/grafana/loki/pull/11840) +* **loki**: feat: Support split align and caching for instant metric query results [11814](https://github.com/grafana/loki/pull/11814) +* **loki**: Helm: Allow the definition of resources for GrafanaAgent pods. [11851](https://github.com/grafana/loki/pull/11851) +* **loki**: Ruler: Add the ability to disable the `X-Scope-OrgId` tenant identification header in remote write requests. [11819](https://github.com/grafana/loki/pull/11819) +* **loki**: Add profiling integrations to tracing instrumentation. [11633](https://github.com/grafana/loki/pull/11633) +* **loki**: Add a metrics.go log line for requests from querier to ingester [11571](https://github.com/grafana/loki/pull/11571) +* **loki**: support GET for /ingester/shutdown [11477](https://github.com/grafana/loki/pull/11477) +* **loki**: bugfix(memcached): Make memcached batch fetch truly context aware. [11363](https://github.com/grafana/loki/pull/11363) +* **loki**: Helm: Add extraContainers to the write pods. [11319](https://github.com/grafana/loki/pull/11319) +* **loki**: Inflight-logging: Add extra metadata to inflight requests logging. [11243](https://github.com/grafana/loki/pull/11243) +* **loki**: Use metrics namespace for more metrics. [11025](https://github.com/grafana/loki/pull/11025). +* **loki**: Change default of metrics.namespace. [11110](https://github.com/grafana/loki/pull/11110). +* **loki**: Helm: Allow topologySpreadConstraints [11086](https://github.com/grafana/loki/pull/11086) +* **loki**: Storage: Allow setting a constant prefix for all created keys [10096](https://github.com/grafana/loki/pull/10096) +* **loki**: Remove already deprecated `store.max-look-back-period`. [11038](https://github.com/grafana/loki/pull/11038) +* **loki**: Support Loki ruler to notify WAL writes to remote storage. [10906](https://github.com/grafana/loki/pull/10906) +* **loki**: Helm: allow GrafanaAgent tolerations [10613](https://github.com/grafana/loki/pull/10613) +* **loki**: Storage: remove signatureversionv2 from s3. [10295](https://github.com/grafana/loki/pull/10295) +* **loki**: Dynamic client-side throttling to avoid object storage rate-limits (GCS only) [10140](https://github.com/grafana/loki/pull/10140) +* **loki**: Removes already deprecated `-querier.engine.timeout` CLI flag and corresponding YAML setting as well as the `querier.query_timeout` YAML setting. [10302](https://github.com/grafana/loki/pull/10302) +* **loki** Tracing: elide small traces for Stats call. [10308](https://github.com/grafana/loki/pull/10308) +* **loki** Shard `avg_over_time` range aggregations. [10373](https://github.com/grafana/loki/pull/10373) +* **loki** Remove deprecated config `-s3.sse-encryption` in favor or `-s3.sse.*` settings. [10377](https://github.com/grafana/loki/pull/10377) +* **loki** Remove deprecated `ruler.wal-cleaer.period` [10378](https://github.com/grafana/loki/pull/10378) +* **loki** Remove `experimental.ruler.enable-api` in favour of `ruler.enable-api` [10380](https://github.com/grafana/loki/pull/10380) +* **loki** Remove deprecated `split_queries_by_interval` and `forward_headers_list` configuration options in the `query_range` section [10395](https://github.com/grafana/loki/pull/10395/) +* **loki** Add `loki_distributor_ingester_append_timeouts_total` metric, remove `loki_distributor_ingester_append_failures_total` metric [10456](https://github.com/grafana/loki/pull/10456) +* **loki** Remove configuration `use_boltdb_shipper_as_backup` [10534](https://github.com/grafana/loki/pull/10534) +* **loki** Enable embedded cache if no other cache is explicitly enabled. [10620](https://github.com/grafana/loki/pull/10620) +* **loki** Remove legacy ingester shutdown handler `/ingester/flush_shutdown`. [10655](https://github.com/grafana/loki/pull/10655) +* **loki** Remove `ingester.max-transfer-retries` configuration option in favor of using the WAL. [10709](https://github.com/grafana/loki/pull/10709) +* **loki** Deprecate write dedupe cache as this is not required by the newer single store indexes (tsdb and boltdb-shipper). [10736](https://github.com/grafana/loki/pull/10736) +* **loki** Embedded cache: Updates the metric prefix from `querier_cache_` to `loki_embeddedcache_` and removes duplicate metrics. [10693](https://github.com/grafana/loki/pull/10693) +* **loki** Removes `shared_store` and `shared_store_key_prefix` from tsdb, boltdb shipper and compactor configs and their corresponding CLI flags. [10840](https://github.com/grafana/loki/pull/10840) +* **loki** Config: Better configuration defaults to provide a better experience for users out of the box. [10793](https://github.com/grafana/loki/pull/10793) +* **loki** Config: Removes `querier.worker-parallelism` and updates default value of `querier.max-concurrent` to 4. [10785](https://github.com/grafana/loki/pull/10785) +* **loki** Add support for case-insensitive logql functions [10733](https://github.com/grafana/loki/pull/10733) +* **loki** Native otlp ingestion support [10727](https://github.com/grafana/loki/pull/10727) +* Refactor to not use global logger in modules [11051](https://github.com/grafana/loki/pull/11051) +* **loki** do not wrap requests but send pure Protobuf from frontend v2 via scheduler to querier when `-frontend.encoding=protobuf`. [10956](https://github.com/grafana/loki/pull/10956) +* **loki** shard `quantile_over_time` range queries using probabilistic data structures. [10417](https://github.com/grafana/loki/pull/10417) +* **loki** Config: Adds `frontend.max-query-capacity` to tune per-tenant query capacity. [11284](https://github.com/grafana/loki/pull/11284) +* **kaviraj,ashwanthgoli** Support caching /series and /labels query results [11539](https://github.com/grafana/loki/pull/11539) +* **loki** Force correct memcached timeout when fetching chunks. [11545](https://github.com/grafana/loki/pull/11545) +* **loki** Results Cache: Adds `query_length_served` cache stat to measure the length of the query served from cache. [11589](https://github.com/grafana/loki/pull/11589) +* **loki** Query Frontend: Allow customisable splitting of queries which overlap the `query_ingester_within` window to reduce query pressure on ingesters. [11535](https://github.com/grafana/loki/pull/11535) +* **loki** Cache: atomically check background cache size limit correctly. [11654](https://github.com/grafana/loki/pull/11654) +* **loki** Metadata cache: Adds `frontend.max-metadata-cache-freshness` to configure the time window for which metadata results are not cached. This helps avoid returning inaccurate results by not caching recent results. [11682](https://github.com/grafana/loki/pull/11682) +* **loki** Cache: extending #11535 to align custom ingester query split with cache keys for correct caching of results. [11679](https://github.com/grafana/loki/pull/11679) +* **loki** otel: Add support for per tenant configuration for mapping otlp data to loki format [11143](https://github.com/grafana/loki/pull/11143) +* **loki** Config: Adds `frontend.log-query-request-headers` to enable logging of request headers in query logs. [11499](https://github.com/grafana/loki/pull/11284) +* **loki** Ruler: Add support for filtering results of `/prometheus/api/v1/rules` endpoint by rule_name, rule_group, file and type. [11817](https://github.com/grafana/loki/pull/11817) +* **loki** Metadata: Introduces a separate split interval of `split_recent_metadata_queries_by_interval` for `recent_metadata_query_window` to help with caching recent metadata query results. [11897](https://github.com/grafana/loki/pull/11897) +* **loki** Ksonnet: Introduces memory limits to the compactor configuration to avoid unbounded memory usage. [11970](https://github.com/grafana/loki/pull/11897) +* **loki** Memcached: Add mTLS support. [12318](https://github.com/grafana/loki/pull/12318) +* **loki** Detect name of service emitting logs and add it as a label. [12392](https://github.com/grafana/loki/pull/12392) +* **loki** LogQL: Introduces pattern match filter operators. [12398](https://github.com/grafana/loki/pull/12398) +* **loki**: Helm: Use `/ingester/shutdown` for `preStop` hook in write pods. [11490](https://github.com/grafana/loki/pull/11490) +* **loki** Upgrade thanos objstore, dskit and other modules [10366](https://github.com/grafana/loki/pull/10366) +* **loki** Upgrade thanos `objstore` [10451](https://github.com/grafana/loki/pull/10451) +* **loki** Upgrade prometheus to v0.47.1 and dskit [10814](https://github.com/grafana/loki/pull/10814) +* **loki** introduce a backoff wait on subquery retries. [10959](https://github.com/grafana/loki/pull/10959) +* **loki** Ensure all lifecycler cfgs ref a valid IPv6 addr and port combination [11121](https://github.com/grafana/loki/pull/11121) +* **loki** Ensure the frontend uses a valid IPv6 addr and port combination [10650](https://github.com/grafana/loki/pull/10650) +* **loki** Deprecate and flip `-legacy-read-mode` flag to `false` by default. [11665](https://github.com/grafana/loki/pull/11665) +* **loki** BREAKING CHANGE: refactor how we do defaults for runtime overrides [12448](https://github.com/grafana/loki/pull/12448/files) +* **promtail**: structured_metadata: enable structured_metadata convert labels [10752](https://github.com/grafana/loki/pull/10752) +* **promtail**: chore(promtail): Improve default configuration that is shipped with rpm/deb packages to avoid possible high CPU utilisation if there are lots of files inside `/var/log`. [11511](https://github.com/grafana/loki/pull/11511) +* **promtail**: Lambda-Promtail: Add support for WAF logs in S3 [10416](https://github.com/grafana/loki/pull/10416) +* **promtail**: users can now define `additional_fields` in cloudflare configuration. [10301](https://github.com/grafana/loki/pull/10301) +* **promtail**: Lambda-Promtail: Add support for dropping labels passed via env var [10755](https://github.com/grafana/loki/pull/10755) -##### Fixes -* [11074](https://github.com/grafana/loki/pull/11074) **hainenber** Fix panic in lambda-promtail due to mishandling of empty DROP_LABELS env var. -* [11195](https://github.com/grafana/loki/pull/11195) **canuteson** Generate tsdb_shipper storage_config even if using_boltdb_shipper is false -* [9831](https://github.com/grafana/loki/pull/9831) **sijmenhuizenga**: Fix Promtail excludepath not evaluated on newly added files. -* [11551](https://github.com/grafana/loki/pull/11551) **dannykopping** Do not reflect label names in request metrics' "route" label. -* [11563](https://github.com/grafana/loki/pull/11563) **ptqa** Fix duplicate logs from docker containers. -* [11601](https://github.com/grafana/loki/pull/11601) **dannykopping** Ruler: Fixed a panic that can be caused by concurrent read-write access of tenant configs when there are a large amount of rules. -* [11606](https://github.com/grafana/loki/pull/11606) **dannykopping** Fixed regression adding newlines to HTTP error response bodies which may break client integrations. -* [11657](https://github.com/grafana/loki/pull/11657) **ashwanthgoli** Log results cache: compose empty response based on the request being served to avoid returning incorrect limit or direction. -* [11587](https://github.com/grafana/loki/pull/11587) **trevorwhitney** Fix semantics of label parsing logic of metrics and logs queries. Both only parse the first label if multiple extractions into the same label are requested. -* [11776](https://github.com/grafana/loki/pull/11776) **ashwanthgoli** Background Cache: Fixes a bug that is causing the background queue size to be incremented twice for each enqueued item. -* [11921](https://github.com/grafana/loki/pull/11921) **paul1r**: Parsing: String array elements were not being parsed correctly in JSON processing +### Bug Fixes -##### Changes +* All lifecycler cfgs ref a valid IPv6 addr and port combination ([#11121](https://github.com/grafana/loki/issues/11121)) ([6385b19](https://github.com/grafana/loki/commit/6385b195739bd7d4e9706faddd0de663d8e5331a)) +* **deps:** update github.com/c2h5oh/datasize digest to 859f65c (main) ([#10820](https://github.com/grafana/loki/issues/10820)) ([c66ffd1](https://github.com/grafana/loki/commit/c66ffd125cd89f5845a75a1751186fa46d003f70)) +* **deps:** update github.com/docker/go-plugins-helpers digest to 6eecb7b (main) ([#10826](https://github.com/grafana/loki/issues/10826)) ([fb9c496](https://github.com/grafana/loki/commit/fb9c496b21be62f56866ae0f92440085e7860a2a)) +* **deps:** update github.com/grafana/gomemcache digest to 6947259 (main) ([#10836](https://github.com/grafana/loki/issues/10836)) ([2327789](https://github.com/grafana/loki/commit/2327789b5506d0ccc00d931195da17a2d47bf236)) +* **deps:** update github.com/grafana/loki/pkg/push digest to 583aa28 (main) ([#10842](https://github.com/grafana/loki/issues/10842)) ([02d9418](https://github.com/grafana/loki/commit/02d9418270f4e615c1f78b0def635da7c0572ca4)) +* **deps:** update github.com/grafana/loki/pkg/push digest to cfc4f0e (main) ([#10946](https://github.com/grafana/loki/issues/10946)) ([d27c4d2](https://github.com/grafana/loki/commit/d27c4d297dc6cce93ada98f16b962380ec933c6a)) +* **deps:** update github.com/grafana/loki/pkg/push digest to e523809 (main) ([#11107](https://github.com/grafana/loki/issues/11107)) ([09cb9ae](https://github.com/grafana/loki/commit/09cb9ae76f4aef7dea477961c0c5424d7243bf2a)) +* **deps:** update github.com/joncrlsn/dque digest to c2ef48c (main) ([#10947](https://github.com/grafana/loki/issues/10947)) ([1fe4885](https://github.com/grafana/loki/commit/1fe48858ae15b33646eedb85b05d6773a8bc5020)) +* **deps:** update module google.golang.org/grpc [security] (main) ([#11031](https://github.com/grafana/loki/issues/11031)) ([0695424](https://github.com/grafana/loki/commit/0695424f7dd62435df3a9981276b40f3c5ef5641)) +* **helm:** bump nginx-unprivilege to fix CVE ([#10754](https://github.com/grafana/loki/issues/10754)) ([dbf7dd4](https://github.com/grafana/loki/commit/dbf7dd4bac112a538a59907a8c6092504e7f4a91)) +* **promtail:** correctly parse list of drop stage sources from YAML ([#10848](https://github.com/grafana/loki/issues/10848)) ([f51ee84](https://github.com/grafana/loki/commit/f51ee849b03c5f6b79f3e93cb7fd7811636bede2)) +* **promtail:** prevent panic due to duplicate metric registration after reloaded ([#10798](https://github.com/grafana/loki/issues/10798)) ([47e2c58](https://github.com/grafana/loki/commit/47e2c5884f443667e64764f3fc3948f8f11abbb8)) +* **loki:** respect query matcher in ingester when getting label values ([#10375](https://github.com/grafana/loki/issues/10375)) ([85e2e52](https://github.com/grafana/loki/commit/85e2e52279ecac6dc111d5c113c54d6054d2c922)) +* **helm:** Sidecar configuration for Backend ([#10603](https://github.com/grafana/loki/issues/10603)) ([c29ba97](https://github.com/grafana/loki/commit/c29ba973a0b5b7b59613d210b741d5a547ea0e83)) +* **tools/lambda-promtail:** Do not evaluate empty string for drop_labels ([#11074](https://github.com/grafana/loki/issues/11074)) ([94169a0](https://github.com/grafana/loki/commit/94169a0e6b5bf96426ad21e40f9583b721f35d6c)) +* **lambda-promtail** Fix panic in lambda-promtail due to mishandling of empty DROP_LABELS env var. [11074](https://github.com/grafana/loki/pull/11074) +* **loki** Generate tsdb_shipper storage_config even if using_boltdb_shipper is false [11195](https://github.com/grafana/loki/pull/11195) +* **promtail**: Fix Promtail excludepath not evaluated on newly added files. [9831](https://github.com/grafana/loki/pull/9831) +* **loki** Do not reflect label names in request metrics' "route" label. [11551](https://github.com/grafana/loki/pull/11551) +* **loki** Fix duplicate logs from docker containers. [11563](https://github.com/grafana/loki/pull/11563) +* **loki** Ruler: Fixed a panic that can be caused by concurrent read-write access of tenant configs when there are a large amount of rules. [11601](https://github.com/grafana/loki/pull/11601) +* **loki** Fixed regression adding newlines to HTTP error response bodies which may break client integrations. [11606](https://github.com/grafana/loki/pull/11606) +* **loki** Log results cache: compose empty response based on the request being served to avoid returning incorrect limit or direction. [11657](https://github.com/grafana/loki/pull/11657) +* **loki** Fix semantics of label parsing logic of metrics and logs queries. Both only parse the first label if multiple extractions into the same label are requested. [11587](https://github.com/grafana/loki/pull/11587) +* **loki** Background Cache: Fixes a bug that is causing the background queue size to be incremented twice for each enqueued item. [11776](https://github.com/grafana/loki/pull/11776) +* **loki**: Parsing: String array elements were not being parsed correctly in JSON processing [11921](https://github.com/grafana/loki/pull/11921) + +## [2.9.7](https://github.com/grafana/loki/compare/v2.9.6...v2.9.7) (2024-04-10) -* [11490](https://github.com/grafana/loki/pull/11490) **andresperezl**: Helm: Use `/ingester/shutdown` for `preStop` hook in write pods. -* [10366](https://github.com/grafana/loki/pull/10366) **shantanualsi** Upgrade thanos objstore, dskit and other modules -* [10451](https://github.com/grafana/loki/pull/10451) **shantanualsi** Upgrade thanos `objstore` -* [10814](https://github.com/grafana/loki/pull/10814) **shantanualsi,kaviraj** Upgrade prometheus to v0.47.1 and dskit -* [10959](https://github.com/grafana/loki/pull/10959) **slim-bean** introduce a backoff wait on subquery retries. -* [11121](https://github.com/grafana/loki/pull/11121) **periklis** Ensure all lifecycler cfgs ref a valid IPv6 addr and port combination -* [10650](https://github.com/grafana/loki/pull/10650) **matthewpi** Ensure the frontend uses a valid IPv6 addr and port combination -* [11665](https://github.com/grafana/loki/pull/11665) **salvacorts** Deprecate and flip `-legacy-read-mode` flag to `false` by default. -#### Promtail +### Bug Fixes -* [10752](https://github.com/grafana/loki/pull/10752) **gonzalesraul**: structured_metadata: enable structured_metadata convert labels -* [11511](https://github.com/grafana/loki/pull/11511) **kavirajk**: chore(promtail): Improve default configuration that is shipped with rpm/deb packages to avoid possible high CPU utilisation if there are lots of files inside `/var/log`. +* Bump go to 1.21.9 and build image to 0.33.1 ([#12542](https://github.com/grafana/loki/issues/12542)) ([efc4d2f](https://github.com/grafana/loki/commit/efc4d2f009e04ecb1db58a637b89b33aa234de34)) -##### Enhancements +## [2.9.6](https://github.com/grafana/loki/compare/v2.9.5...v2.9.6) (2024-03-21) -* [10416](https://github.com/grafana/loki/pull/10416) **lpugoy**: Lambda-Promtail: Add support for WAF logs in S3 -* [10301](https://github.com/grafana/loki/pull/10301) **wildum**: users can now define `additional_fields` in cloudflare configuration. -* [10755](https://github.com/grafana/loki/pull/10755) **hainenber**: Lambda-Promtail: Add support for dropping labels passed via env var +### Bug Fixes + +* promtail failures connecting to local loki installation [release-2.9.x] ([#12184](https://github.com/grafana/loki/issues/12184)) ([8585e35](https://github.com/grafana/loki/commit/8585e3537375c0deb11462d7256f5da23228f5e1)) +* **release-2.9.x:** frontend: Use `net.JoinHostPort` to support IPv6 addresses ([#10650](https://github.com/grafana/loki/issues/10650)) ([#11870](https://github.com/grafana/loki/issues/11870)) ([7def3b4](https://github.com/grafana/loki/commit/7def3b4e774252e13ba154ca13f72816a84da7dd)) +* update google.golang.org/protobuf to v1.33.0 ([#12269](https://github.com/grafana/loki/issues/12269)) ([#12287](https://github.com/grafana/loki/issues/12287)) ([3186520](https://github.com/grafana/loki/commit/318652035059fdaa40405f263fc9e37b4d38b157)) + +## [2.9.5](https://github.com/grafana/loki/compare/v2.9.4...v2.9.5) (2024-02-28) ##### Changes @@ -131,8 +162,13 @@ * [10542](https://github.com/grafana/loki/pull/10542) **chaudum**: Remove legacy deployment mode for ingester (Deployment, without WAL) and instead always run them as StatefulSet. -## [2.8.10](https://github.com/grafana/loki/compare/v2.8.9...v2.8.10) (2024-02-28) +## [2.8.11](https://github.com/grafana/loki/compare/v2.8.10...v2.8.11) (2024-03-22) +### Bug Fixes + +* update google.golang.org/protobuf to v1.33.0 ([#12276](https://github.com/grafana/loki/issues/12276)) ([3c05724](https://github.com/grafana/loki/commit/3c05724ac9d7ea9b6048c6e67cd13dc55fa72782)) + +## [2.8.10](https://github.com/grafana/loki/compare/v2.8.9...v2.8.10) (2024-02-28) ### Bug Fixes @@ -140,7 +176,6 @@ ## [2.8.9](https://github.com/grafana/loki/compare/v2.8.8...v2.8.9) (2024-02-23) - ### Bug Fixes * bump alpine base image and go to fix CVEs ([#12026](https://github.com/grafana/loki/issues/12026)) ([196650e](https://github.com/grafana/loki/commit/196650e4c119249016df85a50a2cced521cbe9be)) @@ -169,7 +204,6 @@ * [10585](https://github.com/grafana/loki/pull/10585) **ashwanthgoli** / **chaudum**: Fix bug in index object client that could result in not showing all ingested logs in query results. * [10314](https://github.com/grafana/loki/pull/10314) **bboreham**: Fix race conditions in indexshipper. - ## 2.9.0 (2023-09-06) ### All Changes @@ -295,6 +329,7 @@ #### FluentD ##### Enhancements + * [LOG-4012](https://issues.redhat.com/browse/LOG-4012) **jcantril**: fluent-plugin-grapha-loki: Add config to support tls: ciphers, min_versio #### Jsonnet @@ -605,6 +640,7 @@ Check the history of the branch `release-2.7.x`. #### Loki ##### Enhancements + * [7436](https://github.com/grafana/loki/pull/7436) **periklis**: Expose ring and memberlist handlers through internal server listener * [7227](https://github.com/grafana/loki/pull/7227) **Red-GV**: Add ability to configure tls minimum version and cipher suites * [7179](https://github.com/grafana/loki/pull/7179) **vlad-diachenko**: Add ability to use Azure Service Principals credentials to authenticate to Azure Blob Storage. @@ -625,6 +661,7 @@ Check the history of the branch `release-2.7.x`. * [6952](https://github.com/grafana/loki/pull/6952) **DylanGuedes**: Experimental: Introduce a new feature named stream sharding. ##### Fixes + * [7426](https://github.com/grafana/loki/pull/7426) **periklis**: Add missing compactor delete client tls client config * [7238](https://github.com/grafana/loki/pull/7328) **periklis**: Fix internal server bootstrap for query frontend * [7288](https://github.com/grafana/loki/pull/7288) **ssncferreira**: Fix query mapping in AST mapper `rangemapper` to support the new `VectorExpr` expression. @@ -637,6 +674,7 @@ Check the history of the branch `release-2.7.x`. * [6372](https://github.com/grafana/loki/pull/6372) **splitice**: Add support for numbers in JSON fields. ##### Changes + * [6726](https://github.com/grafana/loki/pull/6726) **kavirajk**: upgrades go from 1.17.9 -> 1.18.4 * [6415](https://github.com/grafana/loki/pull/6415) **salvacorts**: Evenly spread queriers across kubernetes nodes. * [6349](https://github.com/grafana/loki/pull/6349) **simonswine**: Update the default HTTP listen port from 80 to 3100. Make sure to configure the port explicitly if you are using port 80. @@ -649,9 +687,11 @@ Check the history of the branch `release-2.7.x`. * [5400](https://github.com/grafana/loki/pull/5400) **BenoitKnecht**: promtail/server: Disable profiling by default #### Promtail + * [7470](https://github.com/grafana/loki/pull/7470) **Jack-King**: Add configuration for adding custom HTTP headers to push requests ##### Enhancements + * [7593](https://github.com/grafana/loki/pull/7593) **chodges15**: Promtail: Add tenant label to client drop metrics and logs * [7101](https://github.com/grafana/loki/pull/7101) **liguozhong**: Promtail: Add support for max stream limit. * [7247](https://github.com/grafana/loki/pull/7247) **liguozhong**: Add config reload endpoint / signal to promtail. @@ -663,27 +703,30 @@ Check the history of the branch `release-2.7.x`. * [7414](https://github.com/grafana/loki/pull/7414) **thepalbi**: Add basic tracing support ##### Fixes -* [7394](https://github.com/grafana/loki/pull/7394) **liguozhong**: Fix issue with the Cloudflare target that caused it to stop working after it received an error in the logpull request as explained in issue https://github.com/grafana/loki/issues/6150 -* [6766](https://github.com/grafana/loki/pull/6766) **kavirajk**: fix(logql): Make `LabelSampleExtractor` ignore processing the line if it doesn't contain that specific label. Fixes unwrap behavior explained in the issue https://github.com/grafana/loki/issues/6713 + +* [7394](https://github.com/grafana/loki/pull/7394) **liguozhong**: Fix issue with the Cloudflare target that caused it to stop working after it received an error in the logpull request as explained in issue +* [6766](https://github.com/grafana/loki/pull/6766) **kavirajk**: fix(logql): Make `LabelSampleExtractor` ignore processing the line if it doesn't contain that specific label. Fixes unwrap behavior explained in the issue * [7016](https://github.com/grafana/loki/pull/7016) **chodges15**: Fix issue with dropping logs when a file based SD target's labels are updated ##### Changes -* **quodlibetor**: Change Docker target discovery log level from `Error` to `Info` +* **quodlibetor**: Change Docker target discovery log level from `Error` to `Info` #### Logcli + * [7325](https://github.com/grafana/loki/pull/7325) **dbirks**: Document setting up command completion * [8518](https://github.com/grafana/loki/pull/8518) **SN9NV**: Add parallel flags #### Fluent Bit #### Loki Canary + * [7398](https://github.com/grafana/loki/pull/7398) **verejoel**: Allow insecure TLS connections #### Jsonnet -* [6189](https://github.com/grafana/loki/pull/6189) **irizzant**: Add creation of a `ServiceMonitor` object for Prometheus scraping through configuration parameter `create_service_monitor`. Simplify mixin usage by adding (https://github.com/prometheus-operator/kube-prometheus) library. -* [6662](https://github.com/grafana/loki/pull/6662) **Whyeasy**: Fixes memberlist error when using a stateful ruler. +* [6189](https://github.com/grafana/loki/pull/6189) **irizzant**: Add creation of a `ServiceMonitor` object for Prometheus scraping through configuration parameter `create_service_monitor`. Simplify mixin usage by adding () library. +* [6662](https://github.com/grafana/loki/pull/6662) **Whyeasy**: Fixes memberlist error when using a stateful ruler. ### Notes @@ -706,11 +749,13 @@ Check the history of the branch `release-2.7.x`. # 2.6.0 (2022/07/08) ### All Changes + Here is the list with the changes that were produced since the previous release. #### Loki ##### Enhancements + * [5662](https://github.com/grafana/loki/pull/5662) **ssncferreira** **chaudum** Improve performance of instant queries by splitting range into multiple subqueries that are executed in parallel. * [5848](https://github.com/grafana/loki/pull/5848) **arcosx**: Add Baidu AI Cloud as a storage backend choice. * [6410](https://github.com/grafana/loki/pull/6410) **MichelHollands**: Add support for per tenant delete API access enabling. @@ -721,12 +766,14 @@ Here is the list with the changes that were produced since the previous release. * [6163](https://github.com/grafana/loki/pull/6163) **jburnham**: LogQL: Add a `default` sprig template function in LogQL label/line formatter. ##### Fixes + * [6152](https://github.com/grafana/loki/pull/6152) **slim-bean**: Fixes unbounded ingester memory growth when live tailing under specific circumstances. * [5685](https://github.com/grafana/loki/pull/5685) **chaudum**: Fix bug in push request parser that allowed users to send arbitrary non-string data as "log line". * [5799](https://github.com/grafana/loki/pull/5799) **cyriltovena** Fix deduping issues when multiple entries with the same timestamp exist. !hide or not hide (bugfix Loki) * [5888](https://github.com/grafana/loki/pull/5888) **Papawy** Fix common configuration block net interface name when overwritten by ring common configuration. ##### Changes + * [6361](https://github.com/grafana/loki/pull/6361) **chaudum**: Sum values in unwrapped rate aggregation instead of treating them as counter. * [6412](https://github.com/grafana/loki/pull/6412) **chaudum**: Add new unwrapped range aggregation `rate_counter()` to LogQL * [6042](https://github.com/grafana/loki/pull/6042) **slim-bean**: Add a new configuration to allow fudging of ingested timestamps to guarantee sort order of duplicate timestamps at query time. @@ -740,6 +787,7 @@ Here is the list with the changes that were produced since the previous release. #### Promtail ##### Enhancements + * [6105](https://github.com/grafana/loki/pull/6105) **rutgerke** Export metrics for the Promtail journal target. * [5943](https://github.com/grafana/loki/pull/5943) **tpaschalis**: Add configuration support for excluding configuration files when instantiating Promtail. * [5790](https://github.com/grafana/loki/pull/5790) **chaudum**: Add UDP support for Promtail's syslog target. @@ -749,17 +797,24 @@ Here is the list with the changes that were produced since the previous release. * [6395](https://github.com/grafana/loki/pull/6395) **DylanGuedes**: Add encoding support ##### Fixes + * [6034](https://github.com/grafana/loki/pull/6034) **DylanGuedes**: Promtail: Fix symlink tailing behavior. + ##### Changes + * [6371](https://github.com/grafana/loki/pull/6371) **witalisoft**: BREAKING: Support more complex match based on multiple extracted data fields in drop stage * [5686](https://github.com/grafana/loki/pull/5686) **ssncferreira**: Move promtail StreamLagLabels config to upper level config.Config * [5839](https://github.com/grafana/loki/pull/5839) **marctc**: Add ActiveTargets method to promtail * [5661](https://github.com/grafana/loki/pull/5661) **masslessparticle**: Invalidate caches on deletes + #### Fluent Bit + * [5711](https://github.com/grafana/loki/pull/5711) **MichelHollands**: Update fluent-bit output name #### Loki Canary + * [6310](https://github.com/grafana/loki/pull/6310) **chodges15**: Add support for client-side TLS certs in loki-canary for Loki connection + ### Notes This release was created from a branch starting at commit `1794a766134f07b54386b1a431b58e1d44e6d7f7` but it may also contain backported changes from main. @@ -783,6 +838,7 @@ to include only the most relevant. #### Loki ##### Enhancements + * [5542](https://github.com/grafana/loki/pull/5542) **bboreham**: regexp filter: use modified package with optimisations * [5318](https://github.com/grafana/loki/pull/5318) **jeschkies**: Speed up `EntrySortIterator` by 20%. * [5317](https://github.com/grafana/loki/pull/5317) **owen-d**: Logql/parallel binop @@ -799,9 +855,11 @@ to include only the most relevant. * [5013](https://github.com/grafana/loki/pull/5013) **liguozhong**: [new feature] logql: extrapolate unwrapped rate function * [4947](https://github.com/grafana/loki/pull/4947) **siavashs**: Support Redis Cluster Configuration Endpoint * [4938](https://github.com/grafana/loki/pull/4938) **DylanGuedes**: Add distributor ring page -* [4879](https://github.com/grafana/loki/pull/4879) **cyriltovena**: LogQL: add __line__ function to | line_format template +* [4879](https://github.com/grafana/loki/pull/4879) **cyriltovena**: LogQL: add **line** function to | line_format template * [4858](https://github.com/grafana/loki/pull/4858) **sandy2008**: feat(): add ManagedIdentity in Azure Blob Storage + ## Main + * [5789](https://github.com/grafana/loki/pull/5789) **bboreham**: Production config: add dot to some DNS address to reduce lookups. * [5780](https://github.com/grafana/loki/pull/5780) **simonswine**: Update alpine image to 3.15.4. * [5715](https://github.com/grafana/loki/pull/5715) **chaudum** Add option to push RFC5424 syslog messages from Promtail in syslog scrape target. @@ -840,7 +898,7 @@ to include only the most relevant. * [5144](https://github.com/grafana/loki/pull/5144) **dannykopping** Ruler: fix remote write basic auth credentials. * [5091](https://github.com/grafana/loki/pull/5091) **owen-d**: Changes `ingester.concurrent-flushes` default to 32 * [5031](https://github.com/grafana/loki/pull/5031) **liguozhong**: Promtail: Add global read rate limiting. -* [4879](https://github.com/grafana/loki/pull/4879) **cyriltovena**: LogQL: add __line__ function to | line_format template. +* [4879](https://github.com/grafana/loki/pull/4879) **cyriltovena**: LogQL: add **line** function to | line_format template. * [5081](https://github.com/grafana/loki/pull/5081) **SasSwart**: Add the option to configure memory ballast for Loki * [5085](https://github.com/grafana/loki/pull/5085) **aknuds1**: Upgrade Cortex to [e0807c4eb487](https://github.com/cortexproject/cortex/compare/4e9fc3a2b5ab..e0807c4eb487) and Prometheus to [692a54649ed7](https://github.com/prometheus/prometheus/compare/2a3d62ac8456..692a54649ed7) * [5067](https://github.com/grafana/loki/pull/5057) **cstyan**: Add a metric to Azure Blob Storage client to track total egress bytes @@ -863,7 +921,6 @@ to include only the most relevant. * [4731](https://github.com/grafana/loki/pull/4731) **cyriltovena**: Improve heap iterators. * [4394](https://github.com/grafana/loki/pull/4394) **cyriltovena**: Improve case insensitive search to avoid allocations. - ##### Fixes * [5768](https://github.com/grafana/loki/pull/5768) **slim-bean**: Loki: Increase flush_op_timeout default from 10s to 10m @@ -904,6 +961,7 @@ to include only the most relevant. * [4741](https://github.com/grafana/loki/pull/4741) **sandeepsukhani**: index cleanup fixes while applying retention ##### Changes + * [5544](https://github.com/grafana/loki/pull/5544) **ssncferreira**: Update vectorAggEvaluator to fail for expressions without grouping * [5543](https://github.com/grafana/loki/pull/5543) **cyriltovena**: update loki go version to 1.17.8 * [5450](https://github.com/grafana/loki/pull/5450) **BenoitKnecht**: pkg/ruler/base: Add external_labels option @@ -935,10 +993,10 @@ to include only the most relevant. * [4736](https://github.com/grafana/loki/pull/4736) **sandeepsukhani**: allow applying retention at different interval than compaction with a config * [4656](https://github.com/grafana/loki/pull/4656) **ssncferreira**: Fix dskit/ring metric with 'cortex_' prefix - #### Promtail ##### Enhancements + * [5359](https://github.com/grafana/loki/pull/5359) **JBSchami**: Lambda-promtail: Enhance lambda-promtail to support adding extra labels from an environment variable value * [5290](https://github.com/grafana/loki/pull/5290) **ssncferreira**: Update promtail to support duration string formats * [5051](https://github.com/grafana/loki/pull/5051) **liguozhong**: [new] promtail pipeline: Promtail Rate Limit stage #5048 @@ -949,6 +1007,7 @@ to include only the most relevant. * [4663](https://github.com/grafana/loki/pull/4663) **taisho6339**: Add SASL&mTLS authentication support for Kafka in Promtail ##### Fixes + * [5497](https://github.com/grafana/loki/pull/5497) **MasslessParticle**: Fix orphaned metrics in the file tailer * [5409](https://github.com/grafana/loki/pull/5409) **ldb**: promtail/targets/syslog: Enable best effort parsing for Syslog messages * [5246](https://github.com/grafana/loki/pull/5246) **rsteneteg**: Promtail: skip glob search if filetarget path is an existing file and not a directory @@ -959,24 +1018,29 @@ to include only the most relevant. * [5698](https://github.com/grafana/loki/pull/5698) **paullryan**: Promtail: Fix retry/stop when erroring for out of cloudflare retention range (e.g. over 168 hours old) ##### Changes + * [5377](https://github.com/grafana/loki/pull/5377) **slim-bean**: Promtail: Remove promtail_log_entries_bytes_bucket histogram * [5266](https://github.com/grafana/loki/pull/5266) **jeschkies**: Write Promtail position file atomically. * [4794](https://github.com/grafana/loki/pull/4794) **taisho6339**: Aggregate inotify watcher to file target manager * [4745](https://github.com/grafana/loki/pull/4745) **taisho6339**: Expose Kafka message key in labels #### Logcli + * [5477](https://github.com/grafana/loki/pull/5477) **atomic77**: logcli: Remove port from TLS server name when provided in --addr * [4667](https://github.com/grafana/loki/pull/4667) **jeschkies**: Package logcli as rpm and deb. * [4606](https://github.com/grafana/loki/pull/4606) **kavirajk**: Execute Loki queries on raw log data piped to stdin #### Lambda-Promtail + * [5065](https://github.com/grafana/loki/pull/5065) **AndreZiviani**: lambda-promtail: Add ability to ingest logs from S3 * [7632](https://github.com/grafana/loki/pull/7632) **changhyuni**: lambda-promtail: Add kinesis data stream to use in terraform #### Fluent Bit + * [5223](https://github.com/grafana/loki/pull/5223) **cyriltovena**: fluent-bit: Attempt to unmarshal nested json. #### FluentD + * [6240](https://github.com/grafana/loki/pull/6240) **taharah**: Add the feature flag `include_thread_label` to allow the `fluentd_thread` label included when using multiple threads for flushing to be configurable * [5107](https://github.com/grafana/loki/pull/5107) **chaudum**: fluentd: Fix bug that caused lines to be dropped when containing non utf-8 characters * [5163](https://github.com/grafana/loki/pull/5163) **chaudum**: Fix encoding error in fluentd client @@ -1145,6 +1209,7 @@ Here is a list of all changes included in 2.4.0. * [4071](https://github.com/grafana/loki/pull/4071) **jeschkies**: Support frontend V2 with query scheduler. #### Promtail + * [4599](https://github.com/grafana/loki/pull/4599) **rsteneteg**: [Promtail] resolve issue with promtail not scraping target if only path changed in a simpler way that dont need mutex to sync threads * [4588](https://github.com/grafana/loki/pull/4588) **owen-d**: regenerates assets from current vfsgen dependency * [4568](https://github.com/grafana/loki/pull/4568) **cyriltovena**: Promtail Kafka target @@ -1160,11 +1225,13 @@ Here is a list of all changes included in 2.4.0. * [3907](https://github.com/grafana/loki/pull/3907) **johanfleury**: promtail: add support for TLS/mTLS in syslog receiver #### Logcli + * [4303](https://github.com/grafana/loki/pull/4303) **cyriltovena**: Allow to run local boltdb queries with logcli. * [4242](https://github.com/grafana/loki/pull/4242) **chaudum**: cli: Register configuration option `store.max-look-back-period` as CLI argument * [4203](https://github.com/grafana/loki/pull/4203) **invidian**: cmd/logcli: add --follow flag as an alias for --tail #### Build + * [4639](https://github.com/grafana/loki/pull/4639) **slim-bean**: Build: simplify how protos are built * [4609](https://github.com/grafana/loki/pull/4609) **slim-bean**: Build: Update CODEOWNERS to put Karen back in charge of the docs * [4541](https://github.com/grafana/loki/pull/4541) **cstyan**: Fix drone ECR publish. @@ -1181,12 +1248,14 @@ Here is a list of all changes included in 2.4.0. * [4189](https://github.com/grafana/loki/pull/4189) **mathew-fleisch**: Makefile: Add darwin/arm64 build to release binaries #### Project + * [4535](https://github.com/grafana/loki/pull/4535) **carlpett**: Fix branch reference in PR template * [4604](https://github.com/grafana/loki/pull/4604) **kavirajk**: Update PR template to include `changelog` update in the checklist * [4494](https://github.com/grafana/loki/pull/4494) **cstyan**: Add a a parameter to keep/drop the stream label from cloudwatch. * [4315](https://github.com/grafana/loki/pull/4315) **cstyan**: Rewrite lambda-promtail to use subscription filters. #### Dashboards + * [4634](https://github.com/grafana/loki/pull/4634) **cyriltovena**: Fixes the operational dashboard using an old metric. * [4618](https://github.com/grafana/loki/pull/4618) **cstyan**: loki-mixin: fix label selectors + logs dashboard * [4575](https://github.com/grafana/loki/pull/4575) **dannykopping**: Adding recording rules dashboard @@ -1195,15 +1264,17 @@ Here is a list of all changes included in 2.4.0. * [4423](https://github.com/grafana/loki/pull/4423) **cstyan**: Add tag/link fix to operational dashboard and promtail mixin dashboard. * [4401](https://github.com/grafana/loki/pull/4401) **cstyan**: Minor dashboard fixes - #### Docker-driver + * [4396](https://github.com/grafana/loki/pull/4396) **owen-d**: Removes docker driver empty log line message * [4190](https://github.com/grafana/loki/pull/4190) **jeschkies**: Document known Docker driver issues. #### FluentD + * [4261](https://github.com/grafana/loki/pull/4261) **MrWong99**: FluentD output plugin: Remove an unused variable when processing chunks #### Docs + * [4646](https://github.com/grafana/loki/pull/4646) **KMiller-Grafana**: Docs: revise modes of operation section * [4631](https://github.com/grafana/loki/pull/4631) **kavirajk**: Add changelog and upgrade guide for #4556 * [4616](https://github.com/grafana/loki/pull/4616) **owen-d**: index-gw sts doc fix. closes #4583 @@ -1272,6 +1343,7 @@ Here is a list of all changes included in 2.4.0. * [3880](https://github.com/grafana/loki/pull/3880) **timothydlister**: Update fluent-plugin-loki documentation URLs #### Jsonnet + * [4629](https://github.com/grafana/loki/pull/4629) **owen-d**: Default wal to enabled in jsonnet lib * [4624](https://github.com/grafana/loki/pull/4624) **chaudum**: Disable chunk transfers in jsonnet lib * [4530](https://github.com/grafana/loki/pull/4530) **owen-d**: Jsonnet/overrides exporter @@ -1286,7 +1358,6 @@ Here is a list of all changes included in 2.4.0. * [4154](https://github.com/grafana/loki/pull/4154) **owen-d**: updates scheduler libsonnet * [4102](https://github.com/grafana/loki/pull/4102) **jeschkies**: Define ksonnet lib for query scheduler. - ### Notes This release was created from a branch starting at commit e95d193acf1633a6ec33a328b8a4a3d844e8e5f9 but it may also contain backported changes from main. @@ -1305,6 +1376,7 @@ Release notes for 2.3.0 can be found on the [release notes page](https://grafana ### All Changes #### Loki + * [4048](https://github.com/grafana/loki/pull/4048) **dannykopping**: Ruler: implementing write relabelling on recording rule samples * [4091](https://github.com/grafana/loki/pull/4091) **cyriltovena**: Fixes instant queries in the frontend. * [4087](https://github.com/grafana/loki/pull/4087) **cyriltovena**: Fixes unaligned shards between ingesters and storage. @@ -1424,6 +1496,7 @@ Release notes for 2.3.0 can be found on the [release notes page](https://grafana * [3050](https://github.com/grafana/loki/pull/3050) **cyriltovena**: first_over_time and last_over_time #### Docs + * [4031](https://github.com/grafana/loki/pull/4031) **KMiller-Grafana**: Docs: add weights to YAML metadata to order the LogQL subsections * [4029](https://github.com/grafana/loki/pull/4029) **bearice**: Docs: Update S3 permissions list * [4026](https://github.com/grafana/loki/pull/4026) **KMiller-Grafana**: Docs: correct fluentbit config value for DqueSync @@ -1503,6 +1576,7 @@ Release notes for 2.3.0 can be found on the [release notes page](https://grafana * [3430](https://github.com/grafana/loki/pull/3430) **kavirajk**: doc(gcplog): Add note on scraping multiple GCP projects #### Promtail + * [4011](https://github.com/grafana/loki/pull/4011) **dannykopping**: Promtail: adding pipeline stage inspector * [4006](https://github.com/grafana/loki/pull/4006) **dannykopping**: Promtail: output timestamp with nanosecond precision in dry-run mode * [3971](https://github.com/grafana/loki/pull/3971) **cyriltovena**: Fixes negative gauge in Promtail. @@ -1515,15 +1589,15 @@ Release notes for 2.3.0 can be found on the [release notes page](https://grafana * [3457](https://github.com/grafana/loki/pull/3457) **nmiculinic**: Promtail: Added path information to deleted tailed file * [3400](https://github.com/grafana/loki/pull/3400) **adityacs**: support max_message_length configuration for syslog parser - #### Logcli + * [3879](https://github.com/grafana/loki/pull/3879) **vyzigold**: logcli: Add retries to unsuccessful log queries * [3749](https://github.com/grafana/loki/pull/3749) **dbluxo**: logcli: add support for bearer token authentication * [3739](https://github.com/grafana/loki/pull/3739) **rsteneteg**: correct logcli instant query timestamp param name * [3678](https://github.com/grafana/loki/pull/3678) **cyriltovena**: Add the ability to wrap the roundtripper of the logcli client. - #### Build + * [4034](https://github.com/grafana/loki/pull/4034) **aknuds1**: loki-build-image: Fix building * [4028](https://github.com/grafana/loki/pull/4028) **aknuds1**: loki-build-image: Upgrade golangci-lint and Go * [4007](https://github.com/grafana/loki/pull/4007) **dannykopping**: Adding @grafana/loki-team as default CODEOWNERS @@ -1541,8 +1615,8 @@ Release notes for 2.3.0 can be found on the [release notes page](https://grafana * [3615](https://github.com/grafana/loki/pull/3615) **slim-bean**: Remove codecov * [3481](https://github.com/grafana/loki/pull/3481) **slim-bean**: Update Go and Alpine versions - #### Jsonnet + * [4030](https://github.com/grafana/loki/pull/4030) **cyriltovena**: Improve the sweep lag panel in the retention dashboard. * [3917](https://github.com/grafana/loki/pull/3917) **jvrplmlmn**: refactor(production/ksonnet): Remove kausal from the root element * [3893](https://github.com/grafana/loki/pull/3893) **sandeepsukhani**: update uid of loki-deletion dashboard @@ -1557,6 +1631,7 @@ Release notes for 2.3.0 can be found on the [release notes page](https://grafana * [3584](https://github.com/grafana/loki/pull/3584) **sandeepsukhani**: add loki resource usage dashboard for read and write path #### Project + * [3963](https://github.com/grafana/loki/pull/3963) **rfratto**: Remove Robert Fratto from list of team members * [3926](https://github.com/grafana/loki/pull/3926) **cyriltovena**: Add Danny Kopping to the Loki Team. * [3732](https://github.com/grafana/loki/pull/3732) **dannykopping**: Issue Templates: Improve wording and add warnings @@ -1567,6 +1642,7 @@ Release notes for 2.3.0 can be found on the [release notes page](https://grafana * [3630](https://github.com/grafana/loki/pull/3630) **slim-bean**: Re-license to AGPLv3 #### Docker-driver + * [3814](https://github.com/grafana/loki/pull/3814) **kavirajk**: Update the docker-driver doc about default labels * [3727](https://github.com/grafana/loki/pull/3727) **3Xpl0it3r**: docker-driver: remove duplicated code * [3709](https://github.com/grafana/loki/pull/3709) **cyriltovena**: Fixes docker driver that would panic when closed. @@ -1665,7 +1741,7 @@ TL;DR Loki 2.2 changes the internal chunk format which limits what versions you * [3237](https://github.com/grafana/loki/pull/3237) **cyriltovena**: Fixes unmarshalling of tailing responses. * [3236](https://github.com/grafana/loki/pull/3236) **slim-bean**: Loki: Log a crude lag metric for how far behind a client is. * [3234](https://github.com/grafana/loki/pull/3234) **cyriltovena**: Fixes previous commit not using the new sized body. -* [3233](https://github.com/grafana/loki/pull/3233) **cyriltovena**: Re-introduce https://github.com/grafana/loki/pull/3178. +* [3233](https://github.com/grafana/loki/pull/3233) **cyriltovena**: Re-introduce . * [3228](https://github.com/grafana/loki/pull/3228) **MichelHollands**: Add config endpoint * [3218](https://github.com/grafana/loki/pull/3218) **owen-d**: WAL backpressure * [3217](https://github.com/grafana/loki/pull/3217) **cyriltovena**: Rename checkpoint proto package to avoid conflict with cortex. @@ -1745,7 +1821,6 @@ TL;DR Loki 2.2 changes the internal chunk format which limits what versions you * [3270](https://github.com/grafana/loki/pull/3270) **chancez**: logcli: Fix handling of logcli query using --since/--from and --tail * [3229](https://github.com/grafana/loki/pull/3229) **dethi**: logcli: support --include-label when not using --tail - #### Jsonnet * [3447](https://github.com/grafana/loki/pull/3447) **owen-d**: Use better memory metric on operational dashboard @@ -1773,13 +1848,13 @@ TL;DR Loki 2.2 changes the internal chunk format which limits what versions you * [3240](https://github.com/grafana/loki/pull/3240) **sbaier1**: fix fluent-bit output plugin generating invalid JSON - #### Docker Logging Driver * [3331](https://github.com/grafana/loki/pull/3331) **cyriltovena**: Add pprof endpoint to docker-driver. * [3225](https://github.com/grafana/loki/pull/3225) **Le0tk0k**: (fix: cmd/docker-driver): Insert a space in the error message #### Docs + * [5934](https://github.com/grafana/loki/pull/5934) **johgsc**: Docs: revise modes of operation section * [3437](https://github.com/grafana/loki/pull/3437) **caleb15**: docs: add note about regex * [3421](https://github.com/grafana/loki/pull/3421) **kavirajk**: doc(gcplog): Advanced log export filter example @@ -1842,13 +1917,11 @@ TL;DR Loki 2.2 changes the internal chunk format which limits what versions you * [3031](https://github.com/grafana/loki/pull/3031) **AdamKorcz**: Testing: Introduced continuous fuzzing * [3006](https://github.com/grafana/loki/pull/3006) **huikang**: Fix the docker image version in compose deployment - #### Tooling * [3377](https://github.com/grafana/loki/pull/3377) **slim-bean**: Tooling: Update chunks-inspect to understand the new chunk format as well as new compression algorithms * [3151](https://github.com/grafana/loki/pull/3151) **slim-bean**: Loki migrate-tool - ### Notes This release was created from revision 8012362674568379a3871ff8c4a2bfd1ddba7ad1 (Which was PR 3460) @@ -1858,7 +1931,6 @@ This release was created from revision 8012362674568379a3871ff8c4a2bfd1ddba7ad1 * Go Version: 1.15.3 * Cortex Version: 7dac81171c665be071bd167becd1f55528a9db32 - ## 2.1.0 (2020/12/23) Happy Holidays from the Loki team! Please enjoy a new Loki release to welcome in the New Year! @@ -1867,15 +1939,15 @@ Happy Holidays from the Loki team! Please enjoy a new Loki release to welcome in ### Notable changes -#### Helm users read this! +#### Helm users read this The Helm charts have moved! * [2720](https://github.com/grafana/loki/pull/2720) **torstenwalter**: Deprecate Charts as they have been moved -This was done to consolidate Grafana's helm charts for all Grafana projects in one place: https://github.com/grafana/helm-charts/ +This was done to consolidate Grafana's helm charts for all Grafana projects in one place: -**From now moving forward, please use the new Helm repo url: https://grafana.github.io/helm-charts** +**From now moving forward, please use the new Helm repo url: ** The charts in the Loki repo will soon be removed so please update your Helm repo to the new URL and submit your PR's over there as well @@ -1883,7 +1955,7 @@ Special thanks to @torstenwalter, @unguiculus, and @scottrigby for their initiat Also go check out the microservices helm chart contributed by @unguiculus in the new repo! -#### Fluent bit plugin users read this! +#### Fluent bit plugin users read this Fluent bit officially supports Loki as an output plugin now! WoooHOOO! @@ -1937,7 +2009,6 @@ A number of performance and resource improvements have been made as well! * [2959](https://github.com/grafana/loki/pull/2959) **cyriltovena**: Improve tailer matcher function. * [2876](https://github.com/grafana/loki/pull/2876) **jkellerer**: LogQL: Add unwrap bytes() conversion function - #### Notable mentions Thanks to @timbyr for adding an often requested feature, the ability to support environment variable expansion in config files! @@ -1951,6 +2022,7 @@ Thanks to @huikang for adding a new docker-compose file for running Loki as micr ### All Changes #### Loki + * [2988](https://github.com/grafana/loki/pull/2988) **slim-bean**: Loki: handle faults when opening boltdb files * [2984](https://github.com/grafana/loki/pull/2984) **owen-d**: adds the ability to read chunkFormatV3 while writing v2 * [2983](https://github.com/grafana/loki/pull/2983) **slim-bean**: Loki: recover from panic opening boltdb files @@ -1977,6 +2049,7 @@ Thanks to @huikang for adding a new docker-compose file for running Loki as micr * [2751](https://github.com/grafana/loki/pull/2751) **jeschkies**: Logging: Log throughput and total bytes human readable. #### Helm + * [2986](https://github.com/grafana/loki/pull/2986) **cyriltovena**: Move CI to helm3. * [2967](https://github.com/grafana/loki/pull/2967) **czunker**: Remove `helm init` * [2965](https://github.com/grafana/loki/pull/2965) **czunker**: [Helm Chart Loki] Add needed k8s objects for alerting config @@ -1986,6 +2059,7 @@ Thanks to @huikang for adding a new docker-compose file for running Loki as micr * [2651](https://github.com/grafana/loki/pull/2651) **scottrigby**: helm chart: Fix broken logo #### Jsonnet + * [2976](https://github.com/grafana/loki/pull/2976) **beorn7**: Improve promtail alerts to retain the namespace label * [2961](https://github.com/grafana/loki/pull/2961) **sandeepsukhani**: add missing ingester query routes in loki reads and operational dashboard * [2899](https://github.com/grafana/loki/pull/2899) **halcyondude**: gateway: fix regression in tanka jsonnet @@ -1994,8 +2068,8 @@ Thanks to @huikang for adding a new docker-compose file for running Loki as micr * [2820](https://github.com/grafana/loki/pull/2820) **owen-d**: fixes promtail libsonnet tag. closes #2818 * [2718](https://github.com/grafana/loki/pull/2718) **halcyondude**: parameterize PVC storage class (ingester, querier, compactor) - #### Docs + * [2969](https://github.com/grafana/loki/pull/2969) **simonswine**: Add community forum to README.md * [2968](https://github.com/grafana/loki/pull/2968) **yuichi10**: logcli: Fix logcli logql document URL * [2942](https://github.com/grafana/loki/pull/2942) **hedss**: Docs: Corrects Fluent Bit documentation link to build the plugin. @@ -2016,27 +2090,30 @@ Thanks to @huikang for adding a new docker-compose file for running Loki as micr * [2636](https://github.com/grafana/loki/pull/2636) **LTek-online**: promtail documentation: changing the headers of the configuration docu to reflect configuration code #### Promtail + * [2957](https://github.com/grafana/loki/pull/2957) **slim-bean**: Promtail: Update debian image and use a newer libsystemd * [2928](https://github.com/grafana/loki/pull/2928) **cyriltovena**: Skip journald bad message. * [2914](https://github.com/grafana/loki/pull/2914) **chancez**: promtail: Add support for using syslog message timestamp * [2910](https://github.com/grafana/loki/pull/2910) **rfratto**: Expose underlying promtail client - #### Logcli + * [2948](https://github.com/grafana/loki/pull/2948) **tomwilkie**: Add a few more instructions to logcli --help. #### Build + * [2877](https://github.com/grafana/loki/pull/2877) **cyriltovena**: Update to go 1.15 * [2814](https://github.com/grafana/loki/pull/2814) **torkelo**: Stats: Adding metrics collector GitHub action #### Fluentd + * [2825](https://github.com/grafana/loki/pull/2825) **cyriltovena**: Bump fluentd plugin * [2434](https://github.com/grafana/loki/pull/2434) **andsens**: fluent-plugin: Improve escaping in key_value format - ### Notes This release was created from revision ae9c4b82ec4a5d21267da50d6a1a8170e0ef82ff (Which was PR 2960) and the following PR's were cherry-picked + * [2984](https://github.com/grafana/loki/pull/2984) **owen-d**: adds the ability to read chunkFormatV3 while writing v2 * [2974](https://github.com/grafana/loki/pull/2974) **hedss**: fluent-bit: Rename Fluent Bit plugin output name. @@ -2077,7 +2154,7 @@ Thanks again for the many incredible contributions and improvements from the won Check the [upgrade guide](https://github.com/grafana/loki/blob/master/docs/sources/setup/upgrade/_index.md#200) for detailed information on all these changes. -### 2.0!!!! +### 2.0 There are too many PR's to list individually for the major improvements which we thought justified a 2.0 but here is the high level: @@ -2104,9 +2181,10 @@ Thank you @dlemel8 for this PR! Now you can start Loki with `-verify-config` to ### All Changes #### Loki + * [2804](https://github.com/grafana/loki/pull/2804) **slim-bean**: Loki: log any chunk fetch failure * [2803](https://github.com/grafana/loki/pull/2803) **slim-bean**: Update local and docker default config files to use boltdb-shipper with a few other config changes -* [2796](https://github.com/grafana/loki/pull/2796) **cyriltovena**: Fixes a bug that would add __error__ label incorrectly. +* [2796](https://github.com/grafana/loki/pull/2796) **cyriltovena**: Fixes a bug that would add **error** label incorrectly. * [2793](https://github.com/grafana/loki/pull/2793) **cyriltovena**: Improve the way we reverse iterator for backward queries. * [2790](https://github.com/grafana/loki/pull/2790) **sandeepsukhani**: Boltdb shipper metrics changes * [2788](https://github.com/grafana/loki/pull/2788) **sandeepsukhani**: add a metric in compactor to record timestamp of last successful run @@ -2170,6 +2248,7 @@ Thank you @dlemel8 for this PR! Now you can start Loki with `-verify-config` to * [2487](https://github.com/grafana/loki/pull/2487) **sandeepsukhani**: upload boltdb files from shipper only when they are not expected to be modified or during shutdown #### Docs + * [2797](https://github.com/grafana/loki/pull/2797) **cyriltovena**: Logqlv2 docs * [2772](https://github.com/grafana/loki/pull/2772) **DesistDaydream**: reapir Retention Example Configuration * [2762](https://github.com/grafana/loki/pull/2762) **PabloCastellano**: fix: typo in upgrade.md @@ -2211,18 +2290,22 @@ Thank you @dlemel8 for this PR! Now you can start Loki with `-verify-config` to * [2500](https://github.com/grafana/loki/pull/2500) **oddlittlebird**: Docs: Update README.md #### Helm + * [2746](https://github.com/grafana/loki/pull/2746) **marcosartori**: helm/fluentbit K8S-Logging.Exclude & and Mem_Buf_Limit toggle * [2742](https://github.com/grafana/loki/pull/2742) **steven-sheehy**: Fix linting errors and use of deprecated repositories * [2659](https://github.com/grafana/loki/pull/2659) **rskrishnar**: [Promtail] enables configuring psp in helm chart * [2554](https://github.com/grafana/loki/pull/2554) **alexandre-allard-scality**: production/helm: add support for PV selector in Loki statefulset #### FluentD + * [2739](https://github.com/grafana/loki/pull/2739) **jgehrcke**: FluentD loki plugin: add support for bearer_token_file parameter #### Fluent Bit + * [2568](https://github.com/grafana/loki/pull/2568) **zjj2wry**: fluent-bit plugin support TLS #### Promtail + * [2723](https://github.com/grafana/loki/pull/2723) **carlpett**: Promtail: Add counter promtail_batch_retries_total * [2717](https://github.com/grafana/loki/pull/2717) **slim-bean**: Promtail: Fix deadlock on tailer shutdown. * [2710](https://github.com/grafana/loki/pull/2710) **slim-bean**: Promtail: (and also fluent-bit) change the max batch size to 1MB @@ -2239,6 +2322,7 @@ Thank you @dlemel8 for this PR! Now you can start Loki with `-verify-config` to * [2532](https://github.com/grafana/loki/pull/2532) **slim-bean**: Promtail: Restart the tailer if we fail to read and upate current position #### Ksonnet + * [2719](https://github.com/grafana/loki/pull/2719) **halcyondude**: nit: fix formatting for ksonnet/loki * [2677](https://github.com/grafana/loki/pull/2677) **sandeepsukhani**: fix jsonnet for memcached-writes when using boltdb-shipper * [2617](https://github.com/grafana/loki/pull/2617) **periklis**: Add config options for loki dashboards @@ -2253,15 +2337,18 @@ Thank you @dlemel8 for this PR! Now you can start Loki with `-verify-config` to * [2494](https://github.com/grafana/loki/pull/2494) **primeroz**: Jsonnet Promtail: Change function for mounting configmap in promtail daemonset #### Logstash + * [2607](https://github.com/grafana/loki/pull/2607) **adityacs**: Logstash cpu usage fix #### Build + * [2602](https://github.com/grafana/loki/pull/2602) **sandeepsukhani**: add support for building querytee * [2561](https://github.com/grafana/loki/pull/2561) **tharun208**: Added logcli docker image * [2549](https://github.com/grafana/loki/pull/2549) **simnv**: Ignore .exe files build for Windows * [2527](https://github.com/grafana/loki/pull/2527) **owen-d**: Update docker-compose.yaml to use 1.6.0 #### Docker Logging Driver + * [2459](https://github.com/grafana/loki/pull/2459) **RaitoBezarius**: Docker logging driver: Add a keymod for the extra attributes from the Docker logging driver ### Dependencies @@ -2355,7 +2442,6 @@ If you are using the query-frontend: * [2336](https://github.com/grafana/loki/pull/2336) provides two new flags that will print the entire Loki config object at startup. Be warned there are a lot of config options, and many won’t apply to your setup (such as storage configs you aren’t using), but this can be a really useful tool when troubleshooting. Sticking with the theme of best for last, * [2224](https://github.com/grafana/loki/pull/2224) and [2288](https://github.com/grafana/loki/pull/2288) improve support for running Loki with a shared Ring using memberlist while not requiring Consul or Etcd. We need to follow up soon with some better documentation or a blog post on this! - ### Dependencies * Go Version: 1.14.2 @@ -2364,6 +2450,7 @@ If you are using the query-frontend: ### All Changes #### Loki + * [2484](https://github.com/grafana/loki/pull/2484) **slim-bean**: Loki: fix batch iterator error when all chunks overlap and chunk time ranges are greater than query time range * [2483](https://github.com/grafana/loki/pull/2483) **sandeepsukhani**: download boltdb files parallelly during reads * [2472](https://github.com/grafana/loki/pull/2472) **owen-d**: series endpoint uses normal splits @@ -2419,6 +2506,7 @@ If you are using the query-frontend: * [2032](https://github.com/grafana/loki/pull/2032) **tivvit**: Added support for tail to query frontend #### Promtail + * [2496](https://github.com/grafana/loki/pull/2496) **slim-bean**: Promtail: Drop stage * [2475](https://github.com/grafana/loki/pull/2475) **slim-bean**: Promtail: force the log level on any Loki Push API target servers to match Promtail's log level. * [2474](https://github.com/grafana/loki/pull/2474) **slim-bean**: Promtail: use --client.external-labels for all clients @@ -2443,6 +2531,7 @@ If you are using the query-frontend: * [2087](https://github.com/grafana/loki/pull/2087) **adityacs**: Set JournalTarget Priority value to keyword #### Logcli + * [2497](https://github.com/grafana/loki/pull/2497) **slim-bean**: logcli: adds --analyize-labels to logcli series command and changes how labels are provided to the command * [2482](https://github.com/grafana/loki/pull/2482) **slim-bean**: Logcli: automatically batch requests * [2470](https://github.com/grafana/loki/pull/2470) **adityacs**: colored labels output for logcli @@ -2451,10 +2540,11 @@ If you are using the query-frontend: * [2083](https://github.com/grafana/loki/pull/2083) **adityacs**: Support querying labels on time range in logcli #### Docs + * [2473](https://github.com/grafana/loki/pull/2473) **owen-d**: fixes lambda-promtail relative doc link * [2454](https://github.com/grafana/loki/pull/2454) **oddlittlebird**: Create CODEOWNERS * [2439](https://github.com/grafana/loki/pull/2439) **till**: Docs: updated "Upgrading" for docker driver -* [2437](https://github.com/grafana/loki/pull/2437) **wardbekker**: DOCS: clarified globbing behaviour of __path__ of the doublestar library +* [2437](https://github.com/grafana/loki/pull/2437) **wardbekker**: DOCS: clarified globbing behaviour of **path** of the doublestar library * [2431](https://github.com/grafana/loki/pull/2431) **endu**: fix dead link * [2425](https://github.com/grafana/loki/pull/2425) **RichiH**: Change conduct contact email address * [2420](https://github.com/grafana/loki/pull/2420) **petuhovskiy**: Fix docker driver doc @@ -2501,9 +2591,11 @@ If you are using the query-frontend: * [2092](https://github.com/grafana/loki/pull/2092) **i-takizawa**: docs: make visible #### Build + * [2467](https://github.com/grafana/loki/pull/2467) **slim-bean**: Update Loki build image #### Ksonnet + * [2460](https://github.com/grafana/loki/pull/2460) **Duologic**: refactor: use $.core.v1.envVar * [2452](https://github.com/grafana/loki/pull/2452) **slim-bean**: ksonnet: Reduce querier parallelism to a more sane default value and remove the default setting for storage_backend * [2377](https://github.com/grafana/loki/pull/2377) **Duologic**: refactor: moved jaeger-agent-mixin @@ -2513,6 +2605,7 @@ If you are using the query-frontend: * [2091](https://github.com/grafana/loki/pull/2091) **beorn7**: Keep scrape config in line with the new Prometheus scrape config #### Docker logging driver + * [2435](https://github.com/grafana/loki/pull/2435) **cyriltovena**: Add more precisions on the docker driver installed on the daemon. * [2343](https://github.com/grafana/loki/pull/2343) **jdfalk**: loki-docker-driver: Change "ignoring empty line" to debug logging * [2295](https://github.com/grafana/loki/pull/2295) **cyriltovena**: Remove mount in the docker driver. @@ -2520,11 +2613,13 @@ If you are using the query-frontend: * [2116](https://github.com/grafana/loki/pull/2116) **cyriltovena**: Allows to change the log driver mode and buffer size. #### Logstash output plugin + * [2415](https://github.com/grafana/loki/pull/2415) **cyriltovena**: Set service values via --set for logstash. * [2410](https://github.com/grafana/loki/pull/2410) **adityacs**: logstash code refactor and doc improvements * [1822](https://github.com/grafana/loki/pull/1822) **adityacs**: Loki Logstash Plugin #### Loki canary + * [2413](https://github.com/grafana/loki/pull/2413) **slim-bean**: Loki-Canary: Backoff retries on query failures, add histograms for query performance. * [2369](https://github.com/grafana/loki/pull/2369) **slim-bean**: Loki Canary: One more round of improvements to query for missing websocket entries up to max-wait * [2350](https://github.com/grafana/loki/pull/2350) **slim-bean**: Canary tweaks @@ -2532,12 +2627,14 @@ If you are using the query-frontend: * [2259](https://github.com/grafana/loki/pull/2259) **ombre8**: Canary: make stream configurable #### Fluentd + * [2407](https://github.com/grafana/loki/pull/2407) **cyriltovena**: bump fluentd version to release a new gem. * [2399](https://github.com/grafana/loki/pull/2399) **tarokkk**: fluentd: Make fluentd version requirements permissive * [2179](https://github.com/grafana/loki/pull/2179) **takanabe**: Improve fluentd plugin development experience * [2171](https://github.com/grafana/loki/pull/2171) **takanabe**: Add server TLS certificate verification #### Fluent Bit + * [2375](https://github.com/grafana/loki/pull/2375) **cyriltovena**: Fixes the fluentbit batchwait backward compatiblity. * [2367](https://github.com/grafana/loki/pull/2367) **dojci**: fluent-bit: Add more loki client configuration options * [2365](https://github.com/grafana/loki/pull/2365) **dojci**: fluent-bit: Fix fluent-bit exit callback when buffering is enabled @@ -2547,6 +2644,7 @@ If you are using the query-frontend: * [2089](https://github.com/grafana/loki/pull/2089) **FrederikNS**: Allow configuring more options for output configuration #### Helm + * [2406](https://github.com/grafana/loki/pull/2406) **steven-sheehy**: Helm: Fix regression in chart name * [2379](https://github.com/grafana/loki/pull/2379) **StevenReitsma**: production/helm: Add emptyDir volume type to promtail PSP * [2366](https://github.com/grafana/loki/pull/2366) **StevenReitsma**: production/helm: Add projected and downwardAPI volume types to PodSecurityPolicy (#2355) @@ -2561,6 +2659,7 @@ If you are using the query-frontend: * [2091](https://github.com/grafana/loki/pull/2091) **beorn7**: Keep scrape config in line with the new Prometheus scrape config #### Build + * [2371](https://github.com/grafana/loki/pull/2371) **cyriltovena**: Fixes helm publish that needs now to add repo. * [2341](https://github.com/grafana/loki/pull/2341) **slim-bean**: Build: Fix CI helm test * [2309](https://github.com/grafana/loki/pull/2309) **cyriltovena**: Test again arm32 on internal ci. @@ -2568,7 +2667,6 @@ If you are using the query-frontend: * [2287](https://github.com/grafana/loki/pull/2287) **wardbekker**: Change the Grafana image to latest * [2212](https://github.com/grafana/loki/pull/2212) **roidelapluie**: Remove unhelpful/problematic term in circleci.yml - ## 1.5.0 (2020-05-20) It's been a busy month and a half since 1.4.0 was released, and a lot of new improvements have been added to Loki since! @@ -2726,6 +2824,7 @@ We now GPG sign helm packages! * [1706](https://github.com/grafana/loki/pull/1706) **cyriltovena**: Non-root user docker image for Loki. #### Logcli + * [2027](https://github.com/grafana/loki/pull/2027) **pstibrany**: logcli: Query needs to be stored into url.RawQuery, and not url.Path * [2000](https://github.com/grafana/loki/pull/2000) **cyriltovena**: Improve URL building in the logcli to strip trailing /. * [1922](https://github.com/grafana/loki/pull/1922) **bavarianbidi**: logcli: org-id/tls-skip-verify set via env var @@ -2735,6 +2834,7 @@ We now GPG sign helm packages! * [1712](https://github.com/grafana/loki/pull/1712) **rfratto**: clarify logcli commands and output #### Promtail + * [2069](https://github.com/grafana/loki/pull/2069) **slim-bean**: Promtail: log at debug level when nothing matches the specified path for a file target * [2066](https://github.com/grafana/loki/pull/2066) **slim-bean**: Promtail: metrics stage can also count line bytes * [2049](https://github.com/grafana/loki/pull/2049) **adityacs**: Fix promtail client default values @@ -2749,12 +2849,14 @@ We now GPG sign helm packages! * [1627](https://github.com/grafana/loki/pull/1627) **rfratto**: Proposal: Promtail Push API #### Docker Driver + * [2076](https://github.com/grafana/loki/pull/2076) **cyriltovena**: Allows to pass inlined pipeline stages to the docker driver. * [2054](https://github.com/grafana/loki/pull/2054) **bkmit**: Docker driver: Allow to provision external pipeline files to plugin * [1906](https://github.com/grafana/loki/pull/1906) **cyriltovena**: Add no-file and keep-file log option for docker driver. * [1903](https://github.com/grafana/loki/pull/1903) **cyriltovena**: Log docker driver config map. #### Fluentd + * [2074](https://github.com/grafana/loki/pull/2074) **osela**: fluentd plugin: support placeholders in tenant field * [2006](https://github.com/grafana/loki/pull/2006) **Skeen**: fluent-plugin-loki: Restructuring and CI * [1909](https://github.com/grafana/loki/pull/1909) **jgehrcke**: fluentd loki plugin README: add note about labels @@ -2762,15 +2864,18 @@ We now GPG sign helm packages! * [1811](https://github.com/grafana/loki/pull/1811) **JamesJJ**: Error handling: Show data stream at "debug" level, not "warn" #### Fluent Bit + * [2040](https://github.com/grafana/loki/pull/2040) **avii-ridge**: Add extraOutputs variable to support multiple outputs for fluent-bit * [1915](https://github.com/grafana/loki/pull/1915) **DirtyCajunRice**: Fix fluent-bit metrics * [1890](https://github.com/grafana/loki/pull/1890) **dottedmag**: fluentbit: JSON encoding: avoid base64 encoding of []byte inside other slices * [1791](https://github.com/grafana/loki/pull/1791) **cyriltovena**: Improve fluentbit logfmt. #### Ksonnet + * [1980](https://github.com/grafana/loki/pull/1980) **cyriltovena**: Log slow query from the frontend by default in ksonnet. ##### Mixins + * [2080](https://github.com/grafana/loki/pull/2080) **beorn7**: mixin: Accept suffixes to pod name in instance labels * [2044](https://github.com/grafana/loki/pull/2044) **slim-bean**: Dashboards: fixes the cpu usage graphs * [2043](https://github.com/grafana/loki/pull/2043) **joe-elliott**: Swapped to container restarts over terminated reasons @@ -2779,6 +2884,7 @@ We now GPG sign helm packages! * [1913](https://github.com/grafana/loki/pull/1913) **tomwilkie**: s/dashboards/grafanaDashboards. #### Helm + * [2038](https://github.com/grafana/loki/pull/2038) **oke-py**: Docs: update Loki Helm Chart document to support Helm 3 * [2015](https://github.com/grafana/loki/pull/2015) **etashsingh**: Change image tag from 1.4.1 to 1.4.0 in Helm chart * [1981](https://github.com/grafana/loki/pull/1981) **sshah90**: added extraCommandlineArgs in values file @@ -2790,9 +2896,11 @@ We now GPG sign helm packages! * [1817](https://github.com/grafana/loki/pull/1817) **bclermont**: Helm chart: Prevent prometheus to scrape both services #### Loki Canary + * [1891](https://github.com/grafana/loki/pull/1891) **joe-elliott**: Addition of a `/suspend` endpoint to Loki Canary #### Docs + * [2056](https://github.com/grafana/loki/pull/2056) **cyriltovena**: Update api.md * [2014](https://github.com/grafana/loki/pull/2014) **jsoref**: Spelling * [1999](https://github.com/grafana/loki/pull/1999) **oddlittlebird**: Docs: Added labels content @@ -2813,6 +2921,7 @@ We now GPG sign helm packages! * [1843](https://github.com/grafana/loki/pull/1843) **vishesh92**: Docs: Update configuration docs for redis #### Build + * [2042](https://github.com/grafana/loki/pull/2042) **rfratto**: Fix drone * [2009](https://github.com/grafana/loki/pull/2009) **cyriltovena**: Adds :delegated flags to speed up build experience on MacOS. * [1942](https://github.com/grafana/loki/pull/1942) **owen-d**: delete tag script filters by prefix instead of substring @@ -2820,7 +2929,6 @@ We now GPG sign helm packages! * [1911](https://github.com/grafana/loki/pull/1911) **slim-bean**: build: push images for `k` branches * [1849](https://github.com/grafana/loki/pull/1849) **cyriltovena**: Pin helm version in circle-ci helm testing workflow. - ## 1.4.1 (2020-04-06) We realized after the release last week that piping data into promtail was not working on Linux or Windows, this should fix this issue for both platforms: @@ -2919,6 +3027,7 @@ The second place would be the log file itself. At some point, most log files rol There are many other important fixes and improvements to Loki, way too many to call out in individual detail, so take a look! #### Loki + * [1810](https://github.com/grafana/loki/pull/1810) **cyriltovena**: Optimize empty filter queries. * [1809](https://github.com/grafana/loki/pull/1809) **cyriltovena**: Test stats memchunk * [1807](https://github.com/grafana/loki/pull/1807) **pracucci**: Enable global limits by default in production mixin @@ -2966,6 +3075,7 @@ There are many other important fixes and improvements to Loki, way too many to c * [1484](https://github.com/grafana/loki/pull/1484) **pstibrany**: loki: use new runtimeconfig package from Cortex #### Promtail + * [1840](https://github.com/grafana/loki/pull/1840) **slim-bean**: promtail: Retry 429 rate limit errors from Loki, increase default retry limits * [1775](https://github.com/grafana/loki/pull/1775) **slim-bean**: promtail: remove the read lines counter when the log file stops being tailed * [1770](https://github.com/grafana/loki/pull/1770) **adityacs**: Fix single job with multiple service discovery elements @@ -2982,6 +3092,7 @@ There are many other important fixes and improvements to Loki, way too many to c * [1602](https://github.com/grafana/loki/pull/1602) **slim-bean**: Improve promtail configuration docs #### Helm + * [1731](https://github.com/grafana/loki/pull/1731) **billimek**: [promtail helm chart] - Expand promtail syslog svc to support values * [1688](https://github.com/grafana/loki/pull/1688) **fredgate**: Loki stack helm chart can deploy datasources without Grafana * [1632](https://github.com/grafana/loki/pull/1632) **lukipro**: Added support for imagePullSecrets in Loki Helm chart @@ -2994,6 +3105,7 @@ There are many other important fixes and improvements to Loki, way too many to c * [1529](https://github.com/grafana/loki/pull/1529) **tourea**: Promtail Helm Chart: Add support for passing environment variables #### Jsonnet + * [1776](https://github.com/grafana/loki/pull/1776) **Eraac**: fix typo: Not a binary operator: = * [1767](https://github.com/grafana/loki/pull/1767) **joe-elliott**: Dashboard Cleanup * [1766](https://github.com/grafana/loki/pull/1766) **joe-elliott**: Move dashboards out into their own json files @@ -3010,10 +3122,12 @@ There are many other important fixes and improvements to Loki, way too many to c * [1613](https://github.com/grafana/loki/pull/1613) **cyriltovena**: Fixes config change in the result cache #### Fluent Bit + * [1791](https://github.com/grafana/loki/pull/1791) **cyriltovena**: Improve fluentbit logfmt. * [1717](https://github.com/grafana/loki/pull/1717) **adityacs**: Fluent-bit: Fix panic error when AutoKubernetesLabels is true #### Fluentd + * [1811](https://github.com/grafana/loki/pull/1811) **JamesJJ**: Error handling: Show data stream at "debug" level, not "warn" * [1728](https://github.com/grafana/loki/pull/1728) **irake99**: docs: fix outdated link to fluentd * [1703](https://github.com/grafana/loki/pull/1703) **Skeen**: fluent-plugin-grafana-loki: Update fluentd base image to current images (edge) @@ -3022,6 +3136,7 @@ There are many other important fixes and improvements to Loki, way too many to c * [1603](https://github.com/grafana/loki/pull/1603) **tarokkk**: fluentd-plugin: add URI validation #### Docs + * [1781](https://github.com/grafana/loki/pull/1781) **candlerb**: Docs: Recommended schema is now v11 * [1771](https://github.com/grafana/loki/pull/1771) **rfratto**: change slack url to slack.grafana.com and use https * [1738](https://github.com/grafana/loki/pull/1738) **jgehrcke**: docs: observability.md: clarify lines vs. entries @@ -3044,6 +3159,7 @@ There are many other important fixes and improvements to Loki, way too many to c * [1504](https://github.com/grafana/loki/pull/1504) **hsraju**: Updated configuration.md #### Logcli + * [1808](https://github.com/grafana/loki/pull/1808) **slim-bean**: logcli: log the full stats and send to stderr instead of stdout * [1682](https://github.com/grafana/loki/pull/1682) **adityacs**: BugFix: Fix logcli --quiet parameter parsing issue * [1644](https://github.com/grafana/loki/pull/1644) **cyriltovena**: This improves the log output for statistics in the logcli. @@ -3051,10 +3167,12 @@ There are many other important fixes and improvements to Loki, way too many to c * [1573](https://github.com/grafana/loki/pull/1573) **cyriltovena**: Improve logql query statistics collection. #### Loki Canary + * [1653](https://github.com/grafana/loki/pull/1653) **slim-bean**: Canary needs its logo * [1581](https://github.com/grafana/loki/pull/1581) **slim-bean**: Add sleep to canary reconnect on error #### Build + * [1780](https://github.com/grafana/loki/pull/1780) **slim-bean**: build: Update the CD deploy task name * [1762](https://github.com/grafana/loki/pull/1762) **dgzlopes**: Bump testify to 1.5.1 * [1742](https://github.com/grafana/loki/pull/1742) **slim-bean**: build: fix deploy on tagged build @@ -3072,6 +3190,7 @@ There are many other important fixes and improvements to Loki, way too many to c * [1600](https://github.com/grafana/loki/pull/1600) **mattmendick**: Codecov circleci test [WIP] #### Tooling + * [1577](https://github.com/grafana/loki/pull/1577) **pstibrany**: Move chunks-inspect tool to Loki repo ## 1.3.0 (2020-01-16) @@ -3116,7 +3235,7 @@ And last but not least on the notable changes list is a new feature for Promtail With this change Promtail can receive syslogs via TCP! Thanks to @bastjan for all the hard work on this submission! -### Important things to note: +### Important things to note * [1519](https://github.com/grafana/loki/pull/1519) Changes a core behavior in Loki regarding logs with duplicate content AND duplicate timestamps, previously Loki would store logs with duplicate timestamps and content, moving forward logs with duplicate content AND timestamps will be silently ignored. Mainly this change is to prevent duplicates that appear when a batch is retried (the first entry in the list would be inserted again, now it will be ignored). Logs with the same timestamp and different content will still be accepted. * [1486](https://github.com/grafana/loki/pull/1486) Deprecated `-distributor.limiter-reload-period` flag / distributor's `limiter_reload_period` config option. @@ -3126,6 +3245,7 @@ With this change Promtail can receive syslogs via TCP! Thanks to @bastjan for a Once again we can't thank our community and contributors enough for the significant work that everyone is adding to Loki, the entire list of changes is long!! #### Loki + * [1526](https://github.com/grafana/loki/pull/1526) **codesome**: Support for aggregation * [1522](https://github.com/grafana/loki/pull/1522) **cyriltovena**: Adds support for the old query string regexp in the frontend. * [1519](https://github.com/grafana/loki/pull/1519) **rfratto**: pkg/chunkenc: ignore duplicate lines pushed to a stream @@ -3163,6 +3283,7 @@ Once again we can't thank our community and contributors enough for the signific * [1541](https://github.com/grafana/loki/pull/1541) **owen-d**: legacy endpoint 400s metric queries #### Promtail + * [1515](https://github.com/grafana/loki/pull/1515) **slim-bean**: Promtail: Improve position and size metrics * [1485](https://github.com/grafana/loki/pull/1485) **p37ruh4**: Fileglob parsing fixes * [1472](https://github.com/grafana/loki/pull/1472) **owen-d**: positions.ignore-corruptions @@ -3173,21 +3294,26 @@ Once again we can't thank our community and contributors enough for the signific * [1275](https://github.com/grafana/loki/pull/1275) **bastjan**: pkg/promtail: IETF Syslog (RFC5424) Support #### Fluent Bit + * [1455](https://github.com/grafana/loki/pull/1455) **JensErat**: fluent-bit-plugin: re-enable failing JSON marshaller tests; pass error instead of logging and ignoring * [1294](https://github.com/grafana/loki/pull/1294) **JensErat**: fluent-bit: multi-instance support * [1514](https://github.com/grafana/loki/pull/1514) **shane-axiom**: fluent-plugin-grafana-loki: Add `fluentd_thread` label when `flush_thread_count` > 1 #### Fluentd + * [1500](https://github.com/grafana/loki/pull/1500) **cyriltovena**: Bump fluentd plugin to 1.2.6. * [1475](https://github.com/grafana/loki/pull/1475) **Horkyze**: fluentd-plugin: call gsub for strings only #### Docker Driver + * [1414](https://github.com/grafana/loki/pull/1414) **cyriltovena**: Adds tenant-id for docker driver. #### Logcli -* [1492](https://github.com/grafana/loki/pull/1492) **sandlis**: logcli: replaced GRAFANA_* with LOKI_* in logcli env vars, set default server url for logcli to localhost + +* [1492](https://github.com/grafana/loki/pull/1492) **sandlis**: logcli: replaced GRAFANA_*with LOKI_* in logcli env vars, set default server url for logcli to localhost #### Helm + * [1534](https://github.com/grafana/loki/pull/1534) **olivierboudet**: helm : fix fluent-bit parser configuration syntax * [1506](https://github.com/grafana/loki/pull/1506) **terjesannum**: helm: add podsecuritypolicy for fluent-bit * [1431](https://github.com/grafana/loki/pull/1431) **eugene100**: Helm: fix issue with config.clients @@ -3197,6 +3323,7 @@ Once again we can't thank our community and contributors enough for the signific * [1530](https://github.com/grafana/loki/pull/1530) **WeiBanjo**: Allow extra command line args for external labels like hostname #### Jsonnet + * [1518](https://github.com/grafana/loki/pull/1518) **benjaminhuo**: Fix error 'Field does not exist: jaeger_mixin' in tk show * [1501](https://github.com/grafana/loki/pull/1501) **anarcher**: jsonnet: fix common/defaultPorts parameters * [1497](https://github.com/grafana/loki/pull/1497) **cyriltovena**: Update Loki mixin to include frontend QPS and latency. @@ -3204,6 +3331,7 @@ Once again we can't thank our community and contributors enough for the signific * [1543](https://github.com/grafana/loki/pull/1543) **sh0rez**: fix(ksonnet): use apps/v1 #### Docs + * [1531](https://github.com/grafana/loki/pull/1531) **fitzoh**: Documentation: Add note on using Loki with Amazon ECS * [1521](https://github.com/grafana/loki/pull/1521) **rfratto**: docs: Document timestamp ordering rules * [1516](https://github.com/grafana/loki/pull/1516) **rfratto**: Link to release docs in README.md, not master docs @@ -3224,12 +3352,14 @@ Once again we can't thank our community and contributors enough for the signific * [1539](https://github.com/grafana/loki/pull/1539) **j18e**: docs: fix syntax error in pipeline example #### Build + * [1494](https://github.com/grafana/loki/pull/1494) **pracucci**: Fixed TOUCH_PROTOS in all DroneCI pipelines * [1479](https://github.com/grafana/loki/pull/1479) **owen-d**: TOUCH_PROTOS build arg for dockerfile * [1476](https://github.com/grafana/loki/pull/1476) **owen-d**: initiates docker daemon for circle windows builds * [1469](https://github.com/grafana/loki/pull/1469) **rfratto**: Makefile: re-enable journal scraping on ARM -#### New Members! +#### New Members + * [1415](https://github.com/grafana/loki/pull/1415) **cyriltovena**: Add Joe as member of the team. # 1.2.0 (2019-12-09) @@ -3279,6 +3409,7 @@ Some might call this a **breaking change**, we are instead calling it a bug fix **But please be aware if you are using the `/loki/api/v1/label` or `/loki/api/v1/label//values` the JSON result will be different in 1.1.0** Old result: + ```json { "values": [ @@ -3288,6 +3419,7 @@ Old result: ] } ``` + New result: ```json @@ -3332,7 +3464,6 @@ Binaries will now be zipped instead of gzipped as many people voiced their opini [1357](https://github.com/grafana/loki/pull/1357) **cyriltovena**: Supports same duration format in LogQL as Prometheus - ## Everything Else :heart: All PR's are important to us, thanks everyone for continuing to help support and improve Loki! :heart: @@ -3354,7 +3485,7 @@ Binaries will now be zipped instead of gzipped as many people voiced their opini * [1311](https://github.com/grafana/loki/pull/1311) **pstibrany**: Include positions filename in the error when YAML unmarshal fails. * [1310](https://github.com/grafana/loki/pull/1310) **JensErat**: fluent-bit: sorted JSON and properly convert []byte to string * [1304](https://github.com/grafana/loki/pull/1304) **pstibrany**: promtail: write positions to new file first, move to target location afterwards -* [1303](https://github.com/grafana/loki/pull/1303) **zhangjianweibj**: https://github.com/grafana/loki/issues/1302 +* [1303](https://github.com/grafana/loki/pull/1303) **zhangjianweibj**: * [1298](https://github.com/grafana/loki/pull/1298) **rfratto**: pkg/promtail: remove journal target forced path * [1279](https://github.com/grafana/loki/pull/1279) **rfratto**: Fix loki_discarded_samples_total metric * [1278](https://github.com/grafana/loki/pull/1278) **rfratto**: docs: update limits_config to new structure from #948 @@ -3368,8 +3499,6 @@ Binaries will now be zipped instead of gzipped as many people voiced their opini * [1223](https://github.com/grafana/loki/pull/1223) **jgehrcke**: authentication.md: replace "user" with "tenant" * [1204](https://github.com/grafana/loki/pull/1204) **allanhung**: fluent-bit-plugin: Auto add Kubernetes labels to Loki labels - - # 1.0.0 (2019-11-19) :tada: Nearly a year since Loki was announced at KubeCon in Seattle 2018 we are very excited to announce the 1.0.0 release of Loki! :tada: @@ -3447,7 +3576,6 @@ A **huge** thanks to the **36 contributors** who submitted **148 PR's** since 0. * PR [1062](https://github.com/grafana/loki/pull/1062) and [1089](https://github.com/grafana/loki/pull/1089) have moved Loki from Dep to Go Modules and to Go 1.13 - ## Loki ### Features/Improvements/Changes @@ -3484,6 +3612,7 @@ A **huge** thanks to the **36 contributors** who submitted **148 PR's** since 0. * **Loki** [654](https://github.com/grafana/loki/pull/654) **cyriltovena**: LogQL: Vector and Range Vector Aggregation. ### Bug Fixes + * **Loki** [1114](https://github.com/grafana/loki/pull/1114) **rfratto**: pkg/ingester: prevent shutdowns from processing during joining handoff * **Loki** [1097](https://github.com/grafana/loki/pull/1097) **joe-elliott**: Reverted cloud.google.com/go to 0.44.1 * **Loki** [986](https://github.com/grafana/loki/pull/986) **pracucci**: Fix panic in tailer due to race condition between send() and close() @@ -3527,7 +3656,7 @@ A **huge** thanks to the **36 contributors** who submitted **148 PR's** since 0. * **Docs** [1094](https://github.com/grafana/loki/pull/1094) **rfratto**: docs: update stages README with the docker and cri stages * **Docs** [1091](https://github.com/grafana/loki/pull/1091) **daixiang0**: docs(stage): add docker and cri * **Docs** [1077](https://github.com/grafana/loki/pull/1077) **daixiang0**: doc(fluent-bit): add missing namespace -* **Docs** [1073](https://github.com/grafana/loki/pull/1073) **flouthoc**: Re Fix Docs: PR https://github.com/grafana/loki/pull/1053 got erased due to force push. +* **Docs** [1073](https://github.com/grafana/loki/pull/1073) **flouthoc**: Re Fix Docs: PR got erased due to force push. * **Docs** [1069](https://github.com/grafana/loki/pull/1069) **daixiang0**: doc: unify GOPATH * **Docs** [1068](https://github.com/grafana/loki/pull/1068) **daixiang0**: doc: skip jb init when using Tanka * **Docs** [1067](https://github.com/grafana/loki/pull/1067) **rfratto**: Fix broken links to docs in README.md @@ -3637,21 +3766,10 @@ Loki is now using a Bot to help keep issues and PR's pruned based on age/relevan * **Github** [965](https://github.com/grafana/loki/pull/965) **rfratto**: Change label used to keep issues from being marked as stale to keepalive * **Github** [964](https://github.com/grafana/loki/pull/964) **rfratto**: Add probot-stale configuration to close stale issues. - - - - - - - - - - # 0.3.0 (2019-08-16) ### Features/Enhancements - * **Loki** [877](https://github.com/grafana/loki/pull/877) **pracucci**: loki: Improve Tailer loop * **Loki** [870](https://github.com/grafana/loki/pull/870) **sandlis**: bigtable-backup: update docker image for bigtable-backup tool * **Loki** [862](https://github.com/grafana/loki/pull/862) **sandlis**: live-tailing: preload all the historic entries before query context is cancelled @@ -3684,7 +3802,6 @@ Loki is now using a Bot to help keep issues and PR's pruned based on age/relevan > 857 POSSIBLY BREAKING: If you relied on a custom pod label to overwrite one of the labels configured by the other sections of the scrape config: `job`, `namespace`, `instance`, `container_name` and/or `__path__`, this will no longer happen, the custom pod labels are now loaded first and will be overwritten by any of these listed labels. - ### Fixes * **Loki** [897](https://github.com/grafana/loki/pull/897) **pracucci**: Fix panic in tailer when an ingester is removed from the ring while tailing @@ -3702,7 +3819,6 @@ Loki is now using a Bot to help keep issues and PR's pruned based on age/relevan * **Logcli** [863](https://github.com/grafana/loki/pull/863) **adityacs**: Fix Nolabels parse metrics - # 0.2.0 (2019-08-02) There were over 100 PR's merged since 0.1.0 was released, here's a highlight: @@ -3744,7 +3860,6 @@ There were many fixes, here are a few of the most important: * **Fluent-Plugin**: [667](https://github.com/grafana/loki/pull/667) Rename fluent plugin. * **Docker-Plugin**: [813](https://github.com/grafana/loki/pull/813) Fix panic for newer docker version (18.09.7+). - # 0.1.0 (2019-06-03) First (beta) Release! diff --git a/CONTRIBUTING.md b/CONTRIBUTING.md index 94d664954f6c5..798485cf0d18e 100644 --- a/CONTRIBUTING.md +++ b/CONTRIBUTING.md @@ -16,19 +16,13 @@ LIDs must be created as a pull request using [this template](docs/sources/commun **NOTE:** The Loki team has adopted the use of [Conventional Commits](https://www.conventionalcommits.org/en/v1.0.0/) for commit messages. -1. Your PR title is in the form `: Your change`. - 1. It does not end the title with punctuation. It will be added in the changelog. - 1. It starts with an imperative verb. Example: Fix the latency between System A and System B. - 1. It uses Sentence case, not Title Case. +1. Your PR title is in the conventional commits form `: Your change`. 1. It uses a complete phrase or sentence. The PR title will appear in a changelog, so help other people understand what your change will be. - 1. It has a clear description saying what it does and why. Your PR description will be present in the project' commit log, so be gentle to it. -1. Your PR is well sync'ed with main -1. Your PR is correctly documenting appropriate changes under the CHANGELOG. You should document your changes there if: - * It adds an important feature - * It fixes an issue present in a previous release - * It causes a change in operation that would be useful for an operator of Loki to know - * You can skip this step for documentation changes, build related changes and simple bug fixes or enhancements. Rationale being we are attempting to curate the CHANGELOG entries with the most relevant and important changes that end users of Loki care about. -1. Your PR documents upgrading steps under `docs/sources/setup/upgrade/_index.md` if it changes: + 1. It starts with an imperative verb. Example: Fix the latency between System A and System B. + 2. It uses Sentence case, not Title Case. +2. It has a clear description saying what it does and why. Your PR description is a reviewers first impression of your changes. +3. Your PR branch is sync'ed with main +4. Your PR documents upgrading steps under `docs/sources/setup/upgrade/_index.md` if it changes: * Default configuration values * Metric names or label names * Changes existing log lines that may be used in dashboard or alerts. e.g: logs lines in any `metrics.go` files might be used in building dashboards or alerts. diff --git a/Makefile b/Makefile index 4789bf7e319d0..82389e3835ac2 100644 --- a/Makefile +++ b/Makefile @@ -37,7 +37,7 @@ DOCKER_IMAGE_DIRS := $(patsubst %/Dockerfile,%,$(DOCKERFILES)) BUILD_IN_CONTAINER ?= true # ensure you run `make drone` after changing this -BUILD_IMAGE_VERSION ?= 0.33.0 +BUILD_IMAGE_VERSION ?= 0.33.1 # Docker image info IMAGE_PREFIX ?= grafana @@ -55,7 +55,7 @@ GIT_BRANCH := $(shell git rev-parse --abbrev-ref HEAD) DONT_FIND := -name tools -prune -o -name vendor -prune -o -name operator -prune -o -name .git -prune -o -name .cache -prune -o -name .pkg -prune -o # Build flags -VPREFIX := github.com/grafana/loki/pkg/util/build +VPREFIX := github.com/grafana/loki/v3/pkg/util/build GO_LDFLAGS := -X $(VPREFIX).Branch=$(GIT_BRANCH) -X $(VPREFIX).Version=$(IMAGE_TAG) -X $(VPREFIX).Revision=$(GIT_REVISION) -X $(VPREFIX).BuildUser=$(shell whoami)@$(shell hostname) -X $(VPREFIX).BuildDate=$(shell date -u +"%Y-%m-%dT%H:%M:%SZ") GO_FLAGS := -ldflags "-extldflags \"-static\" -s -w $(GO_LDFLAGS)" -tags netgo DYN_GO_FLAGS := -ldflags "-s -w $(GO_LDFLAGS)" -tags netgo @@ -85,10 +85,11 @@ PROMTAIL_UI_FILES := $(shell find ./clients/pkg/promtail/server/ui -type f -name # Documentation source path DOC_SOURCES_PATH := docs/sources +DOC_TEMPLATE_PATH := docs/templates # Configuration flags documentation -DOC_FLAGS_TEMPLATE := $(DOC_SOURCES_PATH)/configure/index.template -DOC_FLAGS := $(DOC_SOURCES_PATH)/configure/_index.md +DOC_FLAGS_TEMPLATE := $(DOC_TEMPLATE_PATH)/configuration.template +DOC_FLAGS := $(DOC_SOURCES_PATH)/shared/configuration.md ########## # Docker # @@ -191,6 +192,9 @@ production/helm/loki/src/helm-test/helm-test: helm-lint: ## run helm linter $(MAKE) -BC production/helm/loki lint +helm-docs: + helm-docs -c production/helm/loki -g production/helm/loki + ################# # Loki-QueryTee # ################# @@ -200,6 +204,15 @@ loki-querytee: cmd/querytee/querytee ## build loki-querytee executable cmd/querytee/querytee: CGO_ENABLED=0 go build $(GO_FLAGS) -o $@ ./$(@D) +############ +# lokitool # +############ +.PHONY: cmd/lokitool/lokitool +lokitool: cmd/lokitool/lokitool ## build lokitool executable + +cmd/lokitool/lokitool: + CGO_ENABLED=0 go build $(GO_FLAGS) -o $@ ./cmd/lokitool + ############ # Promtail # ############ @@ -315,10 +328,17 @@ publish: packages # To run this efficiently on your workstation, run this from the root dir: # docker run --rm --tty -i -v $(pwd)/.cache:/go/cache -v $(pwd)/.pkg:/go/pkg -v $(pwd):/src/loki grafana/loki-build-image:0.24.1 lint lint: ## run linters +ifeq ($(BUILD_IN_CONTAINER),true) + $(SUDO) docker run $(RM) $(TTY) -i \ + -v $(shell go env GOPATH)/pkg:/go/pkg$(MOUNT_FLAGS) \ + -v $(shell pwd):/src/loki$(MOUNT_FLAGS) \ + $(IMAGE_PREFIX)/loki-build-image:$(BUILD_IMAGE_VERSION) $@; +else go version golangci-lint version GO111MODULE=on golangci-lint run -v --timeout 15m faillint -paths "sync/atomic=go.uber.org/atomic" ./... +endif ######## # Test # @@ -794,7 +814,15 @@ check-format: format # Documentation related commands doc: ## Generates the config file documentation +ifeq ($(BUILD_IN_CONTAINER),true) + $(SUDO) docker run $(RM) $(TTY) -i \ + -v $(shell pwd):/src/loki$(MOUNT_FLAGS) \ + $(IMAGE_PREFIX)/loki-build-image:$(BUILD_IMAGE_VERSION) $@; +else go run ./tools/doc-generator $(DOC_FLAGS_TEMPLATE) > $(DOC_FLAGS) +endif + +docs: doc check-doc: ## Check the documentation files are up to date check-doc: doc @@ -870,4 +898,5 @@ scan-vulnerabilities: trivy snyk .PHONY: release-workflows release-workflows: + pushd $(CURDIR)/.github && jb update && popd jsonnet -SJ .github/vendor -m .github/workflows .github/release-workflows.jsonnet diff --git a/clients/cmd/docker-driver/Dockerfile b/clients/cmd/docker-driver/Dockerfile index 2b5baab318ade..9dd2df5abde36 100644 --- a/clients/cmd/docker-driver/Dockerfile +++ b/clients/cmd/docker-driver/Dockerfile @@ -1,4 +1,4 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.1 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/loki -f cmd/loki/Dockerfile . diff --git a/clients/cmd/docker-driver/config.go b/clients/cmd/docker-driver/config.go index 95dd07a6d8e81..d53117ca4872b 100644 --- a/clients/cmd/docker-driver/config.go +++ b/clients/cmd/docker-driver/config.go @@ -19,11 +19,11 @@ import ( "github.com/prometheus/prometheus/model/relabel" "gopkg.in/yaml.v2" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/client" - "github.com/grafana/loki/clients/pkg/promtail/targets/file" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/file" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/util" ) const ( diff --git a/clients/cmd/docker-driver/config_test.go b/clients/cmd/docker-driver/config_test.go index a3920778b622b..f83c560e39391 100644 --- a/clients/cmd/docker-driver/config_test.go +++ b/clients/cmd/docker-driver/config_test.go @@ -11,9 +11,9 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var jobRename = ` diff --git a/clients/cmd/docker-driver/loki.go b/clients/cmd/docker-driver/loki.go index cc15e71bda2ee..77bc4e5e439b7 100644 --- a/clients/cmd/docker-driver/loki.go +++ b/clients/cmd/docker-driver/loki.go @@ -10,11 +10,11 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) var jobName = "docker" diff --git a/clients/cmd/docker-driver/loki_test.go b/clients/cmd/docker-driver/loki_test.go index 0edc7b0c76931..4e61f37cd05c0 100644 --- a/clients/cmd/docker-driver/loki_test.go +++ b/clients/cmd/docker-driver/loki_test.go @@ -7,7 +7,7 @@ import ( "github.com/docker/docker/daemon/logger" "github.com/stretchr/testify/require" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func Test_loki_LogWhenClosed(t *testing.T) { diff --git a/clients/cmd/docker-driver/main.go b/clients/cmd/docker-driver/main.go index 5aba041f6b5fe..06d90b81bda56 100644 --- a/clients/cmd/docker-driver/main.go +++ b/clients/cmd/docker-driver/main.go @@ -12,8 +12,8 @@ import ( dslog "github.com/grafana/dskit/log" "github.com/prometheus/common/version" - _ "github.com/grafana/loki/pkg/util/build" - util_log "github.com/grafana/loki/pkg/util/log" + _ "github.com/grafana/loki/v3/pkg/util/build" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) const socketAddress = "/run/docker/plugins/loki.sock" diff --git a/clients/cmd/fluent-bit/Dockerfile b/clients/cmd/fluent-bit/Dockerfile index f0dfbc90c36a3..19d0721d540f4 100644 --- a/clients/cmd/fluent-bit/Dockerfile +++ b/clients/cmd/fluent-bit/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.22.0-bullseye AS builder +FROM golang:1.22.2-bullseye AS builder COPY . /src diff --git a/clients/cmd/fluent-bit/buffer.go b/clients/cmd/fluent-bit/buffer.go index a168ccfc142c5..28e9529abff48 100644 --- a/clients/cmd/fluent-bit/buffer.go +++ b/clients/cmd/fluent-bit/buffer.go @@ -5,7 +5,7 @@ import ( "github.com/go-kit/log" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" ) type bufferConfig struct { diff --git a/clients/cmd/fluent-bit/client.go b/clients/cmd/fluent-bit/client.go index 11c2fa1d0386b..828d013d85ae1 100644 --- a/clients/cmd/fluent-bit/client.go +++ b/clients/cmd/fluent-bit/client.go @@ -3,7 +3,7 @@ package main import ( "github.com/go-kit/log" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" ) // NewClient creates a new client based on the fluentbit configuration. diff --git a/clients/cmd/fluent-bit/config.go b/clients/cmd/fluent-bit/config.go index 469e18d495d74..84838d03f20f8 100644 --- a/clients/cmd/fluent-bit/config.go +++ b/clients/cmd/fluent-bit/config.go @@ -12,10 +12,10 @@ import ( "github.com/grafana/dskit/log" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/logentry/logql" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/logentry/logql" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" - lokiflag "github.com/grafana/loki/pkg/util/flagext" + lokiflag "github.com/grafana/loki/v3/pkg/util/flagext" ) var defaultClientCfg = client.Config{} diff --git a/clients/cmd/fluent-bit/config_test.go b/clients/cmd/fluent-bit/config_test.go index 0d5ec6d592b0e..f52ea18bc96db 100644 --- a/clients/cmd/fluent-bit/config_test.go +++ b/clients/cmd/fluent-bit/config_test.go @@ -12,9 +12,9 @@ import ( "github.com/grafana/dskit/log" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" - lokiflag "github.com/grafana/loki/pkg/util/flagext" + lokiflag "github.com/grafana/loki/v3/pkg/util/flagext" ) type fakeConfig map[string]string diff --git a/clients/cmd/fluent-bit/dque.go b/clients/cmd/fluent-bit/dque.go index f7091de893f59..6e5746033254b 100644 --- a/clients/cmd/fluent-bit/dque.go +++ b/clients/cmd/fluent-bit/dque.go @@ -12,10 +12,10 @@ import ( "github.com/joncrlsn/dque" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) type dqueConfig struct { diff --git a/clients/cmd/fluent-bit/loki.go b/clients/cmd/fluent-bit/loki.go index ea3de0261f407..6749af1ebf881 100644 --- a/clients/cmd/fluent-bit/loki.go +++ b/clients/cmd/fluent-bit/loki.go @@ -17,10 +17,10 @@ import ( jsoniter "github.com/json-iterator/go" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) var ( diff --git a/clients/cmd/fluent-bit/loki_test.go b/clients/cmd/fluent-bit/loki_test.go index 1bfd21d22ce02..477f6abe1757c 100644 --- a/clients/cmd/fluent-bit/loki_test.go +++ b/clients/cmd/fluent-bit/loki_test.go @@ -11,10 +11,10 @@ import ( jsoniter "github.com/json-iterator/go" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) var now = time.Now() diff --git a/clients/cmd/fluent-bit/out_grafana_loki.go b/clients/cmd/fluent-bit/out_grafana_loki.go index d396fddfc8da2..70a58e65b9350 100644 --- a/clients/cmd/fluent-bit/out_grafana_loki.go +++ b/clients/cmd/fluent-bit/out_grafana_loki.go @@ -13,12 +13,12 @@ import ( dslog "github.com/grafana/dskit/log" "github.com/prometheus/common/version" - _ "github.com/grafana/loki/pkg/util/build" + _ "github.com/grafana/loki/v3/pkg/util/build" ) import ( "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" ) var ( diff --git a/clients/cmd/promtail/Dockerfile b/clients/cmd/promtail/Dockerfile index 901fc4050f1c8..bb951765411ab 100644 --- a/clients/cmd/promtail/Dockerfile +++ b/clients/cmd/promtail/Dockerfile @@ -1,21 +1,15 @@ -FROM golang:1.21.3-bullseye as build +FROM golang:1.21.9-bookworm as build COPY . /src/loki WORKDIR /src/loki -# Backports repo required to get a libsystemd version 246 or newer which is required to handle journal +ZSTD compression -RUN echo "deb http://deb.debian.org/debian bullseye-backports main" >> /etc/apt/sources.list -RUN apt-get update && apt-get install -t bullseye-backports -qy libsystemd-dev +RUN apt-get update && apt-get install -qy libsystemd-dev RUN make clean && make BUILD_IN_CONTAINER=false PROMTAIL_JOURNAL_ENABLED=true promtail # Promtail requires debian as the base image to support systemd journal reading -FROM debian:bullseye-slim +FROM debian:12.5-slim # tzdata required for the timestamp stage to work -# Backports repo required to get a libsystemd version 246 or newer which is required to handle journal +ZSTD compression -RUN echo "deb http://deb.debian.org/debian bullseye-backports main" >> /etc/apt/sources.list RUN apt-get update && \ - apt-get install -qy \ - tzdata ca-certificates -RUN apt-get install -t bullseye-backports -qy libsystemd-dev && \ + apt-get install -qy tzdata ca-certificates libsystemd-dev && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* COPY --from=build /src/loki/clients/cmd/promtail/promtail /usr/bin/promtail COPY clients/cmd/promtail/promtail-docker-config.yaml /etc/promtail/config.yml diff --git a/clients/cmd/promtail/Dockerfile.arm32 b/clients/cmd/promtail/Dockerfile.arm32 index bb5d89baec764..bb0019332b234 100644 --- a/clients/cmd/promtail/Dockerfile.arm32 +++ b/clients/cmd/promtail/Dockerfile.arm32 @@ -1,21 +1,15 @@ -FROM golang:1.21.2-bullseye as build +FROM golang:1.21.9-bookworm as build COPY . /src/loki WORKDIR /src/loki -# Backports repo required to get a libsystemd version 246 or newer which is required to handle journal +ZSTD compression -RUN echo "deb http://deb.debian.org/debian bullseye-backports main" >> /etc/apt/sources.list -RUN apt-get update && apt-get install -t bullseye-backports -qy libsystemd-dev +RUN apt-get update && apt-get install -qy libsystemd-dev RUN make clean && make BUILD_IN_CONTAINER=false PROMTAIL_JOURNAL_ENABLED=true promtail # Promtail requires debian as the base image to support systemd journal reading -FROM debian:bullseye-slim +FROM debian:12.5-slim # tzdata required for the timestamp stage to work -# Backports repo required to get a libsystemd version 246 or newer which is required to handle journal +ZSTD compression -RUN echo "deb http://deb.debian.org/debian bullseye-backports main" >> /etc/apt/sources.list RUN apt-get update && \ - apt-get install -qy \ - tzdata ca-certificates -RUN apt-get install -t bullseye-backports -qy libsystemd-dev && \ + apt-get install -qy tzdata ca-certificates libsystemd-dev && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* COPY --from=build /src/loki/clients/cmd/promtail/promtail /usr/bin/promtail COPY clients/cmd/promtail/promtail-local-config.yaml /etc/promtail/local-config.yaml diff --git a/clients/cmd/promtail/Dockerfile.cross b/clients/cmd/promtail/Dockerfile.cross index 084186e1a3d5a..c19c7f4d3f490 100644 --- a/clients/cmd/promtail/Dockerfile.cross +++ b/clients/cmd/promtail/Dockerfile.cross @@ -1,8 +1,8 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.1 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f clients/cmd/promtail/Dockerfile . -FROM golang:1.21.2-alpine as goenv +FROM golang:1.21.9-alpine as goenv RUN go env GOARCH > /goarch && \ go env GOARM > /goarm @@ -13,11 +13,10 @@ WORKDIR /src/loki RUN make clean && GOARCH=$(cat /goarch) GOARM=$(cat /goarm) make BUILD_IN_CONTAINER=false PROMTAIL_JOURNAL_ENABLED=true promtail # Promtail requires debian as the base image to support systemd journal reading -FROM debian:stretch-slim +FROM debian:12.5-slim # tzdata required for the timestamp stage to work RUN apt-get update && \ - apt-get install -qy \ - tzdata ca-certificates libsystemd-dev && \ + apt-get install -qy tzdata ca-certificates libsystemd-dev && \ rm -rf /var/lib/apt/lists/* /tmp/* /var/tmp/* COPY --from=build /src/loki/clients/cmd/promtail/promtail /usr/bin/promtail COPY clients/cmd/promtail/promtail-local-config.yaml /etc/promtail/local-config.yaml diff --git a/clients/cmd/promtail/Dockerfile.debug b/clients/cmd/promtail/Dockerfile.debug index 97e191cf4982c..99f245be0529b 100644 --- a/clients/cmd/promtail/Dockerfile.debug +++ b/clients/cmd/promtail/Dockerfile.debug @@ -2,7 +2,7 @@ # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f clients/cmd/promtail/Dockerfile.debug . -FROM grafana/loki-build-image:0.33.0 as build +FROM grafana/loki-build-image:0.33.1 as build ARG GOARCH="amd64" COPY . /src/loki WORKDIR /src/loki diff --git a/clients/cmd/promtail/main.go b/clients/cmd/promtail/main.go index 257c5eef01bc7..7e00e7ff35db3 100644 --- a/clients/cmd/promtail/main.go +++ b/clients/cmd/promtail/main.go @@ -17,22 +17,23 @@ import ( "github.com/grafana/dskit/log" "github.com/grafana/dskit/tracing" "github.com/prometheus/client_golang/prometheus" + collectors_version "github.com/prometheus/client_golang/prometheus/collectors/version" "github.com/prometheus/common/version" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail" - "github.com/grafana/loki/clients/pkg/promtail/client" - promtail_config "github.com/grafana/loki/clients/pkg/promtail/config" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" + promtail_config "github.com/grafana/loki/v3/clients/pkg/promtail/config" - "github.com/grafana/loki/pkg/util" - "github.com/grafana/loki/pkg/util/cfg" + "github.com/grafana/loki/v3/pkg/util" + "github.com/grafana/loki/v3/pkg/util/cfg" - _ "github.com/grafana/loki/pkg/util/build" - util_log "github.com/grafana/loki/pkg/util/log" + _ "github.com/grafana/loki/v3/pkg/util/build" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func init() { - prometheus.MustRegister(version.NewCollector("promtail")) + prometheus.MustRegister(collectors_version.NewCollector("promtail")) } var mtx sync.Mutex diff --git a/clients/pkg/logentry/logql/parser.go b/clients/pkg/logentry/logql/parser.go index d567f6fce4c8b..924ec1b7bdeab 100644 --- a/clients/pkg/logentry/logql/parser.go +++ b/clients/pkg/logentry/logql/parser.go @@ -8,7 +8,7 @@ import ( "github.com/prometheus/prometheus/model/labels" - "github.com/grafana/loki/pkg/logqlmodel" + "github.com/grafana/loki/v3/pkg/logqlmodel" ) func init() { diff --git a/clients/pkg/logentry/metric/metricvec.go b/clients/pkg/logentry/metric/metricvec.go index 07f73c20873d3..f004db760f8f6 100644 --- a/clients/pkg/logentry/metric/metricvec.go +++ b/clients/pkg/logentry/metric/metricvec.go @@ -5,7 +5,7 @@ import ( "sync" "time" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/util" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" diff --git a/clients/pkg/logentry/stages/decolorize.go b/clients/pkg/logentry/stages/decolorize.go index bac7274b6bad3..a86e6cdeafb2e 100644 --- a/clients/pkg/logentry/stages/decolorize.go +++ b/clients/pkg/logentry/stages/decolorize.go @@ -1,7 +1,7 @@ package stages import ( - "github.com/grafana/loki/pkg/logql/log" + "github.com/grafana/loki/v3/pkg/logql/log" ) type decolorizeStage struct{} diff --git a/clients/pkg/logentry/stages/decolorize_test.go b/clients/pkg/logentry/stages/decolorize_test.go index 5e7cead0a5275..029cd74c1c1e3 100644 --- a/clients/pkg/logentry/stages/decolorize_test.go +++ b/clients/pkg/logentry/stages/decolorize_test.go @@ -7,7 +7,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testDecolorizePipeline = ` diff --git a/clients/pkg/logentry/stages/drop.go b/clients/pkg/logentry/stages/drop.go index 19a2e6c378075..462d6c34f6350 100644 --- a/clients/pkg/logentry/stages/drop.go +++ b/clients/pkg/logentry/stages/drop.go @@ -13,7 +13,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/pkg/util/flagext" + "github.com/grafana/loki/v3/pkg/util/flagext" ) const ( diff --git a/clients/pkg/logentry/stages/drop_test.go b/clients/pkg/logentry/stages/drop_test.go index a7e5ffcb5665f..220bb68314df3 100644 --- a/clients/pkg/logentry/stages/drop_test.go +++ b/clients/pkg/logentry/stages/drop_test.go @@ -12,7 +12,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) // Not all these are tested but are here to make sure the different types marshal without error diff --git a/clients/pkg/logentry/stages/eventlogmessage_test.go b/clients/pkg/logentry/stages/eventlogmessage_test.go index 4729d5a08f0e6..ed4bedccfc70c 100644 --- a/clients/pkg/logentry/stages/eventlogmessage_test.go +++ b/clients/pkg/logentry/stages/eventlogmessage_test.go @@ -11,7 +11,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testEvtLogMsgYamlDefaults = ` diff --git a/clients/pkg/logentry/stages/extensions.go b/clients/pkg/logentry/stages/extensions.go index f25ffe02e8403..2e49d6bd224b3 100644 --- a/clients/pkg/logentry/stages/extensions.go +++ b/clients/pkg/logentry/stages/extensions.go @@ -10,7 +10,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - "github.com/grafana/loki/pkg/util/flagext" + "github.com/grafana/loki/v3/pkg/util/flagext" ) const ( @@ -128,7 +128,7 @@ type CriConfig struct { MaxPartialLineSizeTruncate bool `mapstructure:"max_partial_line_size_truncate"` } -// validateDropConfig validates the DropConfig for the dropStage +// validateCriConfig validates the CriConfig for the cri stage func validateCriConfig(cfg *CriConfig) error { if cfg.MaxPartialLines == 0 { cfg.MaxPartialLines = MaxPartialLinesSize diff --git a/clients/pkg/logentry/stages/extensions_test.go b/clients/pkg/logentry/stages/extensions_test.go index 9e2a3f62a56f7..0d03acd3fe3dd 100644 --- a/clients/pkg/logentry/stages/extensions_test.go +++ b/clients/pkg/logentry/stages/extensions_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var ( diff --git a/clients/pkg/logentry/stages/json_test.go b/clients/pkg/logentry/stages/json_test.go index 31a0c0219e5af..1764387253fb1 100644 --- a/clients/pkg/logentry/stages/json_test.go +++ b/clients/pkg/logentry/stages/json_test.go @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/assert" "gopkg.in/yaml.v2" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testJSONYamlSingleStageWithoutSource = ` diff --git a/clients/pkg/logentry/stages/labelallow_test.go b/clients/pkg/logentry/stages/labelallow_test.go index a5cbcd8e3ce6b..ebcf451487ef8 100644 --- a/clients/pkg/logentry/stages/labelallow_test.go +++ b/clients/pkg/logentry/stages/labelallow_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func Test_addLabelStage_Process(t *testing.T) { diff --git a/clients/pkg/logentry/stages/labeldrop_test.go b/clients/pkg/logentry/stages/labeldrop_test.go index 215a7888f8c31..70912c7ed1c84 100644 --- a/clients/pkg/logentry/stages/labeldrop_test.go +++ b/clients/pkg/logentry/stages/labeldrop_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func Test_dropLabelStage_Process(t *testing.T) { diff --git a/clients/pkg/logentry/stages/labels_test.go b/clients/pkg/logentry/stages/labels_test.go index 175359606a2f4..27747d8032edd 100644 --- a/clients/pkg/logentry/stages/labels_test.go +++ b/clients/pkg/logentry/stages/labels_test.go @@ -13,7 +13,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testLabelsYaml = ` diff --git a/clients/pkg/logentry/stages/limit.go b/clients/pkg/logentry/stages/limit.go index d5489221e6ac0..49d32cbf04029 100644 --- a/clients/pkg/logentry/stages/limit.go +++ b/clients/pkg/logentry/stages/limit.go @@ -8,7 +8,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/util" "github.com/go-kit/log" "github.com/mitchellh/mapstructure" diff --git a/clients/pkg/logentry/stages/limit_test.go b/clients/pkg/logentry/stages/limit_test.go index b439db4908b2f..0d3519e8c9b4b 100644 --- a/clients/pkg/logentry/stages/limit_test.go +++ b/clients/pkg/logentry/stages/limit_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) // Not all these are tested but are here to make sure the different types marshal without error @@ -60,7 +60,7 @@ var testNonAppLogLine = ` var plName = "testPipeline" -// TestLimitPipeline is used to verify we properly parse the yaml config and create a working pipeline +// TestLimitWaitPipeline is used to verify we properly parse the yaml config and create a working pipeline func TestLimitWaitPipeline(t *testing.T) { registry := prometheus.NewRegistry() pl, err := NewPipeline(util_log.Logger, loadConfig(testLimitWaitYaml), &plName, registry) @@ -78,7 +78,7 @@ func TestLimitWaitPipeline(t *testing.T) { assert.Equal(t, out[0].Line, testMatchLogLineApp1) } -// TestLimitPipeline is used to verify we properly parse the yaml config and create a working pipeline +// TestLimitDropPipeline is used to verify we properly parse the yaml config and create a working pipeline func TestLimitDropPipeline(t *testing.T) { registry := prometheus.NewRegistry() pl, err := NewPipeline(util_log.Logger, loadConfig(testLimitDropYaml), &plName, registry) diff --git a/clients/pkg/logentry/stages/logfmt_test.go b/clients/pkg/logentry/stages/logfmt_test.go index 8258eeece501b..ed60d8770d014 100644 --- a/clients/pkg/logentry/stages/logfmt_test.go +++ b/clients/pkg/logentry/stages/logfmt_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/assert" "gopkg.in/yaml.v2" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testLogfmtYamlSingleStageWithoutSource = ` diff --git a/clients/pkg/logentry/stages/match.go b/clients/pkg/logentry/stages/match.go index 3b4addbb0de12..4007e45da4ecb 100644 --- a/clients/pkg/logentry/stages/match.go +++ b/clients/pkg/logentry/stages/match.go @@ -8,7 +8,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" - "github.com/grafana/loki/clients/pkg/logentry/logql" + "github.com/grafana/loki/v3/clients/pkg/logentry/logql" ) const ( diff --git a/clients/pkg/logentry/stages/match_test.go b/clients/pkg/logentry/stages/match_test.go index 558407320c57d..05d65f0bcaff5 100644 --- a/clients/pkg/logentry/stages/match_test.go +++ b/clients/pkg/logentry/stages/match_test.go @@ -8,7 +8,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testMatchYaml = ` diff --git a/clients/pkg/logentry/stages/metrics.go b/clients/pkg/logentry/stages/metrics.go index 14386e3b43a40..827f0cf313a47 100644 --- a/clients/pkg/logentry/stages/metrics.go +++ b/clients/pkg/logentry/stages/metrics.go @@ -15,7 +15,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/logentry/metric" + "github.com/grafana/loki/v3/clients/pkg/logentry/metric" ) const ( diff --git a/clients/pkg/logentry/stages/metrics_test.go b/clients/pkg/logentry/stages/metrics_test.go index 6a14e6c80c1ee..f46ea6839919f 100644 --- a/clients/pkg/logentry/stages/metrics_test.go +++ b/clients/pkg/logentry/stages/metrics_test.go @@ -14,9 +14,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/logentry/metric" + "github.com/grafana/loki/v3/clients/pkg/logentry/metric" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testMetricYaml = ` diff --git a/clients/pkg/logentry/stages/multiline.go b/clients/pkg/logentry/stages/multiline.go index 199ff438a9390..2f94a2e1822f3 100644 --- a/clients/pkg/logentry/stages/multiline.go +++ b/clients/pkg/logentry/stages/multiline.go @@ -13,9 +13,9 @@ import ( "github.com/pkg/errors" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) const ( diff --git a/clients/pkg/logentry/stages/multiline_test.go b/clients/pkg/logentry/stages/multiline_test.go index 33b71c8f5f023..b175f89845dea 100644 --- a/clients/pkg/logentry/stages/multiline_test.go +++ b/clients/pkg/logentry/stages/multiline_test.go @@ -10,10 +10,10 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" - util_log "github.com/grafana/loki/pkg/util/log" + "github.com/grafana/loki/v3/pkg/logproto" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func Test_multilineStage_Process(t *testing.T) { diff --git a/clients/pkg/logentry/stages/output_test.go b/clients/pkg/logentry/stages/output_test.go index a7b02714faf74..dc6aac54f0b93 100644 --- a/clients/pkg/logentry/stages/output_test.go +++ b/clients/pkg/logentry/stages/output_test.go @@ -11,7 +11,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testOutputYaml = ` diff --git a/clients/pkg/logentry/stages/pack.go b/clients/pkg/logentry/stages/pack.go index 737fa8d36b796..881650d8c6aa1 100644 --- a/clients/pkg/logentry/stages/pack.go +++ b/clients/pkg/logentry/stages/pack.go @@ -15,7 +15,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - "github.com/grafana/loki/pkg/logqlmodel" + "github.com/grafana/loki/v3/pkg/logqlmodel" ) var ( diff --git a/clients/pkg/logentry/stages/pack_test.go b/clients/pkg/logentry/stages/pack_test.go index b767f90a76063..44935051a9523 100644 --- a/clients/pkg/logentry/stages/pack_test.go +++ b/clients/pkg/logentry/stages/pack_test.go @@ -12,11 +12,11 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" - "github.com/grafana/loki/pkg/logqlmodel" - util_log "github.com/grafana/loki/pkg/util/log" + "github.com/grafana/loki/v3/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logqlmodel" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) // Not all these are tested but are here to make sure the different types marshal without error diff --git a/clients/pkg/logentry/stages/pipeline.go b/clients/pkg/logentry/stages/pipeline.go index c20a7784c511c..1c4d2ba8e5ab4 100644 --- a/clients/pkg/logentry/stages/pipeline.go +++ b/clients/pkg/logentry/stages/pipeline.go @@ -9,7 +9,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "golang.org/x/time/rate" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" ) // PipelineStages contains configuration for each stage within a pipeline diff --git a/clients/pkg/logentry/stages/pipeline_test.go b/clients/pkg/logentry/stages/pipeline_test.go index 51fe66e30c36b..2649de6a83441 100644 --- a/clients/pkg/logentry/stages/pipeline_test.go +++ b/clients/pkg/logentry/stages/pipeline_test.go @@ -14,11 +14,11 @@ import ( "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/pkg/logproto" - util_log "github.com/grafana/loki/pkg/util/log" + "github.com/grafana/loki/v3/pkg/logproto" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var ( diff --git a/clients/pkg/logentry/stages/regex_test.go b/clients/pkg/logentry/stages/regex_test.go index dc3402e6e7a45..f7fa5390a1959 100644 --- a/clients/pkg/logentry/stages/regex_test.go +++ b/clients/pkg/logentry/stages/regex_test.go @@ -14,7 +14,7 @@ import ( "github.com/stretchr/testify/assert" "gopkg.in/yaml.v2" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testRegexYamlSingleStageWithoutSource = ` diff --git a/clients/pkg/logentry/stages/replace_test.go b/clients/pkg/logentry/stages/replace_test.go index f8feef3c898df..87bb3eecb898a 100644 --- a/clients/pkg/logentry/stages/replace_test.go +++ b/clients/pkg/logentry/stages/replace_test.go @@ -10,7 +10,7 @@ import ( "github.com/stretchr/testify/assert" "gopkg.in/yaml.v2" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testReplaceYamlSingleStageWithoutSource = ` diff --git a/clients/pkg/logentry/stages/sampling_test.go b/clients/pkg/logentry/stages/sampling_test.go index 171277e961d66..9b56eec5c0c5a 100644 --- a/clients/pkg/logentry/stages/sampling_test.go +++ b/clients/pkg/logentry/stages/sampling_test.go @@ -9,7 +9,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testSampingYaml = ` diff --git a/clients/pkg/logentry/stages/stage.go b/clients/pkg/logentry/stages/stage.go index 1c19face4044d..9de1d4e0a5904 100644 --- a/clients/pkg/logentry/stages/stage.go +++ b/clients/pkg/logentry/stages/stage.go @@ -12,7 +12,7 @@ import ( "github.com/prometheus/common/model" "gopkg.in/yaml.v2" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" ) const ( diff --git a/clients/pkg/logentry/stages/static_labels_test.go b/clients/pkg/logentry/stages/static_labels_test.go index 9643d3da7aa51..bad2ec68f4a97 100644 --- a/clients/pkg/logentry/stages/static_labels_test.go +++ b/clients/pkg/logentry/stages/static_labels_test.go @@ -7,7 +7,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func Test_staticLabelStage_Process(t *testing.T) { diff --git a/clients/pkg/logentry/stages/structuredmetadata.go b/clients/pkg/logentry/stages/structuredmetadata.go index cdab88a956c7f..cdf70c01d4fa7 100644 --- a/clients/pkg/logentry/stages/structuredmetadata.go +++ b/clients/pkg/logentry/stages/structuredmetadata.go @@ -5,7 +5,7 @@ import ( "github.com/mitchellh/mapstructure" "github.com/prometheus/common/model" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) func newStructuredMetadataStage(params StageCreationParams) (Stage, error) { diff --git a/clients/pkg/logentry/stages/structuredmetadata_test.go b/clients/pkg/logentry/stages/structuredmetadata_test.go index d9a70300b8b0b..2b48c641ef81d 100644 --- a/clients/pkg/logentry/stages/structuredmetadata_test.go +++ b/clients/pkg/logentry/stages/structuredmetadata_test.go @@ -9,7 +9,8 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/pkg/push" - util_log "github.com/grafana/loki/pkg/util/log" + + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var pipelineStagesStructuredMetadataUsingMatch = ` diff --git a/clients/pkg/logentry/stages/template_test.go b/clients/pkg/logentry/stages/template_test.go index 96e7f1b06a2df..7977c87ffee66 100644 --- a/clients/pkg/logentry/stages/template_test.go +++ b/clients/pkg/logentry/stages/template_test.go @@ -12,7 +12,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testTemplateYaml = ` diff --git a/clients/pkg/logentry/stages/tenant.go b/clients/pkg/logentry/stages/tenant.go index 13717ccb29bf6..ed730fbc0c121 100644 --- a/clients/pkg/logentry/stages/tenant.go +++ b/clients/pkg/logentry/stages/tenant.go @@ -10,7 +10,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" ) const ( diff --git a/clients/pkg/logentry/stages/tenant_test.go b/clients/pkg/logentry/stages/tenant_test.go index eb02b0bda9db8..8eee783d47ddf 100644 --- a/clients/pkg/logentry/stages/tenant_test.go +++ b/clients/pkg/logentry/stages/tenant_test.go @@ -12,10 +12,10 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" - lokiutil "github.com/grafana/loki/pkg/util" - util_log "github.com/grafana/loki/pkg/util/log" + lokiutil "github.com/grafana/loki/v3/pkg/util" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testTenantYamlExtractedData = ` diff --git a/clients/pkg/logentry/stages/timestamp.go b/clients/pkg/logentry/stages/timestamp.go index 592ae13565643..fb1fb8a27c3b5 100644 --- a/clients/pkg/logentry/stages/timestamp.go +++ b/clients/pkg/logentry/stages/timestamp.go @@ -12,7 +12,7 @@ import ( "github.com/mitchellh/mapstructure" "github.com/prometheus/common/model" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/util" ) const ( diff --git a/clients/pkg/logentry/stages/timestamp_test.go b/clients/pkg/logentry/stages/timestamp_test.go index 73e4fb196b5b5..f3f23dcfcebab 100644 --- a/clients/pkg/logentry/stages/timestamp_test.go +++ b/clients/pkg/logentry/stages/timestamp_test.go @@ -14,8 +14,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - lokiutil "github.com/grafana/loki/pkg/util" - util_log "github.com/grafana/loki/pkg/util/log" + lokiutil "github.com/grafana/loki/v3/pkg/util" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var testTimestampYaml = ` diff --git a/clients/pkg/logentry/stages/util_test.go b/clients/pkg/logentry/stages/util_test.go index b58490cc56fc6..5ce0ae9a7f93a 100644 --- a/clients/pkg/logentry/stages/util_test.go +++ b/clients/pkg/logentry/stages/util_test.go @@ -8,9 +8,9 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/assert" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) func newEntry(ex map[string]interface{}, lbs model.LabelSet, line string, ts time.Time) Entry { diff --git a/clients/pkg/promtail/api/types.go b/clients/pkg/promtail/api/types.go index 2bb2482da4628..36f9cc484160c 100644 --- a/clients/pkg/promtail/api/types.go +++ b/clients/pkg/promtail/api/types.go @@ -6,7 +6,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) // Entry is a log entry with labels. diff --git a/clients/pkg/promtail/client/batch.go b/clients/pkg/promtail/client/batch.go index 8681b67bd13f1..a6e7b45dd984b 100644 --- a/clients/pkg/promtail/client/batch.go +++ b/clients/pkg/promtail/client/batch.go @@ -12,9 +12,9 @@ import ( "github.com/prometheus/common/model" "golang.org/x/exp/slices" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) const ( diff --git a/clients/pkg/promtail/client/batch_test.go b/clients/pkg/promtail/client/batch_test.go index 56dc9477e8b6a..ec92fbc1c4225 100644 --- a/clients/pkg/promtail/client/batch_test.go +++ b/clients/pkg/promtail/client/batch_test.go @@ -9,9 +9,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) func TestBatch_MaxStreams(t *testing.T) { diff --git a/clients/pkg/promtail/client/client.go b/clients/pkg/promtail/client/client.go index 4dfd11363a824..ea93a604d32fb 100644 --- a/clients/pkg/promtail/client/client.go +++ b/clients/pkg/promtail/client/client.go @@ -20,10 +20,10 @@ import ( "github.com/prometheus/common/config" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - lokiutil "github.com/grafana/loki/pkg/util" - "github.com/grafana/loki/pkg/util/build" + lokiutil "github.com/grafana/loki/v3/pkg/util" + "github.com/grafana/loki/v3/pkg/util/build" ) const ( diff --git a/clients/pkg/promtail/client/client_test.go b/clients/pkg/promtail/client/client_test.go index 01cbb87cc1116..ea3039879605b 100644 --- a/clients/pkg/promtail/client/client_test.go +++ b/clients/pkg/promtail/client/client_test.go @@ -19,12 +19,13 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/utils" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/utils" - "github.com/grafana/loki/pkg/logproto" "github.com/grafana/loki/pkg/push" - lokiflag "github.com/grafana/loki/pkg/util/flagext" + + "github.com/grafana/loki/v3/pkg/logproto" + lokiflag "github.com/grafana/loki/v3/pkg/util/flagext" ) var logEntries = []api.Entry{ diff --git a/clients/pkg/promtail/client/client_writeto.go b/clients/pkg/promtail/client/client_writeto.go index 6fa549dfa2721..bd5ecfc424689 100644 --- a/clients/pkg/promtail/client/client_writeto.go +++ b/clients/pkg/promtail/client/client_writeto.go @@ -10,10 +10,10 @@ import ( "github.com/prometheus/prometheus/tsdb/chunks" "github.com/prometheus/prometheus/tsdb/record" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/ingester/wal" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/ingester/wal" + "github.com/grafana/loki/v3/pkg/util" ) // clientWriteTo implements a wal.WriteTo that re-builds entries with the stored series, and the received entries. After, diff --git a/clients/pkg/promtail/client/client_writeto_test.go b/clients/pkg/promtail/client/client_writeto_test.go index 2254fbb073658..4044d1641fb12 100644 --- a/clients/pkg/promtail/client/client_writeto_test.go +++ b/clients/pkg/promtail/client/client_writeto_test.go @@ -18,10 +18,10 @@ import ( "github.com/prometheus/prometheus/tsdb/record" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/ingester/wal" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/ingester/wal" + "github.com/grafana/loki/v3/pkg/logproto" ) func TestClientWriter_LogEntriesAreReconstructedAndForwardedCorrectly(t *testing.T) { @@ -29,11 +29,14 @@ func TestClientWriter_LogEntriesAreReconstructedAndForwardedCorrectly(t *testing ch := make(chan api.Entry) defer close(ch) + var mu sync.Mutex var receivedEntries []api.Entry go func() { for e := range ch { + mu.Lock() receivedEntries = append(receivedEntries, e) + mu.Unlock() } }() @@ -72,12 +75,16 @@ func TestClientWriter_LogEntriesAreReconstructedAndForwardedCorrectly(t *testing } require.Eventually(t, func() bool { + mu.Lock() + defer mu.Unlock() return len(receivedEntries) == len(lines) }, time.Second*10, time.Second) + mu.Lock() for _, receivedEntry := range receivedEntries { require.Contains(t, lines, receivedEntry.Line, "entry line was not expected") require.Equal(t, model.LabelValue("test"), receivedEntry.Labels["app"]) } + mu.Unlock() } func TestClientWriter_LogEntriesWithoutMatchingSeriesAreIgnored(t *testing.T) { diff --git a/clients/pkg/promtail/client/config.go b/clients/pkg/promtail/client/config.go index ab36353ba4903..eab0eb8863e65 100644 --- a/clients/pkg/promtail/client/config.go +++ b/clients/pkg/promtail/client/config.go @@ -8,7 +8,7 @@ import ( "github.com/grafana/dskit/flagext" "github.com/prometheus/common/config" - lokiflag "github.com/grafana/loki/pkg/util/flagext" + lokiflag "github.com/grafana/loki/v3/pkg/util/flagext" ) // NOTE the helm chart for promtail and fluent-bit also have defaults for these values, please update to match if you make changes here. diff --git a/clients/pkg/promtail/client/fake/client.go b/clients/pkg/promtail/client/fake/client.go index 33e886c30980c..03257135a585d 100644 --- a/clients/pkg/promtail/client/fake/client.go +++ b/clients/pkg/promtail/client/fake/client.go @@ -3,7 +3,7 @@ package fake import ( "sync" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" ) // Client is a fake client used for testing. diff --git a/clients/pkg/promtail/client/logger.go b/clients/pkg/promtail/client/logger.go index 890d51177c26c..ba20055a0d94b 100644 --- a/clients/pkg/promtail/client/logger.go +++ b/clients/pkg/promtail/client/logger.go @@ -12,9 +12,9 @@ import ( "github.com/prometheus/client_golang/prometheus" "gopkg.in/yaml.v2" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/limit" - "github.com/grafana/loki/clients/pkg/promtail/wal" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/limit" + "github.com/grafana/loki/v3/clients/pkg/promtail/wal" ) var ( diff --git a/clients/pkg/promtail/client/logger_test.go b/clients/pkg/promtail/client/logger_test.go index 43c710d69088c..c19f39ac75784 100644 --- a/clients/pkg/promtail/client/logger_test.go +++ b/clients/pkg/promtail/client/logger_test.go @@ -9,10 +9,10 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" - util_log "github.com/grafana/loki/pkg/util/log" + "github.com/grafana/loki/v3/pkg/logproto" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func TestNewLogger(t *testing.T) { diff --git a/clients/pkg/promtail/client/manager.go b/clients/pkg/promtail/client/manager.go index 84dc48de350d5..ef08d2c04f528 100644 --- a/clients/pkg/promtail/client/manager.go +++ b/clients/pkg/promtail/client/manager.go @@ -9,9 +9,9 @@ import ( "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/limit" - "github.com/grafana/loki/clients/pkg/promtail/wal" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/limit" + "github.com/grafana/loki/v3/clients/pkg/promtail/wal" ) // WriterEventsNotifier implements a notifier that's received by the Manager, to which wal.Watcher can subscribe for diff --git a/clients/pkg/promtail/client/manager_test.go b/clients/pkg/promtail/client/manager_test.go index 14165ea503f2b..2105e6a90e3d9 100644 --- a/clients/pkg/promtail/client/manager_test.go +++ b/clients/pkg/promtail/client/manager_test.go @@ -6,6 +6,7 @@ import ( "net/http" "net/url" "os" + "sync" "testing" "time" @@ -16,14 +17,14 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/limit" - "github.com/grafana/loki/clients/pkg/promtail/utils" - "github.com/grafana/loki/clients/pkg/promtail/wal" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/limit" + "github.com/grafana/loki/v3/clients/pkg/promtail/utils" + "github.com/grafana/loki/v3/clients/pkg/promtail/wal" - "github.com/grafana/loki/pkg/logproto" - lokiflag "github.com/grafana/loki/pkg/util/flagext" + "github.com/grafana/loki/v3/pkg/logproto" + lokiflag "github.com/grafana/loki/v3/pkg/util/flagext" ) var testLimitsConfig = limit.Config{ @@ -127,10 +128,13 @@ func TestManager_WALEnabled(t *testing.T) { require.NoError(t, err) require.Equal(t, "wal:test-client", manager.Name()) + var mu sync.Mutex receivedRequests := []utils.RemoteWriteRequest{} go func() { for req := range rwReceivedReqs { + mu.Lock() receivedRequests = append(receivedRequests, req) + mu.Unlock() } }() @@ -155,17 +159,21 @@ func TestManager_WALEnabled(t *testing.T) { } require.Eventually(t, func() bool { + mu.Lock() + defer mu.Unlock() return len(receivedRequests) == totalLines }, 5*time.Second, time.Second, "timed out waiting for requests to be received") var seenEntries = map[string]struct{}{} // assert over rw client received entries + mu.Lock() for _, req := range receivedRequests { require.Len(t, req.Request.Streams, 1, "expected 1 stream requests to be received") require.Len(t, req.Request.Streams[0].Entries, 1, "expected 1 entry in the only stream received per request") require.Equal(t, `{wal_enabled="true"}`, req.Request.Streams[0].Labels) seenEntries[req.Request.Streams[0].Entries[0].Line] = struct{}{} } + mu.Unlock() require.Len(t, seenEntries, totalLines) } @@ -182,10 +190,13 @@ func TestManager_WALDisabled(t *testing.T) { require.NoError(t, err) require.Equal(t, "multi:test-client", manager.Name()) + var mu sync.Mutex receivedRequests := []utils.RemoteWriteRequest{} go func() { for req := range rwReceivedReqs { + mu.Lock() receivedRequests = append(receivedRequests, req) + mu.Unlock() } }() @@ -209,17 +220,21 @@ func TestManager_WALDisabled(t *testing.T) { } require.Eventually(t, func() bool { + mu.Lock() + defer mu.Unlock() return len(receivedRequests) == totalLines }, 5*time.Second, time.Second, "timed out waiting for requests to be received") var seenEntries = map[string]struct{}{} // assert over rw client received entries + mu.Lock() for _, req := range receivedRequests { require.Len(t, req.Request.Streams, 1, "expected 1 stream requests to be received") require.Len(t, req.Request.Streams[0].Entries, 1, "expected 1 entry in the only stream received per request") require.Equal(t, `{pizza-flavour="fugazzeta"}`, req.Request.Streams[0].Labels) seenEntries[req.Request.Streams[0].Entries[0].Line] = struct{}{} } + mu.Unlock() require.Len(t, seenEntries, totalLines) } @@ -250,15 +265,20 @@ func TestManager_WALDisabled_MultipleConfigs(t *testing.T) { require.NoError(t, err) require.Equal(t, "multi:test-client,test-client-2", manager.Name()) + var mu sync.Mutex receivedRequests := []utils.RemoteWriteRequest{} ctx, cancel := context.WithCancel(context.Background()) go func(ctx context.Context) { for { select { case req := <-rwReceivedReqs: + mu.Lock() receivedRequests = append(receivedRequests, req) + mu.Unlock() case req := <-rwReceivedReqs2: + mu.Lock() receivedRequests = append(receivedRequests, req) + mu.Unlock() case <-ctx.Done(): return } @@ -289,16 +309,20 @@ func TestManager_WALDisabled_MultipleConfigs(t *testing.T) { // times 2 due to clients being run expectedTotalLines := totalLines * 2 require.Eventually(t, func() bool { + mu.Lock() + defer mu.Unlock() return len(receivedRequests) == expectedTotalLines }, 5*time.Second, time.Second, "timed out waiting for requests to be received") var seenEntries = map[string]struct{}{} // assert over rw client received entries + mu.Lock() for _, req := range receivedRequests { require.Len(t, req.Request.Streams, 1, "expected 1 stream requests to be received") require.Len(t, req.Request.Streams[0].Entries, 1, "expected 1 entry in the only stream received per request") seenEntries[fmt.Sprintf("%s-%s", req.Request.Streams[0].Labels, req.Request.Streams[0].Entries[0].Line)] = struct{}{} } + mu.Unlock() require.Len(t, seenEntries, expectedTotalLines) } diff --git a/clients/pkg/promtail/config/config.go b/clients/pkg/promtail/config/config.go index 4a720a2dd28f3..615b8e9abaad5 100644 --- a/clients/pkg/promtail/config/config.go +++ b/clients/pkg/promtail/config/config.go @@ -8,16 +8,16 @@ import ( "github.com/go-kit/log/level" "gopkg.in/yaml.v2" - "github.com/grafana/loki/clients/pkg/promtail/client" - "github.com/grafana/loki/clients/pkg/promtail/limit" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/server" - "github.com/grafana/loki/clients/pkg/promtail/targets/file" - "github.com/grafana/loki/clients/pkg/promtail/wal" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/limit" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/server" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/file" + "github.com/grafana/loki/v3/clients/pkg/promtail/wal" - "github.com/grafana/loki/pkg/tracing" - "github.com/grafana/loki/pkg/util/flagext" + "github.com/grafana/loki/v3/pkg/tracing" + "github.com/grafana/loki/v3/pkg/util/flagext" ) // Options contains cross-cutting promtail configurations diff --git a/clients/pkg/promtail/config/config_test.go b/clients/pkg/promtail/config/config_test.go index 04cd09f56dfc1..32bab70501e39 100644 --- a/clients/pkg/promtail/config/config_test.go +++ b/clients/pkg/promtail/config/config_test.go @@ -11,9 +11,9 @@ import ( "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" - "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" - "github.com/grafana/loki/pkg/util/flagext" + "github.com/grafana/loki/v3/pkg/util/flagext" ) const testFile = ` diff --git a/clients/pkg/promtail/discovery/consulagent/consul.go b/clients/pkg/promtail/discovery/consulagent/consul.go index f38bc6e3dfe08..2a08498efea69 100644 --- a/clients/pkg/promtail/discovery/consulagent/consul.go +++ b/clients/pkg/promtail/discovery/consulagent/consul.go @@ -8,6 +8,7 @@ package consulagent import ( "context" "encoding/json" + "fmt" "net" "net/http" "strconv" @@ -62,26 +63,6 @@ const ( ) var ( - rpcFailuresCount = prometheus.NewCounter( - prometheus.CounterOpts{ - Namespace: namespace, - Name: "sd_consulagent_rpc_failures_total", - Help: "The number of Consul Agent RPC call failures.", - }) - rpcDuration = prometheus.NewSummaryVec( - prometheus.SummaryOpts{ - Namespace: namespace, - Name: "sd_consulagent_rpc_duration_seconds", - Help: "The duration of a Consul Agent RPC call in seconds.", - Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, - }, - []string{"endpoint", "call"}, - ) - - // Initialize metric vectors. - servicesRPCDuration = rpcDuration.WithLabelValues("agent", "services") - serviceRPCDuration = rpcDuration.WithLabelValues("agent", "service") - // DefaultSDConfig is the default Consul SD configuration. DefaultSDConfig = SDConfig{ TagSeparator: ",", @@ -94,8 +75,6 @@ var ( func init() { discovery.RegisterConfig(&SDConfig{}) - prometheus.MustRegister(rpcFailuresCount) - prometheus.MustRegister(rpcDuration) } // SDConfig is the configuration for Consul service discovery. @@ -129,12 +108,17 @@ type SDConfig struct { TLSConfig config.TLSConfig `yaml:"tls_config,omitempty"` } +// NewDiscovererMetrics implements discovery.Config. +func (c *SDConfig) NewDiscovererMetrics(reg prometheus.Registerer, rmi discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics { + return newDiscovererMetrics(reg, rmi) +} + // Name returns the name of the Config. func (*SDConfig) Name() string { return "consulagent" } // NewDiscoverer returns a Discoverer for the Config. func (c *SDConfig) NewDiscoverer(opts discovery.DiscovererOptions) (discovery.Discoverer, error) { - return NewDiscovery(c, opts.Logger) + return NewDiscovery(c, opts.Logger, opts.Metrics) } // SetDirectory joins any relative file paths with dir. @@ -169,10 +153,16 @@ type Discovery struct { refreshInterval time.Duration finalizer func() logger log.Logger + metrics *consulMetrics } // NewDiscovery returns a new Discovery for the given config. -func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { +func NewDiscovery(conf *SDConfig, logger log.Logger, metrics discovery.DiscovererMetrics) (*Discovery, error) { + m, ok := metrics.(*consulMetrics) + if !ok { + return nil, fmt.Errorf("invalid discovery metrics type") + } + if logger == nil { logger = log.NewNopLogger() } @@ -220,6 +210,7 @@ func NewDiscovery(conf *SDConfig, logger log.Logger) (*Discovery, error) { clientDatacenter: conf.Datacenter, finalizer: transport.CloseIdleConnections, logger: logger, + metrics: m, } return cd, nil } @@ -275,7 +266,7 @@ func (d *Discovery) getDatacenter() error { info, err := d.client.Agent().Self() if err != nil { level.Error(d.logger).Log("msg", "Error retrieving datacenter name", "err", err) - rpcFailuresCount.Inc() + d.metrics.rpcFailuresCount.Inc() return err } @@ -356,7 +347,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup. t0 := time.Now() srvs, err := agent.Services() elapsed := time.Since(t0) - servicesRPCDuration.Observe(elapsed.Seconds()) + d.metrics.servicesRPCDuration.Observe(elapsed.Seconds()) // Check the context before in order to exit early. select { @@ -367,7 +358,7 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup. if err != nil { level.Error(d.logger).Log("msg", "Error refreshing service list", "err", err) - rpcFailuresCount.Inc() + d.metrics.rpcFailuresCount.Inc() time.Sleep(retryInterval) return } @@ -423,13 +414,15 @@ func (d *Discovery) watchServices(ctx context.Context, ch chan<- []*targetgroup. // consulService contains data belonging to the same service. type consulService struct { - name string - tags []string - labels model.LabelSet - discovery *Discovery - client *consul.Client - tagSeparator string - logger log.Logger + name string + tags []string + labels model.LabelSet + discovery *Discovery + client *consul.Client + tagSeparator string + logger log.Logger + rpcFailuresCount prometheus.Counter + serviceRPCDuration prometheus.Observer } // Start watching a service. @@ -443,8 +436,10 @@ func (d *Discovery) watchService(ctx context.Context, ch chan<- []*targetgroup.G serviceLabel: model.LabelValue(name), datacenterLabel: model.LabelValue(d.clientDatacenter), }, - tagSeparator: d.tagSeparator, - logger: d.logger, + tagSeparator: d.tagSeparator, + logger: d.logger, + rpcFailuresCount: d.metrics.rpcFailuresCount, + serviceRPCDuration: d.metrics.serviceRPCDuration, } go func() { @@ -474,7 +469,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr t0 := time.Now() aggregatedStatus, serviceChecks, err := agent.AgentHealthServiceByName(srv.name) elapsed := time.Since(t0) - serviceRPCDuration.Observe(elapsed.Seconds()) + srv.serviceRPCDuration.Observe(elapsed.Seconds()) // Check the context before in order to exit early. select { @@ -486,7 +481,7 @@ func (srv *consulService) watch(ctx context.Context, ch chan<- []*targetgroup.Gr if err != nil { level.Error(srv.logger).Log("msg", "Error refreshing service", "service", srv.name, "tags", strings.Join(srv.tags, ","), "err", err) - rpcFailuresCount.Inc() + srv.rpcFailuresCount.Inc() time.Sleep(retryInterval) return } diff --git a/clients/pkg/promtail/discovery/consulagent/consul_test.go b/clients/pkg/promtail/discovery/consulagent/consul_test.go index 0d36ae9549a8e..a0593370ee3a8 100644 --- a/clients/pkg/promtail/discovery/consulagent/consul_test.go +++ b/clients/pkg/promtail/discovery/consulagent/consul_test.go @@ -22,10 +22,12 @@ import ( "time" "github.com/go-kit/log" + "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "go.uber.org/goleak" + "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" ) @@ -34,10 +36,24 @@ func TestMain(m *testing.M) { goleak.VerifyTestMain(m) } +// TODO: Add ability to unregister metrics? +func NewTestMetrics(t *testing.T, conf discovery.Config, reg prometheus.Registerer) discovery.DiscovererMetrics { + refreshMetrics := discovery.NewRefreshMetrics(reg) + require.NoError(t, refreshMetrics.Register()) + + metrics := conf.NewDiscovererMetrics(prometheus.NewRegistry(), refreshMetrics) + require.NoError(t, metrics.Register()) + + return metrics +} + func TestConfiguredService(t *testing.T) { conf := &SDConfig{ Services: []string{"configuredServiceName"}} - consulDiscovery, err := NewDiscovery(conf, nil) + + metrics := NewTestMetrics(t, conf, prometheus.NewRegistry()) + + consulDiscovery, err := NewDiscovery(conf, nil, metrics) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) @@ -55,7 +71,10 @@ func TestConfiguredServiceWithTag(t *testing.T) { Services: []string{"configuredServiceName"}, ServiceTags: []string{"http"}, } - consulDiscovery, err := NewDiscovery(conf, nil) + + metrics := NewTestMetrics(t, conf, prometheus.NewRegistry()) + + consulDiscovery, err := NewDiscovery(conf, nil, metrics) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) @@ -151,7 +170,9 @@ func TestConfiguredServiceWithTags(t *testing.T) { } for _, tc := range cases { - consulDiscovery, err := NewDiscovery(tc.conf, nil) + metrics := NewTestMetrics(t, tc.conf, prometheus.NewRegistry()) + + consulDiscovery, err := NewDiscovery(tc.conf, nil, metrics) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) @@ -166,7 +187,10 @@ func TestConfiguredServiceWithTags(t *testing.T) { func TestNonConfiguredService(t *testing.T) { conf := &SDConfig{} - consulDiscovery, err := NewDiscovery(conf, nil) + + metrics := NewTestMetrics(t, conf, prometheus.NewRegistry()) + + consulDiscovery, err := NewDiscovery(conf, nil, metrics) if err != nil { t.Errorf("Unexpected error when initializing discovery %v", err) @@ -341,7 +365,8 @@ func newServer(t *testing.T) (*httptest.Server, *SDConfig) { func newDiscovery(t *testing.T, config *SDConfig) *Discovery { logger := log.NewNopLogger() - d, err := NewDiscovery(config, logger) + metrics := NewTestMetrics(t, config, prometheus.NewRegistry()) + d, err := NewDiscovery(config, logger, metrics) require.NoError(t, err) return d } diff --git a/clients/pkg/promtail/discovery/consulagent/metrics.go b/clients/pkg/promtail/discovery/consulagent/metrics.go new file mode 100644 index 0000000000000..692f17f7969b9 --- /dev/null +++ b/clients/pkg/promtail/discovery/consulagent/metrics.go @@ -0,0 +1,65 @@ +// This code was adapted from the consul service discovery +// package in prometheus: https://github.com/prometheus/prometheus/blob/main/discovery/consul/metrics.go +// which is copyrighted: 2015 The Prometheus Authors +// and licensed under the Apache License, Version 2.0 (the "License"); + +package consulagent + +import ( + "github.com/prometheus/client_golang/prometheus" + + "github.com/prometheus/prometheus/discovery" +) + +var _ discovery.DiscovererMetrics = (*consulMetrics)(nil) + +type consulMetrics struct { + rpcFailuresCount prometheus.Counter + rpcDuration *prometheus.SummaryVec + + servicesRPCDuration prometheus.Observer + serviceRPCDuration prometheus.Observer + + metricRegisterer discovery.MetricRegisterer +} + +func newDiscovererMetrics(reg prometheus.Registerer, _ discovery.RefreshMetricsInstantiator) discovery.DiscovererMetrics { + m := &consulMetrics{ + rpcFailuresCount: prometheus.NewCounter( + prometheus.CounterOpts{ + Namespace: namespace, + Name: "sd_consulagent_rpc_failures_total", + Help: "The number of Consul Agent RPC call failures.", + }), + rpcDuration: prometheus.NewSummaryVec( + prometheus.SummaryOpts{ + Namespace: namespace, + Name: "sd_consulagent_rpc_duration_seconds", + Help: "The duration of a Consul Agent RPC call in seconds.", + Objectives: map[float64]float64{0.5: 0.05, 0.9: 0.01, 0.99: 0.001}, + }, + []string{"endpoint", "call"}, + ), + } + + m.metricRegisterer = discovery.NewMetricRegisterer(reg, []prometheus.Collector{ + m.rpcFailuresCount, + m.rpcDuration, + }) + + // Initialize metric vectors. + m.servicesRPCDuration = m.rpcDuration.WithLabelValues("agent", "services") + m.serviceRPCDuration = m.rpcDuration.WithLabelValues("agent", "service") + + return m +} + +// Register implements discovery.DiscovererMetrics. +func (m *consulMetrics) Register() error { + return m.metricRegisterer.RegisterMetrics() +} + +// Unregister implements discovery.DiscovererMetrics. +func (m *consulMetrics) Unregister() { + m.metricRegisterer.UnregisterMetrics() +} diff --git a/clients/pkg/promtail/limit/config.go b/clients/pkg/promtail/limit/config.go index 02589afd86b89..aed6f23c0b041 100644 --- a/clients/pkg/promtail/limit/config.go +++ b/clients/pkg/promtail/limit/config.go @@ -3,7 +3,7 @@ package limit import ( "flag" - "github.com/grafana/loki/pkg/util/flagext" + "github.com/grafana/loki/v3/pkg/util/flagext" ) type Config struct { diff --git a/clients/pkg/promtail/positions/positions_test.go b/clients/pkg/promtail/positions/positions_test.go index 1dce97b08ec7e..6a7044a5a868d 100644 --- a/clients/pkg/promtail/positions/positions_test.go +++ b/clients/pkg/promtail/positions/positions_test.go @@ -9,7 +9,7 @@ import ( "github.com/go-kit/log" "github.com/stretchr/testify/require" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func tempFilename(t *testing.T) string { diff --git a/clients/pkg/promtail/promtail.go b/clients/pkg/promtail/promtail.go index 1ef3368a697e5..ffe774a405be8 100644 --- a/clients/pkg/promtail/promtail.go +++ b/clients/pkg/promtail/promtail.go @@ -14,17 +14,17 @@ import ( "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client" - "github.com/grafana/loki/clients/pkg/promtail/config" - "github.com/grafana/loki/clients/pkg/promtail/server" - "github.com/grafana/loki/clients/pkg/promtail/targets" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/clients/pkg/promtail/utils" - "github.com/grafana/loki/clients/pkg/promtail/wal" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/config" + "github.com/grafana/loki/v3/clients/pkg/promtail/server" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/utils" + "github.com/grafana/loki/v3/clients/pkg/promtail/wal" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) const ( diff --git a/clients/pkg/promtail/promtail_test.go b/clients/pkg/promtail/promtail_test.go index 25b9dbaa6dcbe..695f3faeb0f5f 100644 --- a/clients/pkg/promtail/promtail_test.go +++ b/clients/pkg/promtail/promtail_test.go @@ -30,19 +30,19 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/client" - "github.com/grafana/loki/clients/pkg/promtail/config" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/server" - pserver "github.com/grafana/loki/clients/pkg/promtail/server" - file2 "github.com/grafana/loki/clients/pkg/promtail/targets/file" - "github.com/grafana/loki/clients/pkg/promtail/targets/testutils" - - "github.com/grafana/loki/pkg/logproto" - "github.com/grafana/loki/pkg/util" - util_log "github.com/grafana/loki/pkg/util/log" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/config" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/server" + pserver "github.com/grafana/loki/v3/clients/pkg/promtail/server" + file2 "github.com/grafana/loki/v3/clients/pkg/promtail/targets/file" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/testutils" + + "github.com/grafana/loki/v3/pkg/logproto" + "github.com/grafana/loki/v3/pkg/util" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var clientMetrics = client.NewMetrics(prometheus.DefaultRegisterer) @@ -522,7 +522,7 @@ func getPromMetrics(t *testing.T, httpListenAddr net.Addr) ([]byte, string) { func parsePromMetrics(t *testing.T, bytes []byte, contentType string, metricName string, label string) map[string]float64 { rb := map[string]float64{} - pr, err := textparse.New(bytes, contentType, false) + pr, err := textparse.New(bytes, contentType, false, nil) require.NoError(t, err) for { et, err := pr.Next() diff --git a/clients/pkg/promtail/promtail_wal_test.go b/clients/pkg/promtail/promtail_wal_test.go index dc00c398e91c8..b4027ed2d9091 100644 --- a/clients/pkg/promtail/promtail_wal_test.go +++ b/clients/pkg/promtail/promtail_wal_test.go @@ -20,15 +20,16 @@ import ( "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/client" - "github.com/grafana/loki/clients/pkg/promtail/config" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/utils" - "github.com/grafana/loki/clients/pkg/promtail/wal" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/config" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/utils" + "github.com/grafana/loki/v3/clients/pkg/promtail/wal" "github.com/grafana/loki/pkg/push" - util_log "github.com/grafana/loki/pkg/util/log" + + util_log "github.com/grafana/loki/v3/pkg/util/log" ) const ( @@ -58,19 +59,25 @@ func TestPromtailWithWAL_SingleTenant(t *testing.T) { // create receive channel and start a collect routine receivedCh := make(chan utils.RemoteWriteRequest) received := map[string][]push.Entry{} + var mu sync.Mutex + // Create a channel for log messages + logCh := make(chan string, 100) // Buffered channel to avoid blocking + wg.Add(1) go func() { defer wg.Done() for req := range receivedCh { + mu.Lock() // Add some observability to the requests received in the remote write endpoint var counts []string for _, str := range req.Request.Streams { counts = append(counts, fmt.Sprint(len(str.Entries))) } - t.Logf("received request: %s", counts) + logCh <- fmt.Sprintf("received request: %s", counts) for _, stream := range req.Request.Streams { received[stream.Labels] = append(received[stream.Labels], stream.Entries...) } + mu.Unlock() } }() @@ -119,14 +126,23 @@ func TestPromtailWithWAL_SingleTenant(t *testing.T) { for i := 0; i < entriesToWrite; i++ { _, err = logsFile.WriteString(fmt.Sprintf("log line # %d\n", i)) if err != nil { - t.Logf("error writing to log file. Err: %s", err.Error()) + logCh <- fmt.Sprintf("error writing to log file. Err: %s", err.Error()) } // not overkill log file time.Sleep(1 * time.Millisecond) } }() + // Goroutine to handle log messages + go func() { + for msg := range logCh { + t.Log(msg) + } + }() + require.Eventually(t, func() bool { + mu.Lock() + defer mu.Unlock() if seen, ok := received[expectedLabelSet]; ok { return len(seen) == entriesToWrite } @@ -157,11 +173,13 @@ func TestPromtailWithWAL_MultipleTenants(t *testing.T) { receivedCh := make(chan utils.RemoteWriteRequest) // received is a mapping from tenant, string-formatted label set to received entries received := map[string]map[string][]push.Entry{} + var mu sync.Mutex var totalReceived = 0 wg.Add(1) go func() { defer wg.Done() for req := range receivedCh { + mu.Lock() // start received label entries map if first time tenant is seen if _, ok := received[req.TenantID]; !ok { received[req.TenantID] = map[string][]push.Entry{} @@ -172,6 +190,7 @@ func TestPromtailWithWAL_MultipleTenants(t *testing.T) { // increment total count totalReceived += len(stream.Entries) } + mu.Unlock() } }() @@ -249,15 +268,19 @@ func TestPromtailWithWAL_MultipleTenants(t *testing.T) { // wait for all entries to be remote written require.Eventually(t, func() bool { + mu.Lock() + defer mu.Unlock() return totalReceived == entriesToWrite }, time.Second*20, time.Second, "timed out waiting for entries to be remote written") // assert over received entries require.Len(t, received, expectedTenantCounts, "not expected tenant count") + mu.Lock() for tenantID := 0; tenantID < expectedTenantCounts; tenantID++ { // we should've received at least entriesToWrite / expectedTenantCounts require.GreaterOrEqual(t, len(received[fmt.Sprint(tenantID)][expectedLabelSet]), entriesToWrite/expectedTenantCounts) } + mu.Unlock() pr.Shutdown() close(receivedCh) diff --git a/clients/pkg/promtail/scrapeconfig/scrapeconfig.go b/clients/pkg/promtail/scrapeconfig/scrapeconfig.go index 262b4b925d25d..a261a9a08a383 100644 --- a/clients/pkg/promtail/scrapeconfig/scrapeconfig.go +++ b/clients/pkg/promtail/scrapeconfig/scrapeconfig.go @@ -27,8 +27,8 @@ import ( "github.com/prometheus/prometheus/discovery/zookeeper" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/discovery/consulagent" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/discovery/consulagent" ) // Config describes a job to scrape. diff --git a/clients/pkg/promtail/server/server.go b/clients/pkg/promtail/server/server.go index 1b47247630e05..2e7752812c93c 100644 --- a/clients/pkg/promtail/server/server.go +++ b/clients/pkg/promtail/server/server.go @@ -23,9 +23,9 @@ import ( "github.com/prometheus/client_golang/prometheus/promhttp" "github.com/prometheus/common/version" - "github.com/grafana/loki/clients/pkg/promtail/server/ui" - "github.com/grafana/loki/clients/pkg/promtail/targets" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/server/ui" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) var ( diff --git a/clients/pkg/promtail/server/template.go b/clients/pkg/promtail/server/template.go index 1ed7fde54547d..53013bc485d46 100644 --- a/clients/pkg/promtail/server/template.go +++ b/clients/pkg/promtail/server/template.go @@ -13,7 +13,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/template" - "github.com/grafana/loki/clients/pkg/promtail/server/ui" + "github.com/grafana/loki/v3/clients/pkg/promtail/server/ui" ) // templateOptions is a set of options to render a template. diff --git a/clients/pkg/promtail/server/ui/assets_generate.go b/clients/pkg/promtail/server/ui/assets_generate.go index 984a1f9c99e4d..0165b2031f60c 100644 --- a/clients/pkg/promtail/server/ui/assets_generate.go +++ b/clients/pkg/promtail/server/ui/assets_generate.go @@ -10,7 +10,7 @@ import ( "github.com/prometheus/alertmanager/pkg/modtimevfs" "github.com/shurcooL/vfsgen" - "github.com/grafana/loki/clients/pkg/promtail/server/ui" + "github.com/grafana/loki/v3/clients/pkg/promtail/server/ui" ) func main() { diff --git a/clients/pkg/promtail/targets/azureeventhubs/parser.go b/clients/pkg/promtail/targets/azureeventhubs/parser.go index cd2ddc7145cbb..0001dc525019e 100644 --- a/clients/pkg/promtail/targets/azureeventhubs/parser.go +++ b/clients/pkg/promtail/targets/azureeventhubs/parser.go @@ -12,9 +12,9 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) type azureMonitorResourceLogs struct { diff --git a/clients/pkg/promtail/targets/azureeventhubs/target_syncer.go b/clients/pkg/promtail/targets/azureeventhubs/target_syncer.go index e16d3b4914a10..bc2175768f460 100644 --- a/clients/pkg/promtail/targets/azureeventhubs/target_syncer.go +++ b/clients/pkg/promtail/targets/azureeventhubs/target_syncer.go @@ -10,10 +10,10 @@ import ( "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/kafka" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/kafka" ) func NewSyncer( diff --git a/clients/pkg/promtail/targets/azureeventhubs/target_syncer_test.go b/clients/pkg/promtail/targets/azureeventhubs/target_syncer_test.go index 2113afffd4e2e..1874453cf364b 100644 --- a/clients/pkg/promtail/targets/azureeventhubs/target_syncer_test.go +++ b/clients/pkg/promtail/targets/azureeventhubs/target_syncer_test.go @@ -10,7 +10,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/stretchr/testify/assert" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) func Test_validateConfig(t *testing.T) { diff --git a/clients/pkg/promtail/targets/azureeventhubs/targetmanager.go b/clients/pkg/promtail/targets/azureeventhubs/targetmanager.go index 2651233942ba2..48f3a3fe8b8e6 100644 --- a/clients/pkg/promtail/targets/azureeventhubs/targetmanager.go +++ b/clients/pkg/promtail/targets/azureeventhubs/targetmanager.go @@ -6,10 +6,10 @@ import ( "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/kafka" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/kafka" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // TargetManager manages a series of kafka targets. diff --git a/clients/pkg/promtail/targets/cloudflare/target.go b/clients/pkg/promtail/targets/cloudflare/target.go index 19d1f18758273..bede17bc45327 100644 --- a/clients/pkg/promtail/targets/cloudflare/target.go +++ b/clients/pkg/promtail/targets/cloudflare/target.go @@ -18,12 +18,12 @@ import ( "github.com/prometheus/common/model" "go.uber.org/atomic" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) // The minimun window size is 1 minute. diff --git a/clients/pkg/promtail/targets/cloudflare/target_test.go b/clients/pkg/promtail/targets/cloudflare/target_test.go index d275a7e845f10..64cb6c4492e5e 100644 --- a/clients/pkg/promtail/targets/cloudflare/target_test.go +++ b/clients/pkg/promtail/targets/cloudflare/target_test.go @@ -15,9 +15,9 @@ import ( "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) func Test_CloudflareTarget(t *testing.T) { diff --git a/clients/pkg/promtail/targets/cloudflare/targetmanager.go b/clients/pkg/promtail/targets/cloudflare/targetmanager.go index c60fd6577a5f3..31a05fe0b75d4 100644 --- a/clients/pkg/promtail/targets/cloudflare/targetmanager.go +++ b/clients/pkg/promtail/targets/cloudflare/targetmanager.go @@ -3,11 +3,11 @@ package cloudflare import ( "github.com/go-kit/log" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // TargetManager manages a series of cloudflare targets. diff --git a/clients/pkg/promtail/targets/cloudflare/util_test.go b/clients/pkg/promtail/targets/cloudflare/util_test.go index 18efefee5cb55..a702bb90f5ddf 100644 --- a/clients/pkg/promtail/targets/cloudflare/util_test.go +++ b/clients/pkg/promtail/targets/cloudflare/util_test.go @@ -3,6 +3,7 @@ package cloudflare import ( "context" "errors" + "sync" "time" "github.com/grafana/cloudflare-go" @@ -13,10 +14,13 @@ var ErrorLogpullReceived = errors.New("error logpull received") type fakeCloudflareClient struct { mock.Mock + mu sync.Mutex } func (f *fakeCloudflareClient) CallCount() int { var actualCalls int + f.mu.Lock() + defer f.mu.Unlock() for _, call := range f.Calls { if call.Method == "LogpullReceived" { actualCalls++ @@ -59,7 +63,9 @@ func newFakeCloudflareClient() *fakeCloudflareClient { } func (f *fakeCloudflareClient) LogpullReceived(ctx context.Context, start, end time.Time) (cloudflare.LogpullReceivedIterator, error) { + f.mu.Lock() r := f.Called(ctx, start, end) + f.mu.Unlock() if r.Get(0) != nil { it := r.Get(0).(cloudflare.LogpullReceivedIterator) if it.Err() == ErrorLogpullReceived { diff --git a/clients/pkg/promtail/targets/docker/target.go b/clients/pkg/promtail/targets/docker/target.go index bb26391ab199b..3ec9d02a022c6 100644 --- a/clients/pkg/promtail/targets/docker/target.go +++ b/clients/pkg/promtail/targets/docker/target.go @@ -10,7 +10,7 @@ import ( "sync" "time" - docker_types "github.com/docker/docker/api/types" + "github.com/docker/docker/api/types/container" "github.com/docker/docker/client" "github.com/docker/docker/pkg/stdcopy" "github.com/go-kit/log" @@ -20,11 +20,11 @@ import ( "github.com/prometheus/prometheus/model/relabel" "go.uber.org/atomic" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) type Target struct { @@ -88,7 +88,7 @@ func (t *Target) processLoop(ctx context.Context) { t.wg.Add(1) defer t.wg.Done() - opts := docker_types.ContainerLogsOptions{ + opts := container.LogsOptions{ ShowStdout: true, ShowStderr: true, Follow: true, diff --git a/clients/pkg/promtail/targets/docker/target_group.go b/clients/pkg/promtail/targets/docker/target_group.go index 0b0ea9eef6f56..b9fd8940824d0 100644 --- a/clients/pkg/promtail/targets/docker/target_group.go +++ b/clients/pkg/promtail/targets/docker/target_group.go @@ -15,11 +15,11 @@ import ( "github.com/prometheus/prometheus/discovery/targetgroup" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/pkg/util/build" + "github.com/grafana/loki/v3/pkg/util/build" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) const DockerSource = "Docker" diff --git a/clients/pkg/promtail/targets/docker/target_test.go b/clients/pkg/promtail/targets/docker/target_test.go index 27a22871e4541..9bb5c9bfacd57 100644 --- a/clients/pkg/promtail/targets/docker/target_test.go +++ b/clients/pkg/promtail/targets/docker/target_test.go @@ -19,8 +19,8 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" ) func Test_DockerTarget(t *testing.T) { diff --git a/clients/pkg/promtail/targets/docker/targetmanager.go b/clients/pkg/promtail/targets/docker/targetmanager.go index a35a2aa79f196..6321705b8f142 100644 --- a/clients/pkg/promtail/targets/docker/targetmanager.go +++ b/clients/pkg/promtail/targets/docker/targetmanager.go @@ -9,13 +9,13 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/util" ) const ( @@ -44,14 +44,25 @@ func NewTargetManager( pushClient api.EntryHandler, scrapeConfigs []scrapeconfig.Config, ) (*TargetManager, error) { + noopRegistry := util.NoopRegistry{} + noopSdMetrics, err := discovery.CreateAndRegisterSDMetrics(noopRegistry) + if err != nil { + return nil, err + } + ctx, cancel := context.WithCancel(context.Background()) tm := &TargetManager{ - metrics: metrics, - logger: logger, - cancel: cancel, - done: make(chan struct{}), - positions: positions, - manager: discovery.NewManager(ctx, log.With(logger, "component", "docker_discovery")), + metrics: metrics, + logger: logger, + cancel: cancel, + done: make(chan struct{}), + positions: positions, + manager: discovery.NewManager( + ctx, + log.With(logger, "component", "docker_discovery"), + noopRegistry, + noopSdMetrics, + ), pushClient: pushClient, groups: make(map[string]*targetGroup), } diff --git a/clients/pkg/promtail/targets/docker/targetmanager_test.go b/clients/pkg/promtail/targets/docker/targetmanager_test.go index 23bca7a923216..224e58d5a8930 100644 --- a/clients/pkg/promtail/targets/docker/targetmanager_test.go +++ b/clients/pkg/promtail/targets/docker/targetmanager_test.go @@ -19,9 +19,9 @@ import ( "github.com/prometheus/prometheus/discovery/moby" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) func Test_TargetManager(t *testing.T) { diff --git a/clients/pkg/promtail/targets/file/decompresser.go b/clients/pkg/promtail/targets/file/decompresser.go index 3beb35415fb6b..34d2434d8b04e 100644 --- a/clients/pkg/promtail/targets/file/decompresser.go +++ b/clients/pkg/promtail/targets/file/decompresser.go @@ -23,11 +23,11 @@ import ( "golang.org/x/text/encoding/ianaindex" "golang.org/x/text/transform" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) func supportedCompressedFormats() map[string]struct{} { diff --git a/clients/pkg/promtail/targets/file/decompresser_test.go b/clients/pkg/promtail/targets/file/decompresser_test.go index 443f14a4a8443..a575922ec6e5c 100644 --- a/clients/pkg/promtail/targets/file/decompresser_test.go +++ b/clients/pkg/promtail/targets/file/decompresser_test.go @@ -11,9 +11,9 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/atomic" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) type noopClient struct { diff --git a/clients/pkg/promtail/targets/file/filetarget.go b/clients/pkg/promtail/targets/file/filetarget.go index 2c52cbead922f..0ade51902b492 100644 --- a/clients/pkg/promtail/targets/file/filetarget.go +++ b/clients/pkg/promtail/targets/file/filetarget.go @@ -4,6 +4,7 @@ import ( "flag" "os" "path/filepath" + "sync" "time" "github.com/bmatcuk/doublestar" @@ -14,10 +15,10 @@ import ( "github.com/pkg/errors" "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) const ( @@ -92,12 +93,14 @@ type FileTarget struct { fileEventWatcher chan fsnotify.Event targetEventHandler chan fileTargetEvent watches map[string]struct{} + watchesMutex sync.Mutex path string pathExclude string quit chan struct{} done chan struct{} - readers map[string]Reader + readers map[string]Reader + readersMutex sync.Mutex targetConfig *Config watchConfig WatchConfig @@ -150,7 +153,7 @@ func NewFileTarget( // Ready if at least one file is being tailed func (t *FileTarget) Ready() bool { - return len(t.readers) > 0 + return t.getReadersLen() > 0 } // Stop the target. @@ -178,17 +181,21 @@ func (t *FileTarget) Labels() model.LabelSet { // Details implements a Target func (t *FileTarget) Details() interface{} { files := map[string]int64{} + t.readersMutex.Lock() for fileName := range t.readers { files[fileName], _ = t.positions.Get(fileName) } + t.readersMutex.Unlock() return files } func (t *FileTarget) run() { defer func() { + t.readersMutex.Lock() for _, v := range t.readers { v.Stop() } + t.readersMutex.Unlock() level.Info(t.logger).Log("msg", "filetarget: watcher closed, tailer stopped, positions saved", "path", t.path) close(t.done) }() @@ -281,15 +288,22 @@ func (t *FileTarget) sync() error { } // Add any directories which are not already being watched. + t.watchesMutex.Lock() toStartWatching := missing(t.watches, dirs) + t.watchesMutex.Unlock() t.startWatching(toStartWatching) // Remove any directories which no longer need watching. + t.watchesMutex.Lock() toStopWatching := missing(dirs, t.watches) + t.watchesMutex.Unlock() + t.stopWatching(toStopWatching) // fsnotify.Watcher doesn't allow us to see what is currently being watched so we have to track it ourselves. + t.watchesMutex.Lock() t.watches = dirs + t.watchesMutex.Unlock() // Check if any running tailers have stopped because of errors and remove them from the running list // (They will be restarted in startTailing) @@ -299,7 +313,9 @@ func (t *FileTarget) sync() error { t.startTailing(matches) // Stop tailing any files which no longer exist + t.readersMutex.Lock() toStopTailing := toStopTailing(matches, t.readers) + t.readersMutex.Unlock() t.stopTailingAndRemovePosition(toStopTailing) return nil @@ -307,9 +323,10 @@ func (t *FileTarget) sync() error { func (t *FileTarget) startWatching(dirs map[string]struct{}) { for dir := range dirs { - if _, ok := t.watches[dir]; ok { + if _, ok := t.getWatch(dir); ok { continue } + level.Info(t.logger).Log("msg", "watching new directory", "directory", dir) t.targetEventHandler <- fileTargetEvent{ path: dir, @@ -320,9 +337,10 @@ func (t *FileTarget) startWatching(dirs map[string]struct{}) { func (t *FileTarget) stopWatching(dirs map[string]struct{}) { for dir := range dirs { - if _, ok := t.watches[dir]; !ok { + if _, ok := t.getWatch(dir); !ok { continue } + level.Info(t.logger).Log("msg", "removing directory from watcher", "directory", dir) t.targetEventHandler <- fileTargetEvent{ path: dir, @@ -333,7 +351,7 @@ func (t *FileTarget) stopWatching(dirs map[string]struct{}) { func (t *FileTarget) startTailing(ps []string) { for _, p := range ps { - if _, ok := t.readers[p]; ok { + if _, ok := t.getReader(p); ok { continue } @@ -387,7 +405,7 @@ func (t *FileTarget) startTailing(ps []string) { } reader = tailer } - t.readers[p] = reader + t.setReader(p, reader) } } @@ -395,10 +413,10 @@ func (t *FileTarget) startTailing(ps []string) { // Call this when a file no longer exists and you want to remove all traces of it. func (t *FileTarget) stopTailingAndRemovePosition(ps []string) { for _, p := range ps { - if reader, ok := t.readers[p]; ok { + if reader, ok := t.getReader(p); ok { reader.Stop() t.positions.Remove(reader.Path()) - delete(t.readers, p) + t.removeReader(p) } } } @@ -406,6 +424,7 @@ func (t *FileTarget) stopTailingAndRemovePosition(ps []string) { // pruneStoppedTailers removes any tailers which have stopped running from // the list of active tailers. This allows them to be restarted if there were errors. func (t *FileTarget) pruneStoppedTailers() { + t.readersMutex.Lock() toRemove := make([]string, 0, len(t.readers)) for k, t := range t.readers { if !t.IsRunning() { @@ -415,6 +434,45 @@ func (t *FileTarget) pruneStoppedTailers() { for _, tr := range toRemove { delete(t.readers, tr) } + t.readersMutex.Unlock() +} + +func (t *FileTarget) getReadersLen() int { + t.readersMutex.Lock() + defer t.readersMutex.Unlock() + return len(t.readers) +} + +func (t *FileTarget) getReader(val string) (Reader, bool) { + t.readersMutex.Lock() + defer t.readersMutex.Unlock() + reader, ok := t.readers[val] + return reader, ok +} + +func (t *FileTarget) setReader(val string, reader Reader) { + t.readersMutex.Lock() + defer t.readersMutex.Unlock() + t.readers[val] = reader +} + +func (t *FileTarget) getWatch(val string) (struct{}, bool) { + t.watchesMutex.Lock() + defer t.watchesMutex.Unlock() + fileTarget, ok := t.watches[val] + return fileTarget, ok +} + +func (t *FileTarget) removeReader(val string) { + t.readersMutex.Lock() + defer t.readersMutex.Unlock() + delete(t.readers, val) +} + +func (t *FileTarget) getWatchesLen() int { + t.watchesMutex.Lock() + defer t.watchesMutex.Unlock() + return len(t.watches) } func toStopTailing(nt []string, et map[string]Reader) []string { @@ -442,7 +500,7 @@ func toStopTailing(nt []string, et map[string]Reader) []string { func (t *FileTarget) reportSize(ms []string) { for _, m := range ms { // Ask the tailer to update the size if a tailer exists, this keeps position and size metrics in sync - if reader, ok := t.readers[m]; ok { + if reader, ok := t.getReader(m); ok { err := reader.MarkPositionAndSize() if err != nil { level.Warn(t.logger).Log("msg", "failed to get file size from tailer, ", "file", m, "error", err) @@ -459,7 +517,6 @@ func (t *FileTarget) reportSize(ms []string) { } t.metrics.totalBytes.WithLabelValues(m).Set(float64(fi.Size())) } - } } diff --git a/clients/pkg/promtail/targets/file/filetarget_test.go b/clients/pkg/promtail/targets/file/filetarget_test.go index f3cde7bf819a4..579ea19e2e56e 100644 --- a/clients/pkg/promtail/targets/file/filetarget_test.go +++ b/clients/pkg/promtail/targets/file/filetarget_test.go @@ -20,8 +20,8 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/atomic" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" ) func TestFileTargetSync(t *testing.T) { @@ -76,10 +76,10 @@ func TestFileTargetSync(t *testing.T) { assert.NoError(t, err) // Start with nothing watched. - if len(target.watches) != 0 { + if target.getWatchesLen() != 0 { t.Fatal("Expected watches to be 0 at this point in the test...") } - if len(target.readers) != 0 { + if target.getReadersLen() != 0 { t.Fatal("Expected tails to be 0 at this point in the test...") } @@ -90,10 +90,10 @@ func TestFileTargetSync(t *testing.T) { err = target.sync() assert.NoError(t, err) - if len(target.watches) != 0 { + if target.getWatchesLen() != 0 { t.Fatal("Expected watches to be 0 at this point in the test...") } - if len(target.readers) != 0 { + if target.getReadersLen() != 0 { t.Fatal("Expected tails to be 0 at this point in the test...") } @@ -106,10 +106,10 @@ func TestFileTargetSync(t *testing.T) { err = target.sync() assert.NoError(t, err) - assert.Equal(t, 1, len(target.watches), + assert.Equal(t, 1, target.getWatchesLen(), "Expected watches to be 1 at this point in the test...", ) - assert.Equal(t, 1, len(target.readers), + assert.Equal(t, 1, target.getReadersLen(), "Expected tails to be 1 at this point in the test...", ) @@ -124,10 +124,10 @@ func TestFileTargetSync(t *testing.T) { err = target.sync() assert.NoError(t, err) - assert.Equal(t, 1, len(target.watches), + assert.Equal(t, 1, target.getWatchesLen(), "Expected watches to be 1 at this point in the test...", ) - assert.Equal(t, 2, len(target.readers), + assert.Equal(t, 2, target.getReadersLen(), "Expected tails to be 2 at this point in the test...", ) @@ -138,10 +138,10 @@ func TestFileTargetSync(t *testing.T) { err = target.sync() assert.NoError(t, err) - assert.Equal(t, 1, len(target.watches), + assert.Equal(t, 1, target.getWatchesLen(), "Expected watches to be 1 at this point in the test...", ) - assert.Equal(t, 1, len(target.readers), + assert.Equal(t, 1, target.getReadersLen(), "Expected tails to be 1 at this point in the test...", ) @@ -152,10 +152,10 @@ func TestFileTargetSync(t *testing.T) { err = target.sync() assert.NoError(t, err) - assert.Equal(t, 0, len(target.watches), + assert.Equal(t, 0, target.getWatchesLen(), "Expected watches to be 0 at this point in the test...", ) - assert.Equal(t, 0, len(target.readers), + assert.Equal(t, 0, target.getReadersLen(), "Expected tails to be 0 at this point in the test...", ) requireEventually(t, func() bool { @@ -198,7 +198,7 @@ func TestFileTarget_StopsTailersCleanly(t *testing.T) { assert.NoError(t, err) requireEventually(t, func() bool { - return len(target.readers) == 1 + return target.getReadersLen() == 1 }, "expected 1 tailer to be created") require.NoError(t, testutil.GatherAndCompare(registry, bytes.NewBufferString(` @@ -208,12 +208,19 @@ func TestFileTarget_StopsTailersCleanly(t *testing.T) { `), "promtail_files_active_total")) // Inject an error to tailer - initailTailer := target.readers[logFile].(*tailer) + + initialReader, _ := target.getReader(logFile) + initailTailer := initialReader.(*tailer) _ = initailTailer.tail.Tomb.Killf("test: network file systems can be unreliable") // Tailer will be replaced by a new one requireEventually(t, func() bool { - return len(target.readers) == 1 && target.readers[logFile].(*tailer) != initailTailer + currentReader, _ := target.getReader(logFile) + var currentTailer *tailer + if currentReader != nil { + currentTailer = currentReader.(*tailer) + } + return target.getReadersLen() == 1 && currentTailer != initailTailer }, "expected dead tailer to be replaced by a new one") // The old tailer should be stopped: @@ -389,10 +396,10 @@ func TestFileTargetPathExclusion(t *testing.T) { assert.NoError(t, err) // Start with nothing watched. - if len(target.watches) != 0 { + if target.getWatchesLen() != 0 { t.Fatal("Expected watches to be 0 at this point in the test...") } - if len(target.readers) != 0 { + if target.getReadersLen() != 0 { t.Fatal("Expected tails to be 0 at this point in the test...") } @@ -407,10 +414,10 @@ func TestFileTargetPathExclusion(t *testing.T) { err = target.sync() assert.NoError(t, err) - if len(target.watches) != 0 { + if target.getWatchesLen() != 0 { t.Fatal("Expected watches to be 0 at this point in the test...") } - if len(target.readers) != 0 { + if target.getReadersLen() != 0 { t.Fatal("Expected tails to be 0 at this point in the test...") } @@ -425,10 +432,10 @@ func TestFileTargetPathExclusion(t *testing.T) { err = target.sync() assert.NoError(t, err) - assert.Equal(t, 2, len(target.watches), + assert.Equal(t, 2, target.getWatchesLen(), "Expected watches to be 2 at this point in the test...", ) - assert.Equal(t, 3, len(target.readers), + assert.Equal(t, 3, target.getReadersLen(), "Expected tails to be 3 at this point in the test...", ) requireEventually(t, func() bool { @@ -446,10 +453,10 @@ func TestFileTargetPathExclusion(t *testing.T) { err = target.sync() assert.NoError(t, err) - assert.Equal(t, 1, len(target.watches), + assert.Equal(t, 1, target.getWatchesLen(), "Expected watches to be 1 at this point in the test...", ) - assert.Equal(t, 1, len(target.readers), + assert.Equal(t, 1, target.getReadersLen(), "Expected tails to be 1 at this point in the test...", ) requireEventually(t, func() bool { @@ -538,7 +545,7 @@ func TestHandleFileCreationEvent(t *testing.T) { Op: fsnotify.Create, } requireEventually(t, func() bool { - return len(target.readers) == 1 + return target.getReadersLen() == 1 }, "Expected tails to be 1 at this point in the test...") } diff --git a/clients/pkg/promtail/targets/file/filetargetmanager.go b/clients/pkg/promtail/targets/file/filetargetmanager.go index 15f28691741b4..a02d0295d2bda 100644 --- a/clients/pkg/promtail/targets/file/filetargetmanager.go +++ b/clients/pkg/promtail/targets/file/filetargetmanager.go @@ -20,13 +20,13 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/util" ) const ( @@ -65,6 +65,12 @@ func NewFileTargetManager( reg = prometheus.DefaultRegisterer } + noopRegistry := util.NoopRegistry{} + noopSdMetrics, err := discovery.CreateAndRegisterSDMetrics(noopRegistry) + if err != nil { + return nil, err + } + watcher, err := fsnotify.NewWatcher() if err != nil { return nil, err @@ -76,7 +82,12 @@ func NewFileTargetManager( watcher: watcher, targetEventHandler: make(chan fileTargetEvent), syncers: map[string]*targetSyncer{}, - manager: discovery.NewManager(ctx, log.With(logger, "component", "discovery")), + manager: discovery.NewManager( + ctx, + log.With(logger, "component", "discovery"), + noopRegistry, + noopSdMetrics, + ), } hostname, err := hostname() diff --git a/clients/pkg/promtail/targets/file/filetargetmanager_test.go b/clients/pkg/promtail/targets/file/filetargetmanager_test.go index 62e41635f1111..d27cd43106fe2 100644 --- a/clients/pkg/promtail/targets/file/filetargetmanager_test.go +++ b/clients/pkg/promtail/targets/file/filetargetmanager_test.go @@ -16,10 +16,10 @@ import ( "github.com/prometheus/prometheus/discovery" "github.com/prometheus/prometheus/discovery/targetgroup" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) func newTestLogDirectories(t *testing.T) string { diff --git a/clients/pkg/promtail/targets/file/tailer.go b/clients/pkg/promtail/targets/file/tailer.go index 387978b6a7707..1e72e35306490 100644 --- a/clients/pkg/promtail/targets/file/tailer.go +++ b/clients/pkg/promtail/targets/file/tailer.go @@ -17,11 +17,11 @@ import ( "golang.org/x/text/encoding/ianaindex" "golang.org/x/text/transform" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" - "github.com/grafana/loki/pkg/logproto" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/logproto" + "github.com/grafana/loki/v3/pkg/util" ) type tailer struct { diff --git a/clients/pkg/promtail/targets/gcplog/formatter.go b/clients/pkg/promtail/targets/gcplog/formatter.go index 9c175a7750f42..9435ec4775958 100644 --- a/clients/pkg/promtail/targets/gcplog/formatter.go +++ b/clients/pkg/promtail/targets/gcplog/formatter.go @@ -10,9 +10,9 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) // GCPLogEntry that will be written to the pubsub topic. diff --git a/clients/pkg/promtail/targets/gcplog/formatter_test.go b/clients/pkg/promtail/targets/gcplog/formatter_test.go index f70fa1d79d122..d5703890197d2 100644 --- a/clients/pkg/promtail/targets/gcplog/formatter_test.go +++ b/clients/pkg/promtail/targets/gcplog/formatter_test.go @@ -10,9 +10,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) func TestFormat(t *testing.T) { diff --git a/clients/pkg/promtail/targets/gcplog/pull_target.go b/clients/pkg/promtail/targets/gcplog/pull_target.go index 38db550bdf730..671b160c6f4ca 100644 --- a/clients/pkg/promtail/targets/gcplog/pull_target.go +++ b/clients/pkg/promtail/targets/gcplog/pull_target.go @@ -14,9 +14,9 @@ import ( "github.com/prometheus/prometheus/model/relabel" "google.golang.org/api/option" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) var defaultBackoff = backoff.Config{ diff --git a/clients/pkg/promtail/targets/gcplog/pull_target_test.go b/clients/pkg/promtail/targets/gcplog/pull_target_test.go index e81ee20a6938c..81a16e6872bdd 100644 --- a/clients/pkg/promtail/targets/gcplog/pull_target_test.go +++ b/clients/pkg/promtail/targets/gcplog/pull_target_test.go @@ -16,9 +16,9 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) func TestPullTarget_RunStop(t *testing.T) { diff --git a/clients/pkg/promtail/targets/gcplog/push_target.go b/clients/pkg/promtail/targets/gcplog/push_target.go index 95260fb914a8a..4d0a2d2b8407c 100644 --- a/clients/pkg/promtail/targets/gcplog/push_target.go +++ b/clients/pkg/promtail/targets/gcplog/push_target.go @@ -13,12 +13,12 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/serverutils" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/serverutils" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) type pushTarget struct { diff --git a/clients/pkg/promtail/targets/gcplog/push_target_test.go b/clients/pkg/promtail/targets/gcplog/push_target_test.go index 50075820fa497..d6e9ce3f75e0a 100644 --- a/clients/pkg/promtail/targets/gcplog/push_target_test.go +++ b/clients/pkg/promtail/targets/gcplog/push_target_test.go @@ -9,7 +9,7 @@ import ( "testing" "time" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" @@ -17,10 +17,10 @@ import ( "github.com/prometheus/prometheus/model/relabel" "github.com/stretchr/testify/require" - lokiClient "github.com/grafana/loki/clients/pkg/promtail/client" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/gcplog" + lokiClient "github.com/grafana/loki/v3/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/gcplog" ) const localhost = "127.0.0.1" diff --git a/clients/pkg/promtail/targets/gcplog/push_translation.go b/clients/pkg/promtail/targets/gcplog/push_translation.go index f96f7171f2185..aae5ee4fb25d6 100644 --- a/clients/pkg/promtail/targets/gcplog/push_translation.go +++ b/clients/pkg/promtail/targets/gcplog/push_translation.go @@ -9,10 +9,10 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/promtail/api" - lokiClient "github.com/grafana/loki/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + lokiClient "github.com/grafana/loki/v3/clients/pkg/promtail/client" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/util" ) // PushMessage is the POST body format sent by GCP PubSub push subscriptions. diff --git a/clients/pkg/promtail/targets/gcplog/target.go b/clients/pkg/promtail/targets/gcplog/target.go index b122fb24112f7..1c60e8a3eb2ca 100644 --- a/clients/pkg/promtail/targets/gcplog/target.go +++ b/clients/pkg/promtail/targets/gcplog/target.go @@ -7,9 +7,9 @@ import ( "github.com/prometheus/prometheus/model/relabel" "google.golang.org/api/option" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // Target is a common interface implemented by both GCPLog targets. diff --git a/clients/pkg/promtail/targets/gcplog/target_test.go b/clients/pkg/promtail/targets/gcplog/target_test.go index 1a7cec47131f6..96bf7606985ed 100644 --- a/clients/pkg/promtail/targets/gcplog/target_test.go +++ b/clients/pkg/promtail/targets/gcplog/target_test.go @@ -17,9 +17,9 @@ import ( "github.com/grafana/dskit/server" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) func TestNewGCPLogTarget(t *testing.T) { diff --git a/clients/pkg/promtail/targets/gcplog/targetmanager.go b/clients/pkg/promtail/targets/gcplog/targetmanager.go index 71f3b5130a2fc..cf731c6c9f995 100644 --- a/clients/pkg/promtail/targets/gcplog/targetmanager.go +++ b/clients/pkg/promtail/targets/gcplog/targetmanager.go @@ -6,10 +6,10 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // nolint:revive diff --git a/clients/pkg/promtail/targets/gelf/gelftarget.go b/clients/pkg/promtail/targets/gelf/gelftarget.go index baaf8e3911fd9..42298b7588cca 100644 --- a/clients/pkg/promtail/targets/gelf/gelftarget.go +++ b/clients/pkg/promtail/targets/gelf/gelftarget.go @@ -14,11 +14,11 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) // SeverityLevels maps severity levels to severity string levels. diff --git a/clients/pkg/promtail/targets/gelf/gelftarget_test.go b/clients/pkg/promtail/targets/gelf/gelftarget_test.go index 86a304ef9a7a0..8bdc5470e28b5 100644 --- a/clients/pkg/promtail/targets/gelf/gelftarget_test.go +++ b/clients/pkg/promtail/targets/gelf/gelftarget_test.go @@ -15,8 +15,8 @@ import ( "github.com/prometheus/prometheus/model/relabel" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) func Test_Gelf(t *testing.T) { diff --git a/clients/pkg/promtail/targets/gelf/gelftargetmanager.go b/clients/pkg/promtail/targets/gelf/gelftargetmanager.go index f9824d3152f07..f6b7048287ce0 100644 --- a/clients/pkg/promtail/targets/gelf/gelftargetmanager.go +++ b/clients/pkg/promtail/targets/gelf/gelftargetmanager.go @@ -4,10 +4,10 @@ import ( "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // TargetManager manages a series of Gelf Targets. diff --git a/clients/pkg/promtail/targets/heroku/target.go b/clients/pkg/promtail/targets/heroku/target.go index 9ab2fdaacfac4..83aceda6b7921 100644 --- a/clients/pkg/promtail/targets/heroku/target.go +++ b/clients/pkg/promtail/targets/heroku/target.go @@ -14,14 +14,14 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/promtail/api" - lokiClient "github.com/grafana/loki/clients/pkg/promtail/client" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/serverutils" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" - - "github.com/grafana/loki/pkg/logproto" - util_log "github.com/grafana/loki/pkg/util/log" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + lokiClient "github.com/grafana/loki/v3/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/serverutils" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" + + "github.com/grafana/loki/v3/pkg/logproto" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) type Target struct { diff --git a/clients/pkg/promtail/targets/heroku/target_test.go b/clients/pkg/promtail/targets/heroku/target_test.go index c584bedba471d..42657d83ff1b4 100644 --- a/clients/pkg/promtail/targets/heroku/target_test.go +++ b/clients/pkg/promtail/targets/heroku/target_test.go @@ -19,9 +19,9 @@ import ( "github.com/prometheus/prometheus/model/relabel" "github.com/stretchr/testify/require" - lokiClient "github.com/grafana/loki/clients/pkg/promtail/client" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + lokiClient "github.com/grafana/loki/v3/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) const localhost = "127.0.0.1" diff --git a/clients/pkg/promtail/targets/heroku/targetmanager.go b/clients/pkg/promtail/targets/heroku/targetmanager.go index 5d046ca3d45cf..3ad94bc5a79f0 100644 --- a/clients/pkg/promtail/targets/heroku/targetmanager.go +++ b/clients/pkg/promtail/targets/heroku/targetmanager.go @@ -5,10 +5,10 @@ import ( "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) type TargetManager struct { diff --git a/clients/pkg/promtail/targets/journal/journaltarget.go b/clients/pkg/promtail/targets/journal/journaltarget.go index 6630b827e7c14..fa04ac50c3db4 100644 --- a/clients/pkg/promtail/targets/journal/journaltarget.go +++ b/clients/pkg/promtail/targets/journal/journaltarget.go @@ -19,12 +19,12 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) const ( diff --git a/clients/pkg/promtail/targets/journal/journaltarget_test.go b/clients/pkg/promtail/targets/journal/journaltarget_test.go index b0186d1504f47..768cb7f5c1510 100644 --- a/clients/pkg/promtail/targets/journal/journaltarget_test.go +++ b/clients/pkg/promtail/targets/journal/journaltarget_test.go @@ -20,10 +20,10 @@ import ( "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/testutils" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/testutils" ) type mockJournalReader struct { diff --git a/clients/pkg/promtail/targets/journal/journaltargetmanager.go b/clients/pkg/promtail/targets/journal/journaltargetmanager.go index 9e55c37d9268c..f0d99f091db17 100644 --- a/clients/pkg/promtail/targets/journal/journaltargetmanager.go +++ b/clients/pkg/promtail/targets/journal/journaltargetmanager.go @@ -7,10 +7,10 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // JournalTargetManager manages a series of JournalTargets. diff --git a/clients/pkg/promtail/targets/journal/journaltargetmanager_linux.go b/clients/pkg/promtail/targets/journal/journaltargetmanager_linux.go index 6940d57ead5d7..52d216e58a090 100644 --- a/clients/pkg/promtail/targets/journal/journaltargetmanager_linux.go +++ b/clients/pkg/promtail/targets/journal/journaltargetmanager_linux.go @@ -7,11 +7,11 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // JournalTargetManager manages a series of JournalTargets. diff --git a/clients/pkg/promtail/targets/kafka/consumer.go b/clients/pkg/promtail/targets/kafka/consumer.go index 34cb61da00e7e..f4b8a4d260cf2 100644 --- a/clients/pkg/promtail/targets/kafka/consumer.go +++ b/clients/pkg/promtail/targets/kafka/consumer.go @@ -11,7 +11,7 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/backoff" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) var defaultBackOff = backoff.Config{ diff --git a/clients/pkg/promtail/targets/kafka/consumer_test.go b/clients/pkg/promtail/targets/kafka/consumer_test.go index 63ab60b1a64f5..a4d87e7c3c71e 100644 --- a/clients/pkg/promtail/targets/kafka/consumer_test.go +++ b/clients/pkg/promtail/targets/kafka/consumer_test.go @@ -3,6 +3,7 @@ package kafka import ( "context" "errors" + "sync" "testing" "time" @@ -11,7 +12,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) type DiscovererFn func(sarama.ConsumerGroupSession, sarama.ConsumerGroupClaim) (RunnableTarget, error) @@ -34,7 +35,7 @@ func (f *fakeTarget) Details() interface{} { return nil } func Test_ComsumerConsume(t *testing.T) { var ( - group = &testConsumerGroupHandler{} + group = &testConsumerGroupHandler{mu: &sync.Mutex{}} session = &testSession{} ctx, cancel = context.WithCancel(context.Background()) c = &consumer{ @@ -86,6 +87,7 @@ func Test_ComsumerConsume(t *testing.T) { func Test_ComsumerRetry(_ *testing.T) { var ( group = &testConsumerGroupHandler{ + mu: &sync.Mutex{}, returnErr: errors.New("foo"), } ctx, cancel = context.WithCancel(context.Background()) diff --git a/clients/pkg/promtail/targets/kafka/formatter.go b/clients/pkg/promtail/targets/kafka/formatter.go index b0f61e4332e3b..f800dbe20b9dd 100644 --- a/clients/pkg/promtail/targets/kafka/formatter.go +++ b/clients/pkg/promtail/targets/kafka/formatter.go @@ -7,7 +7,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/util" ) func format(lbs labels.Labels, cfg []*relabel.Config) model.LabelSet { diff --git a/clients/pkg/promtail/targets/kafka/parser.go b/clients/pkg/promtail/targets/kafka/parser.go index 872ea0e45bc05..9ad3b7f8271c0 100644 --- a/clients/pkg/promtail/targets/kafka/parser.go +++ b/clients/pkg/promtail/targets/kafka/parser.go @@ -5,9 +5,9 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) // messageParser implements MessageParser. It doesn't modify the content of the original `message.Value`. diff --git a/clients/pkg/promtail/targets/kafka/target.go b/clients/pkg/promtail/targets/kafka/target.go index 519af759b66c7..707cc01ca1947 100644 --- a/clients/pkg/promtail/targets/kafka/target.go +++ b/clients/pkg/promtail/targets/kafka/target.go @@ -11,8 +11,8 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) type runnableDroppedTarget struct { diff --git a/clients/pkg/promtail/targets/kafka/target_syncer.go b/clients/pkg/promtail/targets/kafka/target_syncer.go index 187f4749ce2df..6afcd24ad7832 100644 --- a/clients/pkg/promtail/targets/kafka/target_syncer.go +++ b/clients/pkg/promtail/targets/kafka/target_syncer.go @@ -16,12 +16,12 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/util" ) var TopicPollInterval = 30 * time.Second diff --git a/clients/pkg/promtail/targets/kafka/target_syncer_test.go b/clients/pkg/promtail/targets/kafka/target_syncer_test.go index cc1161c63dcb8..6514afeefcb01 100644 --- a/clients/pkg/promtail/targets/kafka/target_syncer_test.go +++ b/clients/pkg/promtail/targets/kafka/target_syncer_test.go @@ -4,6 +4,7 @@ import ( "context" "fmt" "reflect" + "sync" "testing" "time" @@ -17,14 +18,14 @@ import ( "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) func Test_TopicDiscovery(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) - group := &testConsumerGroupHandler{} + group := &testConsumerGroupHandler{mu: &sync.Mutex{}} TopicPollInterval = time.Microsecond var closed bool client := &mockKafkaClient{ @@ -52,21 +53,28 @@ func Test_TopicDiscovery(t *testing.T) { } ts.loop() + tmpTopics := []string{} require.Eventually(t, func() bool { if !group.consuming.Load() { return false } + group.mu.Lock() + defer group.mu.Unlock() + tmpTopics = group.topics return reflect.DeepEqual([]string{"topic1"}, group.topics) - }, 200*time.Millisecond, time.Millisecond, "expected topics: %v, got: %v", []string{"topic1"}, group.topics) + }, 200*time.Millisecond, time.Millisecond, "expected topics: %v, got: %v", []string{"topic1"}, tmpTopics) + client.mu.Lock() client.topics = []string{"topic1", "topic2"} // introduce new topics + client.mu.Unlock() require.Eventually(t, func() bool { if !group.consuming.Load() { return false } + tmpTopics = group.topics return reflect.DeepEqual([]string{"topic1", "topic2"}, group.topics) - }, 200*time.Millisecond, time.Millisecond, "expected topics: %v, got: %v", []string{"topic1", "topic2"}, group.topics) + }, 200*time.Millisecond, time.Millisecond, "expected topics: %v, got: %v", []string{"topic1", "topic2"}, tmpTopics) require.NoError(t, ts.Stop()) require.True(t, closed) diff --git a/clients/pkg/promtail/targets/kafka/target_test.go b/clients/pkg/promtail/targets/kafka/target_test.go index a62488b04a7cb..3ffe4ac69f16b 100644 --- a/clients/pkg/promtail/targets/kafka/target_test.go +++ b/clients/pkg/promtail/targets/kafka/target_test.go @@ -13,7 +13,7 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/atomic" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" ) // Consumergroup handler @@ -21,6 +21,7 @@ type testConsumerGroupHandler struct { handler sarama.ConsumerGroupHandler ctx context.Context topics []string + mu *sync.Mutex returnErr error @@ -32,7 +33,9 @@ func (c *testConsumerGroupHandler) Consume(ctx context.Context, topics []string, return c.returnErr } c.ctx = ctx + c.mu.Lock() c.topics = topics + c.mu.Unlock() c.handler = handler c.consuming.Store(true) <-ctx.Done() diff --git a/clients/pkg/promtail/targets/kafka/targetmanager.go b/clients/pkg/promtail/targets/kafka/targetmanager.go index f16606aefda75..c9cc382503704 100644 --- a/clients/pkg/promtail/targets/kafka/targetmanager.go +++ b/clients/pkg/promtail/targets/kafka/targetmanager.go @@ -5,9 +5,9 @@ import ( "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // TargetManager manages a series of kafka targets. diff --git a/clients/pkg/promtail/targets/kafka/topics_test.go b/clients/pkg/promtail/targets/kafka/topics_test.go index e24d8fd1eb604..447a8a0a65afc 100644 --- a/clients/pkg/promtail/targets/kafka/topics_test.go +++ b/clients/pkg/promtail/targets/kafka/topics_test.go @@ -3,12 +3,14 @@ package kafka import ( "errors" "strings" + "sync" "testing" "github.com/stretchr/testify/require" ) type mockKafkaClient struct { + mu sync.Mutex topics []string err error } @@ -18,6 +20,8 @@ func (m *mockKafkaClient) RefreshMetadata(_ ...string) error { } func (m *mockKafkaClient) Topics() ([]string, error) { + m.mu.Lock() + defer m.mu.Unlock() return m.topics, m.err } diff --git a/clients/pkg/promtail/targets/lokipush/pushtarget.go b/clients/pkg/promtail/targets/lokipush/pushtarget.go index 88c7859bd36e5..63630c6e5ac2d 100644 --- a/clients/pkg/promtail/targets/lokipush/pushtarget.go +++ b/clients/pkg/promtail/targets/lokipush/pushtarget.go @@ -20,14 +20,14 @@ import ( "github.com/grafana/dskit/tenant" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/serverutils" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" - - "github.com/grafana/loki/pkg/loghttp/push" - "github.com/grafana/loki/pkg/logproto" - util_log "github.com/grafana/loki/pkg/util/log" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/serverutils" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" + + "github.com/grafana/loki/v3/pkg/loghttp/push" + "github.com/grafana/loki/v3/pkg/logproto" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) type PushTarget struct { diff --git a/clients/pkg/promtail/targets/lokipush/pushtarget_test.go b/clients/pkg/promtail/targets/lokipush/pushtarget_test.go index 147994fb2df1a..3fe48b599a5e4 100644 --- a/clients/pkg/promtail/targets/lokipush/pushtarget_test.go +++ b/clients/pkg/promtail/targets/lokipush/pushtarget_test.go @@ -20,12 +20,12 @@ import ( "github.com/prometheus/prometheus/model/relabel" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) const localhost = "127.0.0.1" diff --git a/clients/pkg/promtail/targets/lokipush/pushtargetmanager.go b/clients/pkg/promtail/targets/lokipush/pushtargetmanager.go index be29037544726..e924647c2c073 100644 --- a/clients/pkg/promtail/targets/lokipush/pushtargetmanager.go +++ b/clients/pkg/promtail/targets/lokipush/pushtargetmanager.go @@ -9,10 +9,10 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/util/strutil" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // PushTargetManager manages a series of PushTargets. diff --git a/clients/pkg/promtail/targets/lokipush/pushtargetmanager_test.go b/clients/pkg/promtail/targets/lokipush/pushtargetmanager_test.go index 40621d18f5ba0..08730042bf841 100644 --- a/clients/pkg/promtail/targets/lokipush/pushtargetmanager_test.go +++ b/clients/pkg/promtail/targets/lokipush/pushtargetmanager_test.go @@ -5,7 +5,7 @@ import ( "github.com/grafana/dskit/server" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" ) func Test_validateJobName(t *testing.T) { diff --git a/clients/pkg/promtail/targets/manager.go b/clients/pkg/promtail/targets/manager.go index 2e7801bee43a8..241dd25aaa5cc 100644 --- a/clients/pkg/promtail/targets/manager.go +++ b/clients/pkg/promtail/targets/manager.go @@ -8,23 +8,23 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/positions" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/azureeventhubs" - "github.com/grafana/loki/clients/pkg/promtail/targets/cloudflare" - "github.com/grafana/loki/clients/pkg/promtail/targets/docker" - "github.com/grafana/loki/clients/pkg/promtail/targets/file" - "github.com/grafana/loki/clients/pkg/promtail/targets/gcplog" - "github.com/grafana/loki/clients/pkg/promtail/targets/gelf" - "github.com/grafana/loki/clients/pkg/promtail/targets/heroku" - "github.com/grafana/loki/clients/pkg/promtail/targets/journal" - "github.com/grafana/loki/clients/pkg/promtail/targets/kafka" - "github.com/grafana/loki/clients/pkg/promtail/targets/lokipush" - "github.com/grafana/loki/clients/pkg/promtail/targets/stdin" - "github.com/grafana/loki/clients/pkg/promtail/targets/syslog" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/clients/pkg/promtail/targets/windows" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/positions" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/azureeventhubs" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/cloudflare" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/docker" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/file" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/gcplog" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/gelf" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/heroku" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/journal" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/kafka" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/lokipush" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/stdin" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/syslog" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/windows" ) const ( @@ -37,7 +37,6 @@ const ( KafkaConfigs = "kafkaConfigs" GelfConfigs = "gelfConfigs" CloudflareConfigs = "cloudflareConfigs" - DockerConfigs = "dockerConfigs" DockerSDConfigs = "dockerSDConfigs" HerokuDrainConfigs = "herokuDrainConfigs" AzureEventHubsScrapeConfigs = "azureeventhubsScrapeConfigs" @@ -150,7 +149,7 @@ func NewTargetManagers( if len(targetScrapeConfigs[CloudflareConfigs]) > 0 && cloudflareMetrics == nil { cloudflareMetrics = cloudflare.NewMetrics(reg) } - if (len(targetScrapeConfigs[DockerConfigs]) > 0 || len(targetScrapeConfigs[DockerSDConfigs]) > 0) && dockerMetrics == nil { + if (len(targetScrapeConfigs[DockerSDConfigs]) > 0) && dockerMetrics == nil { dockerMetrics = docker.NewMetrics(reg) } if len(targetScrapeConfigs[JournalScrapeConfigs]) > 0 && journalMetrics == nil { @@ -269,16 +268,6 @@ func NewTargetManagers( return nil, errors.Wrap(err, "failed to make cloudflare target manager") } targetManagers = append(targetManagers, cfTargetManager) - case DockerConfigs: - pos, err := getPositionFile() - if err != nil { - return nil, err - } - cfTargetManager, err := docker.NewTargetManager(dockerMetrics, logger, pos, client, scrapeConfigs) - if err != nil { - return nil, errors.Wrap(err, "failed to make Docker target manager") - } - targetManagers = append(targetManagers, cfTargetManager) case DockerSDConfigs: pos, err := getPositionFile() if err != nil { diff --git a/clients/pkg/promtail/targets/stdin/stdin_target_manager.go b/clients/pkg/promtail/targets/stdin/stdin_target_manager.go index 065d6bd93feb0..bcc441950e3a9 100644 --- a/clients/pkg/promtail/targets/stdin/stdin_target_manager.go +++ b/clients/pkg/promtail/targets/stdin/stdin_target_manager.go @@ -15,12 +15,12 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/discovery/targetgroup" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) // bufferSize is the size of the buffered reader diff --git a/clients/pkg/promtail/targets/stdin/stdin_target_manager_test.go b/clients/pkg/promtail/targets/stdin/stdin_target_manager_test.go index 58abae3c802b8..8f2135f3aff32 100644 --- a/clients/pkg/promtail/targets/stdin/stdin_target_manager_test.go +++ b/clients/pkg/promtail/targets/stdin/stdin_target_manager_test.go @@ -12,13 +12,13 @@ import ( "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/pkg/logproto" - util_log "github.com/grafana/loki/pkg/util/log" + "github.com/grafana/loki/v3/pkg/logproto" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func Test_newReaderTarget(t *testing.T) { diff --git a/clients/pkg/promtail/targets/syslog/syslogparser/syslogparser_test.go b/clients/pkg/promtail/targets/syslog/syslogparser/syslogparser_test.go index 89d17c4645f55..f99742de48a24 100644 --- a/clients/pkg/promtail/targets/syslog/syslogparser/syslogparser_test.go +++ b/clients/pkg/promtail/targets/syslog/syslogparser/syslogparser_test.go @@ -9,7 +9,7 @@ import ( "github.com/influxdata/go-syslog/v3/rfc5424" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/targets/syslog/syslogparser" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/syslog/syslogparser" ) var ( diff --git a/clients/pkg/promtail/targets/syslog/syslogtarget.go b/clients/pkg/promtail/targets/syslog/syslogtarget.go index 54befebc38931..35ba4d8cf297f 100644 --- a/clients/pkg/promtail/targets/syslog/syslogtarget.go +++ b/clients/pkg/promtail/targets/syslog/syslogtarget.go @@ -15,11 +15,11 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/prometheus/prometheus/model/relabel" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) var ( diff --git a/clients/pkg/promtail/targets/syslog/syslogtarget_test.go b/clients/pkg/promtail/targets/syslog/syslogtarget_test.go index 62b5924626f14..2f06e04321ece 100644 --- a/clients/pkg/promtail/targets/syslog/syslogtarget_test.go +++ b/clients/pkg/promtail/targets/syslog/syslogtarget_test.go @@ -19,9 +19,9 @@ import ( "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/syslog/syslogparser" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/syslog/syslogparser" ) var ( diff --git a/clients/pkg/promtail/targets/syslog/syslogtargetmanager.go b/clients/pkg/promtail/targets/syslog/syslogtargetmanager.go index 8a7246a28a464..ffda8b3de4203 100644 --- a/clients/pkg/promtail/targets/syslog/syslogtargetmanager.go +++ b/clients/pkg/promtail/targets/syslog/syslogtargetmanager.go @@ -5,10 +5,10 @@ import ( "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // SyslogTargetManager manages a series of SyslogTargets. diff --git a/clients/pkg/promtail/targets/syslog/transport.go b/clients/pkg/promtail/targets/syslog/transport.go index 67a78136e311b..6b1bdfeb91c11 100644 --- a/clients/pkg/promtail/targets/syslog/transport.go +++ b/clients/pkg/promtail/targets/syslog/transport.go @@ -20,8 +20,8 @@ import ( "github.com/influxdata/go-syslog/v3" "github.com/prometheus/prometheus/model/labels" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/syslog/syslogparser" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/syslog/syslogparser" ) var ( diff --git a/clients/pkg/promtail/targets/windows/bookmark.go b/clients/pkg/promtail/targets/windows/bookmark.go index b7a4a7698cde4..55ad7f3040526 100644 --- a/clients/pkg/promtail/targets/windows/bookmark.go +++ b/clients/pkg/promtail/targets/windows/bookmark.go @@ -9,7 +9,7 @@ import ( "github.com/spf13/afero" - "github.com/grafana/loki/clients/pkg/promtail/targets/windows/win_eventlog" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/windows/win_eventlog" ) type bookMark struct { diff --git a/clients/pkg/promtail/targets/windows/format.go b/clients/pkg/promtail/targets/windows/format.go index 9fc44cc62a8ba..821aa4ecf0d9b 100644 --- a/clients/pkg/promtail/targets/windows/format.go +++ b/clients/pkg/promtail/targets/windows/format.go @@ -9,8 +9,8 @@ import ( jsoniter "github.com/json-iterator/go" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/windows/win_eventlog" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/windows/win_eventlog" ) type Event struct { diff --git a/clients/pkg/promtail/targets/windows/target.go b/clients/pkg/promtail/targets/windows/target.go index c4e1806724a54..42cb298f0995c 100644 --- a/clients/pkg/promtail/targets/windows/target.go +++ b/clients/pkg/promtail/targets/windows/target.go @@ -18,10 +18,10 @@ import ( "github.com/prometheus/prometheus/model/labels" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" - "github.com/grafana/loki/clients/pkg/promtail/targets/windows/win_eventlog" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/windows/win_eventlog" ) var fs = afero.NewOsFs() diff --git a/clients/pkg/promtail/targets/windows/target_test.go b/clients/pkg/promtail/targets/windows/target_test.go index a9a692b21ecfc..c766cb869cd87 100644 --- a/clients/pkg/promtail/targets/windows/target_test.go +++ b/clients/pkg/promtail/targets/windows/target_test.go @@ -14,13 +14,13 @@ import ( "github.com/stretchr/testify/require" "golang.org/x/sys/windows/svc/eventlog" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/client/fake" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/windows/win_eventlog" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/windows/win_eventlog" - "github.com/grafana/loki/pkg/logproto" - util_log "github.com/grafana/loki/pkg/util/log" + "github.com/grafana/loki/v3/pkg/logproto" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func init() { diff --git a/clients/pkg/promtail/targets/windows/targetmanager.go b/clients/pkg/promtail/targets/windows/targetmanager.go index 78e98880ca2bf..9bb12ebc15a9c 100644 --- a/clients/pkg/promtail/targets/windows/targetmanager.go +++ b/clients/pkg/promtail/targets/windows/targetmanager.go @@ -8,9 +8,9 @@ import ( "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // TargetManager manages a series of windows event targets. diff --git a/clients/pkg/promtail/targets/windows/targetmanager_windows.go b/clients/pkg/promtail/targets/windows/targetmanager_windows.go index 78793b26c730a..4bc53bcc42153 100644 --- a/clients/pkg/promtail/targets/windows/targetmanager_windows.go +++ b/clients/pkg/promtail/targets/windows/targetmanager_windows.go @@ -8,10 +8,10 @@ import ( "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/clients/pkg/logentry/stages" - "github.com/grafana/loki/clients/pkg/promtail/api" - "github.com/grafana/loki/clients/pkg/promtail/scrapeconfig" - "github.com/grafana/loki/clients/pkg/promtail/targets/target" + "github.com/grafana/loki/v3/clients/pkg/logentry/stages" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/scrapeconfig" + "github.com/grafana/loki/v3/clients/pkg/promtail/targets/target" ) // TargetManager manages a series of windows event targets. diff --git a/clients/pkg/promtail/targets/windows/win_eventlog/win_eventlog.go b/clients/pkg/promtail/targets/windows/win_eventlog/win_eventlog.go index f2411698d4b14..71ff148de58ff 100644 --- a/clients/pkg/promtail/targets/windows/win_eventlog/win_eventlog.go +++ b/clients/pkg/promtail/targets/windows/win_eventlog/win_eventlog.go @@ -39,7 +39,7 @@ import ( "github.com/influxdata/telegraf/plugins/inputs" "golang.org/x/sys/windows" - util_log "github.com/grafana/loki/pkg/util/log" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) var sampleConfig = ` diff --git a/clients/pkg/promtail/utils/entries.go b/clients/pkg/promtail/utils/entries.go index 214422b674bad..10204e8fb37f4 100644 --- a/clients/pkg/promtail/utils/entries.go +++ b/clients/pkg/promtail/utils/entries.go @@ -5,7 +5,7 @@ import ( "sync" "time" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" ) // FanoutEntryHandler implements api.EntryHandler, fanning out received entries to one or multiple channels. diff --git a/clients/pkg/promtail/utils/entries_test.go b/clients/pkg/promtail/utils/entries_test.go index 8029e895c2a62..0164794a89d2d 100644 --- a/clients/pkg/promtail/utils/entries_test.go +++ b/clients/pkg/promtail/utils/entries_test.go @@ -8,9 +8,9 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) func TestFanoutEntryHandler_SuccessfulFanout(t *testing.T) { @@ -43,7 +43,14 @@ func TestFanoutEntryHandler_SuccessfulFanout(t *testing.T) { } require.Eventually(t, func() bool { - return len(eh1.Received) == len(expectedLines) && len(eh2.Received) == len(expectedLines) + eh1.mu.Lock() + len1 := len(eh1.Received) + eh1.mu.Unlock() + eh2.mu.Lock() + len2 := len(eh2.Received) + eh2.mu.Unlock() + + return len1 == len(expectedLines) && len2 == len(expectedLines) }, time.Second*10, time.Second, "expected entries to be received by fanned out channels") } @@ -77,6 +84,8 @@ func TestFanoutEntryHandler_TimeoutWaitingForEntriesToBeSent(t *testing.T) { }() require.Eventually(t, func() bool { + controlEH.mu.Lock() + defer controlEH.mu.Unlock() return len(controlEH.Received) == 1 }, time.Second*5, time.Second, "expected control entry handler to receive an entry") @@ -89,6 +98,7 @@ type savingEntryHandler struct { entries chan api.Entry Received []api.Entry wg sync.WaitGroup + mu sync.Mutex } func newSavingEntryHandler() *savingEntryHandler { @@ -99,7 +109,9 @@ func newSavingEntryHandler() *savingEntryHandler { eh.wg.Add(1) go func() { for e := range eh.entries { + eh.mu.Lock() eh.Received = append(eh.Received, e) + eh.mu.Unlock() } eh.wg.Done() }() diff --git a/clients/pkg/promtail/utils/remotewrite_server.go b/clients/pkg/promtail/utils/remotewrite_server.go index 089f4a79a01bb..837d3a8581f10 100644 --- a/clients/pkg/promtail/utils/remotewrite_server.go +++ b/clients/pkg/promtail/utils/remotewrite_server.go @@ -5,8 +5,8 @@ import ( "net/http" "net/http/httptest" - "github.com/grafana/loki/pkg/logproto" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/logproto" + "github.com/grafana/loki/v3/pkg/util" ) // RemoteWriteRequest wraps the received logs remote write request that is received. diff --git a/clients/pkg/promtail/wal/reader.go b/clients/pkg/promtail/wal/reader.go index b19b2bbecc10b..769c566efbee9 100644 --- a/clients/pkg/promtail/wal/reader.go +++ b/clients/pkg/promtail/wal/reader.go @@ -5,11 +5,11 @@ import ( "github.com/prometheus/common/model" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/ingester/wal" - "github.com/grafana/loki/pkg/util" - walUtils "github.com/grafana/loki/pkg/util/wal" + "github.com/grafana/loki/v3/pkg/ingester/wal" + "github.com/grafana/loki/v3/pkg/util" + walUtils "github.com/grafana/loki/v3/pkg/util/wal" ) // ReadWAL will read all entries in the WAL located under dir. Mainly used for testing diff --git a/clients/pkg/promtail/wal/wal.go b/clients/pkg/promtail/wal/wal.go index af1fa7e3d5098..8e747530470c7 100644 --- a/clients/pkg/promtail/wal/wal.go +++ b/clients/pkg/promtail/wal/wal.go @@ -9,7 +9,7 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/prometheus/tsdb/wlog" - "github.com/grafana/loki/pkg/ingester/wal" + "github.com/grafana/loki/v3/pkg/ingester/wal" ) var ( diff --git a/clients/pkg/promtail/wal/watcher.go b/clients/pkg/promtail/wal/watcher.go index 3e8719a235812..926c93c01bcfc 100644 --- a/clients/pkg/promtail/wal/watcher.go +++ b/clients/pkg/promtail/wal/watcher.go @@ -14,7 +14,7 @@ import ( "github.com/prometheus/prometheus/tsdb/record" "github.com/prometheus/prometheus/tsdb/wlog" - "github.com/grafana/loki/pkg/ingester/wal" + "github.com/grafana/loki/v3/pkg/ingester/wal" ) const ( diff --git a/clients/pkg/promtail/wal/watcher_test.go b/clients/pkg/promtail/wal/watcher_test.go index d9a5e04cb0b89..adf6dbef32de0 100644 --- a/clients/pkg/promtail/wal/watcher_test.go +++ b/clients/pkg/promtail/wal/watcher_test.go @@ -3,6 +3,7 @@ package wal import ( "fmt" "os" + "sync" "testing" "time" @@ -13,11 +14,11 @@ import ( "github.com/prometheus/prometheus/tsdb/record" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/ingester/wal" - "github.com/grafana/loki/pkg/logproto" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/ingester/wal" + "github.com/grafana/loki/v3/pkg/logproto" + "github.com/grafana/loki/v3/pkg/util" ) type testWriteTo struct { @@ -25,6 +26,7 @@ type testWriteTo struct { series map[uint64]model.LabelSet logger log.Logger ReceivedSeriesReset []int + mu sync.Mutex } func (t *testWriteTo) StoreSeries(series []record.RefSeries, _ int) { @@ -42,10 +44,12 @@ func (t *testWriteTo) AppendEntries(entries wal.RefEntries) error { var entry api.Entry if l, ok := t.series[uint64(entries.Ref)]; ok { entry.Labels = l + t.mu.Lock() for _, e := range entries.Entries { entry.Entry = e t.ReadEntries = append(t.ReadEntries, entry) } + t.mu.Unlock() } else { level.Debug(t.logger).Log("series for entry not found") } @@ -94,11 +98,15 @@ var cases = map[string]watcherTest{ res.notifyWrite() require.Eventually(t, func() bool { + res.writeTo.mu.Lock() + defer res.writeTo.mu.Unlock() return len(res.writeTo.ReadEntries) == 3 }, time.Second*10, time.Second, "expected watcher to catch up with written entries") + res.writeTo.mu.Lock() for _, readEntry := range res.writeTo.ReadEntries { require.Contains(t, lines, readEntry.Line, "not expected log line") } + res.writeTo.mu.Unlock() }, "read entries from WAL, just using backup timer to trigger reads": func(t *testing.T, res *watcherTestResources) { @@ -127,11 +135,15 @@ var cases = map[string]watcherTest{ // do not notify, let the backup timer trigger the watcher reads require.Eventually(t, func() bool { + res.writeTo.mu.Lock() + defer res.writeTo.mu.Unlock() return len(res.writeTo.ReadEntries) == 3 }, time.Second*10, time.Second, "expected watcher to catch up with written entries") + res.writeTo.mu.Lock() for _, readEntry := range res.writeTo.ReadEntries { require.Contains(t, lines, readEntry.Line, "not expected log line") } + res.writeTo.mu.Unlock() }, "continue reading entries in next segment after initial segment is closed": func(t *testing.T, res *watcherTestResources) { @@ -164,11 +176,15 @@ var cases = map[string]watcherTest{ res.notifyWrite() require.Eventually(t, func() bool { + res.writeTo.mu.Lock() + defer res.writeTo.mu.Unlock() return len(res.writeTo.ReadEntries) == 3 }, time.Second*10, time.Second, "expected watcher to catch up with written entries") + res.writeTo.mu.Lock() for _, readEntry := range res.writeTo.ReadEntries { require.Contains(t, lines, readEntry.Line, "not expected log line") } + res.writeTo.mu.Unlock() err := res.nextWALSegment() require.NoError(t, err, "expected no error when moving to next wal segment") @@ -186,12 +202,16 @@ var cases = map[string]watcherTest{ res.notifyWrite() require.Eventually(t, func() bool { + res.writeTo.mu.Lock() + defer res.writeTo.mu.Unlock() return len(res.writeTo.ReadEntries) == 6 }, time.Second*10, time.Second, "expected watcher to catch up after new wal segment is cut") // assert over second half of entries + res.writeTo.mu.Lock() for _, readEntry := range res.writeTo.ReadEntries[3:] { require.Contains(t, linesAfter, readEntry.Line, "not expected log line") } + res.writeTo.mu.Unlock() }, "start reading from last segment": func(t *testing.T, res *watcherTestResources) { @@ -234,12 +254,16 @@ var cases = map[string]watcherTest{ res.notifyWrite() require.Eventually(t, func() bool { + res.writeTo.mu.Lock() + defer res.writeTo.mu.Unlock() return len(res.writeTo.ReadEntries) == 3 }, time.Second*10, time.Second, "expected watcher to catch up after new wal segment is cut") // assert over second half of entries + res.writeTo.mu.Lock() for _, readEntry := range res.writeTo.ReadEntries[3:] { require.Contains(t, linesAfter, readEntry.Line, "not expected log line") } + res.writeTo.mu.Unlock() }, "watcher receives segments reclaimed notifications correctly": func(t *testing.T, res *watcherTestResources) { @@ -259,6 +283,8 @@ var cases = map[string]watcherTest{ require.NoError(t, res.syncWAL()) res.notifyWrite() require.Eventually(t, func() bool { + res.writeTo.mu.Lock() + defer res.writeTo.mu.Unlock() return len(res.writeTo.ReadEntries) == expectedReadEntries }, time.Second*10, time.Second, "expected watcher to catch up with written entries") } @@ -275,6 +301,8 @@ var cases = map[string]watcherTest{ // collecting segment 0 res.notifySegmentReclaimed(0) require.Eventually(t, func() bool { + res.writeTo.mu.Lock() + defer res.writeTo.mu.Unlock() return len(res.writeTo.ReceivedSeriesReset) == 1 && res.writeTo.ReceivedSeriesReset[0] == 0 }, time.Second*10, time.Second, "timed out waiting to receive series reset") @@ -290,6 +318,8 @@ var cases = map[string]watcherTest{ res.notifySegmentReclaimed(2) // Expect second SeriesReset call to have the highest numbered deleted segment, 2 require.Eventually(t, func() bool { + res.writeTo.mu.Lock() + defer res.writeTo.mu.Unlock() t.Logf("received series reset: %v", res.writeTo.ReceivedSeriesReset) return len(res.writeTo.ReceivedSeriesReset) == 2 && res.writeTo.ReceivedSeriesReset[1] == 2 }, time.Second*10, time.Second, "timed out waiting to receive series reset") diff --git a/clients/pkg/promtail/wal/writer.go b/clients/pkg/promtail/wal/writer.go index 8e754a01038f8..e9360645716d9 100644 --- a/clients/pkg/promtail/wal/writer.go +++ b/clients/pkg/promtail/wal/writer.go @@ -16,11 +16,11 @@ import ( "github.com/prometheus/prometheus/tsdb/chunks" "github.com/prometheus/prometheus/tsdb/record" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/ingester/wal" - "github.com/grafana/loki/pkg/logproto" - "github.com/grafana/loki/pkg/util" + "github.com/grafana/loki/v3/pkg/ingester/wal" + "github.com/grafana/loki/v3/pkg/logproto" + "github.com/grafana/loki/v3/pkg/util" ) const ( diff --git a/clients/pkg/promtail/wal/writer_test.go b/clients/pkg/promtail/wal/writer_test.go index fbce817f2a26e..4dae546044933 100644 --- a/clients/pkg/promtail/wal/writer_test.go +++ b/clients/pkg/promtail/wal/writer_test.go @@ -4,6 +4,7 @@ import ( "fmt" "os" "path/filepath" + "sync" "testing" "time" @@ -13,9 +14,9 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/clients/pkg/promtail/api" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" - "github.com/grafana/loki/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logproto" ) func TestWriter_EntriesAreWrittenToWAL(t *testing.T) { @@ -77,6 +78,8 @@ func TestWriter_OldSegmentsAreCleanedUp(t *testing.T) { maxSegmentAge := time.Second * 2 + var mu1 sync.Mutex + var mu2 sync.Mutex subscriber1 := []int{} subscriber2 := []int{} @@ -92,10 +95,14 @@ func TestWriter_OldSegmentsAreCleanedUp(t *testing.T) { // add writer events subscriber. Add multiple to test fanout writer.SubscribeCleanup(notifySegmentsCleanedFunc(func(num int) { + mu1.Lock() subscriber1 = append(subscriber1, num) + mu1.Unlock() })) writer.SubscribeCleanup(notifySegmentsCleanedFunc(func(num int) { + mu2.Lock() subscriber2 = append(subscriber2, num) + mu2.Unlock() })) // write entries to wal and sync @@ -148,11 +155,15 @@ func TestWriter_OldSegmentsAreCleanedUp(t *testing.T) { require.ErrorIs(t, err, os.ErrNotExist, "expected file not exists error") // assert all subscribers were notified + mu1.Lock() require.Len(t, subscriber1, 1, "expected one segment reclaimed notification in subscriber1") require.Equal(t, 0, subscriber1[0]) + mu1.Unlock() + mu2.Lock() require.Len(t, subscriber2, 1, "expected one segment reclaimed notification in subscriber2") require.Equal(t, 0, subscriber2[0]) + mu2.Unlock() // Expect last, or "head" segment to still be alive _, err = os.Stat(filepath.Join(dir, "00000001")) diff --git a/cmd/logcli/Dockerfile b/cmd/logcli/Dockerfile index 3ec3428950896..a0c8e3a67cd26 100644 --- a/cmd/logcli/Dockerfile +++ b/cmd/logcli/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21.3 as build +FROM golang:1.21.9 as build COPY . /src/loki WORKDIR /src/loki diff --git a/cmd/logcli/main.go b/cmd/logcli/main.go index 56a954cd5b44b..3d2aa85297b3f 100644 --- a/cmd/logcli/main.go +++ b/cmd/logcli/main.go @@ -15,15 +15,15 @@ import ( "github.com/prometheus/common/version" "gopkg.in/alecthomas/kingpin.v2" - "github.com/grafana/loki/pkg/logcli/client" - "github.com/grafana/loki/pkg/logcli/index" - "github.com/grafana/loki/pkg/logcli/labelquery" - "github.com/grafana/loki/pkg/logcli/output" - "github.com/grafana/loki/pkg/logcli/query" - "github.com/grafana/loki/pkg/logcli/seriesquery" - "github.com/grafana/loki/pkg/logcli/volume" - "github.com/grafana/loki/pkg/logql/syntax" - _ "github.com/grafana/loki/pkg/util/build" + "github.com/grafana/loki/v3/pkg/logcli/client" + "github.com/grafana/loki/v3/pkg/logcli/index" + "github.com/grafana/loki/v3/pkg/logcli/labelquery" + "github.com/grafana/loki/v3/pkg/logcli/output" + "github.com/grafana/loki/v3/pkg/logcli/query" + "github.com/grafana/loki/v3/pkg/logcli/seriesquery" + "github.com/grafana/loki/v3/pkg/logcli/volume" + "github.com/grafana/loki/v3/pkg/logql/syntax" + _ "github.com/grafana/loki/v3/pkg/util/build" ) var ( diff --git a/cmd/logql-analyzer/Dockerfile b/cmd/logql-analyzer/Dockerfile index 87b6893ae5a36..1413a33e6249f 100644 --- a/cmd/logql-analyzer/Dockerfile +++ b/cmd/logql-analyzer/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21.3 as build +FROM golang:1.21.9 as build COPY . /src/loki WORKDIR /src/loki diff --git a/cmd/logql-analyzer/main.go b/cmd/logql-analyzer/main.go index 5031dbad7d894..beed1226709d4 100644 --- a/cmd/logql-analyzer/main.go +++ b/cmd/logql-analyzer/main.go @@ -10,9 +10,9 @@ import ( "github.com/grafana/dskit/server" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/pkg/logqlanalyzer" - "github.com/grafana/loki/pkg/sizing" - util_log "github.com/grafana/loki/pkg/util/log" + "github.com/grafana/loki/v3/pkg/logqlanalyzer" + "github.com/grafana/loki/v3/pkg/sizing" + util_log "github.com/grafana/loki/v3/pkg/util/log" ) func main() { diff --git a/cmd/loki-canary-boringcrypto/Dockerfile b/cmd/loki-canary-boringcrypto/Dockerfile index 0c4086911632e..4814506641800 100644 --- a/cmd/loki-canary-boringcrypto/Dockerfile +++ b/cmd/loki-canary-boringcrypto/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21.3 as build +FROM golang:1.21.9 as build COPY . /src/loki WORKDIR /src/loki diff --git a/cmd/loki-canary/Dockerfile b/cmd/loki-canary/Dockerfile index 7f44b73ab5e65..ec8889df25a52 100644 --- a/cmd/loki-canary/Dockerfile +++ b/cmd/loki-canary/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21.3 as build +FROM golang:1.21.9 as build COPY . /src/loki WORKDIR /src/loki diff --git a/cmd/loki-canary/Dockerfile.cross b/cmd/loki-canary/Dockerfile.cross index 20077b196a8fa..60a5ff732f38c 100644 --- a/cmd/loki-canary/Dockerfile.cross +++ b/cmd/loki-canary/Dockerfile.cross @@ -1,8 +1,8 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.1 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f cmd/promtail/Dockerfile . -FROM golang:1.21.2-alpine as goenv +FROM golang:1.21.9-alpine as goenv RUN go env GOARCH > /goarch && \ go env GOARM > /goarm diff --git a/cmd/loki-canary/main.go b/cmd/loki-canary/main.go index 70aad7b8dfd80..061b98321047e 100644 --- a/cmd/loki-canary/main.go +++ b/cmd/loki-canary/main.go @@ -18,10 +18,10 @@ import ( "github.com/prometheus/common/config" "github.com/prometheus/common/version" - "github.com/grafana/loki/pkg/canary/comparator" - "github.com/grafana/loki/pkg/canary/reader" - "github.com/grafana/loki/pkg/canary/writer" - _ "github.com/grafana/loki/pkg/util/build" + "github.com/grafana/loki/v3/pkg/canary/comparator" + "github.com/grafana/loki/v3/pkg/canary/reader" + "github.com/grafana/loki/v3/pkg/canary/writer" + _ "github.com/grafana/loki/v3/pkg/util/build" ) const ( diff --git a/cmd/loki/Dockerfile b/cmd/loki/Dockerfile index 4f8fc3961c909..55c80ef612168 100644 --- a/cmd/loki/Dockerfile +++ b/cmd/loki/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21.3 as build +FROM golang:1.21.9 as build COPY . /src/loki WORKDIR /src/loki diff --git a/cmd/loki/Dockerfile.cross b/cmd/loki/Dockerfile.cross index d7bd233100e6c..833f9b266ba11 100644 --- a/cmd/loki/Dockerfile.cross +++ b/cmd/loki/Dockerfile.cross @@ -1,8 +1,8 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.1 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/loki -f cmd/loki/Dockerfile . -FROM golang:1.21.2-alpine as goenv +FROM golang:1.21.9-alpine as goenv RUN go env GOARCH > /goarch && \ go env GOARM > /goarm diff --git a/cmd/loki/Dockerfile.debug b/cmd/loki/Dockerfile.debug index fe64eb1208231..9b1f427a696ea 100644 --- a/cmd/loki/Dockerfile.debug +++ b/cmd/loki/Dockerfile.debug @@ -1,9 +1,9 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.1 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/loki -f cmd/loki/Dockerfile.debug . -FROM golang:1.21.2-alpine as goenv +FROM golang:1.21.9-alpine as goenv RUN go env GOARCH > /goarch && \ go env GOARM > /goarm && \ go install github.com/go-delve/delve/cmd/dlv@latest diff --git a/cmd/loki/loki-docker-config.yaml b/cmd/loki/loki-docker-config.yaml index b9f80f910236c..c50c147b06f2f 100644 --- a/cmd/loki/loki-docker-config.yaml +++ b/cmd/loki/loki-docker-config.yaml @@ -20,7 +20,7 @@ schema_config: - from: 2020-10-24 store: tsdb object_store: filesystem - schema: v12 + schema: v13 index: prefix: index_ period: 24h diff --git a/cmd/loki/loki-local-config.yaml b/cmd/loki/loki-local-config.yaml index cbc04cb4413f3..03b579647753a 100644 --- a/cmd/loki/loki-local-config.yaml +++ b/cmd/loki/loki-local-config.yaml @@ -28,7 +28,7 @@ schema_config: - from: 2020-10-24 store: tsdb object_store: filesystem - schema: v12 + schema: v13 index: prefix: index_ period: 24h @@ -36,6 +36,9 @@ schema_config: ruler: alertmanager_url: http://localhost:9093 +frontend: + encoding: protobuf + # By default, Loki will send anonymous, but uniquely-identifiable usage and configuration # analytics to Grafana Labs. These statistics are sent to https://stats.grafana.org/ # diff --git a/cmd/loki/loki-local-with-memcached.yaml b/cmd/loki/loki-local-with-memcached.yaml index a2f4336cdd484..d69a983d6124a 100644 --- a/cmd/loki/loki-local-with-memcached.yaml +++ b/cmd/loki/loki-local-with-memcached.yaml @@ -16,6 +16,10 @@ common: kvstore: store: inmemory +limits_config: + split_instant_metric_queries_by_interval: '10m' + + query_range: align_queries_with_step: true cache_index_stats_results: true @@ -76,7 +80,7 @@ schema_config: - from: 2020-10-24 store: tsdb object_store: filesystem - schema: v12 + schema: v13 index: prefix: index_ period: 24h diff --git a/cmd/loki/main.go b/cmd/loki/main.go index 20a5925acbb4a..d9f4613977872 100644 --- a/cmd/loki/main.go +++ b/cmd/loki/main.go @@ -16,12 +16,13 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/version" - "github.com/grafana/loki/pkg/loki" - "github.com/grafana/loki/pkg/util" - _ "github.com/grafana/loki/pkg/util/build" - "github.com/grafana/loki/pkg/util/cfg" - util_log "github.com/grafana/loki/pkg/util/log" - "github.com/grafana/loki/pkg/validation" + "github.com/grafana/loki/v3/pkg/loki" + loki_runtime "github.com/grafana/loki/v3/pkg/runtime" + "github.com/grafana/loki/v3/pkg/util" + _ "github.com/grafana/loki/v3/pkg/util/build" + "github.com/grafana/loki/v3/pkg/util/cfg" + util_log "github.com/grafana/loki/v3/pkg/util/log" + "github.com/grafana/loki/v3/pkg/validation" ) func exit(code int) { @@ -49,6 +50,7 @@ func main() { // call it atleast once, the defaults are set to an empty struct. // We call it with the flag values so that the config file unmarshalling only overrides the values set in the config. validation.SetDefaultLimitsForYAMLUnmarshalling(config.LimitsConfig) + loki_runtime.SetDefaultLimitsForYAMLUnmarshalling(config.OperationalConfig) // Init the logger which will honor the log level set in config.Server if reflect.DeepEqual(&config.Server.LogLevel, &log.Level{}) { diff --git a/cmd/lokitool/main.go b/cmd/lokitool/main.go new file mode 100644 index 0000000000000..6b52fb0a3d657 --- /dev/null +++ b/cmd/lokitool/main.go @@ -0,0 +1,28 @@ +package main + +import ( + "fmt" + "os" + + "gopkg.in/alecthomas/kingpin.v2" + + "github.com/prometheus/common/version" + + "github.com/grafana/loki/v3/pkg/tool/commands" +) + +var ( + ruleCommand commands.RuleCommand +) + +func main() { + app := kingpin.New("lokitool", "A command-line tool to manage Loki.") + ruleCommand.Register(app) + + app.Command("version", "Get the version of the lokitool CLI").Action(func(k *kingpin.ParseContext) error { + fmt.Println(version.Print("loki")) + return nil + }) + + kingpin.MustParse(app.Parse(os.Args[1:])) +} diff --git a/cmd/migrate/Dockerfile b/cmd/migrate/Dockerfile index 44ffe26513df5..f83c26bf893d4 100644 --- a/cmd/migrate/Dockerfile +++ b/cmd/migrate/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21.3 as build +FROM golang:1.21.9 as build COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false migrate diff --git a/cmd/migrate/main.go b/cmd/migrate/main.go index d638adaaa812e..e42468e532b07 100644 --- a/cmd/migrate/main.go +++ b/cmd/migrate/main.go @@ -17,16 +17,16 @@ import ( "github.com/grafana/dskit/user" "github.com/prometheus/prometheus/model/labels" - "github.com/grafana/loki/pkg/logql/syntax" - "github.com/grafana/loki/pkg/loki" - "github.com/grafana/loki/pkg/storage" - "github.com/grafana/loki/pkg/storage/chunk" - "github.com/grafana/loki/pkg/storage/config" - "github.com/grafana/loki/pkg/storage/stores/shipper/indexshipper" - "github.com/grafana/loki/pkg/util/cfg" - "github.com/grafana/loki/pkg/util/constants" - util_log "github.com/grafana/loki/pkg/util/log" - "github.com/grafana/loki/pkg/validation" + "github.com/grafana/loki/v3/pkg/logql/syntax" + "github.com/grafana/loki/v3/pkg/loki" + "github.com/grafana/loki/v3/pkg/storage" + "github.com/grafana/loki/v3/pkg/storage/chunk" + "github.com/grafana/loki/v3/pkg/storage/config" + "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper" + "github.com/grafana/loki/v3/pkg/util/cfg" + "github.com/grafana/loki/v3/pkg/util/constants" + util_log "github.com/grafana/loki/v3/pkg/util/log" + "github.com/grafana/loki/v3/pkg/validation" ) type syncRange struct { diff --git a/cmd/querytee/Dockerfile b/cmd/querytee/Dockerfile index 858a4d66b971e..c32752b0604f4 100644 --- a/cmd/querytee/Dockerfile +++ b/cmd/querytee/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.21.3 as build +FROM golang:1.21.9 as build COPY . /src/loki WORKDIR /src/loki diff --git a/cmd/querytee/Dockerfile.cross b/cmd/querytee/Dockerfile.cross index 94d2665c6a615..5f84f9ab410d1 100644 --- a/cmd/querytee/Dockerfile.cross +++ b/cmd/querytee/Dockerfile.cross @@ -1,8 +1,8 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.0 +ARG BUILD_IMAGE=grafana/loki-build-image:0.33.1 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f cmd/promtail/Dockerfile . -FROM golang:1.21.2-alpine as goenv +FROM golang:1.21.9-alpine as goenv RUN go env GOARCH > /goarch && \ go env GOARM > /goarm diff --git a/cmd/querytee/main.go b/cmd/querytee/main.go index 9007dd6a3e3f2..5acebfed85179 100644 --- a/cmd/querytee/main.go +++ b/cmd/querytee/main.go @@ -10,8 +10,8 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/collectors" - util_log "github.com/grafana/loki/pkg/util/log" - "github.com/grafana/loki/tools/querytee" + util_log "github.com/grafana/loki/v3/pkg/util/log" + "github.com/grafana/loki/v3/tools/querytee" ) type Config struct { diff --git a/docs/docs.mk b/docs/docs.mk index 88f2da7682056..a08830d907c1b 100644 --- a/docs/docs.mk +++ b/docs/docs.mk @@ -1,5 +1,5 @@ # The source of this file is https://raw.githubusercontent.com/grafana/writers-toolkit/main/docs/docs.mk. -# 3.0.0 (2023-05-18) +# A changelog is included in the head of the `make-docs` script. include variables.mk -include variables.mk.local @@ -34,11 +34,6 @@ endif # First project is considered the primary one used for doc-validator. PRIMARY_PROJECT := $(subst /,-,$(firstword $(subst :, ,$(firstword $(PROJECTS))))) -# Name for the container. -ifeq ($(origin DOCS_CONTAINER), undefined) -export DOCS_CONTAINER := $(PRIMARY_PROJECT)-docs -endif - # Host port to publish container port to. ifeq ($(origin DOCS_HOST_PORT), undefined) export DOCS_HOST_PORT := 3002 @@ -76,11 +71,11 @@ docs-rm: ## Remove the docs container. .PHONY: docs-pull docs-pull: ## Pull documentation base image. - $(PODMAN) pull $(DOCS_IMAGE) + $(PODMAN) pull -q $(DOCS_IMAGE) make-docs: ## Fetch the latest make-docs script. make-docs: - if [[ ! -f "$(PWD)/make-docs" ]]; then + if [[ ! -f "$(CURDIR)/make-docs" ]]; then echo 'WARN: No make-docs script found in the working directory. Run `make update` to download it.' >&2 exit 1 fi @@ -88,32 +83,27 @@ make-docs: .PHONY: docs docs: ## Serve documentation locally, which includes pulling the latest `DOCS_IMAGE` (default: `grafana/docs-base:latest`) container image. See also `docs-no-pull`. docs: docs-pull make-docs - $(PWD)/make-docs $(PROJECTS) + $(CURDIR)/make-docs $(PROJECTS) .PHONY: docs-no-pull docs-no-pull: ## Serve documentation locally without pulling the `DOCS_IMAGE` (default: `grafana/docs-base:latest`) container image. docs-no-pull: make-docs - $(PWD)/make-docs $(PROJECTS) + $(CURDIR)/make-docs $(PROJECTS) .PHONY: docs-debug docs-debug: ## Run Hugo web server with debugging enabled. TODO: support all SERVER_FLAGS defined in website Makefile. docs-debug: make-docs - WEBSITE_EXEC='hugo server --bind 0.0.0.0 --port 3002 --debug' $(PWD)/make-docs $(PROJECTS) + WEBSITE_EXEC='hugo server --bind 0.0.0.0 --port 3002 --debug' $(CURDIR)/make-docs $(PROJECTS) .PHONY: doc-validator doc-validator: ## Run doc-validator on the entire docs folder. doc-validator: make-docs - DOCS_IMAGE=$(DOC_VALIDATOR_IMAGE) $(PWD)/make-docs $(PROJECTS) - -.PHONY: doc-validator/% -doc-validator/%: ## Run doc-validator on a specific path. To lint the path /docs/sources/administration, run 'make doc-validator/administration'. -doc-validator/%: make-docs - DOCS_IMAGE=$(DOC_VALIDATOR_IMAGE) DOC_VALIDATOR_INCLUDE=$(subst doc-validator/,,$@) $(PWD)/make-docs $(PROJECTS) + DOCS_IMAGE=$(DOC_VALIDATOR_IMAGE) $(CURDIR)/make-docs $(PROJECTS) .PHONY: vale vale: ## Run vale on the entire docs folder. vale: make-docs - DOCS_IMAGE=$(VALE_IMAGE) $(PWD)/make-docs $(PROJECTS) + DOCS_IMAGE=$(VALE_IMAGE) $(CURDIR)/make-docs $(PROJECTS) .PHONY: update update: ## Fetch the latest version of this Makefile and the `make-docs` script from Writers' Toolkit. diff --git a/docs/make-docs b/docs/make-docs index 689abdaec36f2..43efdb5faad3a 100755 --- a/docs/make-docs +++ b/docs/make-docs @@ -1,10 +1,224 @@ #!/bin/sh # The source of this file is https://raw.githubusercontent.com/grafana/writers-toolkit/main/docs/make-docs. -# 3.0.0 (2023-05-18) +# # `make-docs` procedure changelog +# +# Updates should conform to the guidelines in https://keepachangelog.com/en/1.1.0/. +# [Semantic versioning](https://semver.org/) is used to help the reader identify the significance of changes. +# Changes are relevant to this script and the support docs.mk GNU Make interface. +# +# ## 6.0.1 (2024-02-28) +# +# ### Added +# +# - Suppress new errors relating to absent content introduced in https://github.com/grafana/website/pull/17561. +# +# ## 6.0.0 (2024-02-16) +# +# ### Changed +# +# - Require `jq` for human readable `make doc-validator` output. +# +# ## 5.4.0 (2024-02-12) +# +# ### Changed +# +# - Set `WEBSITE_MOUNTS=true` when a user includes the `website` project. +# +# Ensures consistent behavior across repositories. +# To disable website mounts, add `export WEBSITE_MOUNTS := false` to your `variables.mk` or `variables.mk.local` file. +# - Use website mounts and container volumes also when a user includes the `grafana-cloud` project. +# +# ## 5.3.0 (2024-02-08) +# +# ### Changed +# +# - Updated support for plugins monorepo now that multiple projects have been moved into it. +# - Use `printf` instead of `echo` for better portability of output. +# +# https://www.in-ulm.de/~mascheck/various/echo+printf/ +# +# ## 5.2.0 (2024-01-18) +# +# ### Changed +# +# - Updated `make vale` to use latest Vale style and configuration. +# - Updated `make vale` to use platform appropriate image. +# +# ## 5.1.2 (2023-11-08) +# +# ### Added +# +# - Hide manual_mount warning messages from non-debug output. +# Set the DEBUG environment variable to see all hidden messages. +# +# ## 5.1.1 (2023-10-30) +# +# ### Added +# +# - Support for Datadog and Oracle data source plugins repositories. +# +# ## 5.1.0 (2023-10-20) +# +# ### Added +# +# - Support for the plugins monorepo. +# +# ## 5.0.0 (2023-10-18) +# +# ### Added +# +# - Improved support for website repository. +# +# Mount more content and provide some feedback to users that the build can take time. +# +# - Ability to enter the `grafana/docs-base` container with a shell using the `ENTER` environment variable. +# +# ### Fixed +# +# - Correct key combination for interrupting the process. +# +# Keyboards use capital letters so this more accurately reflects the exact key combination users are expected to press. +# +# ### Removed +# +# - Imperfect implementation of container name. +# +# Facilitates running `make vale` and `make docs` at once. +# Container names are convenient for recognition in `docker ps` but the current implementation has more downsides than upsides. +# +# - Forced platform specification now that multiple architecture images exist. +# +# Significantly speeds up build times on larger repositories. +# +# ## 4.2.2 (2023-10-05) + +# - Added support for Jira data source and MongoDB data source plugins repositories. +# +# ## 4.2.1 (2023-09-13) + +# ## Fixed +# +# - Improved consistency of the webserver request loop by polling the Hugo port rather than the proxy port. +# +# ## 4.2.0 (2023-09-01) +# +# ### Added +# +# - Retry the initial webserver request up to ten times to allow for the process to start. +# If it is still failing after ten seconds, an error message is logged. +# +# ## 4.1.1 (2023-07-20) +# +# ### Fixed +# +# - Replaced use of `realpath` with POSIX compatible alternative to determine default value for REPOS_PATH. +# +# ## 4.1.0 (2023-06-16) +# +# ### Added +# +# - Mounts of `layouts` and `config` directories for the `website` project. +# Ensures that local changes to mounts or shortcodes are reflected in the development server. +# +# ### Fixed +# +# - Version inference for versioned docs pages. +# Pages in versioned projects now have the `versioned: true` front matter set to ensure that "version" in $.Page.Scratch is set on builds. +# +# ## 4.0.0 (2023-06-06) +# +# ### Removed +# +# - `doc-validator/%` target. +# The behavior of the target was not as described. +# Instead, to limit `doc-validator` to only specific files, refer to https://grafana.com/docs/writers-toolkit/writing-guide/tooling-and-workflows/validate-technical-documentation/#run-on-specific-files. +# +# ## 3.0.0 (2023-05-18) +# +# ### Fixed +# +# - Compatibility with the updated Make targets in the `website` repository. +# `docs` now runs this script itself, `server-docs` builds the site with the `docs` Hugo environment. +# +# ## 2.0.0 (2023-05-18) +# +# ### Added +# +# - Support for the grafana-cloud/frontend-observability/faro-web-sdk project. +# - Use of `doc-validator` v2.0.x which includes breaking changes to command line options. +# +# ### Fixed +# +# - Source grafana-cloud project from website repository. +# +# ### Added +# +# - Support for running the Vale linter with `make vale`. +# +# ## 1.2.1 (2023-05-05) +# +# ### Fixed +# +# - Use `latest` tag of `grafana/vale` image by default instead of hardcoded older version. +# - Fix mounting multiple projects broken by the changes in 1.0.1 +# +# ## 1.2.0 (2023-05-05) +# +# ### Added +# +# - Support for running the Vale linter with `make vale`. +# +# ### Fixed +# +# ## 1.1.0 (2023-05-05) +# +# ### Added +# +# - Rewrite error output so it can be followed by text editors. +# +# ### Fixed +# +# - Fix `docs-debug` container process port. +# +# ## 1.0.1 (2023-05-04) +# +# ### Fixed +# +# - Ensure complete section hierarchy so that all projects have a visible menu. +# +# ## 1.0.0 (2023-05-04) +# +# ### Added +# +# - Build multiple projects simultaneously if all projects are checked out locally. +# - Run [`doc-validator`](https://github.com/grafana/technical-documentation/tree/main/tools/cmd/doc-validator) over projects. +# - Redirect project root to mounted version. +# For example redirect `/docs/grafana/` to `/docs/grafana/latest/`. +# - Support for Podman or Docker containers with `PODMAN` environment variable. +# - Support for projects: +# - agent +# - enterprise-logs +# - enterprise-metrics +# - enterprise-traces +# - grafana +# - grafana-cloud +# - grafana-cloud/machine-learning +# - helm-charts/mimir-distributed +# - helm-charts/tempo-distributed +# - incident +# - loki +# - mimir +# - oncall +# - opentelemetry +# - phlare +# - plugins +# - slo +# - tempo +# - writers-toolkit + set -ef -readonly DOCS_CONTAINER="${DOCS_CONTAINER:-make-docs}" readonly DOCS_HOST_PORT="${DOCS_HOST_PORT:-3002}" readonly DOCS_IMAGE="${DOCS_IMAGE:-grafana/docs-base:latest}" @@ -14,11 +228,24 @@ readonly DOC_VALIDATOR_SKIP_CHECKS="${DOC_VALIDATOR_SKIP_CHECKS:-^image-}" readonly HUGO_REFLINKSERRORLEVEL="${HUGO_REFLINKSERRORLEVEL:-WARNING}" readonly VALE_MINALERTLEVEL="${VALE_MINALERTLEVEL:-error}" readonly WEBSITE_EXEC="${WEBSITE_EXEC:-make server-docs}" -# If set, the docs-base image will run a prebuild script that sets up Hugo mounts. -readonly WEBSITE_MOUNTS="${WEBSITE_MOUNTS:-}" PODMAN="$(if command -v podman >/dev/null 2>&1; then echo podman; else echo docker; fi)" +if ! command -v curl >/dev/null 2>&1; then + if ! command -v wget >/dev/null 2>&1; then + errr 'either `curl` or `wget` must be installed for this script to work.' + + exit 1 + fi +fi + +if ! command -v "${PODMAN}" >/dev/null 2>&1; then + errr 'either `podman` or `docker` must be installed for this script to work.' + + exit 1 +fi + + about() { cat <&2 @@ -63,21 +290,27 @@ SOURCES_as_code='as-code-docs' SOURCES_enterprise_metrics='backend-enterprise' SOURCES_enterprise_metrics_='backend-enterprise' SOURCES_grafana_cloud='website' +SOURCES_grafana_cloud_alerting_and_irm_machine_learning='machine-learning' +SOURCES_grafana_cloud_alerting_and_irm_slo='slo' SOURCES_grafana_cloud_k6='k6-docs' SOURCES_grafana_cloud_data_configuration_integrations='cloud-onboarding' SOURCES_grafana_cloud_frontend_observability_faro_web_sdk='faro-web-sdk' -SOURCES_grafana_cloud_machine_learning='machine-learning' SOURCES_helm_charts_mimir_distributed='mimir' SOURCES_helm_charts_tempo_distributed='tempo' SOURCES_opentelemetry='opentelemetry-docs' +SOURCES_plugins_grafana_datadog_datasource='datadog-datasource' +SOURCES_plugins_grafana_oracle_datasource='oracle-datasource' VERSIONS_as_code='UNVERSIONED' VERSIONS_grafana_cloud='UNVERSIONED' +VERSIONS_grafana_cloud_alerting_and_irm_machine_learning='UNVERSIONED' +VERSIONS_grafana_cloud_alerting_and_irm_slo='UNVERSIONED' VERSIONS_grafana_cloud_k6='UNVERSIONED' VERSIONS_grafana_cloud_data_configuration_integrations='UNVERSIONED' VERSIONS_grafana_cloud_frontend_observability_faro_web_sdk='UNVERSIONED' -VERSIONS_grafana_cloud_machine_learning='UNVERSIONED' VERSIONS_opentelemetry='UNVERSIONED' +VERSIONS_plugins_grafana_datadog_datasource='latest' +VERSIONS_plugins_grafana_oracle_datasource='latest' VERSIONS_technical_documentation='UNVERSIONED' VERSIONS_website='UNVERSIONED' VERSIONS_writers_toolkit='UNVERSIONED' @@ -86,8 +319,10 @@ PATHS_grafana_cloud='content/docs/grafana-cloud' PATHS_helm_charts_mimir_distributed='docs/sources/helm-charts/mimir-distributed' PATHS_helm_charts_tempo_distributed='docs/sources/helm-charts/tempo-distributed' PATHS_mimir='docs/sources/mimir' +PATHS_plugins_grafana_datadog_datasource='docs/sources' +PATHS_plugins_grafana_oracle_datasource='docs/sources' PATHS_tempo='docs/sources/tempo' -PATHS_website='content/docs' +PATHS_website='content' # identifier STR # Replace characters that are not valid in an identifier with underscores. @@ -102,6 +337,77 @@ aget() { eval echo '$'"$(identifier "$1")_$(identifier "$2")" } +# src returns the project source repository name for a project. +src() { + _project="$1" + + case "${_project}" in + plugins/*) + if [ -z "$(aget SOURCES "${_project}")" ]; then + echo plugins-private + else + aget SOURCES "${_project}" + fi + ;; + *) + if [ -z "$(aget SOURCES "${_project}")" ]; then + echo "${_project}" + else + aget SOURCES "${_project}" + fi + ;; + esac + + unset _project +} + +# path returns the relative path within the repository that contain the docs for a project. +path() { + _project="$1" + + case "${_project}" in + plugins/*) + if [ -z "$(aget PATHS "${_project}")" ]; then + echo "${_project}/docs/sources" + else + aget PATHS "${_project}" + fi + ;; + *) + if [ -z "$(aget PATHS "${_project}")" ]; then + echo "docs/sources" + else + aget PATHS "${_project}" + fi + esac + + unset _project +} + +# version returns the version for a project. Unversioned projects return the special value 'UNVERSIONED'. +version() { + _project="$1" + + case "${_project}" in + plugins/*) + if [ -z "$(aget VERSIONS "${_project}")" ]; then + echo "UNVERSIONED" + else + aget VERSIONS "${_project}" + fi + ;; + *) + if [ -z "$(aget VERSIONS "${_project}")" ]; then + echo latest + else + aget VERSIONS "${_project}" + fi + esac + + unset _project +} + + # new_proj populates a new project structure. new_proj() { _project="$1" @@ -112,31 +418,19 @@ new_proj() { # If version is not set, use the script mapping of project to default versions if it exists. # Fallback to 'latest'. if [ -z "${_version}" ]; then - if [ -z "$(aget VERSIONS "${_project}")" ]; then - _version=latest - else - _version="$(aget VERSIONS "${_project}")" - fi + _version="$(version "${_project}")" fi # If repo is not set, use the script mapping of project to repo name if it exists. # Fallback to using the project name. if [ -z "${_repo}" ]; then - if [ -z "$(aget SOURCES "${_project}")" ]; then - _repo="${_project}" - else - _repo="$(aget SOURCES "${_project}")" - fi + _repo="$(src "${_project}")" fi # If path is not set, use the script mapping of project to docs sources path if it exists. # Fallback to using 'docs/sources'. if [ -z "${_path}" ]; then - if [ -z "$(aget PATHS "${_project}")" ]; then - _path="docs/sources" - else - _path="$(aget PATHS "${_project}")" - fi + _path="$(path "${_project}")" fi echo "${_project}:${_version}:${_repo}:${_path}" @@ -150,7 +444,7 @@ proj_url() { $1 POSIX_HERESTRING - if [ "${_project}" = 'website' ]; then + if [ "${_project}" = website ]; then echo "http://localhost:${DOCS_HOST_PORT}/docs/" unset _project _version @@ -184,8 +478,8 @@ proj_dst() { $1 POSIX_HERESTRING - if [ "${_project}" = 'website' ]; then - echo '/hugo/content/docs' + if [ "${_project}" = website ]; then + echo '/hugo/content' unset _project _version return @@ -214,9 +508,10 @@ repo_path() { done unset IFS - echo "ERRR: could not find project '${_repo}' in any of the paths in REPOS_PATH '${REPOS_PATH}'." >&2 - echo "NOTE: you must have a checkout of the project '${_repo}' at '${REPOS_PATH##:*}/${_repo}'." >&2 - echo "NOTE: if you have cloned the repository into a directory with a different name, consider changing it to ${_repo}." >&2 + errr "could not find project '${_repo}' in any of the paths in REPOS_PATH '${REPOS_PATH}'." + note "you must have a checkout of the project '${_repo}' at '${REPOS_PATH##:*}/${_repo}'." + note "if you have cloned the repository into a directory with a different name, consider changing it to ${_repo}." + unset _repo exit 1 } @@ -242,7 +537,7 @@ proj_canonical() { $1 POSIX_HERESTRING - if [ "${_project}" = 'website' ]; then + if [ "${_project}" = website ]; then echo '/docs' unset _project _version @@ -302,6 +597,59 @@ POSIX_HERESTRING unset _project _version _repo _path } +await_build() { + url="$1" + req="$(if command -v curl >/dev/null 2>&1; then echo 'curl -s -o /dev/null'; else echo 'wget -q'; fi)" + + i=1 + max=10 + while [ "${i}" -ne "${max}" ] + do + sleep 1 + debg "Retrying request to web server assuming the process is still starting up." + i=$((i + 1)) + + if ${req} "${url}"; then + printf '\r\nView documentation locally:\r\n' + for x in ${url_src_dst_vers}; do + IFS='^' read -r url _ _ <&2 + fi +} + +errr() { + printf 'ERRR: %s\r\n' "$1" >&2 +} + +note() { + printf 'NOTE: %s\r\n' "$1" >&2 +} + url_src_dst_vers="$(url_src_dst_vers "$@")" volumes="" @@ -311,9 +659,20 @@ for arg in "$@"; do IFS=: read -r _project _ _repo _ <&2 - echo "Is '${_src}' the correct source directory?" >&2 + errr "Index file '${_src}/_index.md' does not exist." + note "Is '${_src}' the correct source directory?" exit 1 fi fi - echo "DEBG: Mounting '${_src}' at container path '${_dst}'" >&2 + debg "Mounting '${_src}' at container path '${_dst}'" + if [ -z "${volumes}" ]; then volumes="--volume=${_src}:${_dst}" else @@ -354,53 +714,70 @@ POSIX_HERESTRING case "${image}" in 'grafana/doc-validator') + if ! command -v jq >/dev/null 2>&1; then + errr '`jq` must be installed for the `doc-validator` target to work.' + note 'To install `jq`, refer to https://jqlang.github.io/jq/download/,' + + exit 1 + fi + proj="$(new_proj "$1")" - echo + printf '\r\n' "${PODMAN}" run \ - --init \ - --interactive \ - --name "${DOCS_CONTAINER}" \ - --platform linux/amd64 \ - --rm \ - --tty \ - ${volumes} \ - "${DOCS_IMAGE}" \ - --include="${DOC_VALIDATOR_INCLUDE}" \ - --skip-checks="${DOC_VALIDATOR_SKIP_CHECKS}" \ - /hugo/content/docs \ - "$(proj_canonical "${proj}")" | sed "s#$(proj_dst "${proj}")#sources#" + --init \ + --interactive \ + --platform linux/amd64 \ + --rm \ + --tty \ + ${volumes} \ + "${DOCS_IMAGE}" \ + "--include=${DOC_VALIDATOR_INCLUDE}" \ + "--skip-checks=${DOC_VALIDATOR_SKIP_CHECKS}" \ + "/hugo/content$(proj_canonical "${proj}")" \ + "$(proj_canonical "${proj}")" \ + | sed "s#$(proj_dst "${proj}")#sources#" \ + | jq -r '"ERROR: \(.location.path):\(.location.range.start.line // 1):\(.location.range.start.column // 1): \(.message)" + if .suggestions[0].text then "\nSuggestion: \(.suggestions[0].text)" else "" end' ;; 'grafana/vale') proj="$(new_proj "$1")" - echo + printf '\r\n' "${PODMAN}" run \ - --init \ - --interactive \ - --name "${DOCS_CONTAINER}" \ - --platform linux/amd64 \ - --rm \ - --tty \ - ${volumes} \ - "${DOCS_IMAGE}" \ - --minAlertLevel="${VALE_MINALERTLEVEL}" \ - --config=/etc/vale/.vale.ini \ - --output=line \ - /hugo/content/docs | sed "s#$(proj_dst "${proj}")#sources#" + --init \ + --interactive \ + --rm \ + --workdir /etc/vale \ + --tty \ + ${volumes} \ + "${DOCS_IMAGE}" \ + "--minAlertLevel=${VALE_MINALERTLEVEL}" \ + '--glob=*.md' \ + --output=/etc/vale/rdjsonl.tmpl \ + /hugo/content/docs | sed "s#$(proj_dst "${proj}")#sources#" ;; *) tempfile="$(mktemp -t make-docs.XXX)" cat <"${tempfile}" #!/usr/bin/env bash + +tc() { + set \${*,,} + echo \${*^} +} + for redirect in ${redirects}; do IFS='^' read -r path ver <<<"\${redirect}" - echo -e "---\\nredirectURL: \"\${path/\/hugo\/content/}\"\\ntype: redirect\\n---\\n" > "\${path/\${ver}/_index.md}" + echo -e "---\\nredirectURL: \"\${path/\/hugo\/content/}\"\\ntype: redirect\\nversioned: true\\n---\\n" > "\${path/\${ver}/_index.md}" done for x in "${url_src_dst_vers}"; do IFS='^' read -r _ _ dst _ <<<"\${x}" + title="\${dst%/*}" + title="\$(tc \${title##*/})" while [[ -n "\${dst}" ]]; do - touch "\${dst}/_index.md" + if [[ ! -f "\${dst}/_index.md" ]]; then + echo -e "---title: \${title}\\n---\\n\\n# \${title}\\n\\n{{< section >}}" > "\${dst}/_index.md" + fi dst="\${dst%/*}" done done @@ -412,36 +789,47 @@ fi ${WEBSITE_EXEC} EOF chmod +x "${tempfile}" - volumes="${volumes} --volume=$(realpath "${tempfile}"):/entrypoint" + volumes="${volumes} --volume=${tempfile}:/entrypoint" readonly volumes - echo - echo "Documentation will be served at the following URLs:" - for x in ${url_src_dst_vers}; do - IFS='^' read -r url _ _ <&1\ + | sed -u \ + -e '/Web Server is available at http:\/\/localhost:3003\/ (bind address 0.0.0.0)/ d' \ + -e '/^hugo server/ d' \ + -e '/fatal: not a git repository (or any parent up to mount point \/)/ d' \ + -e '/Stopping at filesystem boundary (GIT_DISCOVERY_ACROSS_FILESYSTEM not set)./ d' \ + -e "/Makefile:[0-9]*: warning: overriding recipe for target 'docs'/ d" \ + -e "/docs.mk:[0-9]*: warning: ignoring old recipe for target 'docs'/ d" \ + -e '/\/usr\/bin\/make -j 2 proxy hserver-docs HUGO_PORT=3003/ d' \ + -e '/website-proxy/ d' \ + -e '/rm -rf dist*/ d' \ + -e '/Press Ctrl+C to stop/ d' \ + -e '/make/ d' \ + -e '/WARNING: The manual_mount source directory/ d' \ + -e '/docs\/_index.md .* not found/ d' + fi ;; esac diff --git a/docs/sources/_index.md b/docs/sources/_index.md index 8c3871671c3a6..3428c662cd50f 100644 --- a/docs/sources/_index.md +++ b/docs/sources/_index.md @@ -16,4 +16,4 @@ Unlike other logging systems, Loki is built around the idea of only indexing met Log data itself is then compressed and stored in chunks in object stores such as Amazon Simple Storage Service (S3) or Google Cloud Storage (GCS), or even locally on the filesystem. A small index and highly compressed chunks simplifies the operation and significantly lowers the cost of Loki. -For more information, see the [Loki overview]({{< relref "./get-started/overview" >}}) +For more information, see the [Loki overview]({{< relref "./get-started/overview" >}}). diff --git a/docs/sources/alert/_index.md b/docs/sources/alert/_index.md index 81a2671c2101f..2d4b19477a90c 100644 --- a/docs/sources/alert/_index.md +++ b/docs/sources/alert/_index.md @@ -167,7 +167,7 @@ ruler: url: http://localhost:9090/api/v1/write ``` -Further configuration options can be found under [ruler]({{< relref "../configure#ruler" >}}). +Further configuration options can be found under [ruler](https://grafana.com/docs/loki//configure/#ruler). ### Operations @@ -360,7 +360,7 @@ ruler: The Ruler supports the following types of storage: `azure`, `gcs`, `s3`, `swift`, `cos` and `local`. Most kinds of storage work with the sharded Ruler configuration in an obvious way, that is, configure all Rulers to use the same backend. -The local implementation reads the rule files off of the local filesystem. This is a read-only backend that does not support the creation and deletion of rules through the [Ruler API]({{< relref "../reference/api#ruler" >}}). Despite the fact that it reads the local filesystem this method can still be used in a sharded Ruler configuration if the operator takes care to load the same rules to every Ruler. For instance, this could be accomplished by mounting a [Kubernetes ConfigMap](https://kubernetes.io/docs/concepts/configuration/configmap/) onto every Ruler pod. +The local implementation reads the rule files off of the local filesystem. This is a read-only backend that does not support the creation and deletion of rules through the [Ruler API](https://grafana.com/docs/loki//reference/loki-http-api#ruler). Despite the fact that it reads the local filesystem this method can still be used in a sharded Ruler configuration if the operator takes care to load the same rules to every Ruler. For instance, this could be accomplished by mounting a [Kubernetes ConfigMap](https://kubernetes.io/docs/concepts/configuration/configmap/) onto every Ruler pod. A typical local configuration might look something like: ``` diff --git a/docs/sources/community/design-documents/2020-02-Promtail-Push-API.md b/docs/sources/community/design-documents/2020-02-Promtail-Push-API.md index 77268ee5f55c5..5eceb8eeec2d9 100644 --- a/docs/sources/community/design-documents/2020-02-Promtail-Push-API.md +++ b/docs/sources/community/design-documents/2020-02-Promtail-Push-API.md @@ -66,7 +66,7 @@ rejected pushes. Users are recommended to do one of the following: ## Implementation As discussed in this document, this feature will be implemented by copying the -existing [Loki Push API](/docs/loki/latest/api/#post-lokiapiv1push) +existing [Loki Push API](/docs/loki//api/#post-lokiapiv1push) and exposing it via Promtail. ## Considered Alternatives diff --git a/docs/sources/community/maintaining/release/prepare-release-notes.md b/docs/sources/community/maintaining/release/prepare-release-notes.md index 13fb910ee9ecc..9b16671e0e9cf 100644 --- a/docs/sources/community/maintaining/release/prepare-release-notes.md +++ b/docs/sources/community/maintaining/release/prepare-release-notes.md @@ -22,6 +22,8 @@ Release notes are the few key highlights of the release. This is what appears on ## Steps +**This section is out of date now that we use conventional commits, to be updated (Trevor/Callum)** + 1. Discuss with Loki team to finalize what PRs should be part of release notes. 1. Go to the PR and add a label `add-to-release-notes`. Example [PR](https://github.com/grafana/loki/pull/10213) with label added. diff --git a/docs/sources/configure/_index.md b/docs/sources/configure/_index.md index a4cff42832af8..a8f7fef097d60 100644 --- a/docs/sources/configure/_index.md +++ b/docs/sources/configure/_index.md @@ -9,6056 +9,12 @@ weight: 400 # Grafana Loki configuration parameters - - Grafana Loki is configured in a YAML file (usually referred to as `loki.yaml` ) which contains information on the Loki server and its individual components, depending on which mode Loki is launched in. Configuration examples can be found in the [Configuration Examples]({{< relref "./examples/configuration-examples" >}}) document. -## Printing Loki config at runtime - -If you pass Loki the flag `-print-config-stderr` or `-log-config-reverse-order`, (or `-print-config-stderr=true`) -Loki will dump the entire config object it has created from the built-in defaults combined first with -overrides from config file, and second by overrides from flags. - -The result is the value for every config object in the Loki config struct, which is very large... - -Many values will not be relevant to your install such as storage configs which you are not using and which you did not define, -this is expected as every option has a default value if it is being used or not. - -This config is what Loki will use to run, it can be invaluable for debugging issues related to configuration and -is especially useful in making sure your config files and flags are being read and loaded properly. - -`-print-config-stderr` is nice when running Loki directly e.g. `./loki ` as you can get a quick output of the entire Loki config. - -`-log-config-reverse-order` is the flag we run Loki with in all our environments, the config entries are reversed so -that the order of configs reads correctly top to bottom when viewed in Grafana's Explore. - -## Reload at runtime - -Promtail can reload its configuration at runtime. If the new configuration -is not well-formed, the changes will not be applied. -A configuration reload is triggered by sending a `SIGHUP` to the Promtail process or -sending a HTTP POST request to the `/reload` endpoint (when the `--server.enable-runtime-reload` flag is enabled). - -## Configuration file reference - -To specify which configuration file to load, pass the `-config.file` flag at the -command line. The value can be a list of comma separated paths, then the first -file that exists will be used. -If no `-config.file` argument is specified, Loki will look up the `config.yaml` in the -current working directory and the `config/` subdirectory and try to use that. - -The file is written in [YAML -format](https://en.wikipedia.org/wiki/YAML), defined by the scheme below. -Brackets indicate that a parameter is optional. For non-list parameters the -value is set to the specified default. - -### Use environment variables in the configuration - -> **Note:** This feature is only available in Loki 2.1+. - -You can use environment variable references in the configuration file to set values that need to be configurable during deployment. -To do this, pass `-config.expand-env=true` and use: - -``` -${VAR} -``` - -Where VAR is the name of the environment variable. - -Each variable reference is replaced at startup by the value of the environment variable. -The replacement is case-sensitive and occurs before the YAML file is parsed. -References to undefined variables are replaced by empty strings unless you specify a default value or custom error text. - -To specify a default value, use: - -``` -${VAR:-default_value} -``` - -Where default_value is the value to use if the environment variable is undefined. - -Pass the `-config.expand-env` flag at the command line to enable this way of setting configs. - -### Generic placeholders - -- `` : a boolean that can take the values `true` or `false` -- `` : any integer matching the regular expression `[1-9]+[0-9]*` -- `` : a duration matching the regular expression `[0-9]+(ns|us|µs|ms|[smh])` -- `` : a string matching the regular expression `[a-zA-Z_][a-zA-Z0-9_]*` -- `` : a string of unicode characters -- `` : a valid path relative to current working directory or an absolute path. -- `` : a valid string consisting of a hostname or IP followed by an optional port number -- `` : a string -- `` : a string that represents a secret, such as a password - -### Supported contents and default values of `loki.yaml` - -```yaml -# A comma-separated list of components to run. The default value 'all' runs Loki -# in single binary mode. The value 'read' is an alias to run only read-path -# related components such as the querier and query-frontend, but all in the same -# process. The value 'write' is an alias to run only write-path related -# components such as the distributor and compactor, but all in the same process. -# Supported values: all, compactor, distributor, ingester, querier, -# query-scheduler, ingester-querier, query-frontend, index-gateway, ruler, -# table-manager, read, write. A full list of available targets can be printed -# when running Loki with the '-list-targets' command line flag. -# CLI flag: -target -[target: | default = "all"] - -# Enables authentication through the X-Scope-OrgID header, which must be present -# if true. If false, the OrgID will always be set to 'fake'. -# CLI flag: -auth.enabled -[auth_enabled: | default = true] - -# The amount of virtual memory in bytes to reserve as ballast in order to -# optimize garbage collection. Larger ballasts result in fewer garbage -# collection passes, reducing CPU overhead at the cost of heap size. The ballast -# will not consume physical memory, because it is never read from. It will, -# however, distort metrics, because it is counted as live memory. -# CLI flag: -config.ballast-bytes -[ballast_bytes: | default = 0] - -# Configures the server of the launched module(s). -[server: ] - -# Configures the distributor. -[distributor: ] - -# Configures the querier. Only appropriate when running all modules or just the -# querier. -[querier: ] - -# The query_scheduler block configures the Loki query scheduler. When configured -# it separates the tenant query queues from the query-frontend. -[query_scheduler: ] - -# The frontend block configures the Loki query-frontend. -[frontend: ] - -# The query_range block configures the query splitting and caching in the Loki -# query-frontend. -[query_range: ] - -# The ruler block configures the Loki ruler. -[ruler: ] - -# The ingester_client block configures how the distributor will connect to -# ingesters. Only appropriate when running all components, the distributor, or -# the querier. -[ingester_client: ] - -# The ingester block configures the ingester and how the ingester will register -# itself to a key value store. -[ingester: ] - -# The index_gateway block configures the Loki index gateway server, responsible -# for serving index queries without the need to constantly interact with the -# object store. -[index_gateway: ] - -# The bloom_compactor block configures the Loki bloom compactor server, -# responsible for compacting stream indexes into bloom filters and merging them -# as bloom blocks -[bloom_compactor: ] - -# The bloom_gateway block configures the Loki bloom gateway server, responsible -# for serving queries for filtering chunks based on filter expressions. -[bloom_gateway: ] - -# The storage_config block configures one of many possible stores for both the -# index and chunks. Which configuration to be picked should be defined in -# schema_config block. -[storage_config: ] - -# The chunk_store_config block configures how chunks will be cached and how long -# to wait before saving them to the backing store. -[chunk_store_config: ] - -# Configures the chunk index schema and where it is stored. -[schema_config: ] - -# The compactor block configures the compactor component, which compacts index -# shards for performance. -[compactor: ] - -# The limits_config block configures global and per-tenant limits in Loki. -[limits_config: ] - -# The frontend_worker configures the worker - running within the Loki querier - -# picking up and executing queries enqueued by the query-frontend. -[frontend_worker: ] - -# The table_manager block configures the table manager for retention. -[table_manager: ] - -# Configuration for memberlist client. Only applies if the selected kvstore is -# memberlist. -# -# When a memberlist config with atleast 1 join_members is defined, kvstore of -# type memberlist is automatically selected for all the components that require -# a ring unless otherwise specified in the component's configuration section. -[memberlist: ] - -# Configuration for 'runtime config' module, responsible for reloading runtime -# configuration file. -[runtime_config: ] - -# Configuration for tracing. -[tracing: ] - -# Configuration for analytics. -[analytics: ] - -# Common configuration to be shared between multiple modules. If a more specific -# configuration is given in other sections, the related configuration within -# this section will be ignored. -[common: ] - -# How long to wait between SIGTERM and shutdown. After receiving SIGTERM, Loki -# will report 503 Service Unavailable status via /ready endpoint. -# CLI flag: -shutdown-delay -[shutdown_delay: | default = 0s] - -# Namespace of the metrics that in previous releases had cortex as namespace. -# This setting is deprecated and will be removed in the next minor release. -# CLI flag: -metrics-namespace -[metrics_namespace: | default = "loki"] -``` - -### server - -Configures the `server` of the launched module(s). - -```yaml -# HTTP server listen network, default tcp -# CLI flag: -server.http-listen-network -[http_listen_network: | default = "tcp"] - -# HTTP server listen address. -# CLI flag: -server.http-listen-address -[http_listen_address: | default = ""] - -# HTTP server listen port. -# CLI flag: -server.http-listen-port -[http_listen_port: | default = 3100] - -# Maximum number of simultaneous http connections, <=0 to disable -# CLI flag: -server.http-conn-limit -[http_listen_conn_limit: | default = 0] - -# gRPC server listen network -# CLI flag: -server.grpc-listen-network -[grpc_listen_network: | default = "tcp"] - -# gRPC server listen address. -# CLI flag: -server.grpc-listen-address -[grpc_listen_address: | default = ""] - -# gRPC server listen port. -# CLI flag: -server.grpc-listen-port -[grpc_listen_port: | default = 9095] - -# Maximum number of simultaneous grpc connections, <=0 to disable -# CLI flag: -server.grpc-conn-limit -[grpc_listen_conn_limit: | default = 0] - -# Comma-separated list of cipher suites to use. If blank, the default Go cipher -# suites is used. -# CLI flag: -server.tls-cipher-suites -[tls_cipher_suites: | default = ""] - -# Minimum TLS version to use. Allowed values: VersionTLS10, VersionTLS11, -# VersionTLS12, VersionTLS13. If blank, the Go TLS minimum version is used. -# CLI flag: -server.tls-min-version -[tls_min_version: | default = ""] - -http_tls_config: - # Server TLS certificate. This configuration parameter is YAML only. - [cert: | default = ""] - - # Server TLS key. This configuration parameter is YAML only. - [key: | default = ""] - - # Root certificate authority used to verify client certificates. This - # configuration parameter is YAML only. - [client_ca: | default = ""] - - # HTTP server cert path. - # CLI flag: -server.http-tls-cert-path - [cert_file: | default = ""] - - # HTTP server key path. - # CLI flag: -server.http-tls-key-path - [key_file: | default = ""] - - # HTTP TLS Client Auth type. - # CLI flag: -server.http-tls-client-auth - [client_auth_type: | default = ""] - - # HTTP TLS Client CA path. - # CLI flag: -server.http-tls-ca-path - [client_ca_file: | default = ""] - -grpc_tls_config: - # Server TLS certificate. This configuration parameter is YAML only. - [cert: | default = ""] - - # Server TLS key. This configuration parameter is YAML only. - [key: | default = ""] - - # Root certificate authority used to verify client certificates. This - # configuration parameter is YAML only. - [client_ca: | default = ""] - - # GRPC TLS server cert path. - # CLI flag: -server.grpc-tls-cert-path - [cert_file: | default = ""] - - # GRPC TLS server key path. - # CLI flag: -server.grpc-tls-key-path - [key_file: | default = ""] - - # GRPC TLS Client Auth type. - # CLI flag: -server.grpc-tls-client-auth - [client_auth_type: | default = ""] - - # GRPC TLS Client CA path. - # CLI flag: -server.grpc-tls-ca-path - [client_ca_file: | default = ""] - -# Register the intrumentation handlers (/metrics etc). -# CLI flag: -server.register-instrumentation -[register_instrumentation: | default = true] - -# If set to true, gRPC statuses will be reported in instrumentation labels with -# their string representations. Otherwise, they will be reported as "error". -# CLI flag: -server.report-grpc-codes-in-instrumentation-label-enabled -[report_grpc_codes_in_instrumentation_label_enabled: | default = false] - -# Timeout for graceful shutdowns -# CLI flag: -server.graceful-shutdown-timeout -[graceful_shutdown_timeout: | default = 30s] - -# Read timeout for entire HTTP request, including headers and body. -# CLI flag: -server.http-read-timeout -[http_server_read_timeout: | default = 30s] - -# Read timeout for HTTP request headers. If set to 0, value of -# -server.http-read-timeout is used. -# CLI flag: -server.http-read-header-timeout -[http_server_read_header_timeout: | default = 0s] - -# Write timeout for HTTP server -# CLI flag: -server.http-write-timeout -[http_server_write_timeout: | default = 30s] - -# Idle timeout for HTTP server -# CLI flag: -server.http-idle-timeout -[http_server_idle_timeout: | default = 2m] - -# Log closed connections that did not receive any response, most likely because -# client didn't send any request within timeout. -# CLI flag: -server.http-log-closed-connections-without-response-enabled -[http_log_closed_connections_without_response_enabled: | default = false] - -# Limit on the size of a gRPC message this server can receive (bytes). -# CLI flag: -server.grpc-max-recv-msg-size-bytes -[grpc_server_max_recv_msg_size: | default = 4194304] - -# Limit on the size of a gRPC message this server can send (bytes). -# CLI flag: -server.grpc-max-send-msg-size-bytes -[grpc_server_max_send_msg_size: | default = 4194304] - -# Limit on the number of concurrent streams for gRPC calls per client connection -# (0 = unlimited) -# CLI flag: -server.grpc-max-concurrent-streams -[grpc_server_max_concurrent_streams: | default = 100] - -# The duration after which an idle connection should be closed. Default: -# infinity -# CLI flag: -server.grpc.keepalive.max-connection-idle -[grpc_server_max_connection_idle: | default = 2562047h47m16.854775807s] - -# The duration for the maximum amount of time a connection may exist before it -# will be closed. Default: infinity -# CLI flag: -server.grpc.keepalive.max-connection-age -[grpc_server_max_connection_age: | default = 2562047h47m16.854775807s] - -# An additive period after max-connection-age after which the connection will be -# forcibly closed. Default: infinity -# CLI flag: -server.grpc.keepalive.max-connection-age-grace -[grpc_server_max_connection_age_grace: | default = 2562047h47m16.854775807s] - -# Duration after which a keepalive probe is sent in case of no activity over the -# connection., Default: 2h -# CLI flag: -server.grpc.keepalive.time -[grpc_server_keepalive_time: | default = 2h] - -# After having pinged for keepalive check, the duration after which an idle -# connection should be closed, Default: 20s -# CLI flag: -server.grpc.keepalive.timeout -[grpc_server_keepalive_timeout: | default = 20s] - -# Minimum amount of time a client should wait before sending a keepalive ping. -# If client sends keepalive ping more often, server will send GOAWAY and close -# the connection. -# CLI flag: -server.grpc.keepalive.min-time-between-pings -[grpc_server_min_time_between_pings: | default = 10s] - -# If true, server allows keepalive pings even when there are no active -# streams(RPCs). If false, and client sends ping when there are no active -# streams, server will send GOAWAY and close the connection. -# CLI flag: -server.grpc.keepalive.ping-without-stream-allowed -[grpc_server_ping_without_stream_allowed: | default = true] - -# If non-zero, configures the amount of GRPC server workers used to serve the -# requests. -# CLI flag: -server.grpc.num-workers -[grpc_server_num_workers: | default = 0] - -# Output log messages in the given format. Valid formats: [logfmt, json] -# CLI flag: -log.format -[log_format: | default = "logfmt"] - -# Only log messages with the given severity or above. Valid levels: [debug, -# info, warn, error] -# CLI flag: -log.level -[log_level: | default = "info"] - -# Optionally log the source IPs. -# CLI flag: -server.log-source-ips-enabled -[log_source_ips_enabled: | default = false] - -# Header field storing the source IPs. Only used if -# server.log-source-ips-enabled is true. If not set the default Forwarded, -# X-Real-IP and X-Forwarded-For headers are used -# CLI flag: -server.log-source-ips-header -[log_source_ips_header: | default = ""] - -# Regex for matching the source IPs. Only used if server.log-source-ips-enabled -# is true. If not set the default Forwarded, X-Real-IP and X-Forwarded-For -# headers are used -# CLI flag: -server.log-source-ips-regex -[log_source_ips_regex: | default = ""] - -# Optionally log request headers. -# CLI flag: -server.log-request-headers -[log_request_headers: | default = false] - -# Optionally log requests at info level instead of debug level. Applies to -# request headers as well if server.log-request-headers is enabled. -# CLI flag: -server.log-request-at-info-level-enabled -[log_request_at_info_level_enabled: | default = false] - -# Comma separated list of headers to exclude from loggin. Only used if -# server.log-request-headers is true. -# CLI flag: -server.log-request-headers-exclude-list -[log_request_exclude_headers_list: | default = ""] - -# Base path to serve all API routes from (e.g. /v1/) -# CLI flag: -server.path-prefix -[http_path_prefix: | default = ""] -``` - -### distributor - -Configures the `distributor`. - -```yaml -ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -distributor.ring.store - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -distributor.ring.prefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected kvstore is - # consul. - # The CLI flags prefix for this block configuration is: distributor.ring - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected kvstore - # is etcd. - # The CLI flags prefix for this block configuration is: distributor.ring - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -distributor.ring.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -distributor.ring.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -distributor.ring.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -distributor.ring.multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # Period at which to heartbeat to the ring. 0 = disabled. - # CLI flag: -distributor.ring.heartbeat-period - [heartbeat_period: | default = 5s] - - # The heartbeat timeout after which distributors are considered unhealthy - # within the ring. 0 = never (timeout disabled). - # CLI flag: -distributor.ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # Name of network interface to read address from. - # CLI flag: -distributor.ring.instance-interface-names - [instance_interface_names: | default = []] - -rate_store: - # The max number of concurrent requests to make to ingester stream apis - # CLI flag: -distributor.rate-store.max-request-parallelism - [max_request_parallelism: | default = 200] - - # The interval on which distributors will update current stream rates from - # ingesters - # CLI flag: -distributor.rate-store.stream-rate-update-interval - [stream_rate_update_interval: | default = 1s] - - # Timeout for communication between distributors and any given ingester when - # updating rates - # CLI flag: -distributor.rate-store.ingester-request-timeout - [ingester_request_timeout: | default = 500ms] - - # If enabled, detailed logs and spans will be emitted. - # CLI flag: -distributor.rate-store.debug - [debug: | default = false] - -# Experimental. Customize the logging of write failures. -write_failures_logging: - # Experimental and subject to change. Log volume allowed (per second). - # Default: 1KB. - # CLI flag: -distributor.write-failures-logging.rate - [rate: | default = 1KB] - - # Experimental and subject to change. Whether a insight=true key should be - # logged or not. Default: false. - # CLI flag: -distributor.write-failures-logging.add-insights-label - [add_insights_label: | default = false] - -otlp_config: - # List of default otlp resource attributes to be picked as index labels - # CLI flag: -distributor.otlp.default_resource_attributes_as_index_labels - [default_resource_attributes_as_index_labels: | default = [service.name service.namespace service.instance.id deployment.environment cloud.region cloud.availability_zone k8s.cluster.name k8s.namespace.name k8s.pod.name k8s.container.name container.name k8s.replicaset.name k8s.deployment.name k8s.statefulset.name k8s.daemonset.name k8s.cronjob.name k8s.job.name]] -``` - -### querier - -Configures the `querier`. Only appropriate when running all modules or just the querier. - -```yaml -# Maximum duration for which the live tailing requests are served. -# CLI flag: -querier.tail-max-duration -[tail_max_duration: | default = 1h] - -# Time to wait before sending more than the minimum successful query requests. -# CLI flag: -querier.extra-query-delay -[extra_query_delay: | default = 0s] - -# Maximum lookback beyond which queries are not sent to ingester. 0 means all -# queries are sent to ingester. -# CLI flag: -querier.query-ingesters-within -[query_ingesters_within: | default = 3h] - -engine: - # The maximum amount of time to look back for log lines. Used only for instant - # log queries. - # CLI flag: -querier.engine.max-lookback-period - [max_look_back_period: | default = 30s] - -# The maximum number of queries that can be simultaneously processed by the -# querier. -# CLI flag: -querier.max-concurrent -[max_concurrent: | default = 4] - -# Only query the store, and not attempt any ingesters. This is useful for -# running a standalone querier pool operating only against stored data. -# CLI flag: -querier.query-store-only -[query_store_only: | default = false] - -# When true, queriers only query the ingesters, and not stored data. This is -# useful when the object store is unavailable. -# CLI flag: -querier.query-ingester-only -[query_ingester_only: | default = false] - -# When true, allow queries to span multiple tenants. -# CLI flag: -querier.multi-tenant-queries-enabled -[multi_tenant_queries_enabled: | default = false] - -# When true, querier limits sent via a header are enforced. -# CLI flag: -querier.per-request-limits-enabled -[per_request_limits_enabled: | default = false] -``` - -### query_scheduler - -The `query_scheduler` block configures the Loki query scheduler. When configured it separates the tenant query queues from the query-frontend. - -```yaml -# Maximum number of outstanding requests per tenant per query-scheduler. -# In-flight requests above this limit will fail with HTTP response status code -# 429. -# CLI flag: -query-scheduler.max-outstanding-requests-per-tenant -[max_outstanding_requests_per_tenant: | default = 32000] - -# Maximum number of levels of nesting of hierarchical queues. 0 means that -# hierarchical queues are disabled. -# CLI flag: -query-scheduler.max-queue-hierarchy-levels -[max_queue_hierarchy_levels: | default = 3] - -# If a querier disconnects without sending notification about graceful shutdown, -# the query-scheduler will keep the querier in the tenant's shard until the -# forget delay has passed. This feature is useful to reduce the blast radius -# when shuffle-sharding is enabled. -# CLI flag: -query-scheduler.querier-forget-delay -[querier_forget_delay: | default = 0s] - -# This configures the gRPC client used to report errors back to the -# query-frontend. -# The CLI flags prefix for this block configuration is: -# query-scheduler.grpc-client-config -[grpc_client_config: ] - -# Set to true to have the query schedulers create and place themselves in a -# ring. If no frontend_address or scheduler_address are present anywhere else in -# the configuration, Loki will toggle this value to true. -# CLI flag: -query-scheduler.use-scheduler-ring -[use_scheduler_ring: | default = false] - -# The hash ring configuration. This option is required only if -# use_scheduler_ring is true. -scheduler_ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -query-scheduler.ring.store - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -query-scheduler.ring.prefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected kvstore is - # consul. - # The CLI flags prefix for this block configuration is: query-scheduler.ring - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected kvstore - # is etcd. - # The CLI flags prefix for this block configuration is: query-scheduler.ring - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -query-scheduler.ring.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -query-scheduler.ring.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -query-scheduler.ring.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -query-scheduler.ring.multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # Period at which to heartbeat to the ring. 0 = disabled. - # CLI flag: -query-scheduler.ring.heartbeat-period - [heartbeat_period: | default = 15s] - - # The heartbeat timeout after which compactors are considered unhealthy within - # the ring. 0 = never (timeout disabled). - # CLI flag: -query-scheduler.ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # File path where tokens are stored. If empty, tokens are not stored at - # shutdown and restored at startup. - # CLI flag: -query-scheduler.ring.tokens-file-path - [tokens_file_path: | default = ""] - - # True to enable zone-awareness and replicate blocks across different - # availability zones. - # CLI flag: -query-scheduler.ring.zone-awareness-enabled - [zone_awareness_enabled: | default = false] - - # Instance ID to register in the ring. - # CLI flag: -query-scheduler.ring.instance-id - [instance_id: | default = ""] - - # Name of network interface to read address from. - # CLI flag: -query-scheduler.ring.instance-interface-names - [instance_interface_names: | default = []] - - # Port to advertise in the ring (defaults to server.grpc-listen-port). - # CLI flag: -query-scheduler.ring.instance-port - [instance_port: | default = 0] - - # IP address to advertise in the ring. - # CLI flag: -query-scheduler.ring.instance-addr - [instance_addr: | default = ""] - - # The availability zone where this instance is running. Required if - # zone-awareness is enabled. - # CLI flag: -query-scheduler.ring.instance-availability-zone - [instance_availability_zone: | default = ""] - - # Enable using a IPv6 instance address. - # CLI flag: -query-scheduler.ring.instance-enable-ipv6 - [instance_enable_ipv6: | default = false] -``` - -### frontend - -The `frontend` block configures the Loki query-frontend. - -```yaml -# Log queries that are slower than the specified duration. Set to 0 to disable. -# Set to < 0 to enable on all queries. -# CLI flag: -frontend.log-queries-longer-than -[log_queries_longer_than: | default = 0s] - -# Comma-separated list of request header names to include in query logs. Applies -# to both query stats and slow queries logs. -# CLI flag: -frontend.log-query-request-headers -[log_query_request_headers: | default = ""] - -# Max body size for downstream prometheus. -# CLI flag: -frontend.max-body-size -[max_body_size: | default = 10485760] - -# True to enable query statistics tracking. When enabled, a message with some -# statistics is logged for every query. -# CLI flag: -frontend.query-stats-enabled -[query_stats_enabled: | default = false] - -# Maximum number of outstanding requests per tenant per frontend; requests -# beyond this error with HTTP 429. -# CLI flag: -querier.max-outstanding-requests-per-tenant -[max_outstanding_per_tenant: | default = 2048] - -# In the event a tenant is repeatedly sending queries that lead the querier to -# crash or be killed due to an out-of-memory error, the crashed querier will be -# disconnected from the query frontend and a new querier will be immediately -# assigned to the tenant’s shard. This invalidates the assumption that shuffle -# sharding can be used to reduce the impact on tenants. This option mitigates -# the impact by configuring a delay between when a querier disconnects because -# of a crash and when the crashed querier is actually removed from the tenant's -# shard. -# CLI flag: -query-frontend.querier-forget-delay -[querier_forget_delay: | default = 0s] - -# DNS hostname used for finding query-schedulers. -# CLI flag: -frontend.scheduler-address -[scheduler_address: | default = ""] - -# How often to resolve the scheduler-address, in order to look for new -# query-scheduler instances. Also used to determine how often to poll the -# scheduler-ring for addresses if the scheduler-ring is configured. -# CLI flag: -frontend.scheduler-dns-lookup-period -[scheduler_dns_lookup_period: | default = 10s] - -# Number of concurrent workers forwarding queries to single query-scheduler. -# CLI flag: -frontend.scheduler-worker-concurrency -[scheduler_worker_concurrency: | default = 5] - -# The grpc_client block configures the gRPC client used to communicate between -# two Loki components. -# The CLI flags prefix for this block configuration is: -# frontend.grpc-client-config -[grpc_client_config: ] - -# Time to wait for inflight requests to finish before forcefully shutting down. -# This needs to be aligned with the query timeout and the graceful termination -# period of the process orchestrator. -# CLI flag: -frontend.graceful-shutdown-timeout -[graceful_shutdown_timeout: | default = 5m] - -# Name of network interface to read address from. This address is sent to -# query-scheduler and querier, which uses it to send the query response back to -# query-frontend. -# CLI flag: -frontend.instance-interface-names -[instance_interface_names: | default = []] - -# Defines the encoding for requests to and responses from the scheduler and -# querier. Can be 'json' or 'protobuf' (defaults to 'json'). -# CLI flag: -frontend.encoding -[encoding: | default = "json"] - -# Compress HTTP responses. -# CLI flag: -querier.compress-http-responses -[compress_responses: | default = true] - -# URL of downstream Loki. -# CLI flag: -frontend.downstream-url -[downstream_url: | default = ""] - -# URL of querier for tail proxy. -# CLI flag: -frontend.tail-proxy-url -[tail_proxy_url: | default = ""] - -# The TLS configuration. -[tail_tls_config: ] -``` - -### query_range - -The `query_range` block configures the query splitting and caching in the Loki query-frontend. - -```yaml -# Mutate incoming queries to align their start and end with their step. -# CLI flag: -querier.align-querier-with-step -[align_queries_with_step: | default = false] - -results_cache: - # The cache block configures the cache backend. - # The CLI flags prefix for this block configuration is: frontend - [cache: ] - - # Use compression in cache. The default is an empty value '', which disables - # compression. Supported values are: 'snappy' and ''. - # CLI flag: -frontend.compression - [compression: | default = ""] - -# Cache query results. -# CLI flag: -querier.cache-results -[cache_results: | default = false] - -# Maximum number of retries for a single request; beyond this, the downstream -# error is returned. -# CLI flag: -querier.max-retries-per-request -[max_retries: | default = 5] - -# Perform query parallelisations based on storage sharding configuration and -# query ASTs. This feature is supported only by the chunks storage engine. -# CLI flag: -querier.parallelise-shardable-queries -[parallelise_shardable_queries: | default = true] - -# A comma-separated list of LogQL vector and range aggregations that should be -# sharded -# CLI flag: -querier.shard-aggregations -[shard_aggregations: | default = ""] - -# Cache index stats query results. -# CLI flag: -querier.cache-index-stats-results -[cache_index_stats_results: | default = false] - -# If a cache config is not specified and cache_index_stats_results is true, the -# config for the results cache is used. -index_stats_results_cache: - # The cache block configures the cache backend. - # The CLI flags prefix for this block configuration is: - # frontend.index-stats-results-cache - [cache: ] - - # Use compression in cache. The default is an empty value '', which disables - # compression. Supported values are: 'snappy' and ''. - # CLI flag: -frontend.index-stats-results-cache.compression - [compression: | default = ""] - -# Cache volume query results. -# CLI flag: -querier.cache-volume-results -[cache_volume_results: | default = false] - -# If a cache config is not specified and cache_volume_results is true, the -# config for the results cache is used. -volume_results_cache: - # The cache block configures the cache backend. - # The CLI flags prefix for this block configuration is: - # frontend.volume-results-cache - [cache: ] - - # Use compression in cache. The default is an empty value '', which disables - # compression. Supported values are: 'snappy' and ''. - # CLI flag: -frontend.volume-results-cache.compression - [compression: | default = ""] - -# Cache instant metric query results. -# CLI flag: -querier.cache-instant-metric-results -[cache_instant_metric_results: | default = false] - -# If a cache config is not specified and cache_instant_metric_results is true, -# the config for the results cache is used. -instant_metric_results_cache: - # The cache block configures the cache backend. - # The CLI flags prefix for this block configuration is: - # frontend.instant-metric-results-cache - [cache: ] - - # Use compression in cache. The default is an empty value '', which disables - # compression. Supported values are: 'snappy' and ''. - # CLI flag: -frontend.instant-metric-results-cache.compression - [compression: | default = ""] - -# Whether to align the splits of instant metric query with splitByInterval and -# query's exec time. Useful when instant_metric_cache is enabled -# CLI flag: -querier.instant-metric-query-split-align -[instant_metric_query_split_align: | default = false] - -# Cache series query results. -# CLI flag: -querier.cache-series-results -[cache_series_results: | default = false] - -# If series_results_cache is not configured and cache_series_results is true, -# the config for the results cache is used. -series_results_cache: - # The cache block configures the cache backend. - # The CLI flags prefix for this block configuration is: - # frontend.series-results-cache - [cache: ] - - # Use compression in cache. The default is an empty value '', which disables - # compression. Supported values are: 'snappy' and ''. - # CLI flag: -frontend.series-results-cache.compression - [compression: | default = ""] - -# Cache label query results. -# CLI flag: -querier.cache-label-results -[cache_label_results: | default = false] - -# If label_results_cache is not configured and cache_label_results is true, the -# config for the results cache is used. -label_results_cache: - # The cache block configures the cache backend. - # The CLI flags prefix for this block configuration is: - # frontend.label-results-cache - [cache: ] - - # Use compression in cache. The default is an empty value '', which disables - # compression. Supported values are: 'snappy' and ''. - # CLI flag: -frontend.label-results-cache.compression - [compression: | default = ""] -``` - -### ruler - -The `ruler` block configures the Loki ruler. - -```yaml -# Base URL of the Grafana instance. -# CLI flag: -ruler.external.url -[external_url: ] - -# Datasource UID for the dashboard. -# CLI flag: -ruler.datasource-uid -[datasource_uid: | default = ""] - -# Labels to add to all alerts. -[external_labels: ] - -# The grpc_client block configures the gRPC client used to communicate between -# two Loki components. -# The CLI flags prefix for this block configuration is: ruler.client -[ruler_client: ] - -# How frequently to evaluate rules. -# CLI flag: -ruler.evaluation-interval -[evaluation_interval: | default = 1m] - -# How frequently to poll for rule changes. -# CLI flag: -ruler.poll-interval -[poll_interval: | default = 1m] - -# Deprecated: Use -ruler-storage. CLI flags and their respective YAML config -# options instead. -storage: - # Method to use for backend rule storage (configdb, azure, gcs, s3, swift, - # local, bos, cos) - # CLI flag: -ruler.storage.type - [type: | default = ""] - - # Configures backend rule storage for Azure. - # The CLI flags prefix for this block configuration is: ruler.storage - [azure: ] - - # Configures backend rule storage for AlibabaCloud Object Storage (OSS). - # The CLI flags prefix for this block configuration is: ruler - [alibabacloud: ] - - # Configures backend rule storage for GCS. - # The CLI flags prefix for this block configuration is: ruler.storage - [gcs: ] - - # Configures backend rule storage for S3. - # The CLI flags prefix for this block configuration is: ruler - [s3: ] - - # Configures backend rule storage for Baidu Object Storage (BOS). - # The CLI flags prefix for this block configuration is: ruler.storage - [bos: ] - - # Configures backend rule storage for Swift. - # The CLI flags prefix for this block configuration is: ruler.storage - [swift: ] - - # Configures backend rule storage for IBM Cloud Object Storage (COS). - # The CLI flags prefix for this block configuration is: ruler.storage - [cos: ] - - # Configures backend rule storage for a local file system directory. - local: - # Directory to scan for rules - # CLI flag: -ruler.storage.local.directory - [directory: | default = ""] - -# File path to store temporary rule files. -# CLI flag: -ruler.rule-path -[rule_path: | default = "/rules"] - -# Comma-separated list of Alertmanager URLs to send notifications to. Each -# Alertmanager URL is treated as a separate group in the configuration. Multiple -# Alertmanagers in HA per group can be supported by using DNS resolution via -# '-ruler.alertmanager-discovery'. -# CLI flag: -ruler.alertmanager-url -[alertmanager_url: | default = ""] - -# Use DNS SRV records to discover Alertmanager hosts. -# CLI flag: -ruler.alertmanager-discovery -[enable_alertmanager_discovery: | default = false] - -# How long to wait between refreshing DNS resolutions of Alertmanager hosts. -# CLI flag: -ruler.alertmanager-refresh-interval -[alertmanager_refresh_interval: | default = 1m] - -# If enabled requests to Alertmanager will utilize the V2 API. -# CLI flag: -ruler.alertmanager-use-v2 -[enable_alertmanager_v2: | default = false] - -# List of alert relabel configs. -[alert_relabel_configs: ] - -# Capacity of the queue for notifications to be sent to the Alertmanager. -# CLI flag: -ruler.notification-queue-capacity -[notification_queue_capacity: | default = 10000] - -# HTTP timeout duration when sending notifications to the Alertmanager. -# CLI flag: -ruler.notification-timeout -[notification_timeout: | default = 10s] - -alertmanager_client: - # Path to the client certificate, which will be used for authenticating with - # the server. Also requires the key path to be configured. - # CLI flag: -ruler.alertmanager-client.tls-cert-path - [tls_cert_path: | default = ""] - - # Path to the key for the client certificate. Also requires the client - # certificate to be configured. - # CLI flag: -ruler.alertmanager-client.tls-key-path - [tls_key_path: | default = ""] - - # Path to the CA certificates to validate server certificate against. If not - # set, the host's root CA certificates are used. - # CLI flag: -ruler.alertmanager-client.tls-ca-path - [tls_ca_path: | default = ""] - - # Override the expected name on the server certificate. - # CLI flag: -ruler.alertmanager-client.tls-server-name - [tls_server_name: | default = ""] - - # Skip validating server certificate. - # CLI flag: -ruler.alertmanager-client.tls-insecure-skip-verify - [tls_insecure_skip_verify: | default = false] - - # Override the default cipher suite list (separated by commas). Allowed - # values: - # - # Secure Ciphers: - # - TLS_RSA_WITH_AES_128_CBC_SHA - # - TLS_RSA_WITH_AES_256_CBC_SHA - # - TLS_RSA_WITH_AES_128_GCM_SHA256 - # - TLS_RSA_WITH_AES_256_GCM_SHA384 - # - TLS_AES_128_GCM_SHA256 - # - TLS_AES_256_GCM_SHA384 - # - TLS_CHACHA20_POLY1305_SHA256 - # - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA - # - TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA - # - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA - # - TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA - # - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 - # - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 - # - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 - # - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 - # - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 - # - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 - # - # Insecure Ciphers: - # - TLS_RSA_WITH_RC4_128_SHA - # - TLS_RSA_WITH_3DES_EDE_CBC_SHA - # - TLS_RSA_WITH_AES_128_CBC_SHA256 - # - TLS_ECDHE_ECDSA_WITH_RC4_128_SHA - # - TLS_ECDHE_RSA_WITH_RC4_128_SHA - # - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA - # - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 - # - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 - # CLI flag: -ruler.alertmanager-client.tls-cipher-suites - [tls_cipher_suites: | default = ""] - - # Override the default minimum TLS version. Allowed values: VersionTLS10, - # VersionTLS11, VersionTLS12, VersionTLS13 - # CLI flag: -ruler.alertmanager-client.tls-min-version - [tls_min_version: | default = ""] - - # HTTP Basic authentication username. It overrides the username set in the URL - # (if any). - # CLI flag: -ruler.alertmanager-client.basic-auth-username - [basic_auth_username: | default = ""] - - # HTTP Basic authentication password. It overrides the password set in the URL - # (if any). - # CLI flag: -ruler.alertmanager-client.basic-auth-password - [basic_auth_password: | default = ""] - - # HTTP Header authorization type (default: Bearer). - # CLI flag: -ruler.alertmanager-client.type - [type: | default = "Bearer"] - - # HTTP Header authorization credentials. - # CLI flag: -ruler.alertmanager-client.credentials - [credentials: | default = ""] - - # HTTP Header authorization credentials file. - # CLI flag: -ruler.alertmanager-client.credentials-file - [credentials_file: | default = ""] - -# Max time to tolerate outage for restoring "for" state of alert. -# CLI flag: -ruler.for-outage-tolerance -[for_outage_tolerance: | default = 1h] - -# Minimum duration between alert and restored "for" state. This is maintained -# only for alerts with configured "for" time greater than the grace period. -# CLI flag: -ruler.for-grace-period -[for_grace_period: | default = 10m] - -# Minimum amount of time to wait before resending an alert to Alertmanager. -# CLI flag: -ruler.resend-delay -[resend_delay: | default = 1m] - -# Distribute rule evaluation using ring backend. -# CLI flag: -ruler.enable-sharding -[enable_sharding: | default = false] - -# The sharding strategy to use. Supported values are: default, shuffle-sharding. -# CLI flag: -ruler.sharding-strategy -[sharding_strategy: | default = "default"] - -# The sharding algorithm to use for deciding how rules & groups are sharded. -# Supported values are: by-group, by-rule. -# CLI flag: -ruler.sharding-algo -[sharding_algo: | default = "by-group"] - -# Time to spend searching for a pending ruler when shutting down. -# CLI flag: -ruler.search-pending-for -[search_pending_for: | default = 5m] - -# Ring used by Loki ruler. The CLI flags prefix for this block configuration is -# 'ruler.ring'. -ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -ruler.ring.store - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -ruler.ring.prefix - [prefix: | default = "rulers/"] - - # Configuration for a Consul client. Only applies if the selected kvstore is - # consul. - # The CLI flags prefix for this block configuration is: ruler.ring - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected kvstore - # is etcd. - # The CLI flags prefix for this block configuration is: ruler.ring - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -ruler.ring.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -ruler.ring.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -ruler.ring.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -ruler.ring.multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # Interval between heartbeats sent to the ring. 0 = disabled. - # CLI flag: -ruler.ring.heartbeat-period - [heartbeat_period: | default = 5s] - - # The heartbeat timeout after which ruler ring members are considered - # unhealthy within the ring. 0 = never (timeout disabled). - # CLI flag: -ruler.ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # Name of network interface to read addresses from. - # CLI flag: -ruler.ring.instance-interface-names - [instance_interface_names: | default = []] - - # The number of tokens the lifecycler will generate and put into the ring if - # it joined without transferring tokens from another lifecycler. - # CLI flag: -ruler.ring.num-tokens - [num_tokens: | default = 128] - -# Period with which to attempt to flush rule groups. -# CLI flag: -ruler.flush-period -[flush_period: | default = 1m] - -# Enable the ruler API. -# CLI flag: -ruler.enable-api -[enable_api: | default = true] - -# Comma separated list of tenants whose rules this ruler can evaluate. If -# specified, only these tenants will be handled by ruler, otherwise this ruler -# can process rules from all tenants. Subject to sharding. -# CLI flag: -ruler.enabled-tenants -[enabled_tenants: | default = ""] - -# Comma separated list of tenants whose rules this ruler cannot evaluate. If -# specified, a ruler that would normally pick the specified tenant(s) for -# processing will ignore them instead. Subject to sharding. -# CLI flag: -ruler.disabled-tenants -[disabled_tenants: | default = ""] - -# Report the wall time for ruler queries to complete as a per user metric and as -# an info level log message. -# CLI flag: -ruler.query-stats-enabled -[query_stats_enabled: | default = false] - -# Disable the rule_group label on exported metrics. -# CLI flag: -ruler.disable-rule-group-label -[disable_rule_group_label: | default = false] - -wal: - # The directory in which to write tenant WAL files. Each tenant will have its - # own directory one level below this directory. - # CLI flag: -ruler.wal.dir - [dir: | default = "ruler-wal"] - - # Frequency with which to run the WAL truncation process. - # CLI flag: -ruler.wal.truncate-frequency - [truncate_frequency: | default = 1h] - - # Minimum age that samples must exist in the WAL before being truncated. - # CLI flag: -ruler.wal.min-age - [min_age: | default = 5m] - - # Maximum age that samples must exist in the WAL before being truncated. - # CLI flag: -ruler.wal.max-age - [max_age: | default = 4h] - -wal_cleaner: - # The minimum age of a WAL to consider for cleaning. - # CLI flag: -ruler.wal-cleaner.min-age - [min_age: | default = 12h] - - # How often to run the WAL cleaner. 0 = disabled. - # CLI flag: -ruler.wal-cleaner.period - [period: | default = 0s] - -# Remote-write configuration to send rule samples to a Prometheus remote-write -# endpoint. -remote_write: - # Deprecated: Use 'clients' instead. Configure remote write client. - [client: ] - - # Configure remote write clients. A map with remote client id as key. - [clients: ] - - # Enable remote-write functionality. - # CLI flag: -ruler.remote-write.enabled - [enabled: | default = false] - - # Minimum period to wait between refreshing remote-write reconfigurations. - # This should be greater than or equivalent to - # -limits.per-user-override-period. - # CLI flag: -ruler.remote-write.config-refresh-period - [config_refresh_period: | default = 10s] - - # Add X-Scope-OrgID header in remote write requests. - # CLI flag: -ruler.remote-write.add-org-id-header - [add_org_id_header: | default = true] - -# Configuration for rule evaluation. -evaluation: - # The evaluation mode for the ruler. Can be either 'local' or 'remote'. If set - # to 'local', the ruler will evaluate rules locally. If set to 'remote', the - # ruler will evaluate rules remotely. If unset, the ruler will evaluate rules - # locally. - # CLI flag: -ruler.evaluation.mode - [mode: | default = "local"] - - # Upper bound of random duration to wait before rule evaluation to avoid - # contention during concurrent execution of rules. Jitter is calculated - # consistently for a given rule. Set 0 to disable (default). - # CLI flag: -ruler.evaluation.max-jitter - [max_jitter: | default = 0s] - - query_frontend: - # GRPC listen address of the query-frontend(s). Must be a DNS address - # (prefixed with dns:///) to enable client side load balancing. - # CLI flag: -ruler.evaluation.query-frontend.address - [address: | default = ""] - - # Set to true if query-frontend connection requires TLS. - # CLI flag: -ruler.evaluation.query-frontend.tls-enabled - [tls_enabled: | default = false] - - # Path to the client certificate, which will be used for authenticating with - # the server. Also requires the key path to be configured. - # CLI flag: -ruler.evaluation.query-frontend.tls-cert-path - [tls_cert_path: | default = ""] - - # Path to the key for the client certificate. Also requires the client - # certificate to be configured. - # CLI flag: -ruler.evaluation.query-frontend.tls-key-path - [tls_key_path: | default = ""] - - # Path to the CA certificates to validate server certificate against. If not - # set, the host's root CA certificates are used. - # CLI flag: -ruler.evaluation.query-frontend.tls-ca-path - [tls_ca_path: | default = ""] - - # Override the expected name on the server certificate. - # CLI flag: -ruler.evaluation.query-frontend.tls-server-name - [tls_server_name: | default = ""] - - # Skip validating server certificate. - # CLI flag: -ruler.evaluation.query-frontend.tls-insecure-skip-verify - [tls_insecure_skip_verify: | default = false] - - # Override the default cipher suite list (separated by commas). Allowed - # values: - # - # Secure Ciphers: - # - TLS_RSA_WITH_AES_128_CBC_SHA - # - TLS_RSA_WITH_AES_256_CBC_SHA - # - TLS_RSA_WITH_AES_128_GCM_SHA256 - # - TLS_RSA_WITH_AES_256_GCM_SHA384 - # - TLS_AES_128_GCM_SHA256 - # - TLS_AES_256_GCM_SHA384 - # - TLS_CHACHA20_POLY1305_SHA256 - # - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA - # - TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA - # - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA - # - TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA - # - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 - # - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 - # - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 - # - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 - # - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 - # - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 - # - # Insecure Ciphers: - # - TLS_RSA_WITH_RC4_128_SHA - # - TLS_RSA_WITH_3DES_EDE_CBC_SHA - # - TLS_RSA_WITH_AES_128_CBC_SHA256 - # - TLS_ECDHE_ECDSA_WITH_RC4_128_SHA - # - TLS_ECDHE_RSA_WITH_RC4_128_SHA - # - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA - # - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 - # - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 - # CLI flag: -ruler.evaluation.query-frontend.tls-cipher-suites - [tls_cipher_suites: | default = ""] - - # Override the default minimum TLS version. Allowed values: VersionTLS10, - # VersionTLS11, VersionTLS12, VersionTLS13 - # CLI flag: -ruler.evaluation.query-frontend.tls-min-version - [tls_min_version: | default = ""] -``` - -### ingester_client - -The `ingester_client` block configures how the distributor will connect to ingesters. Only appropriate when running all components, the distributor, or the querier. - -```yaml -# Configures how connections are pooled. -pool_config: - # How frequently to clean up clients for ingesters that have gone away. - # CLI flag: -distributor.client-cleanup-period - [client_cleanup_period: | default = 15s] - - # Run a health check on each ingester client during periodic cleanup. - # CLI flag: -distributor.health-check-ingesters - [health_check_ingesters: | default = true] - - # How quickly a dead client will be removed after it has been detected to - # disappear. Set this to a value to allow time for a secondary health check to - # recover the missing client. - # CLI flag: -ingester.client.healthcheck-timeout - [remote_timeout: | default = 1s] - -# The remote request timeout on the client side. -# CLI flag: -ingester.client.timeout -[remote_timeout: | default = 5s] - -# Configures how the gRPC connection to ingesters work as a client. -# The CLI flags prefix for this block configuration is: ingester.client -[grpc_client_config: ] -``` - -### ingester - -The `ingester` block configures the ingester and how the ingester will register itself to a key value store. - -```yaml -# Configures how the lifecycle of the ingester will operate and where it will -# register for discovery. -lifecycler: - ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -ring.store - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -ring.prefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected kvstore - # is consul. - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected - # kvstore is etcd. - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # The heartbeat timeout after which ingesters are skipped for reads/writes. - # 0 = never (timeout disabled). - # CLI flag: -ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # The number of ingesters to write to and read from. - # CLI flag: -distributor.replication-factor - [replication_factor: | default = 3] - - # True to enable the zone-awareness and replicate ingested samples across - # different availability zones. - # CLI flag: -distributor.zone-awareness-enabled - [zone_awareness_enabled: | default = false] - - # Comma-separated list of zones to exclude from the ring. Instances in - # excluded zones will be filtered out from the ring. - # CLI flag: -distributor.excluded-zones - [excluded_zones: | default = ""] - - # Number of tokens for each ingester. - # CLI flag: -ingester.num-tokens - [num_tokens: | default = 128] - - # Period at which to heartbeat to consul. 0 = disabled. - # CLI flag: -ingester.heartbeat-period - [heartbeat_period: | default = 5s] - - # Heartbeat timeout after which instance is assumed to be unhealthy. 0 = - # disabled. - # CLI flag: -ingester.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # Observe tokens after generating to resolve collisions. Useful when using - # gossiping ring. - # CLI flag: -ingester.observe-period - [observe_period: | default = 0s] - - # Period to wait for a claim from another member; will join automatically - # after this. - # CLI flag: -ingester.join-after - [join_after: | default = 0s] - - # Minimum duration to wait after the internal readiness checks have passed but - # before succeeding the readiness endpoint. This is used to slowdown - # deployment controllers (eg. Kubernetes) after an instance is ready and - # before they proceed with a rolling update, to give the rest of the cluster - # instances enough time to receive ring updates. - # CLI flag: -ingester.min-ready-duration - [min_ready_duration: | default = 15s] - - # Name of network interface to read address from. - # CLI flag: -ingester.lifecycler.interface - [interface_names: | default = []] - - # Enable IPv6 support. Required to make use of IP addresses from IPv6 - # interfaces. - # CLI flag: -ingester.enable-inet6 - [enable_inet6: | default = false] - - # Duration to sleep for before exiting, to ensure metrics are scraped. - # CLI flag: -ingester.final-sleep - [final_sleep: | default = 0s] - - # File path where tokens are stored. If empty, tokens are not stored at - # shutdown and restored at startup. - # CLI flag: -ingester.tokens-file-path - [tokens_file_path: | default = ""] - - # The availability zone where this instance is running. - # CLI flag: -ingester.availability-zone - [availability_zone: | default = ""] - - # Unregister from the ring upon clean shutdown. It can be useful to disable - # for rolling restarts with consistent naming in conjunction with - # -distributor.extend-writes=false. - # CLI flag: -ingester.unregister-on-shutdown - [unregister_on_shutdown: | default = true] - - # When enabled the readiness probe succeeds only after all instances are - # ACTIVE and healthy in the ring, otherwise only the instance itself is - # checked. This option should be disabled if in your cluster multiple - # instances can be rolled out simultaneously, otherwise rolling updates may be - # slowed down. - # CLI flag: -ingester.readiness-check-ring-health - [readiness_check_ring_health: | default = true] - - # IP address to advertise in the ring. - # CLI flag: -ingester.lifecycler.addr - [address: | default = ""] - - # port to advertise in consul (defaults to server.grpc-listen-port). - # CLI flag: -ingester.lifecycler.port - [port: | default = 0] - - # ID to register in the ring. - # CLI flag: -ingester.lifecycler.ID - [id: | default = ""] - -# How many flushes can happen concurrently from each stream. -# CLI flag: -ingester.concurrent-flushes -[concurrent_flushes: | default = 32] - -# How often should the ingester see if there are any blocks to flush. The first -# flush check is delayed by a random time up to 0.8x the flush check period. -# Additionally, there is +/- 1% jitter added to the interval. -# CLI flag: -ingester.flush-check-period -[flush_check_period: | default = 30s] - -# The timeout before a flush is cancelled. -# CLI flag: -ingester.flush-op-timeout -[flush_op_timeout: | default = 10m] - -# How long chunks should be retained in-memory after they've been flushed. -# CLI flag: -ingester.chunks-retain-period -[chunk_retain_period: | default = 0s] - -# How long chunks should sit in-memory with no updates before being flushed if -# they don't hit the max block size. This means that half-empty chunks will -# still be flushed after a certain period as long as they receive no further -# activity. -# CLI flag: -ingester.chunks-idle-period -[chunk_idle_period: | default = 30m] - -# The targeted _uncompressed_ size in bytes of a chunk block When this threshold -# is exceeded the head block will be cut and compressed inside the chunk. -# CLI flag: -ingester.chunks-block-size -[chunk_block_size: | default = 262144] - -# A target _compressed_ size in bytes for chunks. This is a desired size not an -# exact size, chunks may be slightly bigger or significantly smaller if they get -# flushed for other reasons (e.g. chunk_idle_period). A value of 0 creates -# chunks with a fixed 10 blocks, a non zero value will create chunks with a -# variable number of blocks to meet the target size. -# CLI flag: -ingester.chunk-target-size -[chunk_target_size: | default = 1572864] - -# The algorithm to use for compressing chunk. (none, gzip, lz4-64k, snappy, -# lz4-256k, lz4-1M, lz4, flate, zstd) -# CLI flag: -ingester.chunk-encoding -[chunk_encoding: | default = "gzip"] - -# The maximum duration of a timeseries chunk in memory. If a timeseries runs for -# longer than this, the current chunk will be flushed to the store and a new -# chunk created. -# CLI flag: -ingester.max-chunk-age -[max_chunk_age: | default = 2h] - -# Forget about ingesters having heartbeat timestamps older than -# `ring.kvstore.heartbeat_timeout`. This is equivalent to clicking on the -# `/ring` `forget` button in the UI: the ingester is removed from the ring. This -# is a useful setting when you are sure that an unhealthy node won't return. An -# example is when not using stateful sets or the equivalent. Use -# `memberlist.rejoin_interval` > 0 to handle network partition cases when using -# a memberlist. -# CLI flag: -ingester.autoforget-unhealthy -[autoforget_unhealthy: | default = false] - -# Parameters used to synchronize ingesters to cut chunks at the same moment. -# Sync period is used to roll over incoming entry to a new chunk. If chunk's -# utilization isn't high enough (eg. less than 50% when sync_min_utilization is -# set to 0.5), then this chunk rollover doesn't happen. -# CLI flag: -ingester.sync-period -[sync_period: | default = 1h] - -# Minimum utilization of chunk when doing synchronization. -# CLI flag: -ingester.sync-min-utilization -[sync_min_utilization: | default = 0.1] - -# The maximum number of errors a stream will report to the user when a push -# fails. 0 to make unlimited. -# CLI flag: -ingester.max-ignored-stream-errors -[max_returned_stream_errors: | default = 10] - -# How far back should an ingester be allowed to query the store for data, for -# use only with boltdb-shipper/tsdb index and filesystem object store. -1 for -# infinite. -# CLI flag: -ingester.query-store-max-look-back-period -[query_store_max_look_back_period: | default = 0s] - -# The ingester WAL (Write Ahead Log) records incoming logs and stores them on -# the local file systems in order to guarantee persistence of acknowledged data -# in the event of a process crash. -wal: - # Enable writing of ingested data into WAL. - # CLI flag: -ingester.wal-enabled - [enabled: | default = true] - - # Directory where the WAL data is stored and/or recovered from. - # CLI flag: -ingester.wal-dir - [dir: | default = "wal"] - - # Interval at which checkpoints should be created. - # CLI flag: -ingester.checkpoint-duration - [checkpoint_duration: | default = 5m] - - # When WAL is enabled, should chunks be flushed to long-term storage on - # shutdown. - # CLI flag: -ingester.flush-on-shutdown - [flush_on_shutdown: | default = false] - - # Maximum memory size the WAL may use during replay. After hitting this, it - # will flush data to storage before continuing. A unit suffix (KB, MB, GB) may - # be applied. - # CLI flag: -ingester.wal-replay-memory-ceiling - [replay_memory_ceiling: | default = 4GB] - -# Shard factor used in the ingesters for the in process reverse index. This MUST -# be evenly divisible by ALL schema shard factors or Loki will not start. -# CLI flag: -ingester.index-shards -[index_shards: | default = 32] - -# Maximum number of dropped streams to keep in memory during tailing. -# CLI flag: -ingester.tailer.max-dropped-streams -[max_dropped_streams: | default = 10] - -# Path where the shutdown marker file is stored. If not set and -# common.path_prefix is set then common.path_prefix will be used. -# CLI flag: -ingester.shutdown-marker-path -[shutdown_marker_path: | default = ""] -``` - -### index_gateway - -The `index_gateway` block configures the Loki index gateway server, responsible for serving index queries without the need to constantly interact with the object store. - -```yaml -# Defines in which mode the index gateway server will operate (default to -# 'simple'). It supports two modes: -# - 'simple': an index gateway server instance is responsible for handling, -# storing and returning requests for all indices for all tenants. -# - 'ring': an index gateway server instance is responsible for a subset of -# tenants instead of all tenants. -# CLI flag: -index-gateway.mode -[mode: | default = "simple"] - -# Defines the ring to be used by the index gateway servers and clients in case -# the servers are configured to run in 'ring' mode. In case this isn't -# configured, this block supports inheriting configuration from the common ring -# section. -ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -index-gateway.ring.store - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -index-gateway.ring.prefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected kvstore is - # consul. - # The CLI flags prefix for this block configuration is: index-gateway.ring - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected kvstore - # is etcd. - # The CLI flags prefix for this block configuration is: index-gateway.ring - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -index-gateway.ring.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -index-gateway.ring.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -index-gateway.ring.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -index-gateway.ring.multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # Period at which to heartbeat to the ring. 0 = disabled. - # CLI flag: -index-gateway.ring.heartbeat-period - [heartbeat_period: | default = 15s] - - # The heartbeat timeout after which compactors are considered unhealthy within - # the ring. 0 = never (timeout disabled). - # CLI flag: -index-gateway.ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # File path where tokens are stored. If empty, tokens are not stored at - # shutdown and restored at startup. - # CLI flag: -index-gateway.ring.tokens-file-path - [tokens_file_path: | default = ""] - - # True to enable zone-awareness and replicate blocks across different - # availability zones. - # CLI flag: -index-gateway.ring.zone-awareness-enabled - [zone_awareness_enabled: | default = false] - - # Deprecated: How many index gateway instances are assigned to each tenant. - # Use -index-gateway.shard-size instead. The shard size is also a per-tenant - # setting. - # CLI flag: -replication-factor - [replication_factor: | default = 3] - - # Instance ID to register in the ring. - # CLI flag: -index-gateway.ring.instance-id - [instance_id: | default = ""] - - # Name of network interface to read address from. - # CLI flag: -index-gateway.ring.instance-interface-names - [instance_interface_names: | default = []] - - # Port to advertise in the ring (defaults to server.grpc-listen-port). - # CLI flag: -index-gateway.ring.instance-port - [instance_port: | default = 0] - - # IP address to advertise in the ring. - # CLI flag: -index-gateway.ring.instance-addr - [instance_addr: | default = ""] - - # The availability zone where this instance is running. Required if - # zone-awareness is enabled. - # CLI flag: -index-gateway.ring.instance-availability-zone - [instance_availability_zone: | default = ""] - - # Enable using a IPv6 instance address. - # CLI flag: -index-gateway.ring.instance-enable-ipv6 - [instance_enable_ipv6: | default = false] -``` - -### bloom_gateway - -The `bloom_gateway` block configures the Loki bloom gateway server, responsible for serving queries for filtering chunks based on filter expressions. - -```yaml -# Defines the ring to be used by the bloom gateway servers and clients. In case -# this isn't configured, this block supports inheriting configuration from the -# common ring section. -ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -bloom-gateway.ring.store - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -bloom-gateway.ring.prefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected kvstore is - # consul. - # The CLI flags prefix for this block configuration is: bloom-gateway.ring - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected kvstore - # is etcd. - # The CLI flags prefix for this block configuration is: bloom-gateway.ring - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -bloom-gateway.ring.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -bloom-gateway.ring.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -bloom-gateway.ring.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -bloom-gateway.ring.multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # Period at which to heartbeat to the ring. 0 = disabled. - # CLI flag: -bloom-gateway.ring.heartbeat-period - [heartbeat_period: | default = 15s] - - # The heartbeat timeout after which compactors are considered unhealthy within - # the ring. 0 = never (timeout disabled). - # CLI flag: -bloom-gateway.ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # File path where tokens are stored. If empty, tokens are not stored at - # shutdown and restored at startup. - # CLI flag: -bloom-gateway.ring.tokens-file-path - [tokens_file_path: | default = ""] - - # True to enable zone-awareness and replicate blocks across different - # availability zones. - # CLI flag: -bloom-gateway.ring.zone-awareness-enabled - [zone_awareness_enabled: | default = false] - - # Number of tokens to use in the ring. The bigger the number of tokens, the - # more fingerprint ranges the compactor will own, but the smaller these ranges - # will be. Bigger number of tokens means that more but smaller requests will - # be handled by each gateway. - # CLI flag: -bloom-gateway.ring.tokens - [num_tokens: | default = 16] - - # Factor for data replication. - # CLI flag: -bloom-gateway.ring.replication-factor - [replication_factor: | default = 3] - - # Instance ID to register in the ring. - # CLI flag: -bloom-gateway.ring.instance-id - [instance_id: | default = ""] - - # Name of network interface to read address from. - # CLI flag: -bloom-gateway.ring.instance-interface-names - [instance_interface_names: | default = []] - - # Port to advertise in the ring (defaults to server.grpc-listen-port). - # CLI flag: -bloom-gateway.ring.instance-port - [instance_port: | default = 0] - - # IP address to advertise in the ring. - # CLI flag: -bloom-gateway.ring.instance-addr - [instance_addr: | default = ""] - - # The availability zone where this instance is running. Required if - # zone-awareness is enabled. - # CLI flag: -bloom-gateway.ring.instance-availability-zone - [instance_availability_zone: | default = ""] - - # Enable using a IPv6 instance address. - # CLI flag: -bloom-gateway.ring.instance-enable-ipv6 - [instance_enable_ipv6: | default = false] - -# Flag to enable or disable the bloom gateway component globally. -# CLI flag: -bloom-gateway.enabled -[enabled: | default = false] - -client: - # Configures the behavior of the connection pool. - pool_config: - [client_cleanup_period: ] - - [health_check_ingesters: ] - - [remote_timeout: ] - - # The grpc_client block configures the gRPC client used to communicate between - # two Loki components. - # The CLI flags prefix for this block configuration is: - # bloom-gateway-client.grpc - [grpc_client_config: ] - - # Flag to control whether requests sent to the gateway should be logged or - # not. - # CLI flag: -bloom-gateway-client.log-gateway-requests - [log_gateway_requests: | default = false] - - results_cache: - # The cache block configures the cache backend. - # The CLI flags prefix for this block configuration is: - # bloom-gateway-client.cache - [cache: ] - - # Use compression in cache. The default is an empty value '', which disables - # compression. Supported values are: 'snappy' and ''. - # CLI flag: -bloom-gateway-client.cache.compression - [compression: | default = ""] - - # Flag to control whether to cache bloom gateway client requests/responses. - # CLI flag: -bloom-gateway-client.cache_results - [cache_results: | default = false] - -# Number of workers to use for filtering chunks concurrently. -# CLI flag: -bloom-gateway.worker-concurrency -[worker_concurrency: | default = 4] - -# Maximum number of outstanding tasks per tenant. -# CLI flag: -bloom-gateway.max-outstanding-per-tenant -[max_outstanding_per_tenant: | default = 1024] - -# How many tasks are multiplexed at once. -# CLI flag: -bloom-gateway.num-multiplex-tasks -[num_multiplex_tasks: | default = 512] -``` - -### storage_config - -The `storage_config` block configures one of many possible stores for both the index and chunks. Which configuration to be picked should be defined in schema_config block. - -```yaml -# The alibabacloud_storage_config block configures the connection to Alibaba -# Cloud Storage object storage backend. -# The CLI flags prefix for this block configuration is: common -[alibabacloud: ] - -# The aws_storage_config block configures the connection to dynamoDB and S3 -# object storage. Either one of them or both can be configured. -[aws: ] - -# The azure_storage_config block configures the connection to Azure object -# storage backend. -[azure: ] - -# The bos_storage_config block configures the connection to Baidu Object Storage -# (BOS) object storage backend. -[bos: ] - -# Deprecated: Configures storing indexes in Bigtable. Required fields only -# required when bigtable is defined in config. -bigtable: - # Bigtable project ID. - # CLI flag: -bigtable.project - [project: | default = ""] - - # Bigtable instance ID. Please refer to - # https://cloud.google.com/docs/authentication/production for more information - # about how to configure authentication. - # CLI flag: -bigtable.instance - [instance: | default = ""] - - # The grpc_client block configures the gRPC client used to communicate between - # two Loki components. - # The CLI flags prefix for this block configuration is: bigtable - [grpc_client_config: ] - - # If enabled, once a tables info is fetched, it is cached. - # CLI flag: -bigtable.table-cache.enabled - [table_cache_enabled: | default = true] - - # Duration to cache tables before checking again. - # CLI flag: -bigtable.table-cache.expiration - [table_cache_expiration: | default = 30m] - -# Configures storing chunks in GCS. Required fields only required when gcs is -# defined in config. -[gcs: ] - -# Deprecated: Configures storing chunks and/or the index in Cassandra. -cassandra: - # Comma-separated hostnames or IPs of Cassandra instances. - # CLI flag: -cassandra.addresses - [addresses: | default = ""] - - # Port that Cassandra is running on - # CLI flag: -cassandra.port - [port: | default = 9042] - - # Keyspace to use in Cassandra. - # CLI flag: -cassandra.keyspace - [keyspace: | default = ""] - - # Consistency level for Cassandra. - # CLI flag: -cassandra.consistency - [consistency: | default = "QUORUM"] - - # Replication factor to use in Cassandra. - # CLI flag: -cassandra.replication-factor - [replication_factor: | default = 3] - - # Instruct the cassandra driver to not attempt to get host info from the - # system.peers table. - # CLI flag: -cassandra.disable-initial-host-lookup - [disable_initial_host_lookup: | default = false] - - # Use SSL when connecting to cassandra instances. - # CLI flag: -cassandra.ssl - [SSL: | default = false] - - # Require SSL certificate validation. - # CLI flag: -cassandra.host-verification - [host_verification: | default = true] - - # Policy for selecting Cassandra host. Supported values are: round-robin, - # token-aware. - # CLI flag: -cassandra.host-selection-policy - [host_selection_policy: | default = "round-robin"] - - # Path to certificate file to verify the peer. - # CLI flag: -cassandra.ca-path - [CA_path: | default = ""] - - # Path to certificate file used by TLS. - # CLI flag: -cassandra.tls-cert-path - [tls_cert_path: | default = ""] - - # Path to private key file used by TLS. - # CLI flag: -cassandra.tls-key-path - [tls_key_path: | default = ""] - - # Enable password authentication when connecting to cassandra. - # CLI flag: -cassandra.auth - [auth: | default = false] - - # Username to use when connecting to cassandra. - # CLI flag: -cassandra.username - [username: | default = ""] - - # Password to use when connecting to cassandra. - # CLI flag: -cassandra.password - [password: | default = ""] - - # File containing password to use when connecting to cassandra. - # CLI flag: -cassandra.password-file - [password_file: | default = ""] - - # If set, when authenticating with cassandra a custom authenticator will be - # expected during the handshake. This flag can be set multiple times. - # CLI flag: -cassandra.custom-authenticator - [custom_authenticators: | default = []] - - # Timeout when connecting to cassandra. - # CLI flag: -cassandra.timeout - [timeout: | default = 2s] - - # Initial connection timeout, used during initial dial to server. - # CLI flag: -cassandra.connect-timeout - [connect_timeout: | default = 5s] - - # Interval to retry connecting to cassandra nodes marked as DOWN. - # CLI flag: -cassandra.reconnent-interval - [reconnect_interval: | default = 1s] - - # Number of retries to perform on a request. Set to 0 to disable retries. - # CLI flag: -cassandra.max-retries - [max_retries: | default = 0] - - # Maximum time to wait before retrying a failed request. - # CLI flag: -cassandra.retry-max-backoff - [retry_max_backoff: | default = 10s] - - # Minimum time to wait before retrying a failed request. - # CLI flag: -cassandra.retry-min-backoff - [retry_min_backoff: | default = 100ms] - - # Limit number of concurrent queries to Cassandra. Set to 0 to disable the - # limit. - # CLI flag: -cassandra.query-concurrency - [query_concurrency: | default = 0] - - # Number of TCP connections per host. - # CLI flag: -cassandra.num-connections - [num_connections: | default = 2] - - # Convict hosts of being down on failure. - # CLI flag: -cassandra.convict-hosts-on-failure - [convict_hosts_on_failure: | default = true] - - # Table options used to create index or chunk tables. This value is used as - # plain text in the table `WITH` like this, "CREATE TABLE - # (...) WITH ". For details, - # see https://cortexmetrics.io/docs/production/cassandra. By default it will - # use the default table options of your Cassandra cluster. - # CLI flag: -cassandra.table-options - [table_options: | default = ""] - -# Deprecated: Configures storing index in BoltDB. Required fields only required -# when boltdb is present in the configuration. -boltdb: - # Location of BoltDB index files. - # CLI flag: -boltdb.dir - [directory: | default = ""] - -# Configures storing the chunks on the local file system. Required fields only -# required when filesystem is present in the configuration. -[filesystem: ] - -# The swift_storage_config block configures the connection to OpenStack Object -# Storage (Swift) object storage backend. -[swift: ] - -# Deprecated: -grpc_store: - # Hostname or IP of the gRPC store instance. - # CLI flag: -grpc-store.server-address - [server_address: | default = ""] - -hedging: - # If set to a non-zero value a second request will be issued at the provided - # duration. Default is 0 (disabled) - # CLI flag: -store.hedge-requests-at - [at: | default = 0s] - - # The maximum of hedge requests allowed. - # CLI flag: -store.hedge-requests-up-to - [up_to: | default = 2] - - # The maximum of hedge requests allowed per seconds. - # CLI flag: -store.hedge-max-per-second - [max_per_second: | default = 5] - -# Configures additional object stores for a given storage provider. -# Supported stores: aws, azure, bos, filesystem, gcs, swift. -# Example: -# storage_config: -# named_stores: -# aws: -# store-1: -# endpoint: s3://foo-bucket -# region: us-west1 -# Named store from this example can be used by setting object_store to store-1 -# in period_config. -[named_stores: ] - -# The cos_storage_config block configures the connection to IBM Cloud Object -# Storage (COS) backend. -[cos: ] - -# Cache validity for active index entries. Should be no higher than -# -ingester.max-chunk-idle. -# CLI flag: -store.index-cache-validity -[index_cache_validity: | default = 5m] - -congestion_control: - # Use storage congestion control (default: disabled). - # CLI flag: -store.congestion-control.enabled - [enabled: | default = false] - - controller: - # Congestion control strategy to use (default: none, options: 'aimd'). - # CLI flag: -store.congestion-control.strategy - [strategy: | default = ""] - - aimd: - # AIMD starting throughput window size: how many requests can be sent per - # second (default: 2000). - # CLI flag: -store.congestion-control.strategy.aimd.start - [start: | default = 2000] - - # AIMD maximum throughput window size: upper limit of requests sent per - # second (default: 10000). - # CLI flag: -store.congestion-control.strategy.aimd.upper-bound - [upper_bound: | default = 10000] - - # AIMD backoff factor when upstream service is throttled to decrease - # number of requests sent per second (default: 0.5). - # CLI flag: -store.congestion-control.strategy.aimd.backoff-factor - [backoff_factor: | default = 0.5] - - retry: - # Congestion control retry strategy to use (default: none, options: - # 'limited'). - # CLI flag: -store.congestion-control.retry.strategy - [strategy: | default = ""] - - # Maximum number of retries allowed. - # CLI flag: -store.congestion-control.retry.strategy.limited.limit - [limit: | default = 2] - - hedging: - config: - [at: ] - - [up_to: ] - - [max_per_second: ] - - # Congestion control hedge strategy to use (default: none, options: - # 'limited'). - # CLI flag: -store.congestion-control.hedge.strategy - [strategy: | default = ""] - -# Experimental. Sets a constant prefix for all keys inserted into object -# storage. Example: loki/ -# CLI flag: -store.object-prefix -[object_prefix: | default = ""] - -# The cache block configures the cache backend. -# The CLI flags prefix for this block configuration is: store.index-cache-read -[index_queries_cache_config: ] - -# Disable broad index queries which results in reduced cache usage and faster -# query performance at the expense of somewhat higher QPS on the index store. -# CLI flag: -store.disable-broad-index-queries -[disable_broad_index_queries: | default = false] - -# Maximum number of parallel chunk reads. -# CLI flag: -store.max-parallel-get-chunk -[max_parallel_get_chunk: | default = 150] - -# Enable the thanos.io/objstore to be the backend for object storage -# CLI flag: -thanos.enable -[thanos_objstore: | default = false] - -objstore_config: - # Backend storage to use. Supported backends are: s3, gcs, azure, swift, - # filesystem. - # CLI flag: -thanos.backend - [backend: | default = "filesystem"] - - s3: - # The S3 bucket endpoint. It could be an AWS S3 endpoint listed at - # https://docs.aws.amazon.com/general/latest/gr/s3.html or the address of an - # S3-compatible service in hostname:port format. - # CLI flag: -thanos.s3.endpoint - [endpoint: | default = ""] - - # S3 region. If unset, the client will issue a S3 GetBucketLocation API call - # to autodetect it. - # CLI flag: -thanos.s3.region - [region: | default = ""] - - # S3 bucket name - # CLI flag: -thanos.s3.bucket-name - [bucket_name: | default = ""] - - # S3 secret access key - # CLI flag: -thanos.s3.secret-access-key - [secret_access_key: | default = ""] - - # S3 session token - # CLI flag: -thanos.s3.session-token - [session_token: | default = ""] - - # S3 access key ID - # CLI flag: -thanos.s3.access-key-id - [access_key_id: | default = ""] - - # If enabled, use http:// for the S3 endpoint instead of https://. This - # could be useful in local dev/test environments while using an - # S3-compatible backend storage, like Minio. - # CLI flag: -thanos.s3.insecure - [insecure: | default = false] - - # The S3 storage class to use. Details can be found at - # https://aws.amazon.com/s3/storage-classes/. - # CLI flag: -thanos.s3.storage-class - [storage_class: | default = "STANDARD"] - - sse: - # Enable AWS Server Side Encryption. Supported values: SSE-KMS, SSE-S3. - # CLI flag: -thanos.s3.sse.type - [type: | default = ""] - - # KMS Key ID used to encrypt objects in S3 - # CLI flag: -thanos.s3.sse.kms-key-id - [kms_key_id: | default = ""] - - # KMS Encryption Context used for object encryption. It expects JSON - # formatted string. - # CLI flag: -thanos.s3.sse.kms-encryption-context - [kms_encryption_context: | default = ""] - - http: - # The time an idle connection will remain idle before closing. - # CLI flag: -thanos.s3.http.idle-conn-timeout - [idle_conn_timeout: | default = 1m30s] - - # The amount of time the client will wait for a servers response headers. - # CLI flag: -thanos.s3.http.response-header-timeout - [response_header_timeout: | default = 2m] - - # If the client connects via HTTPS and this option is enabled, the client - # will accept any certificate and hostname. - # CLI flag: -thanos.s3.http.insecure-skip-verify - [insecure_skip_verify: | default = false] - - # Maximum time to wait for a TLS handshake. 0 means no limit. - # CLI flag: -thanos.s3.http.tls-handshake-timeout - [tls_handshake_timeout: | default = 10s] - - # The time to wait for a server's first response headers after fully - # writing the request headers if the request has an Expect header. 0 to - # send the request body immediately. - # CLI flag: -thanos.s3.http.expect-continue-timeout - [expect_continue_timeout: | default = 1s] - - # Maximum number of idle (keep-alive) connections across all hosts. 0 - # means no limit. - # CLI flag: -thanos.s3.http.max-idle-connections - [max_idle_connections: | default = 100] - - # Maximum number of idle (keep-alive) connections to keep per-host. If 0, - # a built-in default value is used. - # CLI flag: -thanos.s3.http.max-idle-connections-per-host - [max_idle_connections_per_host: | default = 100] - - # Maximum number of connections per host. 0 means no limit. - # CLI flag: -thanos.s3.http.max-connections-per-host - [max_connections_per_host: | default = 0] - - # Path to the trusted CA file that signed the SSL certificate of the - # object storage endpoint. - # CLI flag: -thanos.s3.http.ca-file - [ca_file: | default = ""] - - gcs: - # GCS bucket name - # CLI flag: -thanos.gcs.bucket-name - [bucket_name: | default = ""] - - # JSON representing either a Google Developers Console - # client_credentials.json file or a Google Developers service account key - # file. If empty, fallback to Google default logic. - # CLI flag: -thanos.gcs.service-account - [service_account: | default = ""] - - azure: - # Azure storage account name - # CLI flag: -thanos.azure.account-name - [account_name: | default = ""] - - # Azure storage account key - # CLI flag: -thanos.azure.account-key - [account_key: | default = ""] - - # If `connection-string` is set, the values of `account-name` and - # `endpoint-suffix` values will not be used. Use this method over - # `account-key` if you need to authenticate via a SAS token. Or if you use - # the Azurite emulator. - # CLI flag: -thanos.azure.connection-string - [connection_string: | default = ""] - - # Azure storage container name - # CLI flag: -thanos.azure.container-name - [container_name: | default = "loki"] - - # Azure storage endpoint suffix without schema. The account name will be - # prefixed to this value to create the FQDN - # CLI flag: -thanos.azure.endpoint-suffix - [endpoint_suffix: | default = ""] - - # Number of retries for recoverable errors - # CLI flag: -thanos.azure.max-retries - [max_retries: | default = 20] - - http: - # The time an idle connection will remain idle before closing. - # CLI flag: -thanos.azure.idle-conn-timeout - [idle_conn_timeout: | default = 1m30s] - - # The amount of time the client will wait for a servers response headers. - # CLI flag: -thanos.azure.response-header-timeout - [response_header_timeout: | default = 2m] - - # If the client connects via HTTPS and this option is enabled, the client - # will accept any certificate and hostname. - # CLI flag: -thanos.azure.insecure-skip-verify - [insecure_skip_verify: | default = false] - - # Maximum time to wait for a TLS handshake. 0 means no limit. - # CLI flag: -thanos.azure.tls-handshake-timeout - [tls_handshake_timeout: | default = 10s] - - # The time to wait for a server's first response headers after fully - # writing the request headers if the request has an Expect header. 0 to - # send the request body immediately. - # CLI flag: -thanos.azure.expect-continue-timeout - [expect_continue_timeout: | default = 1s] - - # Maximum number of idle (keep-alive) connections across all hosts. 0 - # means no limit. - # CLI flag: -thanos.azure.max-idle-connections - [max_idle_connections: | default = 100] - - # Maximum number of idle (keep-alive) connections to keep per-host. If 0, - # a built-in default value is used. - # CLI flag: -thanos.azure.max-idle-connections-per-host - [max_idle_connections_per_host: | default = 100] - - # Maximum number of connections per host. 0 means no limit. - # CLI flag: -thanos.azure.max-connections-per-host - [max_connections_per_host: | default = 0] - - # Path to the trusted CA file that signed the SSL certificate of the - # object storage endpoint. - # CLI flag: -thanos.azure.ca-file - [ca_file: | default = ""] - - swift: - # OpenStack Swift authentication API version. 0 to autodetect. - # CLI flag: -thanos.swift.auth-version - [auth_version: | default = 0] - - # OpenStack Swift authentication URL - # CLI flag: -thanos.swift.auth-url - [auth_url: | default = ""] - - # Set this to true to use the internal OpenStack Swift endpoint URL - # CLI flag: -thanos.swift.internal - [internal: | default = false] - - # OpenStack Swift username. - # CLI flag: -thanos.swift.username - [username: | default = ""] - - # OpenStack Swift user's domain name. - # CLI flag: -thanos.swift.user-domain-name - [user_domain_name: | default = ""] - - # OpenStack Swift user's domain ID. - # CLI flag: -thanos.swift.user-domain-id - [user_domain_id: | default = ""] - - # OpenStack Swift user ID. - # CLI flag: -thanos.swift.user-id - [user_id: | default = ""] - - # OpenStack Swift API key. - # CLI flag: -thanos.swift.password - [password: | default = ""] - - # OpenStack Swift user's domain ID. - # CLI flag: -thanos.swift.domain-id - [domain_id: | default = ""] - - # OpenStack Swift user's domain name. - # CLI flag: -thanos.swift.domain-name - [domain_name: | default = ""] - - # OpenStack Swift project ID (v2,v3 auth only). - # CLI flag: -thanos.swift.project-id - [project_id: | default = ""] - - # OpenStack Swift project name (v2,v3 auth only). - # CLI flag: -thanos.swift.project-name - [project_name: | default = ""] - - # ID of the OpenStack Swift project's domain (v3 auth only), only needed if - # it differs the from user domain. - # CLI flag: -thanos.swift.project-domain-id - [project_domain_id: | default = ""] - - # Name of the OpenStack Swift project's domain (v3 auth only), only needed - # if it differs from the user domain. - # CLI flag: -thanos.swift.project-domain-name - [project_domain_name: | default = ""] - - # OpenStack Swift Region to use (v2,v3 auth only). - # CLI flag: -thanos.swift.region-name - [region_name: | default = ""] - - # Name of the OpenStack Swift container to put chunks in. - # CLI flag: -thanos.swift.container-name - [container_name: | default = ""] - - # Max retries on requests error. - # CLI flag: -thanos.swift.max-retries - [max_retries: | default = 3] - - # Time after which a connection attempt is aborted. - # CLI flag: -thanos.swift.connect-timeout - [connect_timeout: | default = 10s] - - # Time after which an idle request is aborted. The timeout watchdog is reset - # each time some data is received, so the timeout triggers after X time no - # data is received on a request. - # CLI flag: -thanos.swift.request-timeout - [request_timeout: | default = 5s] - - filesystem: - # Local filesystem storage directory. - # CLI flag: -thanos.filesystem.dir - [dir: | default = ""] - - # Prefix for all objects stored in the backend storage. For simplicity, it may - # only contain digits and English alphabet letters. - # CLI flag: -thanos.storage-prefix - [storage_prefix: | default = ""] - -# The maximum number of chunks to fetch per batch. -# CLI flag: -store.max-chunk-batch-size -[max_chunk_batch_size: | default = 50] - -# Configures storing index in an Object Store -# (GCS/S3/Azure/Swift/COS/Filesystem) in the form of boltdb files. Required -# fields only required when boltdb-shipper is defined in config. -boltdb_shipper: - # Directory where ingesters would write index files which would then be - # uploaded by shipper to configured storage - # CLI flag: -boltdb.shipper.active-index-directory - [active_index_directory: | default = ""] - - # Cache location for restoring index files from storage for queries - # CLI flag: -boltdb.shipper.cache-location - [cache_location: | default = ""] - - # TTL for index files restored in cache for queries - # CLI flag: -boltdb.shipper.cache-ttl - [cache_ttl: | default = 24h] - - # Resync downloaded files with the storage - # CLI flag: -boltdb.shipper.resync-interval - [resync_interval: | default = 5m] - - # Number of days of common index to be kept downloaded for queries. For per - # tenant index query readiness, use limits overrides config. - # CLI flag: -boltdb.shipper.query-ready-num-days - [query_ready_num_days: | default = 0] - - index_gateway_client: - # The grpc_client block configures the gRPC client used to communicate - # between two Loki components. - # The CLI flags prefix for this block configuration is: - # boltdb.shipper.index-gateway-client.grpc - [grpc_client_config: ] - - # Hostname or IP of the Index Gateway gRPC server running in simple mode. - # Can also be prefixed with dns+, dnssrv+, or dnssrvnoa+ to resolve a DNS A - # record with multiple IP's, a DNS SRV record with a followup A record - # lookup, or a DNS SRV record without a followup A record lookup, - # respectively. - # CLI flag: -boltdb.shipper.index-gateway-client.server-address - [server_address: | default = ""] - - # Whether requests sent to the gateway should be logged or not. - # CLI flag: -boltdb.shipper.index-gateway-client.log-gateway-requests - [log_gateway_requests: | default = false] - - [ingestername: | default = ""] - - [mode: | default = ""] - - [ingesterdbretainperiod: ] - - # Build per tenant index files - # CLI flag: -boltdb.shipper.build-per-tenant-index - [build_per_tenant_index: | default = false] - -# Configures storing index in an Object Store -# (GCS/S3/Azure/Swift/COS/Filesystem) in a prometheus TSDB-like format. Required -# fields only required when TSDB is defined in config. -tsdb_shipper: - # Directory where ingesters would write index files which would then be - # uploaded by shipper to configured storage - # CLI flag: -tsdb.shipper.active-index-directory - [active_index_directory: | default = ""] - - # Cache location for restoring index files from storage for queries - # CLI flag: -tsdb.shipper.cache-location - [cache_location: | default = ""] - - # TTL for index files restored in cache for queries - # CLI flag: -tsdb.shipper.cache-ttl - [cache_ttl: | default = 24h] - - # Resync downloaded files with the storage - # CLI flag: -tsdb.shipper.resync-interval - [resync_interval: | default = 5m] - - # Number of days of common index to be kept downloaded for queries. For per - # tenant index query readiness, use limits overrides config. - # CLI flag: -tsdb.shipper.query-ready-num-days - [query_ready_num_days: | default = 0] - - index_gateway_client: - # The grpc_client block configures the gRPC client used to communicate - # between two Loki components. - # The CLI flags prefix for this block configuration is: - # tsdb.shipper.index-gateway-client.grpc - [grpc_client_config: ] - - # Hostname or IP of the Index Gateway gRPC server running in simple mode. - # Can also be prefixed with dns+, dnssrv+, or dnssrvnoa+ to resolve a DNS A - # record with multiple IP's, a DNS SRV record with a followup A record - # lookup, or a DNS SRV record without a followup A record lookup, - # respectively. - # CLI flag: -tsdb.shipper.index-gateway-client.server-address - [server_address: | default = ""] - - # Whether requests sent to the gateway should be logged or not. - # CLI flag: -tsdb.shipper.index-gateway-client.log-gateway-requests - [log_gateway_requests: | default = false] - - [ingestername: | default = ""] - - [mode: | default = ""] - - [ingesterdbretainperiod: ] - -# Configures Bloom Shipper. -bloom_shipper: - # Working directory to store downloaded Bloom Blocks. - # CLI flag: -bloom.shipper.working-directory - [working_directory: | default = "bloom-shipper"] - - blocks_downloading_queue: - # The count of parallel workers that download Bloom Blocks. - # CLI flag: -bloom.shipper.blocks-downloading-queue.workers-count - [workers_count: | default = 100] - - # Maximum number of task in queue per tenant per bloom-gateway. Enqueuing - # the tasks above this limit will fail an error. - # CLI flag: -bloom.shipper.blocks-downloading-queue.max_tasks_enqueued_per_tenant - [max_tasks_enqueued_per_tenant: | default = 10000] - - blocks_cache: - # Cache for bloom blocks. Whether embedded cache is enabled. - # CLI flag: -bloom.blocks-cache.enabled - [enabled: | default = false] - - # Cache for bloom blocks. Maximum memory size of the cache in MB. - # CLI flag: -bloom.blocks-cache.max-size-mb - [max_size_mb: | default = 100] - - # Cache for bloom blocks. Maximum number of entries in the cache. - # CLI flag: -bloom.blocks-cache.max-size-items - [max_size_items: | default = 0] - - # Cache for bloom blocks. The time to live for items in the cache before - # they get purged. - # CLI flag: -bloom.blocks-cache.ttl - [ttl: | default = 24h] - - # The cache block configures the cache backend. - # The CLI flags prefix for this block configuration is: bloom.metas-cache - [metas_cache: ] -``` - -### chunk_store_config - -The `chunk_store_config` block configures how chunks will be cached and how long to wait before saving them to the backing store. - -```yaml -# The cache block configures the cache backend. -# The CLI flags prefix for this block configuration is: store.chunks-cache -[chunk_cache_config: ] - -# Write dedupe cache is deprecated along with legacy index types (aws, -# aws-dynamo, bigtable, bigtable-hashed, cassandra, gcp, gcp-columnkey, -# grpc-store). -# Consider using TSDB index which does not require a write dedupe cache. -# The CLI flags prefix for this block configuration is: store.index-cache-write -[write_dedupe_cache_config: ] - -# Cache index entries older than this period. 0 to disable. -# CLI flag: -store.cache-lookups-older-than -[cache_lookups_older_than: | default = 0s] -``` - -### schema_config - -Configures the chunk index schema and where it is stored. - -```yaml -[configs: ] -``` - -### compactor - -The `compactor` block configures the compactor component, which compacts index shards for performance. - -```yaml -# Directory where files can be downloaded for compaction. -# CLI flag: -compactor.working-directory -[working_directory: | default = ""] - -# Interval at which to re-run the compaction operation. -# CLI flag: -compactor.compaction-interval -[compaction_interval: | default = 10m] - -# Interval at which to apply/enforce retention. 0 means run at same interval as -# compaction. If non-zero, it should always be a multiple of compaction -# interval. -# CLI flag: -compactor.apply-retention-interval -[apply_retention_interval: | default = 0s] - -# (Experimental) Activate custom (per-stream,per-tenant) retention. -# CLI flag: -compactor.retention-enabled -[retention_enabled: | default = false] - -# Delay after which chunks will be fully deleted during retention. -# CLI flag: -compactor.retention-delete-delay -[retention_delete_delay: | default = 2h] - -# The total amount of worker to use to delete chunks. -# CLI flag: -compactor.retention-delete-worker-count -[retention_delete_worker_count: | default = 150] - -# The maximum amount of time to spend running retention and deletion on any -# given table in the index. -# CLI flag: -compactor.retention-table-timeout -[retention_table_timeout: | default = 0s] - -# Store used for managing delete requests. -# CLI flag: -compactor.delete-request-store -[delete_request_store: | default = ""] - -# Path prefix for storing delete requests. -# CLI flag: -compactor.delete-request-store.key-prefix -[delete_request_store_key_prefix: | default = "index/"] - -# The max number of delete requests to run per compaction cycle. -# CLI flag: -compactor.delete-batch-size -[delete_batch_size: | default = 70] - -# Allow cancellation of delete request until duration after they are created. -# Data would be deleted only after delete requests have been older than this -# duration. Ideally this should be set to at least 24h. -# CLI flag: -compactor.delete-request-cancel-period -[delete_request_cancel_period: | default = 24h] - -# Constrain the size of any single delete request. When a delete request > -# delete_max_interval is input, the request is sharded into smaller requests of -# no more than delete_max_interval -# CLI flag: -compactor.delete-max-interval -[delete_max_interval: | default = 24h] - -# Maximum number of tables to compact in parallel. While increasing this value, -# please make sure compactor has enough disk space allocated to be able to store -# and compact as many tables. -# CLI flag: -compactor.max-compaction-parallelism -[max_compaction_parallelism: | default = 1] - -# Number of upload/remove operations to execute in parallel when finalizing a -# compaction. NOTE: This setting is per compaction operation, which can be -# executed in parallel. The upper bound on the number of concurrent uploads is -# upload_parallelism * max_compaction_parallelism. -# CLI flag: -compactor.upload-parallelism -[upload_parallelism: | default = 10] - -# The hash ring configuration used by compactors to elect a single instance for -# running compactions. The CLI flags prefix for this block config is: -# compactor.ring -compactor_ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -compactor.ring.store - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -compactor.ring.prefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected kvstore is - # consul. - # The CLI flags prefix for this block configuration is: compactor.ring - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected kvstore - # is etcd. - # The CLI flags prefix for this block configuration is: compactor.ring - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -compactor.ring.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -compactor.ring.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -compactor.ring.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -compactor.ring.multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # Period at which to heartbeat to the ring. 0 = disabled. - # CLI flag: -compactor.ring.heartbeat-period - [heartbeat_period: | default = 15s] - - # The heartbeat timeout after which compactors are considered unhealthy within - # the ring. 0 = never (timeout disabled). - # CLI flag: -compactor.ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # File path where tokens are stored. If empty, tokens are not stored at - # shutdown and restored at startup. - # CLI flag: -compactor.ring.tokens-file-path - [tokens_file_path: | default = ""] - - # True to enable zone-awareness and replicate blocks across different - # availability zones. - # CLI flag: -compactor.ring.zone-awareness-enabled - [zone_awareness_enabled: | default = false] - - # Instance ID to register in the ring. - # CLI flag: -compactor.ring.instance-id - [instance_id: | default = ""] - - # Name of network interface to read address from. - # CLI flag: -compactor.ring.instance-interface-names - [instance_interface_names: | default = []] - - # Port to advertise in the ring (defaults to server.grpc-listen-port). - # CLI flag: -compactor.ring.instance-port - [instance_port: | default = 0] - - # IP address to advertise in the ring. - # CLI flag: -compactor.ring.instance-addr - [instance_addr: | default = ""] - - # The availability zone where this instance is running. Required if - # zone-awareness is enabled. - # CLI flag: -compactor.ring.instance-availability-zone - [instance_availability_zone: | default = ""] - - # Enable using a IPv6 instance address. - # CLI flag: -compactor.ring.instance-enable-ipv6 - [instance_enable_ipv6: | default = false] - -# Number of tables that compactor will try to compact. Newer tables are chosen -# when this is less than the number of tables available. -# CLI flag: -compactor.tables-to-compact -[tables_to_compact: | default = 0] - -# Do not compact N latest tables. Together with -compactor.run-once and -# -compactor.tables-to-compact, this is useful when clearing compactor backlogs. -# CLI flag: -compactor.skip-latest-n-tables -[skip_latest_n_tables: | default = 0] -``` - -### bloom_compactor - -The `bloom_compactor` block configures the Loki bloom compactor server, responsible for compacting stream indexes into bloom filters and merging them as bloom blocks - -```yaml -# Defines the ring to be used by the bloom-compactor servers. In case this isn't -# configured, this block supports inheriting configuration from the common ring -# section. -ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -bloom-compactor.ring.store - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -bloom-compactor.ring.prefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected kvstore is - # consul. - # The CLI flags prefix for this block configuration is: bloom-compactor.ring - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected kvstore - # is etcd. - # The CLI flags prefix for this block configuration is: bloom-compactor.ring - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -bloom-compactor.ring.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -bloom-compactor.ring.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -bloom-compactor.ring.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -bloom-compactor.ring.multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # Period at which to heartbeat to the ring. 0 = disabled. - # CLI flag: -bloom-compactor.ring.heartbeat-period - [heartbeat_period: | default = 15s] - - # The heartbeat timeout after which compactors are considered unhealthy within - # the ring. 0 = never (timeout disabled). - # CLI flag: -bloom-compactor.ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # File path where tokens are stored. If empty, tokens are not stored at - # shutdown and restored at startup. - # CLI flag: -bloom-compactor.ring.tokens-file-path - [tokens_file_path: | default = ""] - - # True to enable zone-awareness and replicate blocks across different - # availability zones. - # CLI flag: -bloom-compactor.ring.zone-awareness-enabled - [zone_awareness_enabled: | default = false] - - # Number of tokens to use in the ring per compactor. Higher number of tokens - # will result in more and smaller files (metas and blocks.) - # CLI flag: -bloom-compactor.ring.num-tokens - [num_tokens: | default = 10] - - # Instance ID to register in the ring. - # CLI flag: -bloom-compactor.ring.instance-id - [instance_id: | default = ""] - - # Name of network interface to read address from. - # CLI flag: -bloom-compactor.ring.instance-interface-names - [instance_interface_names: | default = []] - - # Port to advertise in the ring (defaults to server.grpc-listen-port). - # CLI flag: -bloom-compactor.ring.instance-port - [instance_port: | default = 0] - - # IP address to advertise in the ring. - # CLI flag: -bloom-compactor.ring.instance-addr - [instance_addr: | default = ""] - - # The availability zone where this instance is running. Required if - # zone-awareness is enabled. - # CLI flag: -bloom-compactor.ring.instance-availability-zone - [instance_availability_zone: | default = ""] - - # Enable using a IPv6 instance address. - # CLI flag: -bloom-compactor.ring.instance-enable-ipv6 - [instance_enable_ipv6: | default = false] - -# Flag to enable or disable the usage of the bloom-compactor component. -# CLI flag: -bloom-compactor.enabled -[enabled: | default = false] - -# Interval at which to re-run the compaction operation. -# CLI flag: -bloom-compactor.compaction-interval -[compaction_interval: | default = 10m] - -# How many index periods (days) to wait before building bloom filters for a -# table. This can be used to lower cost by not re-writing data to object storage -# too frequently since recent data changes more often. -# CLI flag: -bloom-compactor.min-table-compaction-period -[min_table_compaction_period: | default = 1] - -# The maximum number of index periods (days) to build bloom filters for a table. -# This can be used to lower cost by not trying to compact older data which -# doesn't change. This can be optimized by aligning it with the maximum -# `reject_old_samples_max_age` setting of any tenant. -# CLI flag: -bloom-compactor.max-table-compaction-period -[max_table_compaction_period: | default = 7] - -# Number of workers to run in parallel for compaction. -# CLI flag: -bloom-compactor.worker-parallelism -[worker_parallelism: | default = 1] - -# Minimum backoff time between retries. -# CLI flag: -bloom-compactor.compaction-retries-min-backoff -[compaction_retries_min_backoff: | default = 10s] - -# Maximum backoff time between retries. -# CLI flag: -bloom-compactor.compaction-retries-max-backoff -[compaction_retries_max_backoff: | default = 1m] - -# Number of retries to perform when compaction fails. -# CLI flag: -bloom-compactor.compaction-retries -[compaction_retries: | default = 3] - -# Maximum number of tables to compact in parallel. While increasing this value, -# please make sure compactor has enough disk space allocated to be able to store -# and compact as many tables. -# CLI flag: -bloom-compactor.max-compaction-parallelism -[max_compaction_parallelism: | default = 1] -``` - -### limits_config - -The `limits_config` block configures global and per-tenant limits in Loki. - -```yaml -# Whether the ingestion rate limit should be applied individually to each -# distributor instance (local), or evenly shared across the cluster (global). -# The ingestion rate strategy cannot be overridden on a per-tenant basis. -# - local: enforces the limit on a per distributor basis. The actual effective -# rate limit will be N times higher, where N is the number of distributor -# replicas. -# - global: enforces the limit globally, configuring a per-distributor local -# rate limiter as 'ingestion_rate / N', where N is the number of distributor -# replicas (it's automatically adjusted if the number of replicas change). The -# global strategy requires the distributors to form their own ring, which is -# used to keep track of the current number of healthy distributor replicas. -# CLI flag: -distributor.ingestion-rate-limit-strategy -[ingestion_rate_strategy: | default = "global"] - -# Per-user ingestion rate limit in sample size per second. Units in MB. -# CLI flag: -distributor.ingestion-rate-limit-mb -[ingestion_rate_mb: | default = 4] - -# Per-user allowed ingestion burst size (in sample size). Units in MB. The burst -# size refers to the per-distributor local rate limiter even in the case of the -# 'global' strategy, and should be set at least to the maximum logs size -# expected in a single push request. -# CLI flag: -distributor.ingestion-burst-size-mb -[ingestion_burst_size_mb: | default = 6] - -# Maximum length accepted for label names. -# CLI flag: -validation.max-length-label-name -[max_label_name_length: | default = 1024] - -# Maximum length accepted for label value. This setting also applies to the -# metric name. -# CLI flag: -validation.max-length-label-value -[max_label_value_length: | default = 2048] - -# Maximum number of label names per series. -# CLI flag: -validation.max-label-names-per-series -[max_label_names_per_series: | default = 15] - -# Whether or not old samples will be rejected. -# CLI flag: -validation.reject-old-samples -[reject_old_samples: | default = true] - -# Maximum accepted sample age before rejecting. -# CLI flag: -validation.reject-old-samples.max-age -[reject_old_samples_max_age: | default = 1w] - -# Duration which table will be created/deleted before/after it's needed; we -# won't accept sample from before this time. -# CLI flag: -validation.create-grace-period -[creation_grace_period: | default = 10m] - -# Maximum line size on ingestion path. Example: 256kb. Any log line exceeding -# this limit will be discarded unless `distributor.max-line-size-truncate` is -# set which in case it is truncated instead of discarding it completely. There -# is no limit when unset or set to 0. -# CLI flag: -distributor.max-line-size -[max_line_size: | default = 256KB] - -# Whether to truncate lines that exceed max_line_size. -# CLI flag: -distributor.max-line-size-truncate -[max_line_size_truncate: | default = false] - -# Alter the log line timestamp during ingestion when the timestamp is the same -# as the previous entry for the same stream. When enabled, if a log line in a -# push request has the same timestamp as the previous line for the same stream, -# one nanosecond is added to the log line. This will preserve the received order -# of log lines with the exact same timestamp when they are queried, by slightly -# altering their stored timestamp. NOTE: This is imperfect, because Loki accepts -# out of order writes, and another push request for the same stream could -# contain duplicate timestamps to existing entries and they will not be -# incremented. -# CLI flag: -validation.increment-duplicate-timestamps -[increment_duplicate_timestamp: | default = false] - -# Maximum number of active streams per user, per ingester. 0 to disable. -# CLI flag: -ingester.max-streams-per-user -[max_streams_per_user: | default = 0] - -# Maximum number of active streams per user, across the cluster. 0 to disable. -# When the global limit is enabled, each ingester is configured with a dynamic -# local limit based on the replication factor and the current number of healthy -# ingesters, and is kept updated whenever the number of ingesters change. -# CLI flag: -ingester.max-global-streams-per-user -[max_global_streams_per_user: | default = 5000] - -# Deprecated. When true, out-of-order writes are accepted. -# CLI flag: -ingester.unordered-writes -[unordered_writes: | default = true] - -# Maximum byte rate per second per stream, also expressible in human readable -# forms (1MB, 256KB, etc). -# CLI flag: -ingester.per-stream-rate-limit -[per_stream_rate_limit: | default = 3MB] - -# Maximum burst bytes per stream, also expressible in human readable forms (1MB, -# 256KB, etc). This is how far above the rate limit a stream can 'burst' before -# the stream is limited. -# CLI flag: -ingester.per-stream-rate-limit-burst -[per_stream_rate_limit_burst: | default = 15MB] - -# Maximum number of chunks that can be fetched in a single query. -# CLI flag: -store.query-chunk-limit -[max_chunks_per_query: | default = 2000000] - -# Limit the maximum of unique series that is returned by a metric query. When -# the limit is reached an error is returned. -# CLI flag: -querier.max-query-series -[max_query_series: | default = 500] - -# Limit how far back in time series data and metadata can be queried, up until -# lookback duration ago. This limit is enforced in the query frontend, the -# querier and the ruler. If the requested time range is outside the allowed -# range, the request will not fail, but will be modified to only query data -# within the allowed time range. The default value of 0 does not set a limit. -# CLI flag: -querier.max-query-lookback -[max_query_lookback: | default = 0s] - -# The limit to length of chunk store queries. 0 to disable. -# CLI flag: -store.max-query-length -[max_query_length: | default = 30d1h] - -# Limit the length of the [range] inside a range query. Default is 0 or -# unlimited -# CLI flag: -querier.max-query-range -[max_query_range: | default = 0s] - -# Maximum number of queries that will be scheduled in parallel by the frontend. -# CLI flag: -querier.max-query-parallelism -[max_query_parallelism: | default = 32] - -# Maximum number of queries will be scheduled in parallel by the frontend for -# TSDB schemas. -# CLI flag: -querier.tsdb-max-query-parallelism -[tsdb_max_query_parallelism: | default = 128] - -# Maximum number of bytes assigned to a single sharded query. Also expressible -# in human readable forms (1GB, etc). -# CLI flag: -querier.tsdb-max-bytes-per-shard -[tsdb_max_bytes_per_shard: | default = 600MB] - -# Cardinality limit for index queries. -# CLI flag: -store.cardinality-limit -[cardinality_limit: | default = 100000] - -# Maximum number of stream matchers per query. -# CLI flag: -querier.max-streams-matcher-per-query -[max_streams_matchers_per_query: | default = 1000] - -# Maximum number of concurrent tail requests. -# CLI flag: -querier.max-concurrent-tail-requests -[max_concurrent_tail_requests: | default = 10] - -# Maximum number of log entries that will be returned for a query. -# CLI flag: -validation.max-entries-limit -[max_entries_limit_per_query: | default = 5000] - -# Most recent allowed cacheable result per-tenant, to prevent caching very -# recent results that might still be in flux. -# CLI flag: -frontend.max-cache-freshness -[max_cache_freshness_per_query: | default = 10m] - -# Do not cache metadata request if the end time is within the -# frontend.max-metadata-cache-freshness window. Set this to 0 to apply no such -# limits. Defaults to 24h. -# CLI flag: -frontend.max-metadata-cache-freshness -[max_metadata_cache_freshness: | default = 1d] - -# Do not cache requests with an end time that falls within Now minus this -# duration. 0 disables this feature (default). -# CLI flag: -frontend.max-stats-cache-freshness -[max_stats_cache_freshness: | default = 10m] - -# Maximum number of queriers that can handle requests for a single tenant. If -# set to 0 or value higher than number of available queriers, *all* queriers -# will handle requests for the tenant. Each frontend (or query-scheduler, if -# used) will select the same set of queriers for the same tenant (given that all -# queriers are connected to all frontends / query-schedulers). This option only -# works with queriers connecting to the query-frontend / query-scheduler, not -# when using downstream URL. -# CLI flag: -frontend.max-queriers-per-tenant -[max_queriers_per_tenant: | default = 0] - -# How much of the available query capacity ("querier" components in distributed -# mode, "read" components in SSD mode) can be used by a single tenant. Allowed -# values are 0.0 to 1.0. For example, setting this to 0.5 would allow a tenant -# to use half of the available queriers for processing the query workload. If -# set to 0, query capacity is determined by frontend.max-queriers-per-tenant. -# When both frontend.max-queriers-per-tenant and frontend.max-query-capacity are -# configured, smaller value of the resulting querier replica count is -# considered: min(frontend.max-queriers-per-tenant, ceil(querier_replicas * -# frontend.max-query-capacity)). *All* queriers will handle requests for the -# tenant if neither limits are applied. This option only works with queriers -# connecting to the query-frontend / query-scheduler, not when using downstream -# URL. Use this feature in a multi-tenant setup where you need to limit query -# capacity for certain tenants. -# CLI flag: -frontend.max-query-capacity -[max_query_capacity: | default = 0] - -# Number of days of index to be kept always downloaded for queries. Applies only -# to per user index in boltdb-shipper index store. 0 to disable. -# CLI flag: -store.query-ready-index-num-days -[query_ready_index_num_days: | default = 0] - -# Timeout when querying backends (ingesters or storage) during the execution of -# a query request. When a specific per-tenant timeout is used, the global -# timeout is ignored. -# CLI flag: -querier.query-timeout -[query_timeout: | default = 1m] - -# Split queries by a time interval and execute in parallel. The value 0 disables -# splitting by time. This also determines how cache keys are chosen when result -# caching is enabled. -# CLI flag: -querier.split-queries-by-interval -[split_queries_by_interval: | default = 1h] - -# Split metadata queries by a time interval and execute in parallel. The value 0 -# disables splitting metadata queries by time. This also determines how cache -# keys are chosen when label/series result caching is enabled. -# CLI flag: -querier.split-metadata-queries-by-interval -[split_metadata_queries_by_interval: | default = 1d] - -# Experimental. Split interval to use for the portion of metadata request that -# falls within `recent_metadata_query_window`. Rest of the request which is -# outside the window still uses `split_metadata_queries_by_interval`. If set to -# 0, the entire request defaults to using a split interval of -# `split_metadata_queries_by_interval.`. -# CLI flag: -experimental.querier.split-recent-metadata-queries-by-interval -[split_recent_metadata_queries_by_interval: | default = 1h] - -# Experimental. Metadata query window inside which -# `split_recent_metadata_queries_by_interval` gets applied, portion of the -# metadata request that falls in this window is split using -# `split_recent_metadata_queries_by_interval`. The value 0 disables using a -# different split interval for recent metadata queries. -# -# This is added to improve cacheability of recent metadata queries. Query split -# interval also determines the interval used in cache key. The default split -# interval of 24h is useful for caching long queries, each cache key holding 1 -# day's results. But metadata queries are often shorter than 24h, to cache them -# effectively we need a smaller split interval. `recent_metadata_query_window` -# along with `split_recent_metadata_queries_by_interval` help configure a -# shorter split interval for recent metadata queries. -# CLI flag: -experimental.querier.recent-metadata-query-window -[recent_metadata_query_window: | default = 0s] - -# Split instant metric queries by a time interval and execute in parallel. The -# value 0 disables splitting instant metric queries by time. This also -# determines how cache keys are chosen when instant metric query result caching -# is enabled. -# CLI flag: -querier.split-instant-metric-queries-by-interval -[split_instant_metric_queries_by_interval: | default = 1h] - -# Interval to use for time-based splitting when a request is within the -# `query_ingesters_within` window; defaults to `split-queries-by-interval` by -# setting to 0. -# CLI flag: -querier.split-ingester-queries-by-interval -[split_ingester_queries_by_interval: | default = 0s] - -# Limit queries that can be sharded. Queries within the time range of now and -# now minus this sharding lookback are not sharded. The default value of 0s -# disables the lookback, causing sharding of all queries at all times. -# CLI flag: -frontend.min-sharding-lookback -[min_sharding_lookback: | default = 0s] - -# Max number of bytes a query can fetch. Enforced in log and metric queries only -# when TSDB is used. The default value of 0 disables this limit. -# CLI flag: -frontend.max-query-bytes-read -[max_query_bytes_read: | default = 0B] - -# Max number of bytes a query can fetch after splitting and sharding. Enforced -# in log and metric queries only when TSDB is used. The default value of 0 -# disables this limit. -# CLI flag: -frontend.max-querier-bytes-read -[max_querier_bytes_read: | default = 150GB] - -# Enable log-volume endpoints. -[volume_enabled: ] - -# The maximum number of aggregated series in a log-volume response -# CLI flag: -limits.volume-max-series -[volume_max_series: | default = 1000] - -# Maximum number of rules per rule group per-tenant. 0 to disable. -# CLI flag: -ruler.max-rules-per-rule-group -[ruler_max_rules_per_rule_group: | default = 0] - -# Maximum number of rule groups per-tenant. 0 to disable. -# CLI flag: -ruler.max-rule-groups-per-tenant -[ruler_max_rule_groups_per_tenant: | default = 0] - -# The default tenant's shard size when shuffle-sharding is enabled in the ruler. -# When this setting is specified in the per-tenant overrides, a value of 0 -# disables shuffle sharding for the tenant. -# CLI flag: -ruler.tenant-shard-size -[ruler_tenant_shard_size: | default = 0] - -# Disable recording rules remote-write. -[ruler_remote_write_disabled: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. The URL of the endpoint -# to send samples to. -[ruler_remote_write_url: | default = ""] - -# Deprecated: Use 'ruler_remote_write_config' instead. Timeout for requests to -# the remote write endpoint. -[ruler_remote_write_timeout: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. Custom HTTP headers to be -# sent along with each remote write request. Be aware that headers that are set -# by Loki itself can't be overwritten. -[ruler_remote_write_headers: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. List of remote write -# relabel configurations. -[ruler_remote_write_relabel_configs: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. Number of samples to -# buffer per shard before we block reading of more samples from the WAL. It is -# recommended to have enough capacity in each shard to buffer several requests -# to keep throughput up while processing occasional slow remote requests. -[ruler_remote_write_queue_capacity: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. Minimum number of shards, -# i.e. amount of concurrency. -[ruler_remote_write_queue_min_shards: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. Maximum number of shards, -# i.e. amount of concurrency. -[ruler_remote_write_queue_max_shards: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. Maximum number of samples -# per send. -[ruler_remote_write_queue_max_samples_per_send: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. Maximum time a sample -# will wait in buffer. -[ruler_remote_write_queue_batch_send_deadline: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. Initial retry delay. Gets -# doubled for every retry. -[ruler_remote_write_queue_min_backoff: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. Maximum retry delay. -[ruler_remote_write_queue_max_backoff: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. Retry upon receiving a -# 429 status code from the remote-write storage. This is experimental and might -# change in the future. -[ruler_remote_write_queue_retry_on_ratelimit: ] - -# Deprecated: Use 'ruler_remote_write_config' instead. Configures AWS's -# Signature Verification 4 signing process to sign every remote write request. -ruler_remote_write_sigv4_config: - [region: | default = ""] - - [access_key: | default = ""] - - [secret_key: | default = ""] - - [profile: | default = ""] - - [role_arn: | default = ""] - -# Configures global and per-tenant limits for remote write clients. A map with -# remote client id as key. -[ruler_remote_write_config: ] - -# Timeout for a remote rule evaluation. Defaults to the value of -# 'querier.query-timeout'. -[ruler_remote_evaluation_timeout: ] - -# Maximum size (in bytes) of the allowable response size from a remote rule -# evaluation. Set to 0 to allow any response size (default). -[ruler_remote_evaluation_max_response_size: ] - -# Deletion mode. Can be one of 'disabled', 'filter-only', or -# 'filter-and-delete'. When set to 'filter-only' or 'filter-and-delete', and if -# retention_enabled is true, then the log entry deletion API endpoints are -# available. -# CLI flag: -compactor.deletion-mode -[deletion_mode: | default = "filter-and-delete"] - -# Retention period to apply to stored data, only applies if retention_enabled is -# true in the compactor config. As of version 2.8.0, a zero value of 0 or 0s -# disables retention. In previous releases, Loki did not properly honor a zero -# value to disable retention and a really large value should be used instead. -# CLI flag: -store.retention -[retention_period: | default = 0s] - -# Per-stream retention to apply, if the retention is enable on the compactor -# side. -# Example: -# retention_stream: -# - selector: '{namespace="dev"}' -# priority: 1 -# period: 24h -# - selector: '{container="nginx"}' -# priority: 1 -# period: 744h -# Selector is a Prometheus labels matchers that will apply the 'period' -# retention only if the stream is matching. In case multiple stream are -# matching, the highest priority will be picked. If no rule is matched the -# 'retention_period' is used. -[retention_stream: ] - -# Feature renamed to 'runtime configuration', flag deprecated in favor of -# -runtime-config.file (runtime_config.file in YAML). -# CLI flag: -limits.per-user-override-config -[per_tenant_override_config: | default = ""] - -# Feature renamed to 'runtime configuration'; flag deprecated in favor of -# -runtime-config.reload-period (runtime_config.period in YAML). -# CLI flag: -limits.per-user-override-period -[per_tenant_override_period: | default = 10s] - -# Deprecated: Use deletion_mode per tenant configuration instead. -[allow_deletes: ] - -shard_streams: - [enabled: ] - - [logging_enabled: ] - - [desired_rate: ] - -[blocked_queries: ] - -# Define a list of required selector labels. -[required_labels: ] - -# Minimum number of label matchers a query should contain. -[minimum_labels_number: ] - -# The shard size defines how many index gateways should be used by a tenant for -# querying. If the global shard factor is 0, the global shard factor is set to -# the deprecated -replication-factor for backwards compatibility reasons. -# CLI flag: -index-gateway.shard-size -[index_gateway_shard_size: | default = 0] - -# The shard size defines how many bloom gateways should be used by a tenant for -# querying. -# CLI flag: -bloom-gateway.shard-size -[bloom_gateway_shard_size: | default = 0] - -# Whether to use the bloom gateway component in the read path to filter chunks. -# CLI flag: -bloom-gateway.enable-filtering -[bloom_gateway_enable_filtering: | default = false] - -# The shard size defines how many bloom compactors should be used by a tenant -# when computing blooms. If it's set to 0, shuffle sharding is disabled. -# CLI flag: -bloom-compactor.shard-size -[bloom_compactor_shard_size: | default = 0] - -# Whether to compact chunks into bloom filters. -# CLI flag: -bloom-compactor.enable-compaction -[bloom_compactor_enable_compaction: | default = false] - -# Length of the n-grams created when computing blooms from log lines. -# CLI flag: -bloom-compactor.ngram-length -[bloom_ngram_length: | default = 4] - -# Skip factor for the n-grams created when computing blooms from log lines. -# CLI flag: -bloom-compactor.ngram-skip -[bloom_ngram_skip: | default = 1] - -# Scalable Bloom Filter desired false-positive rate. -# CLI flag: -bloom-compactor.false-positive-rate -[bloom_false_positive_rate: | default = 0.01] - -# Maximum number of blocks will be downloaded in parallel by the Bloom Gateway. -# CLI flag: -bloom-gateway.blocks-downloading-parallelism -[bloom_gateway_blocks_downloading_parallelism: | default = 50] - -# Interval for computing the cache key in the Bloom Gateway. -# CLI flag: -bloom-gateway.cache-key-interval -[bloom_gateway_cache_key_interval: | default = 15m] - -# The maximum bloom block size. A value of 0 sets an unlimited size. Default is -# 200MB. The actual block size might exceed this limit since blooms will be -# added to blocks until the block exceeds the maximum block size. -# CLI flag: -bloom-compactor.max-block-size -[bloom_compactor_max_block_size: | default = 200MB] - -# Allow user to send structured metadata in push payload. -# CLI flag: -validation.allow-structured-metadata -[allow_structured_metadata: | default = false] - -# Maximum size accepted for structured metadata per log line. -# CLI flag: -limits.max-structured-metadata-size -[max_structured_metadata_size: | default = 64KB] - -# Maximum number of structured metadata entries per log line. -# CLI flag: -limits.max-structured-metadata-entries-count -[max_structured_metadata_entries_count: | default = 128] - -# OTLP log ingestion configurations -otlp_config: - # Configuration for resource attributes to store them as index labels or - # Structured Metadata or drop them altogether - resource_attributes: - # Configure whether to ignore the default list of resource attributes set in - # 'distributor.otlp.default_resource_attributes_as_index_labels' to be - # stored as index labels and only use the given resource attributes config - [ignore_defaults: | default = false] - - [attributes_config: ] - - # Configuration for scope attributes to store them as Structured Metadata or - # drop them altogether - [scope_attributes: ] - - # Configuration for log attributes to store them as Structured Metadata or - # drop them altogether - [log_attributes: ] -``` - -### frontend_worker - -The `frontend_worker` configures the worker - running within the Loki querier - picking up and executing queries enqueued by the query-frontend. - -```yaml -# Address of query frontend service, in host:port format. If -# -querier.scheduler-address is set as well, querier will use scheduler instead. -# Only one of -querier.frontend-address or -querier.scheduler-address can be -# set. If neither is set, queries are only received via HTTP endpoint. -# CLI flag: -querier.frontend-address -[frontend_address: | default = ""] - -# Hostname (and port) of scheduler that querier will periodically resolve, -# connect to and receive queries from. Only one of -querier.frontend-address or -# -querier.scheduler-address can be set. If neither is set, queries are only -# received via HTTP endpoint. -# CLI flag: -querier.scheduler-address -[scheduler_address: | default = ""] - -# How often to query DNS for query-frontend or query-scheduler address. Also -# used to determine how often to poll the scheduler-ring for addresses if the -# scheduler-ring is configured. -# CLI flag: -querier.dns-lookup-period -[dns_lookup_duration: | default = 3s] - -# Querier ID, sent to frontend service to identify requests from the same -# querier. Defaults to hostname. -# CLI flag: -querier.id -[id: | default = ""] - -# The grpc_client block configures the gRPC client used to communicate between -# two Loki components. -# The CLI flags prefix for this block configuration is: querier.frontend-client -[grpc_client_config: ] -``` - -### table_manager - -The `table_manager` block configures the table manager for retention. - -```yaml -# If true, disable all changes to DB capacity -# CLI flag: -table-manager.throughput-updates-disabled -[throughput_updates_disabled: | default = false] - -# If true, enables retention deletes of DB tables -# CLI flag: -table-manager.retention-deletes-enabled -[retention_deletes_enabled: | default = false] - -# Tables older than this retention period are deleted. Must be either 0 -# (disabled) or a multiple of 24h. When enabled, be aware this setting is -# destructive to data! -# CLI flag: -table-manager.retention-period -[retention_period: | default = 0s] - -# How frequently to poll backend to learn our capacity. -# CLI flag: -table-manager.poll-interval -[poll_interval: | default = 2m] - -# Periodic tables grace period (duration which table will be created/deleted -# before/after it's needed). -# CLI flag: -table-manager.periodic-table.grace-period -[creation_grace_period: | default = 10m] - -index_tables_provisioning: - # Enables on demand throughput provisioning for the storage provider (if - # supported). Applies only to tables which are not autoscaled. Supported by - # DynamoDB - # CLI flag: -table-manager.index-table.enable-ondemand-throughput-mode - [enable_ondemand_throughput_mode: | default = false] - - # Table default write throughput. Supported by DynamoDB - # CLI flag: -table-manager.index-table.write-throughput - [provisioned_write_throughput: | default = 1000] - - # Table default read throughput. Supported by DynamoDB - # CLI flag: -table-manager.index-table.read-throughput - [provisioned_read_throughput: | default = 300] - - write_scale: - # Should we enable autoscale for the table. - # CLI flag: -table-manager.index-table.write-throughput.scale.enabled - [enabled: | default = false] - - # AWS AutoScaling role ARN - # CLI flag: -table-manager.index-table.write-throughput.scale.role-arn - [role_arn: | default = ""] - - # DynamoDB minimum provision capacity. - # CLI flag: -table-manager.index-table.write-throughput.scale.min-capacity - [min_capacity: | default = 3000] - - # DynamoDB maximum provision capacity. - # CLI flag: -table-manager.index-table.write-throughput.scale.max-capacity - [max_capacity: | default = 6000] - - # DynamoDB minimum seconds between each autoscale up. - # CLI flag: -table-manager.index-table.write-throughput.scale.out-cooldown - [out_cooldown: | default = 1800] - - # DynamoDB minimum seconds between each autoscale down. - # CLI flag: -table-manager.index-table.write-throughput.scale.in-cooldown - [in_cooldown: | default = 1800] - - # DynamoDB target ratio of consumed capacity to provisioned capacity. - # CLI flag: -table-manager.index-table.write-throughput.scale.target-value - [target: | default = 80] - - read_scale: - # Should we enable autoscale for the table. - # CLI flag: -table-manager.index-table.read-throughput.scale.enabled - [enabled: | default = false] - - # AWS AutoScaling role ARN - # CLI flag: -table-manager.index-table.read-throughput.scale.role-arn - [role_arn: | default = ""] - - # DynamoDB minimum provision capacity. - # CLI flag: -table-manager.index-table.read-throughput.scale.min-capacity - [min_capacity: | default = 3000] - - # DynamoDB maximum provision capacity. - # CLI flag: -table-manager.index-table.read-throughput.scale.max-capacity - [max_capacity: | default = 6000] - - # DynamoDB minimum seconds between each autoscale up. - # CLI flag: -table-manager.index-table.read-throughput.scale.out-cooldown - [out_cooldown: | default = 1800] - - # DynamoDB minimum seconds between each autoscale down. - # CLI flag: -table-manager.index-table.read-throughput.scale.in-cooldown - [in_cooldown: | default = 1800] - - # DynamoDB target ratio of consumed capacity to provisioned capacity. - # CLI flag: -table-manager.index-table.read-throughput.scale.target-value - [target: | default = 80] - - # Enables on demand throughput provisioning for the storage provider (if - # supported). Applies only to tables which are not autoscaled. Supported by - # DynamoDB - # CLI flag: -table-manager.index-table.inactive-enable-ondemand-throughput-mode - [enable_inactive_throughput_on_demand_mode: | default = false] - - # Table write throughput for inactive tables. Supported by DynamoDB - # CLI flag: -table-manager.index-table.inactive-write-throughput - [inactive_write_throughput: | default = 1] - - # Table read throughput for inactive tables. Supported by DynamoDB - # CLI flag: -table-manager.index-table.inactive-read-throughput - [inactive_read_throughput: | default = 300] - - inactive_write_scale: - # Should we enable autoscale for the table. - # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.enabled - [enabled: | default = false] - - # AWS AutoScaling role ARN - # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.role-arn - [role_arn: | default = ""] - - # DynamoDB minimum provision capacity. - # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.min-capacity - [min_capacity: | default = 3000] - - # DynamoDB maximum provision capacity. - # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.max-capacity - [max_capacity: | default = 6000] - - # DynamoDB minimum seconds between each autoscale up. - # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.out-cooldown - [out_cooldown: | default = 1800] - - # DynamoDB minimum seconds between each autoscale down. - # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.in-cooldown - [in_cooldown: | default = 1800] - - # DynamoDB target ratio of consumed capacity to provisioned capacity. - # CLI flag: -table-manager.index-table.inactive-write-throughput.scale.target-value - [target: | default = 80] - - inactive_read_scale: - # Should we enable autoscale for the table. - # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.enabled - [enabled: | default = false] - - # AWS AutoScaling role ARN - # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.role-arn - [role_arn: | default = ""] - - # DynamoDB minimum provision capacity. - # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.min-capacity - [min_capacity: | default = 3000] - - # DynamoDB maximum provision capacity. - # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.max-capacity - [max_capacity: | default = 6000] - - # DynamoDB minimum seconds between each autoscale up. - # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.out-cooldown - [out_cooldown: | default = 1800] - - # DynamoDB minimum seconds between each autoscale down. - # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.in-cooldown - [in_cooldown: | default = 1800] - - # DynamoDB target ratio of consumed capacity to provisioned capacity. - # CLI flag: -table-manager.index-table.inactive-read-throughput.scale.target-value - [target: | default = 80] - - # Number of last inactive tables to enable write autoscale. - # CLI flag: -table-manager.index-table.inactive-write-throughput.scale-last-n - [inactive_write_scale_lastn: | default = 4] - - # Number of last inactive tables to enable read autoscale. - # CLI flag: -table-manager.index-table.inactive-read-throughput.scale-last-n - [inactive_read_scale_lastn: | default = 4] - -chunk_tables_provisioning: - # Enables on demand throughput provisioning for the storage provider (if - # supported). Applies only to tables which are not autoscaled. Supported by - # DynamoDB - # CLI flag: -table-manager.chunk-table.enable-ondemand-throughput-mode - [enable_ondemand_throughput_mode: | default = false] - - # Table default write throughput. Supported by DynamoDB - # CLI flag: -table-manager.chunk-table.write-throughput - [provisioned_write_throughput: | default = 1000] - - # Table default read throughput. Supported by DynamoDB - # CLI flag: -table-manager.chunk-table.read-throughput - [provisioned_read_throughput: | default = 300] - - write_scale: - # Should we enable autoscale for the table. - # CLI flag: -table-manager.chunk-table.write-throughput.scale.enabled - [enabled: | default = false] - - # AWS AutoScaling role ARN - # CLI flag: -table-manager.chunk-table.write-throughput.scale.role-arn - [role_arn: | default = ""] - - # DynamoDB minimum provision capacity. - # CLI flag: -table-manager.chunk-table.write-throughput.scale.min-capacity - [min_capacity: | default = 3000] - - # DynamoDB maximum provision capacity. - # CLI flag: -table-manager.chunk-table.write-throughput.scale.max-capacity - [max_capacity: | default = 6000] - - # DynamoDB minimum seconds between each autoscale up. - # CLI flag: -table-manager.chunk-table.write-throughput.scale.out-cooldown - [out_cooldown: | default = 1800] - - # DynamoDB minimum seconds between each autoscale down. - # CLI flag: -table-manager.chunk-table.write-throughput.scale.in-cooldown - [in_cooldown: | default = 1800] - - # DynamoDB target ratio of consumed capacity to provisioned capacity. - # CLI flag: -table-manager.chunk-table.write-throughput.scale.target-value - [target: | default = 80] - - read_scale: - # Should we enable autoscale for the table. - # CLI flag: -table-manager.chunk-table.read-throughput.scale.enabled - [enabled: | default = false] - - # AWS AutoScaling role ARN - # CLI flag: -table-manager.chunk-table.read-throughput.scale.role-arn - [role_arn: | default = ""] - - # DynamoDB minimum provision capacity. - # CLI flag: -table-manager.chunk-table.read-throughput.scale.min-capacity - [min_capacity: | default = 3000] - - # DynamoDB maximum provision capacity. - # CLI flag: -table-manager.chunk-table.read-throughput.scale.max-capacity - [max_capacity: | default = 6000] - - # DynamoDB minimum seconds between each autoscale up. - # CLI flag: -table-manager.chunk-table.read-throughput.scale.out-cooldown - [out_cooldown: | default = 1800] - - # DynamoDB minimum seconds between each autoscale down. - # CLI flag: -table-manager.chunk-table.read-throughput.scale.in-cooldown - [in_cooldown: | default = 1800] - - # DynamoDB target ratio of consumed capacity to provisioned capacity. - # CLI flag: -table-manager.chunk-table.read-throughput.scale.target-value - [target: | default = 80] - - # Enables on demand throughput provisioning for the storage provider (if - # supported). Applies only to tables which are not autoscaled. Supported by - # DynamoDB - # CLI flag: -table-manager.chunk-table.inactive-enable-ondemand-throughput-mode - [enable_inactive_throughput_on_demand_mode: | default = false] - - # Table write throughput for inactive tables. Supported by DynamoDB - # CLI flag: -table-manager.chunk-table.inactive-write-throughput - [inactive_write_throughput: | default = 1] - - # Table read throughput for inactive tables. Supported by DynamoDB - # CLI flag: -table-manager.chunk-table.inactive-read-throughput - [inactive_read_throughput: | default = 300] - - inactive_write_scale: - # Should we enable autoscale for the table. - # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.enabled - [enabled: | default = false] - - # AWS AutoScaling role ARN - # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.role-arn - [role_arn: | default = ""] - - # DynamoDB minimum provision capacity. - # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.min-capacity - [min_capacity: | default = 3000] - - # DynamoDB maximum provision capacity. - # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.max-capacity - [max_capacity: | default = 6000] - - # DynamoDB minimum seconds between each autoscale up. - # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.out-cooldown - [out_cooldown: | default = 1800] - - # DynamoDB minimum seconds between each autoscale down. - # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.in-cooldown - [in_cooldown: | default = 1800] - - # DynamoDB target ratio of consumed capacity to provisioned capacity. - # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale.target-value - [target: | default = 80] - - inactive_read_scale: - # Should we enable autoscale for the table. - # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.enabled - [enabled: | default = false] - - # AWS AutoScaling role ARN - # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.role-arn - [role_arn: | default = ""] - - # DynamoDB minimum provision capacity. - # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.min-capacity - [min_capacity: | default = 3000] - - # DynamoDB maximum provision capacity. - # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.max-capacity - [max_capacity: | default = 6000] - - # DynamoDB minimum seconds between each autoscale up. - # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.out-cooldown - [out_cooldown: | default = 1800] - - # DynamoDB minimum seconds between each autoscale down. - # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.in-cooldown - [in_cooldown: | default = 1800] - - # DynamoDB target ratio of consumed capacity to provisioned capacity. - # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale.target-value - [target: | default = 80] - - # Number of last inactive tables to enable write autoscale. - # CLI flag: -table-manager.chunk-table.inactive-write-throughput.scale-last-n - [inactive_write_scale_lastn: | default = 4] - - # Number of last inactive tables to enable read autoscale. - # CLI flag: -table-manager.chunk-table.inactive-read-throughput.scale-last-n - [inactive_read_scale_lastn: | default = 4] -``` - -### runtime_config - -Configuration for 'runtime config' module, responsible for reloading runtime configuration file. - -```yaml -# How often to check runtime config files. -# CLI flag: -runtime-config.reload-period -[period: | default = 10s] - -# Comma separated list of yaml files with the configuration that can be updated -# at runtime. Runtime config files will be merged from left to right. -# CLI flag: -runtime-config.file -[file: | default = ""] -``` - -### tracing - -Configuration for `tracing`. - -```yaml -# Set to false to disable tracing. -# CLI flag: -tracing.enabled -[enabled: | default = true] -``` - -### analytics - -Configuration for `analytics`. - -```yaml -# Enable anonymous usage reporting. -# CLI flag: -reporting.enabled -[reporting_enabled: | default = true] - -# URL to which reports are sent -# CLI flag: -reporting.usage-stats-url -[usage_stats_url: | default = "https://stats.grafana.org/loki-usage-report"] -``` - -### common - -Common configuration to be shared between multiple modules. If a more specific configuration is given in other sections, the related configuration within this section will be ignored. - -```yaml -[path_prefix: | default = ""] - -storage: - # The s3_storage_config block configures the connection to Amazon S3 object - # storage backend. - # The CLI flags prefix for this block configuration is: common - [s3: ] - - # The gcs_storage_config block configures the connection to Google Cloud - # Storage object storage backend. - # The CLI flags prefix for this block configuration is: common.storage - [gcs: ] - - # The azure_storage_config block configures the connection to Azure object - # storage backend. - # The CLI flags prefix for this block configuration is: common.storage - [azure: ] - - # The alibabacloud_storage_config block configures the connection to Alibaba - # Cloud Storage object storage backend. - [alibabacloud: ] - - # The bos_storage_config block configures the connection to Baidu Object - # Storage (BOS) object storage backend. - # The CLI flags prefix for this block configuration is: common.storage - [bos: ] - - # The swift_storage_config block configures the connection to OpenStack Object - # Storage (Swift) object storage backend. - # The CLI flags prefix for this block configuration is: common.storage - [swift: ] - - filesystem: - # Directory to store chunks in. - # CLI flag: -common.storage.filesystem.chunk-directory - [chunks_directory: | default = ""] - - # Directory to store rules in. - # CLI flag: -common.storage.filesystem.rules-directory - [rules_directory: | default = ""] - - hedging: - # If set to a non-zero value a second request will be issued at the provided - # duration. Default is 0 (disabled) - # CLI flag: -common.storage.hedge-requests-at - [at: | default = 0s] - - # The maximum of hedge requests allowed. - # CLI flag: -common.storage.hedge-requests-up-to - [up_to: | default = 2] - - # The maximum of hedge requests allowed per seconds. - # CLI flag: -common.storage.hedge-max-per-second - [max_per_second: | default = 5] - - # The cos_storage_config block configures the connection to IBM Cloud Object - # Storage (COS) backend. - # The CLI flags prefix for this block configuration is: common.storage - [cos: ] - - congestion_control: - # Use storage congestion control (default: disabled). - # CLI flag: -common.storage.congestion-control.enabled - [enabled: | default = false] - - controller: - # Congestion control strategy to use (default: none, options: 'aimd'). - # CLI flag: -common.storage.congestion-control.strategy - [strategy: | default = ""] - - aimd: - # AIMD starting throughput window size: how many requests can be sent - # per second (default: 2000). - # CLI flag: -common.storage.congestion-control.strategy.aimd.start - [start: | default = 2000] - - # AIMD maximum throughput window size: upper limit of requests sent per - # second (default: 10000). - # CLI flag: -common.storage.congestion-control.strategy.aimd.upper-bound - [upper_bound: | default = 10000] - - # AIMD backoff factor when upstream service is throttled to decrease - # number of requests sent per second (default: 0.5). - # CLI flag: -common.storage.congestion-control.strategy.aimd.backoff-factor - [backoff_factor: | default = 0.5] - - retry: - # Congestion control retry strategy to use (default: none, options: - # 'limited'). - # CLI flag: -common.storage.congestion-control.retry.strategy - [strategy: | default = ""] - - # Maximum number of retries allowed. - # CLI flag: -common.storage.congestion-control.retry.strategy.limited.limit - [limit: | default = 2] - - hedging: - config: - [at: ] - - [up_to: ] - - [max_per_second: ] - - # Congestion control hedge strategy to use (default: none, options: - # 'limited'). - # CLI flag: -common.storage.congestion-control.hedge.strategy - [strategy: | default = ""] - - # Enable the thanos.io/objstore to be the backend for object storage - # CLI flag: -common.thanos.enable - [thanos_objstore: | default = false] - - objstore_config: - # Backend storage to use. Supported backends are: s3, gcs, azure, swift, - # filesystem. - # CLI flag: -common.storage.thanos.backend - [backend: | default = "filesystem"] - - s3: - # The S3 bucket endpoint. It could be an AWS S3 endpoint listed at - # https://docs.aws.amazon.com/general/latest/gr/s3.html or the address of - # an S3-compatible service in hostname:port format. - # CLI flag: -common.storage.thanos.s3.endpoint - [endpoint: | default = ""] - - # S3 region. If unset, the client will issue a S3 GetBucketLocation API - # call to autodetect it. - # CLI flag: -common.storage.thanos.s3.region - [region: | default = ""] - - # S3 bucket name - # CLI flag: -common.storage.thanos.s3.bucket-name - [bucket_name: | default = ""] - - # S3 secret access key - # CLI flag: -common.storage.thanos.s3.secret-access-key - [secret_access_key: | default = ""] - - # S3 session token - # CLI flag: -common.storage.thanos.s3.session-token - [session_token: | default = ""] - - # S3 access key ID - # CLI flag: -common.storage.thanos.s3.access-key-id - [access_key_id: | default = ""] - - # If enabled, use http:// for the S3 endpoint instead of https://. This - # could be useful in local dev/test environments while using an - # S3-compatible backend storage, like Minio. - # CLI flag: -common.storage.thanos.s3.insecure - [insecure: | default = false] - - # The S3 storage class to use. Details can be found at - # https://aws.amazon.com/s3/storage-classes/. - # CLI flag: -common.storage.thanos.s3.storage-class - [storage_class: | default = "STANDARD"] - - sse: - # Enable AWS Server Side Encryption. Supported values: SSE-KMS, SSE-S3. - # CLI flag: -common.storage.thanos.s3.sse.type - [type: | default = ""] - - # KMS Key ID used to encrypt objects in S3 - # CLI flag: -common.storage.thanos.s3.sse.kms-key-id - [kms_key_id: | default = ""] - - # KMS Encryption Context used for object encryption. It expects JSON - # formatted string. - # CLI flag: -common.storage.thanos.s3.sse.kms-encryption-context - [kms_encryption_context: | default = ""] - - http: - # The time an idle connection will remain idle before closing. - # CLI flag: -common.storage.thanos.s3.http.idle-conn-timeout - [idle_conn_timeout: | default = 1m30s] - - # The amount of time the client will wait for a servers response - # headers. - # CLI flag: -common.storage.thanos.s3.http.response-header-timeout - [response_header_timeout: | default = 2m] - - # If the client connects via HTTPS and this option is enabled, the - # client will accept any certificate and hostname. - # CLI flag: -common.storage.thanos.s3.http.insecure-skip-verify - [insecure_skip_verify: | default = false] - - # Maximum time to wait for a TLS handshake. 0 means no limit. - # CLI flag: -common.storage.thanos.s3.http.tls-handshake-timeout - [tls_handshake_timeout: | default = 10s] - - # The time to wait for a server's first response headers after fully - # writing the request headers if the request has an Expect header. 0 to - # send the request body immediately. - # CLI flag: -common.storage.thanos.s3.http.expect-continue-timeout - [expect_continue_timeout: | default = 1s] - - # Maximum number of idle (keep-alive) connections across all hosts. 0 - # means no limit. - # CLI flag: -common.storage.thanos.s3.http.max-idle-connections - [max_idle_connections: | default = 100] - - # Maximum number of idle (keep-alive) connections to keep per-host. If - # 0, a built-in default value is used. - # CLI flag: -common.storage.thanos.s3.http.max-idle-connections-per-host - [max_idle_connections_per_host: | default = 100] - - # Maximum number of connections per host. 0 means no limit. - # CLI flag: -common.storage.thanos.s3.http.max-connections-per-host - [max_connections_per_host: | default = 0] - - # Path to the trusted CA file that signed the SSL certificate of the - # object storage endpoint. - # CLI flag: -common.storage.thanos.s3.http.ca-file - [ca_file: | default = ""] - - gcs: - # GCS bucket name - # CLI flag: -common.storage.thanos.gcs.bucket-name - [bucket_name: | default = ""] - - # JSON representing either a Google Developers Console - # client_credentials.json file or a Google Developers service account key - # file. If empty, fallback to Google default logic. - # CLI flag: -common.storage.thanos.gcs.service-account - [service_account: | default = ""] - - azure: - # Azure storage account name - # CLI flag: -common.storage.thanos.azure.account-name - [account_name: | default = ""] - - # Azure storage account key - # CLI flag: -common.storage.thanos.azure.account-key - [account_key: | default = ""] - - # If `connection-string` is set, the values of `account-name` and - # `endpoint-suffix` values will not be used. Use this method over - # `account-key` if you need to authenticate via a SAS token. Or if you use - # the Azurite emulator. - # CLI flag: -common.storage.thanos.azure.connection-string - [connection_string: | default = ""] - - # Azure storage container name - # CLI flag: -common.storage.thanos.azure.container-name - [container_name: | default = "loki"] - - # Azure storage endpoint suffix without schema. The account name will be - # prefixed to this value to create the FQDN - # CLI flag: -common.storage.thanos.azure.endpoint-suffix - [endpoint_suffix: | default = ""] - - # Number of retries for recoverable errors - # CLI flag: -common.storage.thanos.azure.max-retries - [max_retries: | default = 20] - - http: - # The time an idle connection will remain idle before closing. - # CLI flag: -common.storage.thanos.azure.idle-conn-timeout - [idle_conn_timeout: | default = 1m30s] - - # The amount of time the client will wait for a servers response - # headers. - # CLI flag: -common.storage.thanos.azure.response-header-timeout - [response_header_timeout: | default = 2m] - - # If the client connects via HTTPS and this option is enabled, the - # client will accept any certificate and hostname. - # CLI flag: -common.storage.thanos.azure.insecure-skip-verify - [insecure_skip_verify: | default = false] - - # Maximum time to wait for a TLS handshake. 0 means no limit. - # CLI flag: -common.storage.thanos.azure.tls-handshake-timeout - [tls_handshake_timeout: | default = 10s] - - # The time to wait for a server's first response headers after fully - # writing the request headers if the request has an Expect header. 0 to - # send the request body immediately. - # CLI flag: -common.storage.thanos.azure.expect-continue-timeout - [expect_continue_timeout: | default = 1s] - - # Maximum number of idle (keep-alive) connections across all hosts. 0 - # means no limit. - # CLI flag: -common.storage.thanos.azure.max-idle-connections - [max_idle_connections: | default = 100] - - # Maximum number of idle (keep-alive) connections to keep per-host. If - # 0, a built-in default value is used. - # CLI flag: -common.storage.thanos.azure.max-idle-connections-per-host - [max_idle_connections_per_host: | default = 100] - - # Maximum number of connections per host. 0 means no limit. - # CLI flag: -common.storage.thanos.azure.max-connections-per-host - [max_connections_per_host: | default = 0] - - # Path to the trusted CA file that signed the SSL certificate of the - # object storage endpoint. - # CLI flag: -common.storage.thanos.azure.ca-file - [ca_file: | default = ""] - - swift: - # OpenStack Swift authentication API version. 0 to autodetect. - # CLI flag: -common.storage.thanos.swift.auth-version - [auth_version: | default = 0] - - # OpenStack Swift authentication URL - # CLI flag: -common.storage.thanos.swift.auth-url - [auth_url: | default = ""] - - # Set this to true to use the internal OpenStack Swift endpoint URL - # CLI flag: -common.storage.thanos.swift.internal - [internal: | default = false] - - # OpenStack Swift username. - # CLI flag: -common.storage.thanos.swift.username - [username: | default = ""] - - # OpenStack Swift user's domain name. - # CLI flag: -common.storage.thanos.swift.user-domain-name - [user_domain_name: | default = ""] - - # OpenStack Swift user's domain ID. - # CLI flag: -common.storage.thanos.swift.user-domain-id - [user_domain_id: | default = ""] - - # OpenStack Swift user ID. - # CLI flag: -common.storage.thanos.swift.user-id - [user_id: | default = ""] - - # OpenStack Swift API key. - # CLI flag: -common.storage.thanos.swift.password - [password: | default = ""] - - # OpenStack Swift user's domain ID. - # CLI flag: -common.storage.thanos.swift.domain-id - [domain_id: | default = ""] - - # OpenStack Swift user's domain name. - # CLI flag: -common.storage.thanos.swift.domain-name - [domain_name: | default = ""] - - # OpenStack Swift project ID (v2,v3 auth only). - # CLI flag: -common.storage.thanos.swift.project-id - [project_id: | default = ""] - - # OpenStack Swift project name (v2,v3 auth only). - # CLI flag: -common.storage.thanos.swift.project-name - [project_name: | default = ""] - - # ID of the OpenStack Swift project's domain (v3 auth only), only needed - # if it differs the from user domain. - # CLI flag: -common.storage.thanos.swift.project-domain-id - [project_domain_id: | default = ""] - - # Name of the OpenStack Swift project's domain (v3 auth only), only needed - # if it differs from the user domain. - # CLI flag: -common.storage.thanos.swift.project-domain-name - [project_domain_name: | default = ""] - - # OpenStack Swift Region to use (v2,v3 auth only). - # CLI flag: -common.storage.thanos.swift.region-name - [region_name: | default = ""] - - # Name of the OpenStack Swift container to put chunks in. - # CLI flag: -common.storage.thanos.swift.container-name - [container_name: | default = ""] - - # Max retries on requests error. - # CLI flag: -common.storage.thanos.swift.max-retries - [max_retries: | default = 3] - - # Time after which a connection attempt is aborted. - # CLI flag: -common.storage.thanos.swift.connect-timeout - [connect_timeout: | default = 10s] - - # Time after which an idle request is aborted. The timeout watchdog is - # reset each time some data is received, so the timeout triggers after X - # time no data is received on a request. - # CLI flag: -common.storage.thanos.swift.request-timeout - [request_timeout: | default = 5s] - - filesystem: - # Local filesystem storage directory. - # CLI flag: -common.storage.thanos.filesystem.dir - [dir: | default = ""] - - # Prefix for all objects stored in the backend storage. For simplicity, it - # may only contain digits and English alphabet letters. - # CLI flag: -common.storage.thanos.storage-prefix - [storage_prefix: | default = ""] - -[persist_tokens: ] - -[replication_factor: ] - -ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -common.storage.ring.store - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -common.storage.ring.prefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected kvstore is - # consul. - # The CLI flags prefix for this block configuration is: common.storage.ring - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected kvstore - # is etcd. - # The CLI flags prefix for this block configuration is: common.storage.ring - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -common.storage.ring.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -common.storage.ring.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -common.storage.ring.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -common.storage.ring.multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # Period at which to heartbeat to the ring. 0 = disabled. - # CLI flag: -common.storage.ring.heartbeat-period - [heartbeat_period: | default = 15s] - - # The heartbeat timeout after which compactors are considered unhealthy within - # the ring. 0 = never (timeout disabled). - # CLI flag: -common.storage.ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # File path where tokens are stored. If empty, tokens are not stored at - # shutdown and restored at startup. - # CLI flag: -common.storage.ring.tokens-file-path - [tokens_file_path: | default = ""] - - # True to enable zone-awareness and replicate blocks across different - # availability zones. - # CLI flag: -common.storage.ring.zone-awareness-enabled - [zone_awareness_enabled: | default = false] - - # Number of tokens to own in the ring. - # CLI flag: -common.storage.ring.num-tokens - [num_tokens: | default = 128] - - # Factor for data replication. - # CLI flag: -common.storage.ring.replication-factor - [replication_factor: | default = 3] - - # Instance ID to register in the ring. - # CLI flag: -common.storage.ring.instance-id - [instance_id: | default = ""] - - # Name of network interface to read address from. - # CLI flag: -common.storage.ring.instance-interface-names - [instance_interface_names: | default = []] - - # Port to advertise in the ring (defaults to server.grpc-listen-port). - # CLI flag: -common.storage.ring.instance-port - [instance_port: | default = 0] - - # IP address to advertise in the ring. - # CLI flag: -common.storage.ring.instance-addr - [instance_addr: | default = ""] - - # The availability zone where this instance is running. Required if - # zone-awareness is enabled. - # CLI flag: -common.storage.ring.instance-availability-zone - [instance_availability_zone: | default = ""] - - # Enable using a IPv6 instance address. - # CLI flag: -common.storage.ring.instance-enable-ipv6 - [instance_enable_ipv6: | default = false] - -[instance_interface_names: | default = []] - -[instance_addr: | default = ""] - -# the http address of the compactor in the form http://host:port -# CLI flag: -common.compactor-address -[compactor_address: | default = ""] - -# the grpc address of the compactor in the form host:port -# CLI flag: -common.compactor-grpc-address -[compactor_grpc_address: | default = ""] -``` - -### consul - -Configuration for a Consul client. Only applies if the selected kvstore is `consul`. The supported CLI flags `` used to reference this configuration block are: - -- `bloom-compactor.ring` -- `bloom-gateway.ring` -- `common.storage.ring` -- `compactor.ring` -- `distributor.ring` -- `index-gateway.ring` -- `query-scheduler.ring` -- `ruler.ring` - -  - -```yaml -# Hostname and port of Consul. -# CLI flag: -.consul.hostname -[host: | default = "localhost:8500"] - -# ACL Token used to interact with Consul. -# CLI flag: -.consul.acl-token -[acl_token: | default = ""] - -# HTTP timeout when talking to Consul -# CLI flag: -.consul.client-timeout -[http_client_timeout: | default = 20s] - -# Enable consistent reads to Consul. -# CLI flag: -.consul.consistent-reads -[consistent_reads: | default = false] - -# Rate limit when watching key or prefix in Consul, in requests per second. 0 -# disables the rate limit. -# CLI flag: -.consul.watch-rate-limit -[watch_rate_limit: | default = 1] - -# Burst size used in rate limit. Values less than 1 are treated as 1. -# CLI flag: -.consul.watch-burst-size -[watch_burst_size: | default = 1] - -# Maximum duration to wait before retrying a Compare And Swap (CAS) operation. -# CLI flag: -.consul.cas-retry-delay -[cas_retry_delay: | default = 1s] -``` - -### etcd - -Configuration for an ETCD v3 client. Only applies if the selected kvstore is `etcd`. The supported CLI flags `` used to reference this configuration block are: - -- `bloom-compactor.ring` -- `bloom-gateway.ring` -- `common.storage.ring` -- `compactor.ring` -- `distributor.ring` -- `index-gateway.ring` -- `query-scheduler.ring` -- `ruler.ring` - -  - -```yaml -# The etcd endpoints to connect to. -# CLI flag: -.etcd.endpoints -[endpoints: | default = []] - -# The dial timeout for the etcd connection. -# CLI flag: -.etcd.dial-timeout -[dial_timeout: | default = 10s] - -# The maximum number of retries to do for failed ops. -# CLI flag: -.etcd.max-retries -[max_retries: | default = 10] - -# Enable TLS. -# CLI flag: -.etcd.tls-enabled -[tls_enabled: | default = false] - -# Path to the client certificate, which will be used for authenticating with the -# server. Also requires the key path to be configured. -# CLI flag: -.etcd.tls-cert-path -[tls_cert_path: | default = ""] - -# Path to the key for the client certificate. Also requires the client -# certificate to be configured. -# CLI flag: -.etcd.tls-key-path -[tls_key_path: | default = ""] - -# Path to the CA certificates to validate server certificate against. If not -# set, the host's root CA certificates are used. -# CLI flag: -.etcd.tls-ca-path -[tls_ca_path: | default = ""] - -# Override the expected name on the server certificate. -# CLI flag: -.etcd.tls-server-name -[tls_server_name: | default = ""] - -# Skip validating server certificate. -# CLI flag: -.etcd.tls-insecure-skip-verify -[tls_insecure_skip_verify: | default = false] - -# Override the default cipher suite list (separated by commas). Allowed values: -# -# Secure Ciphers: -# - TLS_RSA_WITH_AES_128_CBC_SHA -# - TLS_RSA_WITH_AES_256_CBC_SHA -# - TLS_RSA_WITH_AES_128_GCM_SHA256 -# - TLS_RSA_WITH_AES_256_GCM_SHA384 -# - TLS_AES_128_GCM_SHA256 -# - TLS_AES_256_GCM_SHA384 -# - TLS_CHACHA20_POLY1305_SHA256 -# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA -# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA -# - TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 -# - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 -# - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 -# - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 -# - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 -# - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 -# -# Insecure Ciphers: -# - TLS_RSA_WITH_RC4_128_SHA -# - TLS_RSA_WITH_3DES_EDE_CBC_SHA -# - TLS_RSA_WITH_AES_128_CBC_SHA256 -# - TLS_ECDHE_ECDSA_WITH_RC4_128_SHA -# - TLS_ECDHE_RSA_WITH_RC4_128_SHA -# - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 -# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 -# CLI flag: -.etcd.tls-cipher-suites -[tls_cipher_suites: | default = ""] - -# Override the default minimum TLS version. Allowed values: VersionTLS10, -# VersionTLS11, VersionTLS12, VersionTLS13 -# CLI flag: -.etcd.tls-min-version -[tls_min_version: | default = ""] - -# Etcd username. -# CLI flag: -.etcd.username -[username: | default = ""] - -# Etcd password. -# CLI flag: -.etcd.password -[password: | default = ""] -``` - -### memberlist - -Configuration for `memberlist` client. Only applies if the selected kvstore is memberlist. - -When a memberlist config with atleast 1 join_members is defined, kvstore of type memberlist is automatically selected for all the components that require a ring unless otherwise specified in the component's configuration section. - -```yaml -# Name of the node in memberlist cluster. Defaults to hostname. -# CLI flag: -memberlist.nodename -[node_name: | default = ""] - -# Add random suffix to the node name. -# CLI flag: -memberlist.randomize-node-name -[randomize_node_name: | default = true] - -# The timeout for establishing a connection with a remote node, and for -# read/write operations. -# CLI flag: -memberlist.stream-timeout -[stream_timeout: | default = 10s] - -# Multiplication factor used when sending out messages (factor * log(N+1)). -# CLI flag: -memberlist.retransmit-factor -[retransmit_factor: | default = 4] - -# How often to use pull/push sync. -# CLI flag: -memberlist.pullpush-interval -[pull_push_interval: | default = 30s] - -# How often to gossip. -# CLI flag: -memberlist.gossip-interval -[gossip_interval: | default = 200ms] - -# How many nodes to gossip to. -# CLI flag: -memberlist.gossip-nodes -[gossip_nodes: | default = 3] - -# How long to keep gossiping to dead nodes, to give them chance to refute their -# death. -# CLI flag: -memberlist.gossip-to-dead-nodes-time -[gossip_to_dead_nodes_time: | default = 30s] - -# How soon can dead node's name be reclaimed with new address. 0 to disable. -# CLI flag: -memberlist.dead-node-reclaim-time -[dead_node_reclaim_time: | default = 0s] - -# Enable message compression. This can be used to reduce bandwidth usage at the -# cost of slightly more CPU utilization. -# CLI flag: -memberlist.compression-enabled -[compression_enabled: | default = true] - -# Gossip address to advertise to other members in the cluster. Used for NAT -# traversal. -# CLI flag: -memberlist.advertise-addr -[advertise_addr: | default = ""] - -# Gossip port to advertise to other members in the cluster. Used for NAT -# traversal. -# CLI flag: -memberlist.advertise-port -[advertise_port: | default = 7946] - -# The cluster label is an optional string to include in outbound packets and -# gossip streams. Other members in the memberlist cluster will discard any -# message whose label doesn't match the configured one, unless the -# 'cluster-label-verification-disabled' configuration option is set to true. -# CLI flag: -memberlist.cluster-label -[cluster_label: | default = ""] - -# When true, memberlist doesn't verify that inbound packets and gossip streams -# have the cluster label matching the configured one. This verification should -# be disabled while rolling out the change to the configured cluster label in a -# live memberlist cluster. -# CLI flag: -memberlist.cluster-label-verification-disabled -[cluster_label_verification_disabled: | default = false] - -# Other cluster members to join. Can be specified multiple times. It can be an -# IP, hostname or an entry specified in the DNS Service Discovery format. -# CLI flag: -memberlist.join -[join_members: | default = []] - -# Min backoff duration to join other cluster members. -# CLI flag: -memberlist.min-join-backoff -[min_join_backoff: | default = 1s] - -# Max backoff duration to join other cluster members. -# CLI flag: -memberlist.max-join-backoff -[max_join_backoff: | default = 1m] - -# Max number of retries to join other cluster members. -# CLI flag: -memberlist.max-join-retries -[max_join_retries: | default = 10] - -# If this node fails to join memberlist cluster, abort. -# CLI flag: -memberlist.abort-if-join-fails -[abort_if_cluster_join_fails: | default = false] - -# If not 0, how often to rejoin the cluster. Occasional rejoin can help to fix -# the cluster split issue, and is harmless otherwise. For example when using -# only few components as a seed nodes (via -memberlist.join), then it's -# recommended to use rejoin. If -memberlist.join points to dynamic service that -# resolves to all gossiping nodes (eg. Kubernetes headless service), then rejoin -# is not needed. -# CLI flag: -memberlist.rejoin-interval -[rejoin_interval: | default = 0s] - -# How long to keep LEFT ingesters in the ring. -# CLI flag: -memberlist.left-ingesters-timeout -[left_ingesters_timeout: | default = 5m] - -# Timeout for leaving memberlist cluster. -# CLI flag: -memberlist.leave-timeout -[leave_timeout: | default = 20s] - -# How much space to use for keeping received and sent messages in memory for -# troubleshooting (two buffers). 0 to disable. -# CLI flag: -memberlist.message-history-buffer-bytes -[message_history_buffer_bytes: | default = 0] - -# IP address to listen on for gossip messages. Multiple addresses may be -# specified. Defaults to 0.0.0.0 -# CLI flag: -memberlist.bind-addr -[bind_addr: | default = []] - -# Port to listen on for gossip messages. -# CLI flag: -memberlist.bind-port -[bind_port: | default = 7946] - -# Timeout used when connecting to other nodes to send packet. -# CLI flag: -memberlist.packet-dial-timeout -[packet_dial_timeout: | default = 2s] - -# Timeout for writing 'packet' data. -# CLI flag: -memberlist.packet-write-timeout -[packet_write_timeout: | default = 5s] - -# Enable TLS on the memberlist transport layer. -# CLI flag: -memberlist.tls-enabled -[tls_enabled: | default = false] - -# Path to the client certificate, which will be used for authenticating with the -# server. Also requires the key path to be configured. -# CLI flag: -memberlist.tls-cert-path -[tls_cert_path: | default = ""] - -# Path to the key for the client certificate. Also requires the client -# certificate to be configured. -# CLI flag: -memberlist.tls-key-path -[tls_key_path: | default = ""] - -# Path to the CA certificates to validate server certificate against. If not -# set, the host's root CA certificates are used. -# CLI flag: -memberlist.tls-ca-path -[tls_ca_path: | default = ""] - -# Override the expected name on the server certificate. -# CLI flag: -memberlist.tls-server-name -[tls_server_name: | default = ""] - -# Skip validating server certificate. -# CLI flag: -memberlist.tls-insecure-skip-verify -[tls_insecure_skip_verify: | default = false] - -# Override the default cipher suite list (separated by commas). Allowed values: -# -# Secure Ciphers: -# - TLS_RSA_WITH_AES_128_CBC_SHA -# - TLS_RSA_WITH_AES_256_CBC_SHA -# - TLS_RSA_WITH_AES_128_GCM_SHA256 -# - TLS_RSA_WITH_AES_256_GCM_SHA384 -# - TLS_AES_128_GCM_SHA256 -# - TLS_AES_256_GCM_SHA384 -# - TLS_CHACHA20_POLY1305_SHA256 -# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA -# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA -# - TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 -# - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 -# - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 -# - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 -# - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 -# - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 -# -# Insecure Ciphers: -# - TLS_RSA_WITH_RC4_128_SHA -# - TLS_RSA_WITH_3DES_EDE_CBC_SHA -# - TLS_RSA_WITH_AES_128_CBC_SHA256 -# - TLS_ECDHE_ECDSA_WITH_RC4_128_SHA -# - TLS_ECDHE_RSA_WITH_RC4_128_SHA -# - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 -# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 -# CLI flag: -memberlist.tls-cipher-suites -[tls_cipher_suites: | default = ""] - -# Override the default minimum TLS version. Allowed values: VersionTLS10, -# VersionTLS11, VersionTLS12, VersionTLS13 -# CLI flag: -memberlist.tls-min-version -[tls_min_version: | default = ""] -``` - -### grpc_client - -The `grpc_client` block configures the gRPC client used to communicate between two Loki components. The supported CLI flags `` used to reference this configuration block are: - -- `bigtable` -- `bloom-gateway-client.grpc` -- `boltdb.shipper.index-gateway-client.grpc` -- `frontend.grpc-client-config` -- `ingester.client` -- `querier.frontend-client` -- `query-scheduler.grpc-client-config` -- `ruler.client` -- `tsdb.shipper.index-gateway-client.grpc` - -  - -```yaml -# gRPC client max receive message size (bytes). -# CLI flag: -.grpc-max-recv-msg-size -[max_recv_msg_size: | default = 104857600] - -# gRPC client max send message size (bytes). -# CLI flag: -.grpc-max-send-msg-size -[max_send_msg_size: | default = 104857600] - -# Use compression when sending messages. Supported values are: 'gzip', 'snappy' -# and '' (disable compression) -# CLI flag: -.grpc-compression -[grpc_compression: | default = ""] - -# Rate limit for gRPC client; 0 means disabled. -# CLI flag: -.grpc-client-rate-limit -[rate_limit: | default = 0] - -# Rate limit burst for gRPC client. -# CLI flag: -.grpc-client-rate-limit-burst -[rate_limit_burst: | default = 0] - -# Enable backoff and retry when we hit rate limits. -# CLI flag: -.backoff-on-ratelimits -[backoff_on_ratelimits: | default = false] - -backoff_config: - # Minimum delay when backing off. - # CLI flag: -.backoff-min-period - [min_period: | default = 100ms] - - # Maximum delay when backing off. - # CLI flag: -.backoff-max-period - [max_period: | default = 10s] - - # Number of times to backoff and retry before failing. - # CLI flag: -.backoff-retries - [max_retries: | default = 10] - -# Initial stream window size. Values less than the default are not supported and -# are ignored. Setting this to a value other than the default disables the BDP -# estimator. -# CLI flag: -.initial-stream-window-size -[initial_stream_window_size: | default = 63KiB1023B] - -# Initial connection window size. Values less than the default are not supported -# and are ignored. Setting this to a value other than the default disables the -# BDP estimator. -# CLI flag: -.initial-connection-window-size -[initial_connection_window_size: | default = 63KiB1023B] - -# Enable TLS in the gRPC client. This flag needs to be enabled when any other -# TLS flag is set. If set to false, insecure connection to gRPC server will be -# used. -# CLI flag: -.tls-enabled -[tls_enabled: | default = false] - -# Path to the client certificate, which will be used for authenticating with the -# server. Also requires the key path to be configured. -# CLI flag: -.tls-cert-path -[tls_cert_path: | default = ""] - -# Path to the key for the client certificate. Also requires the client -# certificate to be configured. -# CLI flag: -.tls-key-path -[tls_key_path: | default = ""] - -# Path to the CA certificates to validate server certificate against. If not -# set, the host's root CA certificates are used. -# CLI flag: -.tls-ca-path -[tls_ca_path: | default = ""] - -# Override the expected name on the server certificate. -# CLI flag: -.tls-server-name -[tls_server_name: | default = ""] - -# Skip validating server certificate. -# CLI flag: -.tls-insecure-skip-verify -[tls_insecure_skip_verify: | default = false] - -# Override the default cipher suite list (separated by commas). Allowed values: -# -# Secure Ciphers: -# - TLS_RSA_WITH_AES_128_CBC_SHA -# - TLS_RSA_WITH_AES_256_CBC_SHA -# - TLS_RSA_WITH_AES_128_GCM_SHA256 -# - TLS_RSA_WITH_AES_256_GCM_SHA384 -# - TLS_AES_128_GCM_SHA256 -# - TLS_AES_256_GCM_SHA384 -# - TLS_CHACHA20_POLY1305_SHA256 -# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA -# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA -# - TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 -# - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 -# - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 -# - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 -# - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 -# - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 -# -# Insecure Ciphers: -# - TLS_RSA_WITH_RC4_128_SHA -# - TLS_RSA_WITH_3DES_EDE_CBC_SHA -# - TLS_RSA_WITH_AES_128_CBC_SHA256 -# - TLS_ECDHE_ECDSA_WITH_RC4_128_SHA -# - TLS_ECDHE_RSA_WITH_RC4_128_SHA -# - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 -# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 -# CLI flag: -.tls-cipher-suites -[tls_cipher_suites: | default = ""] - -# Override the default minimum TLS version. Allowed values: VersionTLS10, -# VersionTLS11, VersionTLS12, VersionTLS13 -# CLI flag: -.tls-min-version -[tls_min_version: | default = ""] - -# The maximum amount of time to establish a connection. A value of 0 means -# default gRPC client connect timeout and backoff. -# CLI flag: -.connect-timeout -[connect_timeout: | default = 5s] - -# Initial backoff delay after first connection failure. Only relevant if -# ConnectTimeout > 0. -# CLI flag: -.connect-backoff-base-delay -[connect_backoff_base_delay: | default = 1s] - -# Maximum backoff delay when establishing a connection. Only relevant if -# ConnectTimeout > 0. -# CLI flag: -.connect-backoff-max-delay -[connect_backoff_max_delay: | default = 5s] -``` - -### tls_config - -The TLS configuration. - -```yaml -# Path to the client certificate, which will be used for authenticating with the -# server. Also requires the key path to be configured. -# CLI flag: -frontend.tail-tls-config.tls-cert-path -[tls_cert_path: | default = ""] - -# Path to the key for the client certificate. Also requires the client -# certificate to be configured. -# CLI flag: -frontend.tail-tls-config.tls-key-path -[tls_key_path: | default = ""] - -# Path to the CA certificates to validate server certificate against. If not -# set, the host's root CA certificates are used. -# CLI flag: -frontend.tail-tls-config.tls-ca-path -[tls_ca_path: | default = ""] - -# Override the expected name on the server certificate. -# CLI flag: -frontend.tail-tls-config.tls-server-name -[tls_server_name: | default = ""] - -# Skip validating server certificate. -# CLI flag: -frontend.tail-tls-config.tls-insecure-skip-verify -[tls_insecure_skip_verify: | default = false] - -# Override the default cipher suite list (separated by commas). Allowed values: -# -# Secure Ciphers: -# - TLS_RSA_WITH_AES_128_CBC_SHA -# - TLS_RSA_WITH_AES_256_CBC_SHA -# - TLS_RSA_WITH_AES_128_GCM_SHA256 -# - TLS_RSA_WITH_AES_256_GCM_SHA384 -# - TLS_AES_128_GCM_SHA256 -# - TLS_AES_256_GCM_SHA384 -# - TLS_CHACHA20_POLY1305_SHA256 -# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_256_CBC_SHA -# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA -# - TLS_ECDHE_RSA_WITH_AES_256_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_128_GCM_SHA256 -# - TLS_ECDHE_ECDSA_WITH_AES_256_GCM_SHA384 -# - TLS_ECDHE_RSA_WITH_AES_128_GCM_SHA256 -# - TLS_ECDHE_RSA_WITH_AES_256_GCM_SHA384 -# - TLS_ECDHE_RSA_WITH_CHACHA20_POLY1305_SHA256 -# - TLS_ECDHE_ECDSA_WITH_CHACHA20_POLY1305_SHA256 -# -# Insecure Ciphers: -# - TLS_RSA_WITH_RC4_128_SHA -# - TLS_RSA_WITH_3DES_EDE_CBC_SHA -# - TLS_RSA_WITH_AES_128_CBC_SHA256 -# - TLS_ECDHE_ECDSA_WITH_RC4_128_SHA -# - TLS_ECDHE_RSA_WITH_RC4_128_SHA -# - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA -# - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 -# - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 -# CLI flag: -frontend.tail-tls-config.tls-cipher-suites -[tls_cipher_suites: | default = ""] - -# Override the default minimum TLS version. Allowed values: VersionTLS10, -# VersionTLS11, VersionTLS12, VersionTLS13 -# CLI flag: -frontend.tail-tls-config.tls-min-version -[tls_min_version: | default = ""] -``` - -### cache_config - -The cache block configures the cache backend. The supported CLI flags `` used to reference this configuration block are: - -- `bloom-gateway-client.cache` -- `bloom.metas-cache` -- `frontend` -- `frontend.index-stats-results-cache` -- `frontend.instant-metric-results-cache` -- `frontend.label-results-cache` -- `frontend.series-results-cache` -- `frontend.volume-results-cache` -- `store.chunks-cache` -- `store.index-cache-read` -- `store.index-cache-write` - -  - -```yaml -# The default validity of entries for caches unless overridden. -# CLI flag: -.default-validity -[default_validity: | default = 1h] - -background: - # At what concurrency to write back to cache. - # CLI flag: -.background.write-back-concurrency - [writeback_goroutines: | default = 10] - - # How many key batches to buffer for background write-back. - # CLI flag: -.background.write-back-buffer - [writeback_buffer: | default = 10000] - - # Size limit in bytes for background write-back. - # CLI flag: -.background.write-back-size-limit - [writeback_size_limit: | default = 1GB] - -memcached: - # How long keys stay in the memcache. - # CLI flag: -.memcached.expiration - [expiration: | default = 0s] - - # How many keys to fetch in each batch. - # CLI flag: -.memcached.batchsize - [batch_size: | default = 256] - - # Maximum active requests to memcache. - # CLI flag: -.memcached.parallelism - [parallelism: | default = 10] - -memcached_client: - # Hostname for memcached service to use. If empty and if addresses is unset, - # no memcached will be used. - # CLI flag: -.memcached.hostname - [host: | default = ""] - - # SRV service used to discover memcache servers. - # CLI flag: -.memcached.service - [service: | default = "memcached"] - - # EXPERIMENTAL: Comma separated addresses list in DNS Service Discovery - # format: - # https://cortexmetrics.io/docs/configuration/arguments/#dns-service-discovery - # CLI flag: -.memcached.addresses - [addresses: | default = ""] - - # Maximum time to wait before giving up on memcached requests. - # CLI flag: -.memcached.timeout - [timeout: | default = 100ms] - - # Maximum number of idle connections in pool. - # CLI flag: -.memcached.max-idle-conns - [max_idle_conns: | default = 16] - - # The maximum size of an item stored in memcached. Bigger items are not - # stored. If set to 0, no maximum size is enforced. - # CLI flag: -.memcached.max-item-size - [max_item_size: | default = 0] - - # Period with which to poll DNS for memcache servers. - # CLI flag: -.memcached.update-interval - [update_interval: | default = 1m] - - # Use consistent hashing to distribute to memcache servers. - # CLI flag: -.memcached.consistent-hash - [consistent_hash: | default = true] - - # Trip circuit-breaker after this number of consecutive dial failures (if zero - # then circuit-breaker is disabled). - # CLI flag: -.memcached.circuit-breaker-consecutive-failures - [circuit_breaker_consecutive_failures: | default = 10] - - # Duration circuit-breaker remains open after tripping (if zero then 60 - # seconds is used). - # CLI flag: -.memcached.circuit-breaker-timeout - [circuit_breaker_timeout: | default = 10s] - - # Reset circuit-breaker counts after this long (if zero then never reset). - # CLI flag: -.memcached.circuit-breaker-interval - [circuit_breaker_interval: | default = 10s] - -redis: - # Redis Server or Cluster configuration endpoint to use for caching. A - # comma-separated list of endpoints for Redis Cluster or Redis Sentinel. If - # empty, no redis will be used. - # CLI flag: -.redis.endpoint - [endpoint: | default = ""] - - # Redis Sentinel master name. An empty string for Redis Server or Redis - # Cluster. - # CLI flag: -.redis.master-name - [master_name: | default = ""] - - # Maximum time to wait before giving up on redis requests. - # CLI flag: -.redis.timeout - [timeout: | default = 500ms] - - # How long keys stay in the redis. - # CLI flag: -.redis.expiration - [expiration: | default = 0s] - - # Database index. - # CLI flag: -.redis.db - [db: | default = 0] - - # Maximum number of connections in the pool. - # CLI flag: -.redis.pool-size - [pool_size: | default = 0] - - # Username to use when connecting to redis. - # CLI flag: -.redis.username - [username: | default = ""] - - # Password to use when connecting to redis. - # CLI flag: -.redis.password - [password: | default = ""] - - # Enable connecting to redis with TLS. - # CLI flag: -.redis.tls-enabled - [tls_enabled: | default = false] - - # Skip validating server certificate. - # CLI flag: -.redis.tls-insecure-skip-verify - [tls_insecure_skip_verify: | default = false] - - # Close connections after remaining idle for this duration. If the value is - # zero, then idle connections are not closed. - # CLI flag: -.redis.idle-timeout - [idle_timeout: | default = 0s] - - # Close connections older than this duration. If the value is zero, then the - # pool does not close connections based on age. - # CLI flag: -.redis.max-connection-age - [max_connection_age: | default = 0s] - - # By default, the Redis client only reads from the master node. Enabling this - # option can lower pressure on the master node by randomly routing read-only - # commands to the master and any available replicas. - # CLI flag: -.redis.route-randomly - [route_randomly: | default = false] - -embedded_cache: - # Whether embedded cache is enabled. - # CLI flag: -.embedded-cache.enabled - [enabled: | default = false] - - # Maximum memory size of the cache in MB. - # CLI flag: -.embedded-cache.max-size-mb - [max_size_mb: | default = 100] - - # Maximum number of entries in the cache. - # CLI flag: -.embedded-cache.max-size-items - [max_size_items: | default = 0] - - # The time to live for items in the cache before they get purged. - # CLI flag: -.embedded-cache.ttl - [ttl: | default = 1h] - -# The maximum number of concurrent asynchronous writeback cache can occur. -# CLI flag: -.max-async-cache-write-back-concurrency -[async_cache_write_back_concurrency: | default = 16] - -# The maximum number of enqueued asynchronous writeback cache allowed. -# CLI flag: -.max-async-cache-write-back-buffer-size -[async_cache_write_back_buffer_size: | default = 500] -``` - -### period_config - -The `period_config` block configures what index schemas should be used for from specific time periods. - -```yaml -# The date of the first day that index buckets should be created. Use a date in -# the past if this is your only period_config, otherwise use a date when you -# want the schema to switch over. In YYYY-MM-DD format, for example: 2018-04-15. -[from: ] - -# store and object_store below affect which key is used. Which -# index to use. Either tsdb or boltdb-shipper. Following stores are deprecated: -# aws, aws-dynamo, gcp, gcp-columnkey, bigtable, bigtable-hashed, cassandra, -# grpc. -[store: | default = ""] - -# Which store to use for the chunks. Either aws (alias s3), azure, gcs, -# alibabacloud, bos, cos, swift, filesystem, or a named_store (refer to -# named_stores_config). Following stores are deprecated: aws-dynamo, gcp, -# gcp-columnkey, bigtable, bigtable-hashed, cassandra, grpc. -[object_store: | default = ""] - -# The schema version to use, current recommended schema is v12. -[schema: | default = ""] - -# Configures how the index is updated and stored. -index: - # Path prefix for index tables. Prefix always needs to end with a path - # delimiter '/', except when the prefix is empty. - [path_prefix: | default = "index/"] - - # Table prefix for all period tables. - [prefix: | default = ""] - - # Table period. - [period: ] - - # A map to be added to all managed tables. - [tags: ] - -# Configured how the chunks are updated and stored. -chunks: - # Table prefix for all period tables. - [prefix: | default = ""] - - # Table period. - [period: ] - - # A map to be added to all managed tables. - [tags: ] - -# How many shards will be created. Only used if schema is v10 or greater. -[row_shards: | default = 16] -``` - -### aws_storage_config - -The `aws_storage_config` block configures the connection to dynamoDB and S3 object storage. Either one of them or both can be configured. - -```yaml -# Deprecated: Configures storing indexes in DynamoDB. -dynamodb: - # DynamoDB endpoint URL with escaped Key and Secret encoded. If only region is - # specified as a host, proper endpoint will be deduced. Use - # inmemory:/// to use a mock in-memory implementation. - # CLI flag: -dynamodb.url - [dynamodb_url: ] - - # DynamoDB table management requests per second limit. - # CLI flag: -dynamodb.api-limit - [api_limit: | default = 2] - - # DynamoDB rate cap to back off when throttled. - # CLI flag: -dynamodb.throttle-limit - [throttle_limit: | default = 10] - - metrics: - # Use metrics-based autoscaling, via this query URL - # CLI flag: -metrics.url - [url: | default = ""] - - # Queue length above which we will scale up capacity - # CLI flag: -metrics.target-queue-length - [target_queue_length: | default = 100000] - - # Scale up capacity by this multiple - # CLI flag: -metrics.scale-up-factor - [scale_up_factor: | default = 1.3] - - # Ignore throttling below this level (rate per second) - # CLI flag: -metrics.ignore-throttle-below - [ignore_throttle_below: | default = 1] - - # query to fetch ingester queue length - # CLI flag: -metrics.queue-length-query - [queue_length_query: | default = "sum(avg_over_time(loki_ingester_flush_queue_length{job=\"cortex/ingester\"}[2m])) or sum(avg_over_time(cortex_ingester_flush_queue_length{job=\"cortex/ingester\"}[2m]))"] - - # query to fetch throttle rates per table - # CLI flag: -metrics.write-throttle-query - [write_throttle_query: | default = "sum(rate(cortex_dynamo_throttled_total{operation=\"DynamoDB.BatchWriteItem\"}[1m])) by (table) > 0"] - - # query to fetch write capacity usage per table - # CLI flag: -metrics.usage-query - [write_usage_query: | default = "sum(rate(cortex_dynamo_consumed_capacity_total{operation=\"DynamoDB.BatchWriteItem\"}[15m])) by (table) > 0"] - - # query to fetch read capacity usage per table - # CLI flag: -metrics.read-usage-query - [read_usage_query: | default = "sum(rate(cortex_dynamo_consumed_capacity_total{operation=\"DynamoDB.QueryPages\"}[1h])) by (table) > 0"] - - # query to fetch read errors per table - # CLI flag: -metrics.read-error-query - [read_error_query: | default = "sum(increase(cortex_dynamo_failures_total{operation=\"DynamoDB.QueryPages\",error=\"ProvisionedThroughputExceededException\"}[1m])) by (table) > 0"] - - # Number of chunks to group together to parallelise fetches (zero to disable) - # CLI flag: -dynamodb.chunk-gang-size - [chunk_gang_size: | default = 10] - - # Max number of chunk-get operations to start in parallel - # CLI flag: -dynamodb.chunk.get-max-parallelism - [chunk_get_max_parallelism: | default = 32] - - backoff_config: - # Minimum backoff time - # CLI flag: -dynamodb.min-backoff - [min_period: | default = 100ms] - - # Maximum backoff time - # CLI flag: -dynamodb.max-backoff - [max_period: | default = 50s] - - # Maximum number of times to retry an operation - # CLI flag: -dynamodb.max-retries - [max_retries: | default = 20] - - # KMS key used for encrypting DynamoDB items. DynamoDB will use an Amazon - # owned KMS key if not provided. - # CLI flag: -dynamodb.kms-key-id - [kms_key_id: | default = ""] - -# S3 endpoint URL with escaped Key and Secret encoded. If only region is -# specified as a host, proper endpoint will be deduced. Use -# inmemory:/// to use a mock in-memory implementation. -# CLI flag: -s3.url -[s3: ] - -# Set this to `true` to force the request to use path-style addressing. -# CLI flag: -s3.force-path-style -[s3forcepathstyle: | default = false] - -# Comma separated list of bucket names to evenly distribute chunks over. -# Overrides any buckets specified in s3.url flag -# CLI flag: -s3.buckets -[bucketnames: | default = ""] - -# S3 Endpoint to connect to. -# CLI flag: -s3.endpoint -[endpoint: | default = ""] - -# AWS region to use. -# CLI flag: -s3.region -[region: | default = ""] - -# AWS Access Key ID -# CLI flag: -s3.access-key-id -[access_key_id: | default = ""] - -# AWS Secret Access Key -# CLI flag: -s3.secret-access-key -[secret_access_key: | default = ""] - -# AWS Session Token -# CLI flag: -s3.session-token -[session_token: | default = ""] - -# Disable https on s3 connection. -# CLI flag: -s3.insecure -[insecure: | default = false] - -http_config: - # Timeout specifies a time limit for requests made by s3 Client. - # CLI flag: -s3.http.timeout - [timeout: | default = 0s] - - # The maximum amount of time an idle connection will be held open. - # CLI flag: -s3.http.idle-conn-timeout - [idle_conn_timeout: | default = 1m30s] - - # If non-zero, specifies the amount of time to wait for a server's response - # headers after fully writing the request. - # CLI flag: -s3.http.response-header-timeout - [response_header_timeout: | default = 0s] - - # Set to true to skip verifying the certificate chain and hostname. - # CLI flag: -s3.http.insecure-skip-verify - [insecure_skip_verify: | default = false] - - # Path to the trusted CA file that signed the SSL certificate of the S3 - # endpoint. - # CLI flag: -s3.http.ca-file - [ca_file: | default = ""] - -# The signature version to use for authenticating against S3. Supported values -# are: v4. -# CLI flag: -s3.signature-version -[signature_version: | default = "v4"] - -# The S3 storage class which objects will use. Supported values are: GLACIER, -# DEEP_ARCHIVE, GLACIER_IR, INTELLIGENT_TIERING, ONEZONE_IA, OUTPOSTS, -# REDUCED_REDUNDANCY, STANDARD, STANDARD_IA. -# CLI flag: -s3.storage-class -[storage_class: | default = "STANDARD"] - -sse: - # Enable AWS Server Side Encryption. Supported values: SSE-KMS, SSE-S3. - # CLI flag: -s3.sse.type - [type: | default = ""] - - # KMS Key ID used to encrypt objects in S3 - # CLI flag: -s3.sse.kms-key-id - [kms_key_id: | default = ""] - - # KMS Encryption Context used for object encryption. It expects JSON formatted - # string. - # CLI flag: -s3.sse.kms-encryption-context - [kms_encryption_context: | default = ""] - -# Configures back off when S3 get Object. -backoff_config: - # Minimum backoff time when s3 get Object - # CLI flag: -s3.min-backoff - [min_period: | default = 100ms] - - # Maximum backoff time when s3 get Object - # CLI flag: -s3.max-backoff - [max_period: | default = 3s] - - # Maximum number of times to retry when s3 get Object - # CLI flag: -s3.max-retries - [max_retries: | default = 5] -``` - -### azure_storage_config - -The `azure_storage_config` block configures the connection to Azure object storage backend. The supported CLI flags `` used to reference this configuration block are: - -- `common.storage` -- `ruler.storage` - -  - -```yaml -# Azure Cloud environment. Supported values are: AzureGlobal, AzureChinaCloud, -# AzureGermanCloud, AzureUSGovernment. -# CLI flag: -.azure.environment -[environment: | default = "AzureGlobal"] - -# Azure storage account name. -# CLI flag: -.azure.account-name -[account_name: | default = ""] - -# Azure storage account key. -# CLI flag: -.azure.account-key -[account_key: | default = ""] - -# If `connection-string` is set, the values of `account-name` and -# `endpoint-suffix` values will not be used. Use this method over `account-key` -# if you need to authenticate via a SAS token. Or if you use the Azurite -# emulator. -# CLI flag: -.azure.connection-string -[connection_string: | default = ""] - -# Name of the storage account blob container used to store chunks. This -# container must be created before running cortex. -# CLI flag: -.azure.container-name -[container_name: | default = "loki"] - -# Azure storage endpoint suffix without schema. The storage account name will be -# prefixed to this value to create the FQDN. -# CLI flag: -.azure.endpoint-suffix -[endpoint_suffix: | default = ""] - -# Use Managed Identity to authenticate to the Azure storage account. -# CLI flag: -.azure.use-managed-identity -[use_managed_identity: | default = false] - -# Use Federated Token to authenticate to the Azure storage account. -# CLI flag: -.azure.use-federated-token -[use_federated_token: | default = false] - -# User assigned identity ID to authenticate to the Azure storage account. -# CLI flag: -.azure.user-assigned-id -[user_assigned_id: | default = ""] - -# Use Service Principal to authenticate through Azure OAuth. -# CLI flag: -.azure.use-service-principal -[use_service_principal: | default = false] - -# Azure Service Principal ID(GUID). -# CLI flag: -.azure.client-id -[client_id: | default = ""] - -# Azure Service Principal secret key. -# CLI flag: -.azure.client-secret -[client_secret: | default = ""] - -# Azure Tenant ID is used to authenticate through Azure OAuth. -# CLI flag: -.azure.tenant-id -[tenant_id: | default = ""] - -# Chunk delimiter for blob ID to be used -# CLI flag: -.azure.chunk-delimiter -[chunk_delimiter: | default = "-"] - -# Preallocated buffer size for downloads. -# CLI flag: -.azure.download-buffer-size -[download_buffer_size: | default = 512000] - -# Preallocated buffer size for uploads. -# CLI flag: -.azure.upload-buffer-size -[upload_buffer_size: | default = 256000] - -# Number of buffers used to used to upload a chunk. -# CLI flag: -.azure.download-buffer-count -[upload_buffer_count: | default = 1] - -# Timeout for requests made against azure blob storage. -# CLI flag: -.azure.request-timeout -[request_timeout: | default = 30s] - -# Number of retries for a request which times out. -# CLI flag: -.azure.max-retries -[max_retries: | default = 5] - -# Minimum time to wait before retrying a request. -# CLI flag: -.azure.min-retry-delay -[min_retry_delay: | default = 10ms] - -# Maximum time to wait before retrying a request. -# CLI flag: -.azure.max-retry-delay -[max_retry_delay: | default = 500ms] -``` - -### alibabacloud_storage_config - -The `alibabacloud_storage_config` block configures the connection to Alibaba Cloud Storage object storage backend. The supported CLI flags `` used to reference this configuration block are: - -- `common` -- `ruler` - -  - -```yaml -# Name of OSS bucket. -# CLI flag: -common.storage.oss.bucketname -[bucket: | default = ""] - -# oss Endpoint to connect to. -# CLI flag: -common.storage.oss.endpoint -[endpoint: | default = ""] - -# alibabacloud Access Key ID -# CLI flag: -common.storage.oss.access-key-id -[access_key_id: | default = ""] - -# alibabacloud Secret Access Key -# CLI flag: -common.storage.oss.secret-access-key -[secret_access_key: | default = ""] -``` - -### gcs_storage_config - -The `gcs_storage_config` block configures the connection to Google Cloud Storage object storage backend. The supported CLI flags `` used to reference this configuration block are: - -- `common.storage` -- `ruler.storage` - -  - -```yaml -# Name of GCS bucket. Please refer to -# https://cloud.google.com/docs/authentication/production for more information -# about how to configure authentication. -# CLI flag: -.gcs.bucketname -[bucket_name: | default = ""] - -# Service account key content in JSON format, refer to -# https://cloud.google.com/iam/docs/creating-managing-service-account-keys for -# creation. -# CLI flag: -.gcs.service-account -[service_account: | default = ""] - -# The size of the buffer that GCS client for each PUT request. 0 to disable -# buffering. -# CLI flag: -.gcs.chunk-buffer-size -[chunk_buffer_size: | default = 0] - -# The duration after which the requests to GCS should be timed out. -# CLI flag: -.gcs.request-timeout -[request_timeout: | default = 0s] - -# Enable OpenCensus (OC) instrumentation for all requests. -# CLI flag: -.gcs.enable-opencensus -[enable_opencensus: | default = true] - -# Enable HTTP2 connections. -# CLI flag: -.gcs.enable-http2 -[enable_http2: | default = true] - -# Enable automatic retries of failed idempotent requests. -# CLI flag: -.gcs.enable-retries -[enable_retries: | default = true] -``` - -### s3_storage_config - -The `s3_storage_config` block configures the connection to Amazon S3 object storage backend. The supported CLI flags `` used to reference this configuration block are: - -- `common` -- `ruler` - -  - -```yaml -# S3 endpoint URL with escaped Key and Secret encoded. If only region is -# specified as a host, proper endpoint will be deduced. Use -# inmemory:/// to use a mock in-memory implementation. -# CLI flag: -.storage.s3.url -[s3: ] - -# Set this to `true` to force the request to use path-style addressing. -# CLI flag: -.storage.s3.force-path-style -[s3forcepathstyle: | default = false] - -# Comma separated list of bucket names to evenly distribute chunks over. -# Overrides any buckets specified in s3.url flag -# CLI flag: -.storage.s3.buckets -[bucketnames: | default = ""] - -# S3 Endpoint to connect to. -# CLI flag: -.storage.s3.endpoint -[endpoint: | default = ""] - -# AWS region to use. -# CLI flag: -.storage.s3.region -[region: | default = ""] - -# AWS Access Key ID -# CLI flag: -.storage.s3.access-key-id -[access_key_id: | default = ""] - -# AWS Secret Access Key -# CLI flag: -.storage.s3.secret-access-key -[secret_access_key: | default = ""] - -# AWS Session Token -# CLI flag: -.storage.s3.session-token -[session_token: | default = ""] - -# Disable https on s3 connection. -# CLI flag: -.storage.s3.insecure -[insecure: | default = false] - -http_config: - # Timeout specifies a time limit for requests made by s3 Client. - # CLI flag: -.storage.s3.http.timeout - [timeout: | default = 0s] - - # The maximum amount of time an idle connection will be held open. - # CLI flag: -.storage.s3.http.idle-conn-timeout - [idle_conn_timeout: | default = 1m30s] - - # If non-zero, specifies the amount of time to wait for a server's response - # headers after fully writing the request. - # CLI flag: -.storage.s3.http.response-header-timeout - [response_header_timeout: | default = 0s] - - # Set to true to skip verifying the certificate chain and hostname. - # CLI flag: -.storage.s3.http.insecure-skip-verify - [insecure_skip_verify: | default = false] - - # Path to the trusted CA file that signed the SSL certificate of the S3 - # endpoint. - # CLI flag: -.storage.s3.http.ca-file - [ca_file: | default = ""] - -# The signature version to use for authenticating against S3. Supported values -# are: v4. -# CLI flag: -.storage.s3.signature-version -[signature_version: | default = "v4"] - -# The S3 storage class which objects will use. Supported values are: GLACIER, -# DEEP_ARCHIVE, GLACIER_IR, INTELLIGENT_TIERING, ONEZONE_IA, OUTPOSTS, -# REDUCED_REDUNDANCY, STANDARD, STANDARD_IA. -# CLI flag: -.storage.s3.storage-class -[storage_class: | default = "STANDARD"] - -sse: - # Enable AWS Server Side Encryption. Supported values: SSE-KMS, SSE-S3. - # CLI flag: -.storage.s3.sse.type - [type: | default = ""] - - # KMS Key ID used to encrypt objects in S3 - # CLI flag: -.storage.s3.sse.kms-key-id - [kms_key_id: | default = ""] - - # KMS Encryption Context used for object encryption. It expects JSON formatted - # string. - # CLI flag: -.storage.s3.sse.kms-encryption-context - [kms_encryption_context: | default = ""] - -# Configures back off when S3 get Object. -backoff_config: - # Minimum backoff time when s3 get Object - # CLI flag: -.storage.s3.min-backoff - [min_period: | default = 100ms] - - # Maximum backoff time when s3 get Object - # CLI flag: -.storage.s3.max-backoff - [max_period: | default = 3s] - - # Maximum number of times to retry when s3 get Object - # CLI flag: -.storage.s3.max-retries - [max_retries: | default = 5] -``` - -### bos_storage_config - -The `bos_storage_config` block configures the connection to Baidu Object Storage (BOS) object storage backend. The supported CLI flags `` used to reference this configuration block are: - -- `common.storage` -- `ruler.storage` - -  - -```yaml -# Name of BOS bucket. -# CLI flag: -.bos.bucket-name -[bucket_name: | default = ""] - -# BOS endpoint to connect to. -# CLI flag: -.bos.endpoint -[endpoint: | default = "bj.bcebos.com"] - -# Baidu Cloud Engine (BCE) Access Key ID. -# CLI flag: -.bos.access-key-id -[access_key_id: | default = ""] - -# Baidu Cloud Engine (BCE) Secret Access Key. -# CLI flag: -.bos.secret-access-key -[secret_access_key: | default = ""] -``` - -### swift_storage_config - -The `swift_storage_config` block configures the connection to OpenStack Object Storage (Swift) object storage backend. The supported CLI flags `` used to reference this configuration block are: - -- `common.storage` -- `ruler.storage` - -  - -```yaml -# OpenStack Swift authentication API version. 0 to autodetect. -# CLI flag: -.swift.auth-version -[auth_version: | default = 0] - -# OpenStack Swift authentication URL -# CLI flag: -.swift.auth-url -[auth_url: | default = ""] - -# Set this to true to use the internal OpenStack Swift endpoint URL -# CLI flag: -.swift.internal -[internal: | default = false] - -# OpenStack Swift username. -# CLI flag: -.swift.username -[username: | default = ""] - -# OpenStack Swift user's domain name. -# CLI flag: -.swift.user-domain-name -[user_domain_name: | default = ""] - -# OpenStack Swift user's domain ID. -# CLI flag: -.swift.user-domain-id -[user_domain_id: | default = ""] - -# OpenStack Swift user ID. -# CLI flag: -.swift.user-id -[user_id: | default = ""] - -# OpenStack Swift API key. -# CLI flag: -.swift.password -[password: | default = ""] - -# OpenStack Swift user's domain ID. -# CLI flag: -.swift.domain-id -[domain_id: | default = ""] - -# OpenStack Swift user's domain name. -# CLI flag: -.swift.domain-name -[domain_name: | default = ""] - -# OpenStack Swift project ID (v2,v3 auth only). -# CLI flag: -.swift.project-id -[project_id: | default = ""] - -# OpenStack Swift project name (v2,v3 auth only). -# CLI flag: -.swift.project-name -[project_name: | default = ""] - -# ID of the OpenStack Swift project's domain (v3 auth only), only needed if it -# differs the from user domain. -# CLI flag: -.swift.project-domain-id -[project_domain_id: | default = ""] - -# Name of the OpenStack Swift project's domain (v3 auth only), only needed if it -# differs from the user domain. -# CLI flag: -.swift.project-domain-name -[project_domain_name: | default = ""] - -# OpenStack Swift Region to use (v2,v3 auth only). -# CLI flag: -.swift.region-name -[region_name: | default = ""] - -# Name of the OpenStack Swift container to put chunks in. -# CLI flag: -.swift.container-name -[container_name: | default = ""] - -# Max retries on requests error. -# CLI flag: -.swift.max-retries -[max_retries: | default = 3] - -# Time after which a connection attempt is aborted. -# CLI flag: -.swift.connect-timeout -[connect_timeout: | default = 10s] - -# Time after which an idle request is aborted. The timeout watchdog is reset -# each time some data is received, so the timeout triggers after X time no data -# is received on a request. -# CLI flag: -.swift.request-timeout -[request_timeout: | default = 5s] -``` - -### cos_storage_config - -The `cos_storage_config` block configures the connection to IBM Cloud Object Storage (COS) backend. The supported CLI flags `` used to reference this configuration block are: - -- `common.storage` -- `ruler.storage` - -  - -```yaml -# Set this to `true` to force the request to use path-style addressing. -# CLI flag: -.cos.force-path-style -[forcepathstyle: | default = false] - -# Comma separated list of bucket names to evenly distribute chunks over. -# CLI flag: -.cos.buckets -[bucketnames: | default = ""] - -# COS Endpoint to connect to. -# CLI flag: -.cos.endpoint -[endpoint: | default = ""] - -# COS region to use. -# CLI flag: -.cos.region -[region: | default = ""] - -# COS HMAC Access Key ID. -# CLI flag: -.cos.access-key-id -[access_key_id: | default = ""] - -# COS HMAC Secret Access Key. -# CLI flag: -.cos.secret-access-key -[secret_access_key: | default = ""] - -http_config: - # The maximum amount of time an idle connection will be held open. - # CLI flag: -.cos.http.idle-conn-timeout - [idle_conn_timeout: | default = 1m30s] - - # If non-zero, specifies the amount of time to wait for a server's response - # headers after fully writing the request. - # CLI flag: -.cos.http.response-header-timeout - [response_header_timeout: | default = 0s] - -# Configures back off when cos get Object. -backoff_config: - # Minimum backoff time when cos get Object. - # CLI flag: -.cos.min-backoff - [min_period: | default = 100ms] - - # Maximum backoff time when cos get Object. - # CLI flag: -.cos.max-backoff - [max_period: | default = 3s] - - # Maximum number of times to retry when cos get Object. - # CLI flag: -.cos.max-retries - [max_retries: | default = 5] - -# IAM API key to access COS. -# CLI flag: -.cos.api-key -[api_key: | default = ""] - -# COS service instance id to use. -# CLI flag: -.cos.service-instance-id -[service_instance_id: | default = ""] - -# IAM Auth Endpoint for authentication. -# CLI flag: -.cos.auth-endpoint -[auth_endpoint: | default = "https://iam.cloud.ibm.com/identity/token"] - -# Compute resource token file path. -# CLI flag: -.cos.cr-token-file-path -[cr_token_file_path: | default = ""] - -# Name of the trusted profile. -# CLI flag: -.cos.trusted-profile-name -[trusted_profile_name: | default = ""] - -# ID of the trusted profile. -# CLI flag: -.cos.trusted-profile-id -[trusted_profile_id: | default = ""] -``` - -### local_storage_config - -The `local_storage_config` block configures the usage of local file system as object storage backend. - -```yaml -# Directory to store chunks in. -# CLI flag: -local.chunk-directory -[directory: | default = ""] -``` - -### named_stores_config - -Configures additional object stores for a given storage provider. -Supported stores: aws, azure, bos, filesystem, gcs, swift. -Example: -storage_config: - named_stores: - aws: - store-1: - endpoint: s3://foo-bucket - region: us-west1 -Named store from this example can be used by setting object_store to store-1 in period_config. - -```yaml -[aws: ] - -[azure: ] - -[bos: ] - -[filesystem: ] - -[gcs: ] - -[alibabacloud: ] - -[swift: ] - -[cos: ] -``` - -### attributes_config - -Define actions for matching OpenTelemetry (OTEL) attributes. - -```yaml -# Configures action to take on matching attributes. It allows one of -# [structured_metadata, drop] for all attribute types. It additionally allows -# index_label action for resource attributes -[action: | default = ""] - -# List of attributes to configure how to store them or drop them altogether -[attributes: ] - -# Regex to choose attributes to configure how to store them or drop them -# altogether -[regex: ] -``` - -## Runtime Configuration file - -Loki has a concept of "runtime config" file, which is simply a file that is reloaded while Loki is running. It is used by some Loki components to allow operator to change some aspects of Loki configuration without restarting it. File is specified by using `-runtime-config.file=` flag and reload period (which defaults to 10 seconds) can be changed by `-runtime-config.reload-period=` flag. Previously this mechanism was only used by limits overrides, and flags were called `-limits.per-user-override-config=` and `-limits.per-user-override-period=10s` respectively. These are still used, if `-runtime-config.file=` is not specified. - -At the moment, two components use runtime configuration: limits and multi KV store. - -Options for runtime configuration reload can also be configured via YAML: - -```yaml -# Configuration file to periodically check and reload. -[file: : default = empty] - -# How often to check the file. -[period: : default 10s] -``` - -Example runtime configuration file: - -```yaml -overrides: - tenant1: - ingestion_rate_mb: 10 - max_streams_per_user: 100000 - max_chunks_per_query: 100000 - tenant2: - max_streams_per_user: 1000000 - max_chunks_per_query: 1000000 - -multi_kv_config: - mirror-enabled: false - primary: consul -``` - -## Accept out-of-order writes - -Since the beginning of Loki, log entries had to be written to Loki in order -by time. -This limitation has been lifted. -Out-of-order writes are enabled globally by default, but can be disabled/enabled -on a cluster or per-tenant basis. - -- To disable out-of-order writes for all tenants, -place in the `limits_config` section: - - ``` - limits_config: - unordered_writes: false - ``` - -- To disable out-of-order writes for specific tenants, -configure a runtime configuration file: - - ``` - runtime_config: - file: overrides.yaml - ``` - - In the `overrides.yaml` file, add `unordered_writes` for each tenant - permitted to have out-of-order writes: - - ``` - overrides: - "tenantA": - unordered_writes: false - ``` - -How far into the past accepted out-of-order log entries may be -is configurable with `max_chunk_age`. -`max_chunk_age` defaults to 2 hour. -Loki calculates the earliest time that out-of-order entries may have -and be accepted with - -``` -time_of_most_recent_line - (max_chunk_age/2) -``` - -Log entries with timestamps that are after this earliest time are accepted. -Log entries further back in time return an out-of-order error. + -For example, if `max_chunk_age` is 2 hours -and the stream `{foo="bar"}` has one entry at `8:00`, -Loki will accept data for that stream as far back in time as `7:00`. -If another log line is written at `10:00`, -Loki will accept data for that stream as far back in time as `9:00`. +{{< docs/shared lookup="configuration.md" source="loki" version="" >}} diff --git a/docs/sources/configure/bp-configure.md b/docs/sources/configure/bp-configure.md index 23175d7c1b1a5..28feb68b1ba4c 100644 --- a/docs/sources/configure/bp-configure.md +++ b/docs/sources/configure/bp-configure.md @@ -14,7 +14,7 @@ Loki can cache data at many levels, which can drastically improve performance. D ## Time ordering of logs -Loki [accepts out-of-order writes]({{< relref "../configure#accept-out-of-order-writes" >}}) _by default_. +Loki [accepts out-of-order writes](https://grafana.com/docs/loki//configure/#accept-out-of-order-writes) _by default_. This section identifies best practices when Loki is _not_ configured to accept out-of-order writes. One issue many people have with Loki is their client receiving errors for out of order log entries. This happens because of this hard and fast rule within Loki: @@ -46,7 +46,7 @@ What can we do about this? What if this was because the sources of these logs we {job="syslog", instance="host2"} 00:00:02 i'm a syslog! <- Accepted, still in order for stream 2 ``` -But what if the application itself generated logs that were out of order? Well, I'm afraid this is a problem. If you are extracting the timestamp from the log line with something like [the Promtail pipeline stage](/docs/loki/latest/send-data/promtail/stages/timestamp/), you could instead _not_ do this and let Promtail assign a timestamp to the log lines. Or you can hopefully fix it in the application itself. +But what if the application itself generated logs that were out of order? Well, I'm afraid this is a problem. If you are extracting the timestamp from the log line with something like [the Promtail pipeline stage](/docs/loki//send-data/promtail/stages/timestamp/), you could instead _not_ do this and let Promtail assign a timestamp to the log lines. Or you can hopefully fix it in the application itself. It's also worth noting that the batching nature of the Loki push API can lead to some instances of out of order errors being received which are really false positives. (Perhaps a batch partially succeeded and was present; or anything that previously succeeded would return an out of order entry; or anything new would be accepted.) diff --git a/docs/sources/configure/examples/configuration-examples.md b/docs/sources/configure/examples/configuration-examples.md index 6fb77d78a6c4f..a644f27ccb412 100644 --- a/docs/sources/configure/examples/configuration-examples.md +++ b/docs/sources/configure/examples/configuration-examples.md @@ -30,15 +30,12 @@ schema_config: - from: 2020-05-15 store: tsdb object_store: filesystem - schema: v12 + schema: v13 index: prefix: index_ period: 24h storage_config: - tsdb_shipper: - active_index_directory: /tmp/loki/index - cache_location: /tmp/loki/index_cache filesystem: directory: /tmp/loki/chunks @@ -71,7 +68,7 @@ schema_config: - from: 2020-05-15 store: tsdb object_store: s3 - schema: v12 + schema: v13 index: prefix: index_ period: 24h @@ -127,7 +124,7 @@ schema_config: - from: 2020-05-15 store: tsdb object_store: gcs - schema: v12 + schema: v13 index: prefix: index_ period: 24h @@ -148,21 +145,20 @@ storage_config: # This is a partial configuration to deploy Loki backed by Baidu Object Storage (BOS). # The index will be shipped to the storage via tsdb-shipper. +common: + path_prefix: /tmp/loki schema_config: configs: - from: 2020-05-15 store: tsdb object_store: bos - schema: v12 + schema: v13 index: prefix: index_ period: 24h storage_config: - tsdb_shipper: - active_index_directory: /loki/index - cache_location: /loki/index_cache bos: bucket_name: bucket_name_1 endpoint: bj.bcebos.com @@ -190,6 +186,9 @@ compactor: ```yaml +common: + path_prefix: /tmp/loki + schema_config: configs: # Starting from 2018-04-15 Loki should store indexes on BoltDB with the v11 schema @@ -203,12 +202,12 @@ schema_config: period: 24h prefix: index_ - # Starting from 2023-6-15 Loki should store indexes on TSDB with the v12 schema + # Starting from 2023-6-15 Loki should store indexes on TSDB with the v13 schema # using daily periodic tables and chunks on AWS S3. - from: "2023-06-15" store: tsdb object_store: s3 - schema: v12 + schema: v13 index: period: 24h prefix: index_ @@ -221,13 +220,15 @@ schema_config: ```yaml # This partial configuration uses Alibaba for chunk storage. +common: + path_prefix: /tmp/loki schema_config: configs: - from: 2020-05-15 store: tsdb object_store: alibabacloud - schema: v12 + schema: v13 index: prefix: index_ period: 24h @@ -291,21 +292,20 @@ storage_config: ```yaml # This partial configuration uses IBM Cloud Object Storage (COS) for chunk storage. HMAC will be used for authenticating with COS. +common: + path_prefix: /tmp/loki schema_config: configs: - from: 2020-10-01 store: tsdb object_store: cos - schema: v12 + schema: v13 index: period: 24h prefix: index_ storage_config: - tsdb_shipper: - active_index_directory: /loki/index - cache_location: /loki/index_cache cos: bucketnames: endpoint: @@ -321,21 +321,20 @@ storage_config: ```yaml # This partial configuration uses IBM Cloud Object Storage (COS) for chunk storage. APIKey will be used for authenticating with COS. +common: + path_prefix: /tmp/loki schema_config: configs: - from: 2020-10-01 store: tsdb object_store: cos - schema: v12 + schema: v13 index: period: 24h prefix: index_ storage_config: - tsdb_shipper: - active_index_directory: /loki/index - cache_location: /loki/index_cache cos: bucketnames: endpoint: @@ -358,21 +357,20 @@ storage_config: # the same trusted profile. # In order to use trusted profile authentication we need to follow an additional step to create a trusted profile. # For more details about creating a trusted profile, see https://cloud.ibm.com/docs/account?topic=account-create-trusted-profile&interface=ui. +common: + path_prefix: /tmp/loki schema_config: configs: - from: 2020-10-01 store: tsdb object_store: cos - schema: v12 + schema: v13 index: period: 24h prefix: index_ storage_config: - tsdb_shipper: - active_index_directory: /loki/index - cache_location: /loki/index_cache cos: bucketnames: endpoint: @@ -404,61 +402,3 @@ memberlist: ``` - -## 16-(Deprecated)-Cassandra-Snippet.yaml - -```yaml - -# This is a partial config that uses the local filesystem for chunk storage and Cassandra for index storage -# WARNING - DEPRECATED: The Cassandra index store is deprecated and will be removed in a future release. - -schema_config: - configs: - - from: 2020-05-15 - store: cassandra - object_store: filesystem - schema: v12 - index: - prefix: cassandra_table - period: 168h - -storage_config: - cassandra: - username: cassandra - password: cassandra - addresses: 127.0.0.1 - auth: true - keyspace: lokiindex - - filesystem: - directory: /tmp/loki/chunks - - -``` - - -## 17-(Deprecated)-S3-And-DynamoDB-Snippet.yaml - -```yaml - -# This partial configuration uses S3 for chunk storage and uses DynamoDB for index storage -# WARNING - DEPRECATED: The DynamoDB index store is deprecated and will be removed in a future release. - -schema_config: - configs: - - from: 2020-05-15 - store: aws - object_store: s3 - schema: v12 - index: - prefix: loki_ - -storage_config: - aws: - s3: s3://access_key:secret_access_key@region/bucket_name - dynamodb: - dynamodb_url: dynamodb://access_key:secret_access_key@region - - -``` - diff --git a/docs/sources/configure/examples/yaml/1-Local-Configuration-Example.yaml b/docs/sources/configure/examples/yaml/1-Local-Configuration-Example.yaml index e4b3ab0c6a28c..5d575922d3d07 100644 --- a/docs/sources/configure/examples/yaml/1-Local-Configuration-Example.yaml +++ b/docs/sources/configure/examples/yaml/1-Local-Configuration-Example.yaml @@ -19,14 +19,11 @@ schema_config: - from: 2020-05-15 store: tsdb object_store: filesystem - schema: v12 + schema: v13 index: prefix: index_ period: 24h storage_config: - tsdb_shipper: - active_index_directory: /tmp/loki/index - cache_location: /tmp/loki/index_cache filesystem: directory: /tmp/loki/chunks diff --git a/docs/sources/configure/examples/yaml/11-COS-HMAC-Example.yaml b/docs/sources/configure/examples/yaml/11-COS-HMAC-Example.yaml index 642d0f3316dae..a246d0e491cae 100644 --- a/docs/sources/configure/examples/yaml/11-COS-HMAC-Example.yaml +++ b/docs/sources/configure/examples/yaml/11-COS-HMAC-Example.yaml @@ -1,19 +1,18 @@ # This partial configuration uses IBM Cloud Object Storage (COS) for chunk storage. HMAC will be used for authenticating with COS. +common: + path_prefix: /tmp/loki schema_config: configs: - from: 2020-10-01 store: tsdb object_store: cos - schema: v12 + schema: v13 index: period: 24h prefix: index_ storage_config: - tsdb_shipper: - active_index_directory: /loki/index - cache_location: /loki/index_cache cos: bucketnames: endpoint: diff --git a/docs/sources/configure/examples/yaml/12-COS-APIKey-Example.yaml b/docs/sources/configure/examples/yaml/12-COS-APIKey-Example.yaml index d50d1c39863cb..2247d96d5da49 100644 --- a/docs/sources/configure/examples/yaml/12-COS-APIKey-Example.yaml +++ b/docs/sources/configure/examples/yaml/12-COS-APIKey-Example.yaml @@ -1,19 +1,18 @@ # This partial configuration uses IBM Cloud Object Storage (COS) for chunk storage. APIKey will be used for authenticating with COS. +common: + path_prefix: /tmp/loki schema_config: configs: - from: 2020-10-01 store: tsdb object_store: cos - schema: v12 + schema: v13 index: period: 24h prefix: index_ storage_config: - tsdb_shipper: - active_index_directory: /loki/index - cache_location: /loki/index_cache cos: bucketnames: endpoint: diff --git a/docs/sources/configure/examples/yaml/13-COS-Trusted-Profile-Example.yaml b/docs/sources/configure/examples/yaml/13-COS-Trusted-Profile-Example.yaml index 90e6b9673353d..5f3651f4d0014 100644 --- a/docs/sources/configure/examples/yaml/13-COS-Trusted-Profile-Example.yaml +++ b/docs/sources/configure/examples/yaml/13-COS-Trusted-Profile-Example.yaml @@ -5,21 +5,20 @@ # the same trusted profile. # In order to use trusted profile authentication we need to follow an additional step to create a trusted profile. # For more details about creating a trusted profile, see https://cloud.ibm.com/docs/account?topic=account-create-trusted-profile&interface=ui. +common: + path_prefix: /tmp/loki schema_config: configs: - from: 2020-10-01 store: tsdb object_store: cos - schema: v12 + schema: v13 index: period: 24h prefix: index_ storage_config: - tsdb_shipper: - active_index_directory: /loki/index - cache_location: /loki/index_cache cos: bucketnames: endpoint: diff --git a/docs/sources/configure/examples/yaml/16-(Deprecated)-Cassandra-Snippet.yaml b/docs/sources/configure/examples/yaml/16-(Deprecated)-Cassandra-Snippet.yaml deleted file mode 100644 index 71cacb5743687..0000000000000 --- a/docs/sources/configure/examples/yaml/16-(Deprecated)-Cassandra-Snippet.yaml +++ /dev/null @@ -1,24 +0,0 @@ -# This is a partial config that uses the local filesystem for chunk storage and Cassandra for index storage -# WARNING - DEPRECATED: The Cassandra index store is deprecated and will be removed in a future release. - -schema_config: - configs: - - from: 2020-05-15 - store: cassandra - object_store: filesystem - schema: v12 - index: - prefix: cassandra_table - period: 168h - -storage_config: - cassandra: - username: cassandra - password: cassandra - addresses: 127.0.0.1 - auth: true - keyspace: lokiindex - - filesystem: - directory: /tmp/loki/chunks - \ No newline at end of file diff --git a/docs/sources/configure/examples/yaml/17-(Deprecated)-S3-And-DynamoDB-Snippet.yaml b/docs/sources/configure/examples/yaml/17-(Deprecated)-S3-And-DynamoDB-Snippet.yaml deleted file mode 100644 index 0b297b02650b6..0000000000000 --- a/docs/sources/configure/examples/yaml/17-(Deprecated)-S3-And-DynamoDB-Snippet.yaml +++ /dev/null @@ -1,18 +0,0 @@ -# This partial configuration uses S3 for chunk storage and uses DynamoDB for index storage -# WARNING - DEPRECATED: The DynamoDB index store is deprecated and will be removed in a future release. - -schema_config: - configs: - - from: 2020-05-15 - store: aws - object_store: s3 - schema: v12 - index: - prefix: loki_ - -storage_config: - aws: - s3: s3://access_key:secret_access_key@region/bucket_name - dynamodb: - dynamodb_url: dynamodb://access_key:secret_access_key@region - \ No newline at end of file diff --git a/docs/sources/configure/examples/yaml/2-S3-Cluster-Example.yaml b/docs/sources/configure/examples/yaml/2-S3-Cluster-Example.yaml index 4dd9a3ae04f03..cde37ed5b2e8a 100644 --- a/docs/sources/configure/examples/yaml/2-S3-Cluster-Example.yaml +++ b/docs/sources/configure/examples/yaml/2-S3-Cluster-Example.yaml @@ -20,7 +20,7 @@ schema_config: - from: 2020-05-15 store: tsdb object_store: s3 - schema: v12 + schema: v13 index: prefix: index_ period: 24h diff --git a/docs/sources/configure/examples/yaml/4-GCS-Example.yaml b/docs/sources/configure/examples/yaml/4-GCS-Example.yaml index 7a227d064a417..330e94006aeef 100644 --- a/docs/sources/configure/examples/yaml/4-GCS-Example.yaml +++ b/docs/sources/configure/examples/yaml/4-GCS-Example.yaml @@ -19,7 +19,7 @@ schema_config: - from: 2020-05-15 store: tsdb object_store: gcs - schema: v12 + schema: v13 index: prefix: index_ period: 24h diff --git a/docs/sources/configure/examples/yaml/5-BOS-Example.yaml b/docs/sources/configure/examples/yaml/5-BOS-Example.yaml index 3a024ee849c70..5367106ed42da 100644 --- a/docs/sources/configure/examples/yaml/5-BOS-Example.yaml +++ b/docs/sources/configure/examples/yaml/5-BOS-Example.yaml @@ -1,20 +1,19 @@ # This is a partial configuration to deploy Loki backed by Baidu Object Storage (BOS). # The index will be shipped to the storage via tsdb-shipper. +common: + path_prefix: /tmp/loki schema_config: configs: - from: 2020-05-15 store: tsdb object_store: bos - schema: v12 + schema: v13 index: prefix: index_ period: 24h storage_config: - tsdb_shipper: - active_index_directory: /loki/index - cache_location: /loki/index_cache bos: bucket_name: bucket_name_1 endpoint: bj.bcebos.com diff --git a/docs/sources/configure/examples/yaml/7-Schema-Migration-Snippet.yaml b/docs/sources/configure/examples/yaml/7-Schema-Migration-Snippet.yaml index 37c12e034ba26..0c59e40807185 100644 --- a/docs/sources/configure/examples/yaml/7-Schema-Migration-Snippet.yaml +++ b/docs/sources/configure/examples/yaml/7-Schema-Migration-Snippet.yaml @@ -1,3 +1,6 @@ +common: + path_prefix: /tmp/loki + schema_config: configs: # Starting from 2018-04-15 Loki should store indexes on BoltDB with the v11 schema @@ -11,12 +14,12 @@ schema_config: period: 24h prefix: index_ - # Starting from 2023-6-15 Loki should store indexes on TSDB with the v12 schema + # Starting from 2023-6-15 Loki should store indexes on TSDB with the v13 schema # using daily periodic tables and chunks on AWS S3. - from: "2023-06-15" store: tsdb object_store: s3 - schema: v12 + schema: v13 index: period: 24h prefix: index_ diff --git a/docs/sources/configure/examples/yaml/8-alibaba-cloud-storage-Snippet.yaml b/docs/sources/configure/examples/yaml/8-alibaba-cloud-storage-Snippet.yaml index 48b7edeb7b0bb..7d48a54fe8994 100644 --- a/docs/sources/configure/examples/yaml/8-alibaba-cloud-storage-Snippet.yaml +++ b/docs/sources/configure/examples/yaml/8-alibaba-cloud-storage-Snippet.yaml @@ -1,11 +1,13 @@ # This partial configuration uses Alibaba for chunk storage. +common: + path_prefix: /tmp/loki schema_config: configs: - from: 2020-05-15 store: tsdb object_store: alibabacloud - schema: v12 + schema: v13 index: prefix: index_ period: 24h diff --git a/docs/sources/configure/index.template b/docs/sources/configure/index.template deleted file mode 100644 index 1cbcbe6d2279e..0000000000000 --- a/docs/sources/configure/index.template +++ /dev/null @@ -1,185 +0,0 @@ ---- -title: Grafana Loki configuration parameters -menuTitle: Configure -description: Configuration reference for the parameters used to configure Grafana Loki. -aliases: - - ./configuration # /docs/loki//configuration/ -weight: 400 ---- - -# Grafana Loki configuration parameters - -{{ .GeneratedFileWarning }} - -Grafana Loki is configured in a YAML file (usually referred to as `loki.yaml` ) -which contains information on the Loki server and its individual components, -depending on which mode Loki is launched in. - -Configuration examples can be found in the [Configuration Examples]({{ `{{< relref "./examples/configuration-examples" >}}` }}) document. - -## Printing Loki config at runtime - -If you pass Loki the flag `-print-config-stderr` or `-log-config-reverse-order`, (or `-print-config-stderr=true`) -Loki will dump the entire config object it has created from the built-in defaults combined first with -overrides from config file, and second by overrides from flags. - -The result is the value for every config object in the Loki config struct, which is very large... - -Many values will not be relevant to your install such as storage configs which you are not using and which you did not define, -this is expected as every option has a default value if it is being used or not. - -This config is what Loki will use to run, it can be invaluable for debugging issues related to configuration and -is especially useful in making sure your config files and flags are being read and loaded properly. - -`-print-config-stderr` is nice when running Loki directly e.g. `./loki ` as you can get a quick output of the entire Loki config. - -`-log-config-reverse-order` is the flag we run Loki with in all our environments, the config entries are reversed so -that the order of configs reads correctly top to bottom when viewed in Grafana's Explore. - -## Reload at runtime - -Promtail can reload its configuration at runtime. If the new configuration -is not well-formed, the changes will not be applied. -A configuration reload is triggered by sending a `SIGHUP` to the Promtail process or -sending a HTTP POST request to the `/reload` endpoint (when the `--server.enable-runtime-reload` flag is enabled). - -## Configuration file reference - -To specify which configuration file to load, pass the `-config.file` flag at the -command line. The value can be a list of comma separated paths, then the first -file that exists will be used. -If no `-config.file` argument is specified, Loki will look up the `config.yaml` in the -current working directory and the `config/` subdirectory and try to use that. - -The file is written in [YAML -format](https://en.wikipedia.org/wiki/YAML), defined by the scheme below. -Brackets indicate that a parameter is optional. For non-list parameters the -value is set to the specified default. - -### Use environment variables in the configuration - -> **Note:** This feature is only available in Loki 2.1+. - -You can use environment variable references in the configuration file to set values that need to be configurable during deployment. -To do this, pass `-config.expand-env=true` and use: - -``` -${VAR} -``` - -Where VAR is the name of the environment variable. - -Each variable reference is replaced at startup by the value of the environment variable. -The replacement is case-sensitive and occurs before the YAML file is parsed. -References to undefined variables are replaced by empty strings unless you specify a default value or custom error text. - -To specify a default value, use: - -``` -${VAR:-default_value} -``` - -Where default_value is the value to use if the environment variable is undefined. - -Pass the `-config.expand-env` flag at the command line to enable this way of setting configs. - -### Generic placeholders - -- `` : a boolean that can take the values `true` or `false` -- `` : any integer matching the regular expression `[1-9]+[0-9]*` -- `` : a duration matching the regular expression `[0-9]+(ns|us|µs|ms|[smh])` -- `` : a string matching the regular expression `[a-zA-Z_][a-zA-Z0-9_]*` -- `` : a string of unicode characters -- `` : a valid path relative to current working directory or an absolute path. -- `` : a valid string consisting of a hostname or IP followed by an optional port number -- `` : a string -- `` : a string that represents a secret, such as a password - -### Supported contents and default values of `loki.yaml` - -{{ .ConfigFile }} - -## Runtime Configuration file - -Loki has a concept of "runtime config" file, which is simply a file that is reloaded while Loki is running. It is used by some Loki components to allow operator to change some aspects of Loki configuration without restarting it. File is specified by using `-runtime-config.file=` flag and reload period (which defaults to 10 seconds) can be changed by `-runtime-config.reload-period=` flag. Previously this mechanism was only used by limits overrides, and flags were called `-limits.per-user-override-config=` and `-limits.per-user-override-period=10s` respectively. These are still used, if `-runtime-config.file=` is not specified. - -At the moment, two components use runtime configuration: limits and multi KV store. - -Options for runtime configuration reload can also be configured via YAML: - -```yaml -# Configuration file to periodically check and reload. -[file: : default = empty] - -# How often to check the file. -[period: : default 10s] -``` - -Example runtime configuration file: - -```yaml -overrides: - tenant1: - ingestion_rate_mb: 10 - max_streams_per_user: 100000 - max_chunks_per_query: 100000 - tenant2: - max_streams_per_user: 1000000 - max_chunks_per_query: 1000000 - -multi_kv_config: - mirror-enabled: false - primary: consul -``` - -## Accept out-of-order writes - -Since the beginning of Loki, log entries had to be written to Loki in order -by time. -This limitation has been lifted. -Out-of-order writes are enabled globally by default, but can be disabled/enabled -on a cluster or per-tenant basis. - -- To disable out-of-order writes for all tenants, -place in the `limits_config` section: - - ``` - limits_config: - unordered_writes: false - ``` - -- To disable out-of-order writes for specific tenants, -configure a runtime configuration file: - - ``` - runtime_config: - file: overrides.yaml - ``` - - In the `overrides.yaml` file, add `unordered_writes` for each tenant - permitted to have out-of-order writes: - - ``` - overrides: - "tenantA": - unordered_writes: false - ``` - -How far into the past accepted out-of-order log entries may be -is configurable with `max_chunk_age`. -`max_chunk_age` defaults to 2 hour. -Loki calculates the earliest time that out-of-order entries may have -and be accepted with - -``` -time_of_most_recent_line - (max_chunk_age/2) -``` - -Log entries with timestamps that are after this earliest time are accepted. -Log entries further back in time return an out-of-order error. - -For example, if `max_chunk_age` is 2 hours -and the stream `{foo="bar"}` has one entry at `8:00`, -Loki will accept data for that stream as far back in time as `7:00`. -If another log line is written at `10:00`, -Loki will accept data for that stream as far back in time as `9:00`. diff --git a/docs/sources/configure/storage.md b/docs/sources/configure/storage.md new file mode 100644 index 0000000000000..a4786d1cdebe5 --- /dev/null +++ b/docs/sources/configure/storage.md @@ -0,0 +1,499 @@ +--- +title: Storage +description: Describes Loki storage. +aliases: + - ../storage/ # /docs/loki/latest/storage/ +weight: 475 +--- +# Storage + +Unlike other logging systems, Grafana Loki is built around the idea of only indexing +metadata about your logs: labels (just like Prometheus labels). Log data itself +is then compressed and stored in chunks in object stores such as S3 or GCS, or +even locally on the filesystem. A small index and highly compressed chunks +simplifies the operation and significantly lowers the cost of Loki. + +Loki 2.8 introduced TSDB as a new mode for the Single Store and is now the recommended way to persist data in Loki. +More detailed information about TSDB can be found under the [manage section](https://grafana.com/docs/loki//operations/storage/tsdb/). + +Loki 2.0 introduced an index mechanism named 'boltdb-shipper' and is what we now call [Single Store](#single-store). +This type only requires one store, the object store, for both the index and chunks. +More detailed information about 'boltdb-shipper' can be found under the [manage section](https://grafana.com/docs/loki//operations/storage/boltdb-shipper/). + +Prior to Loki 2.0, chunks and index data were stored in separate backends: +object storage (or filesystem) for chunk data and NoSQL/Key-Value databases for index data. These "multistore" backends have been deprecated, as noted below. + +You can find more detailed information about all of the storage options in the [manage section](https://grafana.com/docs/loki//operations/storage/). + +## Single Store + +Single Store refers to using object storage as the storage medium for both Loki's index as well as its data ("chunks"). There are two supported modes: + +### TSDB (recommended) + +Starting in Loki 2.8, the [TSDB index store](https://grafana.com/docs/loki//operations/storage/tsdb/) improves query performance, reduces TCO and has the same feature parity as "boltdb-shipper". + +### BoltDB (deprecated) + +Also known as "boltdb-shipper" during development (and is still the schema `store` name). The single store configurations for Loki utilize the chunk store for both chunks and the index, requiring just one store to run Loki. + +Performance is comparable to a dedicated index type while providing a much less expensive and less complicated deployment. +When using Single Store, no extra [Chunk storage](#chunk-storage) and [Index storage](#index-storage) are necessary. + +### Supported storage backends + +See [Object Storage](#object-storage) for supported backends. + +## Chunk storage + +### File system + +The file system is the simplest backend for chunks, although it's also susceptible to data loss as it's unreplicated. This is common for single binary deployments though, as well as for those trying out loki or doing local development on the project. It is similar in concept to many Prometheus deployments where a single Prometheus is responsible for monitoring a fleet. + +### Object storage + +#### Google Cloud Storage (GCS) + +GCS is a hosted object store offered by Google. It is a good candidate for a managed object store, especially when you're already running on GCP, and is production safe. + +#### Amazon Simple Storage Storage (S3) + +S3 is AWS's hosted object store. It is a good candidate for a managed object store, especially when you're already running on AWS, and is production safe. + +#### Azure Blob Storage + +Blob Storage is Microsoft Azure's hosted object store. It is a good candidate for a managed object store, especially when you're already running on Azure, and is production safe. +You can authenticate Blob Storage access by using a storage account name and key or by using a Service Principal. + +#### IBM Cloud Object Storage (COS) + +[COS](https://www.ibm.com/cloud/object-storage) is IBM Cloud hosted object store. It is a good candidate for a managed object store, especially when you're already running on IBM Cloud, and is production safe. + +#### Baidu Object Storage (BOS) + +[BOS](https://intl.cloud.baidu.com/product/bos.html) is the Baidu CLoud hosted object storage. + +#### Alibaba Object Storage Service (OSS) + +[OSS](https://www.alibabacloud.com/product/object-storage-service) is the Alibaba Cloud hosted object storage. + +#### Other notable mentions + +You may use any substitutable services, such as those that implement the S3 API like [MinIO](https://min.io/). + +### Cassandra (deprecated) + +Cassandra is a popular database and one of Loki's possible chunk stores and is production safe. + +{{< collapse title="Title of hidden content" >}} +This storage type for chunks is deprecated and may be removed in future major versions of Loki. +{{< /collapse >}} + +## Index storage + +### Cassandra (deprecated) + +Cassandra can also be utilized for the index store and aside from the [boltdb-shipper](https://grafana.com/docs/loki//operations/storage/boltdb-shipper/), it's the only non-cloud offering that can be used for the index that's horizontally scalable and has configurable replication. It's a good candidate when you already run Cassandra, are running on-prem, or do not wish to use a managed cloud offering. + +{{< collapse title="Title of hidden content" >}} +This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< /collapse >}} + +### BigTable (deprecated) + +Bigtable is a cloud database offered by Google. It is a good candidate for a managed index store if you're already using it (due to its heavy fixed costs) or wish to run in GCP. + +{{< collapse title="Title of hidden content" >}} +This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< /collapse >}} + +### DynamoDB (deprecated) + +DynamoDB is a cloud database offered by AWS. It is a good candidate for a managed index store, especially if you're already running in AWS. + +{{< collapse title="Title of hidden content" >}} +This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< /collapse >}} + +#### Rate limiting + +DynamoDB is susceptible to rate limiting, particularly due to overconsuming what is called [provisioned capacity](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ReadWriteCapacityMode.html). This can be controlled via the [provisioning](#provisioning) configs in the table manager. + +### BoltDB (deprecated) + +BoltDB is an embedded database on disk. It is not replicated and thus cannot be used for high availability or clustered Loki deployments, but is commonly paired with a `filesystem` chunk store for proof of concept deployments, trying out Loki, and development. The [boltdb-shipper](https://grafana.com/docs/loki//operations/storage/boltdb-shipper/) aims to support clustered deployments using `boltdb` as an index. + +{{< collapse title="Title of hidden content" >}} +This storage type for indexes is deprecated and may be removed in future major versions of Loki. +{{< /collapse >}} + +## Schema Config + +Loki aims to be backwards compatible and over the course of its development has had many internal changes that facilitate better and more efficient storage/querying. Loki allows incrementally upgrading to these new storage _schemas_ and can query across them transparently. This makes upgrading a breeze. +For instance, this is what it looks like when migrating from BoltDB with v11 schema to TSDB with v13 schema starting 2023-07-01: + +```yaml +schema_config: + configs: + - from: 2019-07-01 + store: boltdb + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + - from: 2023-07-01 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h +``` + +For all data ingested before 2023-07-01, Loki used BoltDB with the v11 schema, and then switched after that point to the more effective TSDB with the v13 schema. This dramatically simplifies upgrading, ensuring it's simple to take advantage of new storage optimizations. These configs should be immutable for as long as you care about retention. + +## Table Manager (deprecated) + +One of the subcomponents in Loki is the `table-manager`. It is responsible for pre-creating and expiring index tables. This helps partition the writes and reads in Loki across a set of distinct indices in order to prevent unbounded growth. + +```yaml +table_manager: + # The retention period must be a multiple of the index / chunks + # table "period" (see period_config). + retention_deletes_enabled: true + # This is 15 weeks retention, based on the 168h (1week) period durations used in the rest of the examples. + retention_period: 2520h +``` + +For more information, see the [table manager](https://grafana.com/docs/loki//operations/storage/tsdb/) documentation. + +### Provisioning + +In the case of AWS DynamoDB, you'll likely want to tune the provisioned throughput for your tables as well. This is to prevent your tables being rate limited on one hand and assuming unnecessary cost on the other. By default Loki uses a [provisioned capacity](https://docs.aws.amazon.com/amazondynamodb/latest/developerguide/HowItWorks.ReadWriteCapacityMode.html) strategy for DynamoDB tables like so: + +``` +table_manager: + index_tables_provisioning: + # Read/write throughput requirements for the current table + # (the table which would handle writes/reads for data timestamped at the current time) + provisioned_write_throughput: | default = 3000 + provisioned_read_throughput: | default = 300 + + # Read/write throughput requirements for non-current tables + inactive_write_throughput: | default = 1 + inactive_read_throughput: | Default = 300 +``` + +Note, there are a few other DynamoDB provisioning options including DynamoDB autoscaling and on-demand capacity. See the [provisioning configuration](https://grafana.com/docs/loki//configure/#table_manager) in the `table_manager` block documentation for more information. + +## Upgrading Schemas + +When a new schema is released and you want to gain the advantages it provides, you can! Loki can transparently query and merge data from across schema boundaries so there is no disruption of service and upgrading is easy. + +First, you'll want to create a new [period_config](https://grafana.com/docs/loki//configure/#period_config) entry in your [schema_config](https://grafana.com/docs/loki//configure/#schema_config). The important thing to remember here is to set this at some point in the _future_ and then roll out the config file changes to Loki. This allows the table manager to create the required table in advance of writes and ensures that existing data isn't queried as if it adheres to the new schema. + +As an example, let's say it's 2023-07-14 and we want to start using the `v13` schema on the 20th: +```yaml +schema_config: + configs: + - from: 2019-07-14 + store: tsdb + object_store: filesystem + schema: v11 + index: + prefix: index_ + period: 24h + - from: 2023-07-20 + store: tsdb + object_store: filesystem + schema: v13 + index: + prefix: index_ + period: 24h +``` + +It's that easy; we just created a new entry starting on the 20th. + +## Retention + +With the exception of the `filesystem` chunk store, Loki will not delete old chunk stores. This is generally handled instead by configuring TTLs (time to live) in the chunk store of your choice (bucket lifecycles in S3/GCS, and TTLs in Cassandra). Neither will Loki currently delete old data when your local disk fills when using the `filesystem` chunk store -- deletion is only determined by retention duration. + +We're interested in adding targeted deletion in future Loki releases (think tenant or stream level granularity) and may include other strategies as well. + +For more information, see the [retention configuration](https://grafana.com/docs/loki//operations/storage/retention/) documentation. + + +## Examples + +### Single machine/local development (boltdb+filesystem) + +[The repo contains a working example](https://github.com/grafana/loki/blob/main/cmd/loki/loki-local-config.yaml), you may want to checkout a tag of the repo to make sure you get a compatible example. + +### GCP deployment (GCS Single Store) + +```yaml +storage_config: + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/index_cache + cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space + gcs: + bucket_name: + +schema_config: + configs: + - from: 2020-07-01 + store: tsdb + object_store: gcs + schema: v13 + index: + prefix: index_ + period: 24h +``` + +### AWS deployment (S3 Single Store) + +```yaml +storage_config: + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/index_cache + cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space + aws: + s3: s3://:@ + bucketnames: + +schema_config: + configs: + - from: 2020-07-01 + store: tsdb + object_store: aws + schema: v13 + index: + prefix: index_ + period: 24h +``` + +If you don't wish to hard-code S3 credentials, you can also configure an EC2 +instance role by changing the `storage_config` section: + +```yaml +storage_config: + aws: + s3: s3://region + bucketnames: +``` + +The role should have a policy with the following permissions attached. + +```json +{ + "Version": "2012-10-17", + "Statement": [ + { + "Sid": "LokiStorage", + "Effect": "Allow", + "Principal": { + "AWS": [ + "arn:aws:iam::" + ] + }, + "Action": [ + "s3:ListBucket", + "s3:PutObject", + "s3:GetObject", + "s3:DeleteObject" + ], + "Resource": [ + "arn:aws:s3:::", + "arn:aws:s3:::/*" + ] + } + ] +} +``` + +**To setup an S3 bucket and an IAM role and policy:** + +This guide assumes a provisioned EKS cluster. + +1. Checkout the Loki repository and navigate to [production/terraform/modules/s3](https://github.com/grafana/loki/tree/main/production/terraform/modules/s3). + +2. Initialize Terraform `terraform init`. + +3. Export the AWS profile and region if not done so: + + ``` + export AWS_PROFILE= + export AWS_REGION= + ``` + +4. Save the OIDC provider in an environment variable: + + ``` + oidc_provider=$(aws eks describe-cluster --name --query "cluster.identity.oidc.issuer" --output text | sed -e "s/^https:\/\///") + ``` + + See the [IAM OIDC provider guide](https://docs.aws.amazon.com/eks/latest/userguide/enable-iam-roles-for-service-accounts.html) for a guide for creating a provider. + +5. Apply the Terraform module `terraform -var region="$AWS_REGION" -var cluster_name= -var oidc_id="$oidc_provider"` + + Note, the bucket name defaults to `loki-data` but can be changed via the + `bucket_name` variable. + + +### Azure deployment (Azure Blob Storage Single Store) + +#### Using account name and key + +```yaml +schema_config: + configs: + - from: "2020-12-11" + index: + period: 24h + prefix: index_ + object_store: azure + schema: v13 + store: tsdb +storage_config: + azure: + # Your Azure storage account name + account_name: + # For the account-key, see docs: https://docs.microsoft.com/en-us/azure/storage/common/storage-account-keys-manage?tabs=azure-portal + account_key: + # See https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction#containers + container_name: + use_managed_identity: + # Providing a user assigned ID will override use_managed_identity + user_assigned_id: + request_timeout: 0 + # Configure this if you are using private azure cloud like azure stack hub and will use this endpoint suffix to compose container and blob storage URL. Ex: https://account_name.endpoint_suffix/container_name/blob_name + endpoint_suffix: + # If `connection_string` is set, the values of `account_name` and `endpoint_suffix` values will not be used. Use this method over `account_key` if you need to authenticate via a SAS token. Or if you use the Azurite emulator. + connection_string: + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/index_cache + cache_ttl: 24h + filesystem: + directory: /loki/chunks +``` + +#### Using a service principal + +```yaml +schema_config: + configs: + - from: "2020-12-11" + index: + period: 24h + prefix: index_ + object_store: azure + schema: v13 + store: tsdb +storage_config: + azure: + use_service_principal: true + # Azure tenant ID used to authenticate through Azure OAuth + tenant_id : + # Azure Service Principal ID + client_id: + # Azure Service Principal secret key + client_secret: + # See https://docs.microsoft.com/en-us/azure/storage/blobs/storage-blobs-introduction#containers + container_name: + request_timeout: 0 + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/index_cache + cache_ttl: 24h + filesystem: + directory: /loki/chunks +``` + +### IBM Deployment (COS Single Store) + +```yaml +schema_config: + configs: + - from: 2020-10-01 + index: + period: 24h + prefix: loki_index_ + object_store: cos + schema: v13 + store: tsdb + +storage_config: + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/index_cache + cos: + bucketnames: + endpoint: + api_key: + region: + service_instance_id: + auth_endpoint: +``` + +### On premise deployment (Cassandra+Cassandra) + +{{< collapse title="Title of hidden content" >}} +Cassandra as storage backend for chunks and indexes is deprecated. +{{< /collapse >}} + +**Keeping this for posterity, but this is likely not a common config. Cassandra should work and could be faster in some situations but is likely much more expensive.** + +```yaml +storage_config: + cassandra: + addresses: + keyspace: + auth: + username: # only applicable when auth=true + password: # only applicable when auth=true + +schema_config: + configs: + - from: 2020-07-01 + store: cassandra + object_store: cassandra + schema: v11 + index: + prefix: index_ + period: 168h + chunks: + prefix: chunk_ + period: 168h + +``` + +### On premise deployment (MinIO Single Store) + +We configure MinIO by using the AWS config because MinIO implements the S3 API: + +```yaml +storage_config: + aws: + # Note: use a fully qualified domain name (fqdn), like localhost. + # full example: http://loki:supersecret@localhost.:9000 + s3: http://:@: + s3forcepathstyle: true + tsdb_shipper: + active_index_directory: /loki/index + cache_location: /loki/index_cache + cache_ttl: 24h # Can be increased for faster performance over longer query periods, uses more disk space + +schema_config: + configs: + - from: 2020-07-01 + store: tsdb + object_store: s3 + schema: v13 + index: + prefix: index_ + period: 24h +``` diff --git a/docs/sources/get-started/_index.md b/docs/sources/get-started/_index.md index 36daa54cff0bc..f82d5f9f089c2 100644 --- a/docs/sources/get-started/_index.md +++ b/docs/sources/get-started/_index.md @@ -17,30 +17,28 @@ To collect logs and view your log data generally involves the following steps: ![Loki implementation steps](loki-install.png) -1. Install Loki on Kubernetes in simple scalable mode, using the recommended [Helm chart](https://grafana.com/docs/loki/latest/setup/install/helm/install-scalable/). Supply the Helm chart with your object storage authentication details. - - [Storage options](https://grafana.com/docs/loki/latest/operations/storage/) - - [Configuration reference](https://grafana.com/docs/loki/latest/configure/) - - There are [examples](https://grafana.com/docs/loki/latest/configure/examples/) for specific Object Storage providers that you can modify. +1. Install Loki on Kubernetes in simple scalable mode, using the recommended [Helm chart](https://grafana.com/docs/loki//setup/install/helm/install-scalable/). Supply the Helm chart with your object storage authentication details. + - [Storage options](https://grafana.com/docs/loki//operations/storage/) + - [Configuration reference](https://grafana.com/docs/loki//configure/) + - There are [examples](https://grafana.com/docs/loki//configure/examples/) for specific Object Storage providers that you can modify. 1. Deploy the [Grafana Agent](https://grafana.com/docs/agent/latest/flow/) to collect logs from your applications. 1. On Kubernetes, deploy the Grafana Agent using the Helm chart. Configure Grafana Agent to scrape logs from your Kubernetes cluster, and add your Loki endpoint details. See the following section for an example Grafana Agent Flow configuration file. - 1. Add [labels](https://grafana.com/docs/loki/latest/get-started/labels/) to your logs following our [best practices](https://grafana.com/docs/loki/latest/get-started/labels/bp-labels/). Most Loki users start by adding labels which describe where the logs are coming from (region, cluster, environment, etc.). + 1. Add [labels](https://grafana.com/docs/loki//get-started/labels/) to your logs following our [best practices](https://grafana.com/docs/loki//get-started/labels/bp-labels/). Most Loki users start by adding labels which describe where the logs are coming from (region, cluster, environment, etc.). 1. Deploy [Grafana](https://grafana.com/docs/grafana/latest/setup-grafana/) or [Grafana Cloud](https://grafana.com/docs/grafana-cloud/quickstart/) and configure a [Loki datasource](https://grafana.com/docs/grafana/latest/datasources/loki/configure-loki-data-source/). 1. Select the [Explore feature](https://grafana.com/docs/grafana/latest/explore/) in the Grafana main menu. To [view logs in Explore](https://grafana.com/docs/grafana/latest/explore/logs-integration/): 1. Pick a time range. 1. Choose the Loki datasource. - 1. Use [LogQL](https://grafana.com/docs/loki/latest/query/) in the [query editor](https://grafana.com/docs/grafana/latest/datasources/loki/query-editor/), use the Builder view to explore your labels, or select from sample pre-configured queries using the **Kick start your query** button. - -**Next steps:** Learn more about Loki’s query language, [LogQL](https://grafana.com/docs/loki/latest/query/). + 1. Use [LogQL](https://grafana.com/docs/loki//query/) in the [query editor](https://grafana.com/docs/grafana/latest/datasources/loki/query-editor/), use the Builder view to explore your labels, or select from sample pre-configured queries using the **Kick start your query** button. +**Next steps:** Learn more about Loki’s query language, [LogQL](https://grafana.com/docs/loki//query/). ## Example Grafana Agent configuration file to ship Kubernetes Pod logs to Loki To deploy Grafana Agent to collect Pod logs from your Kubernetes cluster and ship them to Loki, you an use the Grafana Agent Helm chart, and a `values.yaml` file. -1. Install Loki with the [Helm chart](https://grafana.com/docs/loki/latest/setup/install/helm/install-scalable/). +1. Install Loki with the [Helm chart](https://grafana.com/docs/loki//setup/install/helm/install-scalable/). 1. Deploy the Grafana Agent, using the [Grafana Agent Helm chart](https://grafana.com/docs/agent/latest/flow/setup/install/kubernetes/) and this example `values.yaml` file updating the value for `forward_to = [loki.write.endpoint.receiver]`: - ```yaml agent: mounts: @@ -101,14 +99,15 @@ agent: } ``` - 1. Then install Grafana Agent in your Kubernetes cluster using: ```bash helm upgrade -f values.yaml agent grafana/grafana-agent ``` + This sample file is configured to: + - Install Grafana Agent to discover Pod logs. - Add `container` and `pod` labels to the logs. - Push the logs to your Loki cluster using the tenant ID `cloud`. diff --git a/docs/sources/get-started/architecture.md b/docs/sources/get-started/architecture.md index 93ef93b3a52f4..9caeb717144bd 100644 --- a/docs/sources/get-started/architecture.md +++ b/docs/sources/get-started/architecture.md @@ -2,7 +2,7 @@ title: Loki architecture menutitle: Architecture description: Describes Grafana Loki's architecture. -weight: 300 +weight: 400 aliases: - ../architecture/ - ../fundamentals/architecture/ diff --git a/docs/sources/get-started/components.md b/docs/sources/get-started/components.md index 12c2ea2810653..2ea5349d75105 100644 --- a/docs/sources/get-started/components.md +++ b/docs/sources/get-started/components.md @@ -12,16 +12,18 @@ Loki is a modular system that contains many components that can either be run to in logical groups (in "simple scalable deployment" mode with targets `read`, `write`, `backend`), or individually (in "microservice" mode). For more information see [Deployment modes]({{< relref "./deployment-modes" >}}). -| Component | _individual_ | `all` | `read` | `write` | `backend` | -| ----------------------------------- | - | - | - | - | - | -| [Distributor](#distributor) | x | x | | x | | -| [Ingester](#ingester) | x | x | | x | | -| [Query Frontend](#query-frontend) | x | x | x | | | -| [Query Scheduler](#query-scheduler) | x | x | | | x | -| [Querier](#querier) | x | x | x | | | -| [Index Gateway](#index-gateway) | x | | | | x | -| [Compactor](#compactor) | x | x | | | x | -| [Ruler](#ruler) | x | x | | | x | +| Component | _individual_ | `all` | `read` | `write` | `backend` | +|----------------------------------------------------|--------------| - | - | - | - | +| [Distributor](#distributor) | x | x | | x | | +| [Ingester](#ingester) | x | x | | x | | +| [Query Frontend](#query-frontend) | x | x | x | | | +| [Query Scheduler](#query-scheduler) | x | x | | | x | +| [Querier](#querier) | x | x | x | | | +| [Index Gateway](#index-gateway) | x | | | | x | +| [Compactor](#compactor) | x | x | | | x | +| [Ruler](#ruler) | x | x | | | x | +| [Bloom Compactor (Experimental)](#bloom-compactor) | x | | | | x | +| [Bloom Gateway (Experimental)](#bloom-gateway) | x | | | | x | This page describes the responsibilities of each of these components. @@ -55,7 +57,7 @@ Currently the only way the distributor mutates incoming data is by normalizing l The distributor can also rate limit incoming logs based on the maximum data ingest rate per tenant. It does this by checking a per-tenant limit and dividing it by the current number of distributors. This allows the rate limit to be specified per tenant at the cluster level and enables us to scale the distributors up or down and have the per-distributor limit adjust accordingly. For instance, say we have 10 distributors and tenant A has a 10MB rate limit. Each distributor will allow up to 1MB/s before limiting. Now, say another large tenant joins the cluster and we need to spin up 10 more distributors. The now 20 distributors will adjust their rate limits for tenant A to `(10MB / 20 distributors) = 500KB/s`. This is how global limits allow much simpler and safer operation of the Loki cluster. {{% admonition type="note" %}} -The distributor uses the `ring` component under the hood to register itself amongst its peers and get the total number of active distributors. This is a different "key" than the ingesters use in the ring and comes from the distributor's own [ring configuration]({{< relref "../configure#distributor" >}}). +The distributor uses the `ring` component under the hood to register itself amongst its peers and get the total number of active distributors. This is a different "key" than the ingesters use in the ring and comes from the distributor's own [ring configuration](https://grafana.com/docs/loki//configure/#distributor). {{% /admonition %}} ### Forwarding @@ -170,7 +172,7 @@ deduplicated. ### Timestamp Ordering -Loki is configured to [accept out-of-order writes]({{< relref "../configure#accept-out-of-order-writes" >}}) by default. +Loki is configured to [accept out-of-order writes](https://grafana.com/docs/loki//configure/#accept-out-of-order-writes) by default. When not configured to accept out-of-order writes, the ingester validates that ingested log lines are in order. When an ingester receives a log line that doesn't follow the expected order, the line @@ -334,3 +336,29 @@ This mode is called remote rule evaluation and is used to gain the advantages of from the query frontend. When running multiple rulers, they use a consistent hash ring to distribute rule groups amongst available ruler instances. + +## Bloom Compactor +{{% admonition type="warning" %}} +This feature is an [experimental feature](/docs/release-life-cycle/). Engineering and on-call support is not available. No SLA is provided. +{{% /admonition %}} + +The Bloom Compactor service is responsible for building blooms for chunks in the object store. +The resulting blooms are grouped in bloom blocks spanning multiple series and chunks from a given day. +This component also builds metadata files to track which blocks are available for each series and TSDB index file. + +The service is horizontally scalable. When running multiple Bloom Compactors, they use a ring to shard tenants and +distribute series fingerprints among the available Bloom Compactor instances. +The ring is also used to decide which compactor should apply blooms retention. + +## Bloom Gateway +{{% admonition type="warning" %}} +This feature is an [experimental feature](/docs/release-life-cycle/). Engineering and on-call support is not available. No SLA is provided. +{{% /admonition %}} + +The Bloom Gateway service is responsible for handling and serving chunks filtering requests. +The index gateway queries the Bloom Gateway when computing chunk references, or when computing shards for a given query. +The gateway service takes a list of chunks and a filtering expression and matches them against the blooms, +filtering out any chunks that do not match the given filter expression. + +The service is horizontally scalable. When running multiple instances, they use a ring to shard tenants and +distribute series fingerprints across instances. diff --git a/docs/sources/get-started/deployment-modes.md b/docs/sources/get-started/deployment-modes.md index df4008ff332db..80d48231a9c11 100644 --- a/docs/sources/get-started/deployment-modes.md +++ b/docs/sources/get-started/deployment-modes.md @@ -2,7 +2,7 @@ menuTitle: Deployment modes title: Loki deployment modes description: Describes the three different Loki deployment models. -weight: 400 +weight: 600 aliases: - ../fundamentals/architecture/deployment-modes --- @@ -52,7 +52,7 @@ The simplest mode of operation is the monolithic deployment mode. You enable mon Monolithic mode is useful for getting started quickly to experiment with Loki, as well as for small read/write volumes of up to approximately 20GB per day. -You can horizontally scale a monolithic mode deployment to more instances by using a shared object store, and by configuring the [`ring` section]({{< relref "../configure#common" >}}) of the `loki.yaml` file to share state between all instances, but the recommendation is to use simple scalable mode if you need to scale your deployment. +You can horizontally scale a monolithic mode deployment to more instances by using a shared object store, and by configuring the [`ring` section](https://grafana.com/docs/loki//configure/#common) of the `loki.yaml` file to share state between all instances, but the recommendation is to use simple scalable mode if you need to scale your deployment. You can configure high availability by running two Loki instances using `memberlist_config` configuration and a shared object store and setting the `replication_factor` to `3`. You route traffic to all the Loki instances in a round robin fashion. diff --git a/docs/sources/get-started/hash-rings.md b/docs/sources/get-started/hash-rings.md index e136b0b9136b1..8bb024f4085fb 100644 --- a/docs/sources/get-started/hash-rings.md +++ b/docs/sources/get-started/hash-rings.md @@ -31,10 +31,12 @@ These components need to be connected into a hash ring: - query schedulers - compactors - rulers +- bloom compactors (Experimental) These components can optionally be connected into a hash ring: - index gateway + In an architecture that has three distributors and three ingesters defined, the hash rings for these components connect the instances of same-type components. @@ -53,7 +55,7 @@ For each node, the key-value store holds: ## Configuring rings -Define [ring configuration]({{< relref "../configure#common" >}}) within the `common.ring_config` block. +Define [ring configuration](https://grafana.com/docs/loki//configure/#common) within the `common.ring_config` block. Use the default `memberlist` key-value store type unless there is a compelling reason to use a different key-value store type. @@ -102,3 +104,13 @@ The ruler ring is used to determine which rulers evaluate which rule groups. ## About the index gateway ring The index gateway ring is used to determine which gateway is responsible for which tenant's indexes when queried by rulers or queriers. + +## About the Bloom Compactor ring +{{% admonition type="warning" %}} +This feature is an [experimental feature](/docs/release-life-cycle/). Engineering and on-call support is not available. No SLA is provided. +{{% /admonition %}} + +The Bloom Compactor ring is used to determine which subset of compactors own a given tenant, +and which series fingerprint ranges each compactor owns. +The ring is also used to determine which compactor owns retention. +Retention will be applied by the compactor owning the smallest token in the ring. diff --git a/docs/sources/get-started/labels/bp-labels.md b/docs/sources/get-started/labels/bp-labels.md index c2bc2e925c23b..7800345684cee 100644 --- a/docs/sources/get-started/labels/bp-labels.md +++ b/docs/sources/get-started/labels/bp-labels.md @@ -45,7 +45,7 @@ Loki has several client options: [Promtail]({{< relref "../../send-data/promtail Each of these come with ways to configure what labels are applied to create log streams. But be aware of what dynamic labels might be applied. Use the Loki series API to get an idea of what your log streams look like and see if there might be ways to reduce streams and cardinality. -Series information can be queried through the [Series API]({{< relref "../../reference/api" >}}), or you can use [logcli]({{< relref "../../query" >}}). +Series information can be queried through the [Series API](https://grafana.com/docs/loki//reference/loki-http-api/), or you can use [logcli]({{< relref "../../query" >}}). In Loki 1.6.0 and newer the logcli series command added the `--analyze-labels` flag specifically for debugging high cardinality labels: diff --git a/docs/sources/get-started/labels/structured-metadata.md b/docs/sources/get-started/labels/structured-metadata.md index e199402e0b000..319af3886d97c 100644 --- a/docs/sources/get-started/labels/structured-metadata.md +++ b/docs/sources/get-started/labels/structured-metadata.md @@ -6,7 +6,7 @@ description: Describes how to enable structure metadata for logs and how to quer # What is structured metadata {{% admonition type="warning" %}} -Structured metadata was added to chunk format V4 which is used if the schema version is greater or equal to `13`. (See [Schema Config]({{< relref "../../storage#schema-config" >}}) for more details about schema versions. ) +Structured metadata was added to chunk format V4 which is used if the schema version is greater or equal to `13`. See [Schema Config](https://grafana.com/docs/loki//configure/storage/#schema-config) for more details about schema versions. {{% /admonition %}} Selecting proper, low cardinality labels is critical to operating and querying Loki effectively. Some metadata, especially infrastructure related metadata, can be difficult to embed in log lines, and is too high cardinality to effectively store as indexed labels (and therefore reducing performance of the index). @@ -15,18 +15,26 @@ Structured metadata is a way to attach metadata to logs without indexing them or kubernetes pod names, process ID's, or any other label that is often used in queries but has high cardinality and is expensive to extract at query time. -Structured metadata can also be used to query commonly needed metadata from log lines without needing to apply a parser at query time. Large json blobs or a poorly written query using complex regex patterns, for example, come with a high performance cost. Examples of useful metadata include trace IDs or user IDs. +Structured metadata can also be used to query commonly needed metadata from log lines without needing to apply a parser at query time. Large json blobs or a poorly written query using complex regex patterns, for example, come with a high performance cost. Examples of useful metadata include container_IDs or user IDs. +## When to use structured metadata + +You should only use structured metadata in the following situations: + +- If you are ingesting data in OpenTelemetry format, using the Grafana Agent or an OpenTelemetry Collector. Structured metadata was designed to support native ingestion of OpenTelemetry data. +- If you have high cardinality metadata that should not be used as a label and does not exist in the log line. Some examples might include `process_id` or `thread_id` or Kubernetes pod names. + +It is an antipattern to extract information that already exists in your log lines and put it into structured metadata. ## Attaching structured metadata to log lines You have the option to attach structured metadata to log lines in the push payload along with each log line and the timestamp. -For more information on how to push logs to Loki via the HTTP endpoint, refer to the [HTTP API documentation]({{< relref "../../reference/api#ingest-logs" >}}). +For more information on how to push logs to Loki via the HTTP endpoint, refer to the [HTTP API documentation](https://grafana.com/docs/loki//reference/api/#ingest-logs). Alternatively, you can use the Grafana Agent or Promtail to extract and attach structured metadata to your log lines. -See the [Promtail: Structured metadata stage]({{< relref "../../send-data/promtail/stages/structured_metadata" >}}) for more information. +See the [Promtail: Structured metadata stage](https://grafana.com/docs/loki//send-data/promtail/stages/structured_metadata/) for more information. -With Loki version 1.2.0, support for structured metadata has been added to the Logstash output plugin. For more information, see [logstash]({{< relref "../../send-data/logstash/_index.md" >}}). +With Loki version 1.2.0, support for structured metadata has been added to the Logstash output plugin. For more information, see [logstash](https://grafana.com/docs/loki//send-data/logstash/). {{% admonition type="warning" %}} There are defaults for how much structured metadata can be attached per log line. @@ -44,7 +52,7 @@ There are defaults for how much structured metadata can be attached per log line ## Querying structured metadata Structured metadata is extracted automatically for each returned log line and added to the labels returned for the query. -You can use labels of structured metadata to filter log line using a [label filter expression]({{< relref "../../query/log_queries#label-filter-expression" >}}). +You can use labels of structured metadata to filter log line using a [label filter expression](https://grafana.com/docs/loki//query/log_queries/#label-filter-expression). For example, if you have a label `pod` attached to some of your log lines as structured metadata, you can filter log lines using: @@ -58,7 +66,7 @@ Of course, you can filter by multiple labels of structured metadata at the same {job="example"} | pod="myservice-abc1234-56789" | trace_id="0242ac120002" ``` -Note that since structured metadata is extracted automatically to the results labels, some metric queries might return an error like `maximum of series (50000) reached for a single query`. You can use the [Keep]({{< relref "../../query/log_queries#keep-labels-expression" >}}) and [Drop]({{< relref "../../query/log_queries#drop-labels-expression" >}}) stages to filter out labels that you don't need. +Note that since structured metadata is extracted automatically to the results labels, some metric queries might return an error like `maximum of series (50000) reached for a single query`. You can use the [Keep](https://grafana.com/docs/loki//query/log_queries/#keep-labels-expression) and [Drop](https://grafana.com/docs/loki//query/log_queries/#drop-labels-expression) stages to filter out labels that you don't need. For example: ```logql diff --git a/docs/sources/get-started/quick-start.md b/docs/sources/get-started/quick-start.md index 70cbfc2c57d21..16e14be923acc 100644 --- a/docs/sources/get-started/quick-start.md +++ b/docs/sources/get-started/quick-start.md @@ -1,13 +1,13 @@ --- -title: Quick start to run Loki locally -menuTitle: Loki quick start -weight: 550 +title: Quickstart to run Loki locally +menuTitle: Loki quickstart +weight: 200 description: How to create and use a simple local Loki cluster for testing and evaluation purposes. --- -# Quick start to run Loki locally +# Quickstart to run Loki locally -If you want to experiment with Loki, you can run Loki locally using the Docker Compose file that ships with Loki. It runs Loki in a [monolithic deployment](https://grafana.com/docs/loki/latest/get-started/deployment-modes/#monolithic-mode) mode and includes a sample application to generate logs. +If you want to experiment with Loki, you can run Loki locally using the Docker Compose file that ships with Loki. It runs Loki in a [monolithic deployment](https://grafana.com/docs/loki//get-started/deployment-modes/#monolithic-mode) mode and includes a sample application to generate logs. The Docker Compose configuration instantiates the following components, each in its own container: @@ -24,11 +24,12 @@ The Docker Compose configuration instantiates the following components, each in ## Installing Loki and collecting sample logs Prerequisites + - [Docker](https://docs.docker.com/install) - [Docker Compose](https://docs.docker.com/compose/install) {{% admonition type="note" %}} -This quick start assumes you are running Linux. +This quickstart assumes you are running Linux. {{% /admonition %}} **To install Loki locally, follow these steps:** @@ -57,6 +58,7 @@ This quick start assumes you are running Linux. ``` You should see something similar to the following: + ```bash ✔ Network evaluate-loki_loki Created 0.1s ✔ Container evaluate-loki-minio-1 Started 0.6s @@ -74,7 +76,7 @@ This quick start assumes you are running Linux. ## Viewing your logs in Grafana -Once you have collected logs, you will want to view them. You can view your logs using the command line interface, [LogCLI](/docs/loki/latest/query/logcli/), but the easiest way to view your logs is with Grafana. +Once you have collected logs, you will want to view them. You can view your logs using the command line interface, [LogCLI](/docs/loki//query/logcli/), but the easiest way to view your logs is with Grafana. 1. Use Grafana to query the Loki data source. @@ -84,7 +86,7 @@ Once you have collected logs, you will want to view them. You can view your log 1. From the Grafana main menu, click the **Explore** icon (1) to launch the Explore tab. To learn more about Explore, refer the [Explore](https://grafana.com/docs/grafana/latest/explore/) documentation. -1. From the menu in the dashboard header, select the Loki data source (2). This displays the Loki query editor. In the query editor you use the Loki query language, [LogQL](https://grafana.com/docs/loki/latest/query/), to query your logs. +1. From the menu in the dashboard header, select the Loki data source (2). This displays the Loki query editor. In the query editor you use the Loki query language, [LogQL](https://grafana.com/docs/loki//query/), to query your logs. To learn more about the query editor, refer to the [query editor documentation](https://grafana.com/docs/grafana/latest/datasources/loki/query-editor/). 1. The Loki query editor has two modes (3): @@ -99,30 +101,37 @@ Once you have collected logs, you will want to view them. You can view your log Here are some basic sample queries to get you started using LogQL. Note that these queries assume that you followed the instructions to create a directory called `evaluate-loki`. If you installed in a different directory, you’ll need to modify these queries to match your installation directory. After copying any of these queries into the query editor, click **Run Query** (4) to execute the query. 1. View all the log lines which have the container label "flog": + ```bash {container="evaluate-loki-flog-1"} ``` - In Loki, this is called a log stream. Loki uses [labels](https://grafana.com/docs/loki/latest/get-started/labels/) as metadata to describe log streams. Loki queries always start with a label selector. In the query above, the label selector is `container`. + + In Loki, this is called a log stream. Loki uses [labels](https://grafana.com/docs/loki//get-started/labels/) as metadata to describe log streams. Loki queries always start with a label selector. In the query above, the label selector is `container`. 1. To view all the log lines which have the container label "grafana": + ```bash {container="evaluate-loki-grafana-1"} ``` 1. Find all the log lines in the container=flog stream that contain the string "status": + ```bash {container="evaluate-loki-flog-1"} |= `status` ``` 1. Find all the log lines in the container=flog stream where the JSON field "status" is "404": + ```bash {container="evaluate-loki-flog-1"} | json | status=`404` ``` 1. Calculate the number of logs per second where the JSON field "status" is "404": + ```bash sum by(container) (rate({container="evaluate-loki-flog-1"} | json | status=`404` [$__auto])) ``` + The final query above is a metric query which returns a time series. This will trigger Grafana to draw a graph of the results. You can change the type of graph for a different view of the data. Click **Bars** to view a bar graph of the data. 1. Click the **Builder** tab (3) to return to Builder mode in the query editor. @@ -131,38 +140,45 @@ Once you have collected logs, you will want to view them. You can view your log 1. Select the first choice, **Parse log lines with logfmt parser**, by clicking **Use this query**. 1. On the Explore tab, click **Label browser**, in the dialog select a container and click **Show logs**. -For a thorough introduction to LogQL, refer to the [LogQL reference](https://grafana.com/docs/loki/latest/query/). +For a thorough introduction to LogQL, refer to the [LogQL reference](https://grafana.com/docs/loki//query/). ## Sample queries (code view) + Here are some more sample queries that you can run using the Flog sample data. To see all the log lines that flog has generated, enter the LogQL query: + ```bash {container="evaluate-loki-flog-1"}|= `` ``` -The flog app generates log lines for simulated HTTP requests. + +The flog app generates log lines for simulated HTTP requests. To see all `GET` log lines, enter the LogQL query: + ```bash {container="evaluate-loki-flog-1"} |= "GET" ``` To see all `POST` methods, enter the LogQL query: + ```bash {container="evaluate-loki-flog-1"} |= "POST" ``` To see every log line with a 401 status (unauthorized error), enter the LogQL query: + ```bash {container="evaluate-loki-flog-1"} | json | status="401" ``` To see every log line that does not contain the value 401: + ```bash {container="evaluate-loki-flog-1"} != "401" ``` -For more examples, refer to the [query documentation](https://grafana.com/docs/loki/latest/query/query_examples/). +For more examples, refer to the [query documentation](https://grafana.com/docs/loki//query/query_examples/). ## Complete metrics, logs, traces, and profiling example diff --git a/docs/sources/operations/automatic-stream-sharding.md b/docs/sources/operations/automatic-stream-sharding.md index 1d2d36393c369..04a46fbfe0826 100644 --- a/docs/sources/operations/automatic-stream-sharding.md +++ b/docs/sources/operations/automatic-stream-sharding.md @@ -12,23 +12,28 @@ existing streams. When properly tuned, this should eliminate issues where log pr per-stream rate limit. **To enable automatic stream sharding:** -1. Edit the global [limits_config]({{< relref "../configure#limits_config" >}}) of the Loki configuration file: +1. Edit the global [`limits_config`](https://grafana.com/docs/loki//configure/#limits_config) of the Loki configuration file: + ```yaml limits_config: shard_streams: enabled: true ``` + 1. Optionally lower the `desired_rate` in bytes if you find that the system is still hitting the `per_stream_rate_limit`: + ```yaml limits_config: shard_streams: enabled: true desired_rate: 2097152 #2MiB ``` -1. Optionally enable `logging_enabled` for debugging stream sharding. + +1. Optionally enable `logging_enabled` for debugging stream sharding. {{% admonition type="note" %}} This may affect the ingestion performance of Loki. {{% /admonition %}} + ```yaml limits_config: shard_streams: diff --git a/docs/sources/operations/blocking-queries.md b/docs/sources/operations/blocking-queries.md index 3c3468215d96e..1a88342bd3f16 100644 --- a/docs/sources/operations/blocking-queries.md +++ b/docs/sources/operations/blocking-queries.md @@ -10,7 +10,7 @@ In certain situations, you may not be able to control the queries being sent to may be intentionally or unintentionally expensive to run, and they may affect the overall stability or cost of running your service. -You can block queries using [per-tenant overrides]({{< relref "../configure#runtime-configuration-file" >}}), like so: +You can block queries using [per-tenant overrides](https://grafana.com/docs/loki//configure/#runtime-configuration-file), like so: ```yaml overrides: @@ -36,7 +36,7 @@ overrides: types: filter,limited ``` {{% admonition type="note" %}} -Changes to these configurations **do not require a restart**; they are defined in the [runtime configuration file]({{< relref "../configure#runtime-configuration-file" >}}). +Changes to these configurations **do not require a restart**; they are defined in the [runtime configuration file](https://grafana.com/docs/loki//configure/#runtime-configuration-file). {{% /admonition %}} The available query types are: diff --git a/docs/sources/operations/loki-canary/_index.md b/docs/sources/operations/loki-canary/_index.md index d1b68726ee336..cf2a1075d3c06 100644 --- a/docs/sources/operations/loki-canary/_index.md +++ b/docs/sources/operations/loki-canary/_index.md @@ -289,7 +289,7 @@ The `-labelname` and `-labelvalue` flags should also be provided, as these are used by Loki Canary to filter the log stream to only process logs for the current instance of the canary. Ensure that the values provided to the flags are unique to each instance of Loki Canary. Grafana Labs' Tanka config -accomplishes this by passing in the pod name as the label value. +accomplishes this by passing in the Pod name as the label value. If Loki Canary reports a high number of `unexpected_entries`, Loki Canary may not be waiting long enough and the value for the `-wait` flag should be diff --git a/docs/sources/operations/overrides-exporter.md b/docs/sources/operations/overrides-exporter.md index 6a16d9dbbb040..ef645ca28efde 100644 --- a/docs/sources/operations/overrides-exporter.md +++ b/docs/sources/operations/overrides-exporter.md @@ -11,7 +11,7 @@ Loki is a multi-tenant system that supports applying limits to each tenant as a ## Context -Configuration updates to tenant limits can be applied to Loki without restart via the [`runtime_config`]({{< relref "../configure#runtime_config" >}}) feature. +Configuration updates to tenant limits can be applied to Loki without restart via the [`runtime_config`](https://grafana.com/docs/loki//configure/#runtime_config) feature. ## Example diff --git a/docs/sources/operations/query-acceleration-blooms.md b/docs/sources/operations/query-acceleration-blooms.md new file mode 100644 index 0000000000000..c7449074c0289 --- /dev/null +++ b/docs/sources/operations/query-acceleration-blooms.md @@ -0,0 +1,221 @@ +--- +title: Query Acceleration with Blooms (Experimental) +menuTitle: Query Acceleration with Blooms +description: Describes how to enable and configure query acceleration with blooms. +weight: +keywords: + - blooms + - query acceleration +--- + +# Query Acceleration with Blooms (Experimental) +{{% admonition type="warning" %}} +This feature is an [experimental feature](/docs/release-life-cycle/). Engineering and on-call support is not available. No SLA is provided. +{{% /admonition %}} + +Loki 3.0 leverages [bloom filters](https://en.wikipedia.org/wiki/Bloom_filter) to speed up queries by reducing the +amount of data Loki needs to load from the store and iterate through. Loki is often used to run “needle in a haystack†+queries; these are queries where a large number of log lines are searched, but only a few log lines match the [filtering +expressions]({{< relref "../query/log_queries#line-filter-expression" >}}) of the query. +Some common use cases are needing to find a specific text pattern in a message, or all logs tied to a specific customer ID. + +An example of such queries would be looking for a trace ID on a whole cluster for the past 24 hours: + +```logql +{cluster="prod"} |= "traceID=3c0e3dcd33e7" +``` + +Loki would download all the chunks for all the streams matching `{cluster=â€prodâ€}` for the last 24 hours and iterate +through each log line in the chunks checking if the string `traceID=3c0e3dcd33e7` is present. + +With accelerated filtering, Loki is able to skip most of the chunks and only process the ones where we have a +statistical confidence that the string might be present. +The underlying blooms are built by the new [Bloom Compactor](#bloom-compactor) component +and served by the new [Bloom Gateway](#bloom-gateway) component. + +## Enable Query Acceleration with Blooms +To start building and using blooms you need to: +- Deploy the [Bloom Compactor](#bloom-compactor) component and enable the component in the [Bloom Compactor config][compactor-cfg]. +- Deploy the [Bloom Gateway](#bloom-gateway) component (as a [microservice][microservices] or via the [SSD][ssd] Backend target) and enable the component in the [Bloom Gateway config][gateway-cfg]. +- Enable blooms filtering and compaction for each tenant individually, or for all of them by default. + +```yaml +bloom_compactor: + enabled: true + +bloom_gateway: + enabled: true + client: + addresses: dnssrvnoa+_bloom-gateway-grpc._tcp.bloom-gateway-headless..svc.cluster.local + +# Enable blooms filtering and compaction for all tenants by default +limits_config: + bloom_gateway_enable_filtering: true + bloom_compactor_enable_compaction: true +``` + +For more configuration options refer to the [Bloom Gateways][gateway-cfg], [Bloom Compactor][compactor-cfg] and +[per tenant-limits][tenant-limits] configuration docs. +We strongly recommend reading the whole documentation for this experimental feature before using it. + +## Bloom Compactor +The Bloom Compactor component builds blooms from the chunks in the object store. +The resulting blooms are grouped in bloom blocks spanning multiple streams (also known as series) and chunks from a given day. +To learn more about how blocks and metadata files are organized, refer to the +[Building and querying blooms](#building-and-querying-blooms) section below. + +Bloom Compactors are horizontally scalable and use a [ring] for sharding tenants and stream fingerprints, +as well as determining which compactor should apply [blooms retention](#retention). +Each compactor owns a configurable number of contiguous streams fingerprint ranges for a tenant. +The compactor builds blooms for all the chunks from the tenant streams whose fingerprint +falls within its owned key-space ranges. + +You can find all the configuration options for this component in the [Configure section for the Bloom Compactor][compactor-cfg]. +Refer to the [Enable Query Acceleration with Blooms](#enable-query-acceleration-with-blooms) section below for +a configuration snippet enabling this feature. + +### Retention +One Bloom Compactor from all those running will apply retention. Retention is disabled by default. +The instance owning the smallest token in the ring owns retention. +Retention is applied to all tenants. The retention for each tenant is the longest of its [configured][tenant-limits] +general retention (`retention_period`) and the streams retention (`retention_stream`). + +For example, in the following example, tenant A has a bloom retention of 30 days, +and tenant B a bloom retention of 40 days. + +```yaml +overrides: + "A": + retention: 30d + "B": + retention: 30d + retention_stream: + - selector: '{namespace="prod"}' + priority: 1 + period: 40d +``` + +### Sizing +Compactors build blocks concurrently. Concurrency is [configured][compactor-cfg] via `-bloom-compactor.worker-parallelism`. +Each worker will build bloom blocks from streams and chunks. +The maximum block size is configured per tenant via `-bloom-compactor.max-block-size`. +Note that the actual block size might exceed this limit given that we append streams blooms to the block until the +block is larger than the configured maximum size. Blocks are created in memory and as soon as they are written to the +object store they are freed. Chunks and TSDB files are downloaded from the object store to the file system. +We estimate that compactors are able to process 4 MB worth of data per second per core. + +## Bloom Gateway +Bloom Gateways handle chunks filtering requests from the [index gateway](https://grafana.com/docs/loki//get-started/components/#index-gateway). +The service takes a list of chunks and a filtering expression and matches them against the blooms, +filtering out those chunks not matching the given filter expression. + +This component is horizontally scalable and every instance only owns a subset of the stream +fingerprint range for which it performs the filtering. +The sharding of the data is performed on the client side using DNS discovery of the server instances +and the [jumphash](https://arxiv.org/abs/1406.2294) algorithm for consistent hashing +and even distribution of the stream fingerprints across Bloom Gateway instances. + +You can find all the configuration options for this component in the Configure section for the [Bloom Gateways][gateway-cfg]. +Refer to the [Enable Query Acceleration with Blooms](#enable-query-acceleration-with-blooms) section below for a configuration snippet enabling this feature. + +### Sizing +Bloom Gateways use their local filesystem as a Least Recently Used (LRU) cache for blooms that are +downloaded from object storage. The size of the blooms depend on the ingest volume and the log content cardinality, +as well as on compaction settings of the blooms, namely n-gram length, skip-factor, and false-positive-rate. +With default settings, bloom filters make up roughly 3% of the chunk data. + +Example calculation for storage requirements of blooms for a single tenant. +``` +100 MB/s ingest rate ~> 8.6 TB/day chunks ~> 260 GB/day blooms +``` + +Since reading blooms depends heavily on disk IOPS, Bloom Gateways should make use of multiple, +locally attached SSD disks (NVMe) to increase i/o throughput. +Multiple directories on different disk mounts can be specified using the `-bloom.shipper.working-directory` [setting][gateway-cfg] +when using a comma separated list of mount points, for example: +``` +-bloom.shipper.working-directory="/mnt/data0,/mnt/data1,/mnt/data2,/mnt/data3" +``` + +Bloom Gateways need to deal with relatively large files: the bloom filter blocks. +Even though the binary format of the bloom blocks allows for reading them into memory in smaller pages, +the memory consumption depends on the amount of pages that are concurrently loaded into memory for processing. +The product of three settings control the maximum amount of bloom data in memory at any given +time: `-bloom-gateway.worker-concurrency`, `-bloom-gateway.block-query-concurrency`, and `-bloom.max-query-page-size`. + +Example, assuming 4 CPU cores: +``` +-bloom-gateway.worker-concurrency=4 // 1x NUM_CORES +-bloom-gateway.block-query-concurrency=8 // 2x NUM_CORES +-bloom.max-query-page-size=64MiB + +4 x 8 x 64MiB = 2048MiB +``` + +Here, the memory requirement for block processing is 2GiB. +To get the minimum requirements for the Bloom Gateways, you need to double the value. + +## Building and querying blooms +Bloom filters are built per stream and aggregated together into block files. +Streams are assigned to blocks by their fingerprint, following the same ordering scheme as Loki’s TSDB and sharding calculation. +This gives a data locality benefit when querying as streams in the same shard are likely to be in the same block. + +In addition to blocks, compactors maintain a list of metadata files containing references to bloom blocks and the +TSDB index files they were built from. They also contain tombstones for old blocks which are outdated and +can be deleted in future iterations. Gateways and compactors use these metadata files to discover existing blocks. + +Every `-bloom-compactor.compaction-interval`, compactors will load the latest TSDB files for all tenants for +which bloom compaction is enabled, and compare the TSDB files with the latest bloom metadata files. +If there are new TSDB files or any of them have changed, the compactor will process all the streams and chunks pointed +by the TSDB file. In case of changes for a previously processed TSDB file, +compactors will try to reuse blooms from existing blocks instead of building new ones from scratch. + +For a given stream, the compactor owning that stream will iterate through all the log lines inside its new +chunks and build a bloom for the stream. For each log line, we compute its [n-grams](https://en.wikipedia.org/wiki/N-gram#:~:text=An%20n%2Dgram%20is%20a,pairs%20extracted%20from%20a%20genome.) +and append to the bloom both the hash for each n-gram and the hash for each n-gram plus the chunk identifier. +The former allows gateways to skip whole streams while the latter is for skipping individual chunks. + +For example, given a log line `abcdef` in the chunk `c6dj8g`, we compute its n-grams: `abc`, `bcd`, `cde`, `def`. +And append to the stream bloom the following hashes: `hash("abc")`, `hash("abc" + "c6dj8g")` ... `hash("def")`, `hash("def" + "c6dj8g")`. + +By adding n-grams to blooms instead of whole log lines, we can perform partial matches. +For the example above, a filter expression `|= "bcd"` would match against the bloom. +The filter `|= "bcde` would also match the bloom since we decompose the filter into n-grams: +`bcd`, `cde` which both are present in the bloom. + +N-grams sizes are configurable. The longer the n-gram is, the fewer tokens we need to append to the blooms, +but the longer filtering expressions need to be able to check them against blooms. +For the example above, where the n-gram length is 3, we need filtering expressions that have at least 3 characters. + +### Queries for which blooms are used +Loki will check blooms for any log filtering expression within a query that satisfies the following criteria: +- The filtering expression contains at least as many characters as the n-gram length used to build the blooms. + - For example, if the n-grams length is 5, the filter `|= "foo"` will not take advantage of blooms but `|= "foobar"` would. +- If the filter is a regex, we use blooms only if we can simplify the regex to a set of simple matchers. + - For example, `|~ "(error|warn)"` would be simplified into `|= "error" or "warn"` thus would make use of blooms, + whereas `|~ "f.*oo"` would not be simplifiable. +- The filtering expression is a match (`|=`) or regex match (`|~`) filter. We don’t use blooms for not equal (`!=`) or not regex (`!~`) expressions. + - For example, `|= "level=error"` would use blooms but `!= "level=error"` would not. +- The filtering expression is placed before a [line format expression](https://grafana.com/docs/loki //query/log_queries/#line-format-expression). + - For example, with `|= "level=error" | logfmt | line_format "ERROR {{.err}}" |= "traceID=3ksn8d4jj3"`, + the first filter (`|= "level=error"`) will benefit from blooms but the second one (`|= "traceID=3ksn8d4jj3"`) will not. + +## Query sharding +Query acceleration does not just happen while processing chunks, +but also happens from the query planning phase where the query frontend applies [query sharding](https://lokidex.com/posts/tsdb/#sharding). +Loki 3.0 introduces a new {per-tenant configuration][tenant-limits] flag `tsdb_sharding_strategy` which defaults to computing +shards as in previous versions of Loki by using the index stats to come up with the closest power of two that would +optimistically divide the data to process in shards of roughly the same size. Unfortunately, +the amount of data each stream has is often unbalanced with the rest, +therefore, some shards end up processing more data than others. + +Query acceleration introduces a new sharding strategy: `bounded`, which uses blooms to reduce the chunks to be +processed right away during the planning phase in the query frontend, +as well as evenly distributes the amount of chunks each sharded query will need to process. + +[ring]: https://grafana.com/docs/loki//get-started/hash-rings/ +[tenant-limits]: https://grafana.com/docs/loki//configure/#limits_config +[gateway-cfg]: https://grafana.com/docs/loki//configure/#bloom_gateway +[compactor-cfg]: https://grafana.com/docs/loki//configure/#bloom_compactor +[microservices]: https://grafana.com/docs/loki//get-started/deployment-modes/#microservices-mode +[ssd]: https://grafana.com/docs/loki//get-started/deployment-modes/#simple-scalable \ No newline at end of file diff --git a/docs/sources/operations/recording-rules.md b/docs/sources/operations/recording-rules.md index afac69b75e271..2254510daf7ee 100644 --- a/docs/sources/operations/recording-rules.md +++ b/docs/sources/operations/recording-rules.md @@ -9,7 +9,7 @@ weight: Recording rules are evaluated by the `ruler` component. Each `ruler` acts as its own `querier`, in the sense that it executes queries against the store without using the `query-frontend` or `querier` components. It will respect all query -[limits]({{< relref "../configure#limits_config" >}}) put in place for the `querier`. +[limits](https://grafana.com/docs/loki//configure/#limits_config) put in place for the `querier`. Loki's implementation of recording rules largely reuses Prometheus' code. @@ -77,8 +77,8 @@ so a `Persistent Volume` should be utilised. ### Per-Tenant Limits Remote-write can be configured at a global level in the base configuration, and certain parameters tuned specifically on -a per-tenant basis. Most of the configuration options [defined here]({{< relref "../configure#ruler" >}}) -have [override options]({{< relref "../configure#limits_config" >}}) (which can be also applied at runtime!). +a per-tenant basis. Most of the configuration options [defined here](https://grafana.com/docs/loki//configure/#ruler) +have [override options](https://grafana.com/docs/loki//configure/#limits_config) (which can be also applied at runtime!). ### Tuning diff --git a/docs/sources/operations/request-validation-rate-limits.md b/docs/sources/operations/request-validation-rate-limits.md index 726d8af570f04..c5472beac3757 100644 --- a/docs/sources/operations/request-validation-rate-limits.md +++ b/docs/sources/operations/request-validation-rate-limits.md @@ -28,11 +28,11 @@ Rate-limits are enforced when Loki cannot handle more requests from a tenant. This rate-limit is enforced when a tenant has exceeded their configured log ingestion rate-limit. -One solution if you're seeing samples dropped due to `rate_limited` is simply to increase the rate limits on your Loki cluster. These limits can be modified globally in the [`limits_config`](/docs/loki/latest/configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki/latest/configuration/#runtime-configuration-file) file. The config options to use are `ingestion_rate_mb` and `ingestion_burst_size_mb`. +One solution if you're seeing samples dropped due to `rate_limited` is simply to increase the rate limits on your Loki cluster. These limits can be modified globally in the [`limits_config`](/docs/loki//configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki//configuration/#runtime-configuration-file) file. The config options to use are `ingestion_rate_mb` and `ingestion_burst_size_mb`. Note that you'll want to make sure your Loki cluster has sufficient resources provisioned to be able to accommodate these higher limits. Otherwise your cluster may experience performance degradation as it tries to handle this higher volume of log lines to ingest. - Another option to address samples being dropped due to `rate_limits` is simply to decrease the rate of log lines being sent to your Loki cluster. Consider collecting logs from fewer targets or setting up `drop` stages in Promtail to filter out certain log lines. Promtail's [limits configuration](/docs/loki/latest/send-data/promtail/configuration/#limits_config) also gives you the ability to control the volume of logs Promtail remote writes to your Loki cluster. + Another option to address samples being dropped due to `rate_limits` is simply to decrease the rate of log lines being sent to your Loki cluster. Consider collecting logs from fewer targets or setting up `drop` stages in Promtail to filter out certain log lines. Promtail's [limits configuration](/docs/loki//send-data/promtail/configuration/#limits_config) also gives you the ability to control the volume of logs Promtail remote writes to your Loki cluster. | Property | Value | @@ -50,9 +50,9 @@ This limit is enforced when a single stream reaches its rate-limit. Each stream has a rate-limit applied to it to prevent individual streams from overwhelming the set of ingesters it is distributed to (the size of that set is equal to the `replication_factor` value). -This value can be modified globally in the [`limits_config`](/docs/loki/latest/configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki/latest/configuration/#runtime-configuration-file) file. The config options to adjust are `per_stream_rate_limit` and `per_stream_rate_limit_burst`. +This value can be modified globally in the [`limits_config`](/docs/loki//configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki//configuration/#runtime-configuration-file) file. The config options to adjust are `per_stream_rate_limit` and `per_stream_rate_limit_burst`. -Another option you could consider to decrease the rate of samples dropped due to `per_stream_rate_limit` is to split the stream that is getting rate limited into several smaller streams. A third option is to use Promtail's [limit stage](/docs/loki/latest/send-data/promtail/stages/limit/#limit-stage) to limit the rate of samples sent to the stream hitting the `per_stream_rate_limit`. +Another option you could consider to decrease the rate of samples dropped due to `per_stream_rate_limit` is to split the stream that is getting rate limited into several smaller streams. A third option is to use Promtail's [limit stage](/docs/loki//send-data/promtail/stages/limit/#limit-stage) to limit the rate of samples sent to the stream hitting the `per_stream_rate_limit`. We typically recommend setting `per_stream_rate_limit` no higher than 5MB, and `per_stream_rate_limit_burst` no higher than 20MB. @@ -71,7 +71,7 @@ This limit is enforced when a tenant reaches their maximum number of active stre Active streams are held in memory buffers in the ingesters, and if this value becomes sufficiently large then it will cause the ingesters to run out of memory. -This value can be modified globally in the [`limits_config`](/docs/loki/latest/configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki/latest/configuration/#runtime-configuration-file) file. To increase the allowable active streams, adjust `max_global_streams_per_user`. Alternatively, the number of active streams can be reduced by removing extraneous labels or removing excessive unique label values. +This value can be modified globally in the [`limits_config`](/docs/loki//configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki//configuration/#runtime-configuration-file) file. To increase the allowable active streams, adjust `max_global_streams_per_user`. Alternatively, the number of active streams can be reduced by removing extraneous labels or removing excessive unique label values. | Property | Value | |-------------------------|-------------------------| @@ -90,7 +90,7 @@ Validation errors occur when a request violates a validation rule defined by Lok This error occurs when a log line exceeds the maximum allowable length in bytes. The HTTP response will include the stream to which the offending log line belongs as well as its size in bytes. -This value can be modified globally in the [`limits_config`](/docs/loki/latest/configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki/latest/configuration/#runtime-configuration-file) file. To increase the maximum line size, adjust `max_line_size`. We recommend that you do not increase this value above 256kb for performance reasons. Alternatively, Loki can be configured to ingest truncated versions of log lines over the length limit by using the `max_line_size_truncate` option. +This value can be modified globally in the [`limits_config`](/docs/loki//configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki//configuration/#runtime-configuration-file) file. To increase the maximum line size, adjust `max_line_size`. We recommend that you do not increase this value above 256kb for performance reasons. Alternatively, Loki can be configured to ingest truncated versions of log lines over the length limit by using the `max_line_size_truncate` option. | Property | Value | |-------------------------|------------------| @@ -129,9 +129,9 @@ This validation error is returned when a stream is submitted without any labels. The `too_far_behind` and `out_of_order` reasons are identical. Loki clusters with `unordered_writes=true` (the default value as of Loki v2.4) use `reason=too_far_behind`. Loki clusters with `unordered_writes=false` use `reason=out_of_order`. -This validation error is returned when a stream is submitted out of order. More details can be found [here](/docs/loki/latest/configuration/#accept-out-of-order-writes) about Loki's ordering constraints. +This validation error is returned when a stream is submitted out of order. More details can be found [here](/docs/loki//configuration/#accept-out-of-order-writes) about Loki's ordering constraints. -The `unordered_writes` config value can be modified globally in the [`limits_config`](/docs/loki/latest/configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki/latest/configuration/#runtime-configuration-file) file, whereas `max_chunk_age` is a global configuration. +The `unordered_writes` config value can be modified globally in the [`limits_config`](/docs/loki//configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki//configuration/#runtime-configuration-file) file, whereas `max_chunk_age` is a global configuration. This problem can be solved by ensuring that log delivery is configured correctly, or by increasing the `max_chunk_age` value. @@ -148,7 +148,7 @@ It is recommended to resist modifying the default value of `max_chunk_age` as th If the `reject_old_samples` config option is set to `true` (it is by default), then samples will be rejected with `reason=greater_than_max_sample_age` if they are older than the `reject_old_samples_max_age` value. You should not see samples rejected for `reason=greater_than_max_sample_age` if `reject_old_samples=false`. -This value can be modified globally in the [`limits_config`](/docs/loki/latest/configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki/latest/configuration/#runtime-configuration-file) file. This error can be solved by increasing the `reject_old_samples_max_age` value, or investigating why log delivery is delayed for this particular stream. The stream in question will be returned in the body of the HTTP response. +This value can be modified globally in the [`limits_config`](/docs/loki//configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki//configuration/#runtime-configuration-file) file. This error can be solved by increasing the `reject_old_samples_max_age` value, or investigating why log delivery is delayed for this particular stream. The stream in question will be returned in the body of the HTTP response. | Property | Value | |-------------------------|-------------------| @@ -163,7 +163,7 @@ This value can be modified globally in the [`limits_config`](/docs/loki/latest/c If a sample's timestamp is greater than the current timestamp, Loki allows for a certain grace period during which samples will be accepted. If the grace period is exceeded, the error will occur. -This value can be modified globally in the [`limits_config`](/docs/loki/latest/configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki/latest/configuration/#runtime-configuration-file) file. This error can be solved by increasing the `creation_grace_period` value, or investigating why this particular stream has a timestamp too far into the future. The stream in question will be returned in the body of the HTTP response. +This value can be modified globally in the [`limits_config`](/docs/loki//configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki//configuration/#runtime-configuration-file) file. This error can be solved by increasing the `creation_grace_period` value, or investigating why this particular stream has a timestamp too far into the future. The stream in question will be returned in the body of the HTTP response. | Property | Value | |-------------------------|-------------------| @@ -178,7 +178,7 @@ This value can be modified globally in the [`limits_config`](/docs/loki/latest/c If a sample is submitted with more labels than Loki has been configured to allow, it will be rejected with the `max_label_names_per_series` reason. Note that 'series' is the same thing as a 'stream' in Loki - the 'series' term is a legacy name. -This value can be modified globally in the [`limits_config`](/docs/loki/latest/configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki/latest/configuration/#runtime-configuration-file) file. This error can be solved by increasing the `max_label_names_per_series` value. The stream to which the offending sample (i.e. the one with too many label names) belongs will be returned in the body of the HTTP response. +This value can be modified globally in the [`limits_config`](/docs/loki//configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki//configuration/#runtime-configuration-file) file. This error can be solved by increasing the `max_label_names_per_series` value. The stream to which the offending sample (i.e. the one with too many label names) belongs will be returned in the body of the HTTP response. | Property | Value | |-------------------------|-------------------| @@ -193,7 +193,7 @@ This value can be modified globally in the [`limits_config`](/docs/loki/latest/c If a sample is sent with a label name that has a length in bytes greater than Loki has been configured to allow, it will be rejected with the `label_name_too_long` reason. -This value can be modified globally in the [`limits_config`](/docs/loki/latest/configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki/latest/configuration/#runtime-configuration-file) file. This error can be solved by increasing the `max_label_name_length` value, though we do not recommend raising it significantly above the default value of `1024` for performance reasons. The offending stream will be returned in the body of the HTTP response. +This value can be modified globally in the [`limits_config`](/docs/loki//configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki//configuration/#runtime-configuration-file) file. This error can be solved by increasing the `max_label_name_length` value, though we do not recommend raising it significantly above the default value of `1024` for performance reasons. The offending stream will be returned in the body of the HTTP response. | Property | Value | |-------------------------|-------------------| @@ -208,7 +208,7 @@ This value can be modified globally in the [`limits_config`](/docs/loki/latest/c If a sample has a label value with a length in bytes greater than Loki has been configured to allow, it will be rejected for the `label_value_too_long` reason. -This value can be modified globally in the [`limits_config`](/docs/loki/latest/configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki/latest/configuration/#runtime-configuration-file) file. This error can be solved by increasing the `max_label_value_length` value. The offending stream will be returned in the body of the HTTP response. +This value can be modified globally in the [`limits_config`](/docs/loki//configuration/#limits_config) block, or on a per-tenant basis in the [runtime overrides](/docs/loki//configuration/#runtime-configuration-file) file. This error can be solved by increasing the `max_label_value_length` value. The offending stream will be returned in the body of the HTTP response. | Property | Value | |-------------------------|-------------------| diff --git a/docs/sources/operations/scalability.md b/docs/sources/operations/scalability.md index e916e2bbdbe70..f7117ff103477 100644 --- a/docs/sources/operations/scalability.md +++ b/docs/sources/operations/scalability.md @@ -17,7 +17,7 @@ and scaling for resource usage. The Query frontend has an in-memory queue that can be moved out into a separate process similar to the [Grafana Mimir query-scheduler](/docs/mimir/latest/operators-guide/architecture/components/query-scheduler/). This allows running multiple query frontends. -To run with the Query Scheduler, the frontend needs to be passed the scheduler's address via `-frontend.scheduler-address` and the querier processes needs to be started with `-querier.scheduler-address` set to the same address. Both options can also be defined via the [configuration file]({{< relref "../configure/_index.md" >}}). +To run with the Query Scheduler, the frontend needs to be passed the scheduler's address via `-frontend.scheduler-address` and the querier processes needs to be started with `-querier.scheduler-address` set to the same address. Both options can also be defined via the [configuration file](https://grafana.com/docs/loki//configure). It is not valid to start the querier with both a configured frontend and a scheduler address. diff --git a/docs/sources/operations/storage/_index.md b/docs/sources/operations/storage/_index.md index 75740348d0367..f1947d072b56c 100644 --- a/docs/sources/operations/storage/_index.md +++ b/docs/sources/operations/storage/_index.md @@ -2,11 +2,10 @@ title: Manage storage menuTitle: Storage description: Describes Loki's storage needs and supported stores. -weight: --- # Manage storage -You can read a high level overview of Loki storage [here]({{< relref "../../storage/_index.md" >}}) +You can read a high level overview of Loki storage [here](https://grafana.com/docs/loki//configure/storage/) Grafana Loki needs to store two different types of data: **chunks** and **indexes**. @@ -18,21 +17,21 @@ format](#chunk-format) for how chunks are stored internally. The **index** stores each stream's label set and links them to the individual chunks. -Refer to Loki's [configuration]({{< relref "../../configure" >}}) for details on +Refer to Loki's [configuration](https://grafana.com/docs/loki//configure/) for details on how to configure the storage and the index. For more information: -- [Table Manager]({{< relref "./table-manager" >}}) -- [Retention]({{< relref "./retention" >}}) -- [Logs Deletion]({{< relref "./logs-deletion" >}}) +- [Table Manager](https://grafana.com/docs/loki//operations/storage/table-manager/) +- [Retention](https://grafana.com/docs/loki//operations/storage/retention/) +- [Logs Deletion](https://grafana.com/docs/loki//operations/storage/logs-deletion/) ## Supported Stores The following are supported for the index: -- [TSDB]({{< relref "./tsdb" >}}) index store which stores TSDB index files in the object store. This is the recommended index store for Loki 2.8 and newer. -- [Single Store (boltdb-shipper)]({{< relref "./boltdb-shipper" >}}) index store which stores boltdb index files in the object store. +- [TSDB](https://grafana.com/docs/loki//operations/storage/tsdb/) index store which stores TSDB index files in the object store. This is the recommended index store for Loki 2.8 and newer. +- [Single Store (boltdb-shipper)](https://grafana.com/docs/loki//operations/storage/boltdb-shipper/) index store which stores boltdb index files in the object store. - [Amazon DynamoDB](https://aws.amazon.com/dynamodb) - [Google Bigtable](https://cloud.google.com/bigtable) - [Apache Cassandra](https://cassandra.apache.org) @@ -76,7 +75,7 @@ When using S3 as object storage, the following permissions are needed: Resources: `arn:aws:s3:::`, `arn:aws:s3:::/*` -See the [AWS deployment section]({{< relref "../../storage#aws-deployment-s3-single-store" >}}) on the storage page for a detailed setup guide. +See the [AWS deployment section](https://grafana.com/docs/loki//configure/storage/#aws-deployment-s3-single-store) on the storage page for a detailed setup guide. ### DynamoDB @@ -134,7 +133,7 @@ Resources: `arn:aws:iam:::role/` When using IBM Cloud Object Storage (COS) as object storage, IAM `Writer` role is needed. -See the [IBM Cloud Object Storage section]({{< relref "../../storage#ibm-deployment-cos-single-store" >}}) on the storage page for a detailed setup guide. +See the [IBM Cloud Object Storage section](https://grafana.com/docs/loki//configure/storage/#ibm-deployment-cos-single-store) on the storage page for a detailed setup guide. ## Chunk Format diff --git a/docs/sources/operations/storage/boltdb-shipper.md b/docs/sources/operations/storage/boltdb-shipper.md index df32b95f3eedf..0b299806a0adc 100644 --- a/docs/sources/operations/storage/boltdb-shipper.md +++ b/docs/sources/operations/storage/boltdb-shipper.md @@ -117,14 +117,14 @@ Within Kubernetes, if you are not using an Index Gateway, we recommend running Q An Index Gateway downloads and synchronizes the BoltDB index from the Object Storage in order to serve index queries to the Queriers and Rulers over gRPC. This avoids running Queriers and Rulers with a disk for persistence. Disks can become costly in a big cluster. -To run an Index Gateway, configure [StorageConfig]({{< relref "../../configure#storage_config" >}}) and set the `-target` CLI flag to `index-gateway`. -To connect Queriers and Rulers to the Index Gateway, set the address (with gRPC port) of the Index Gateway with the `-boltdb.shipper.index-gateway-client.server-address` CLI flag or its equivalent YAML value under [StorageConfig]({{< relref "../../configure#storage_config" >}}). +To run an Index Gateway, configure [StorageConfig](https://grafana.com/docs/loki//configure/#storage_config) and set the `-target` CLI flag to `index-gateway`. +To connect Queriers and Rulers to the Index Gateway, set the address (with gRPC port) of the Index Gateway with the `-boltdb.shipper.index-gateway-client.server-address` CLI flag or its equivalent YAML value under [StorageConfig](https://grafana.com/docs/loki//configure/#storage_config). When using the Index Gateway within Kubernetes, we recommend using a StatefulSet with persistent storage for downloading and querying index files. This can obtain better read performance, avoids [noisy neighbor problems](https://en.wikipedia.org/wiki/Cloud_computing_issues#Performance_interference_and_noisy_neighbors) by not using the node disk, and avoids the time consuming index downloading step on startup after rescheduling to a new node. ### Write Deduplication disabled -Loki does write deduplication of chunks and index using Chunks and WriteDedupe cache respectively, configured with [ChunkStoreConfig]({{< relref "../../configure#chunk_store_config" >}}). +Loki does write deduplication of chunks and index using Chunks and WriteDedupe cache respectively, configured with [ChunkStoreConfig](https://grafana.com/docs/loki//configure/#chunk_store_config). The problem with write deduplication when using `boltdb-shipper` though is ingesters only keep uploading boltdb files periodically to make them available to all the other services which means there would be a brief period where some of the services would not have received updated index yet. The problem due to that is if an ingester which first wrote the chunks and index goes down and all the other ingesters which were part of replication scheme skipped writing those chunks and index due to deduplication, we would end up missing those logs from query responses since only the ingester which had the index went down. This problem would be faced even during rollouts which is quite common. diff --git a/docs/sources/operations/storage/logs-deletion.md b/docs/sources/operations/storage/logs-deletion.md index cb0337c7bc904..eef99609ffd32 100644 --- a/docs/sources/operations/storage/logs-deletion.md +++ b/docs/sources/operations/storage/logs-deletion.md @@ -11,7 +11,7 @@ Log entries that fall within a specified time window and match an optional line Log entry deletion is supported _only_ when TSDB or BoltDB shipper is configured as the index store. -The compactor component exposes REST [endpoints]({{< relref "../../reference/api#compactor" >}}) that process delete requests. +The compactor component exposes REST [endpoints](https://grafana.com/docs/loki//reference/loki-http-api#compactor) that process delete requests. Hitting the endpoint specifies the streams and the time window. The deletion of the log entries takes place after a configurable cancellation time period expires. diff --git a/docs/sources/operations/storage/retention.md b/docs/sources/operations/storage/retention.md index 96880a43374e6..d211109786384 100644 --- a/docs/sources/operations/storage/retention.md +++ b/docs/sources/operations/storage/retention.md @@ -16,7 +16,7 @@ If you have a lifecycle policy configured on the object store, please ensure tha Granular retention policies to apply retention at per tenant or per stream level are also supported by the Compactor. {{% admonition type="note" %}} -The Compactor does not support retention on [legacy index types]({{< relref "../../storage#index-storage" >}}). Please use the [Table Manager]({{< relref "./table-manager" >}}) when using legacy index types. +The Compactor does not support retention on [legacy index types](https://grafana.com/docs/loki//configure/storage/#index-storage). Please use the [Table Manager](https://grafana.com/docs/loki//operations/storage/table-manager/) when using legacy index types. Both the Table manager and legacy index types are deprecated and may be removed in future major versions of Loki. {{% /admonition %}} @@ -50,7 +50,7 @@ Chunks cannot be deleted immediately for the following reasons: Marker files should be stored on a persistent disk to ensure that the chunks pending for deletion are processed even if the Compactor process restarts. {{% admonition type="note" %}} -We recommend running Compactor as a stateful deployment (StatefulSet when using Kubernetes) with a persistent storage for storing marker files. +Grafana Labs recommends running Compactor as a stateful deployment (StatefulSet when using Kubernetes) with a persistent storage for storing marker files. {{% /admonition %}} ### Retention Configuration @@ -72,7 +72,7 @@ schema_config: period: 24h prefix: index_ object_store: gcs - schema: v12 + schema: v13 store: tsdb storage_config: tsdb_shipper: @@ -100,7 +100,7 @@ Retention is only available if the index period is 24h. Single store TSDB and si #### Configuring the retention period -Retention period is configured within the [`limits_config`]({{< relref "../../configure#limits_config" >}}) configuration section. +Retention period is configured within the [`limits_config`](https://grafana.com/docs/loki//configure/#limits_config) configuration section. There are two ways of setting retention policies: @@ -129,7 +129,7 @@ limits_config: You can only use label matchers in the `selector` field of a `retention_stream` definition. Arbitrary LogQL expressions are not supported. {{% /admonition %}} -Per tenant retention can be defined by configuring [runtime overrides]({{< relref "../../configure#runtime-configuration-file" >}}). For example: +Per tenant retention can be defined by configuring [runtime overrides](https://grafana.com/docs/loki//configure/#runtime-configuration-file). For example: ```yaml overrides: @@ -181,13 +181,13 @@ The example configurations defined above will result in the following retention ## Table Manager (deprecated) -Retention through the [Table Manager]({{< relref "./table-manager" >}}) is +Retention through the [Table Manager](https://grafana.com/docs/loki//operations/storage/table-manager/) is achieved by relying on the object store TTL feature, and will work for both -[boltdb-shipper]({{< relref "./boltdb-shipper" >}}) store and chunk/index stores. +[boltdb-shipper](https://grafana.com/docs/loki//operations/storage/boltdb-shipper/) store and chunk/index stores. In order to enable the retention support, the Table Manager needs to be configured to enable deletions and a retention period. Please refer to the -[`table_manager`]({{< relref "../../configure#table_manager" >}}) +[`table_manager`](https://grafana.com/docs/loki//configure/#table_manager) section of the Loki configuration reference for all available options. Alternatively, the `table-manager.retention-period` and `table-manager.retention-deletes-enabled` command line flags can be used. The @@ -196,15 +196,13 @@ can be parsed using the Prometheus common model [ParseDuration](https://pkg.go.d {{% admonition type="warning" %}} The retention period must be a multiple of the index and chunks table -`period`, configured in the [`period_config`]({{< relref "../../configure#period_config" >}}) -block. See the [Table Manager]({{< relref "./table-manager#retention" >}}) documentation for +`period`, configured in the [`period_config`](https://grafana.com/docs/loki//configure/#period_config) block. +See the [Table Manager](https://grafana.com/docs/loki//operations/storage/table-manager/#retention) documentation for more information. {{% /admonition %}} {{% admonition type="note" %}} -To avoid querying of data beyond the retention period, -`max_query_lookback` config in [`limits_config`]({{< relref "../../configure#limits_config" >}}) must be set to a value less than or equal to -what is set in `table_manager.retention_period`. +To avoid querying of data beyond the retention period,`max_query_lookback` config in [`limits_config`](https://grafana.com/docs/loki//configure/#limits_config) must be set to a value less than or equal to what is set in `table_manager.retention_period`. {{% /admonition %}} When using S3 or GCS, the bucket storing the chunks needs to have the expiry @@ -225,7 +223,7 @@ intact; you will still be able to see related labels but will be unable to retrieve the deleted log content. For further details on the Table Manager internals, refer to the -[Table Manager]({{< relref "./table-manager" >}}) documentation. +[Table Manager](https://grafana.com/docs/loki//operations/storage/table-manager/) documentation. ## Example Configuration @@ -238,7 +236,7 @@ schema_config: - from: 2018-04-15 store: tsdb object_store: gcs - schema: v12 + schema: v13 index: prefix: loki_index_ period: 24h diff --git a/docs/sources/operations/storage/schema/_index.md b/docs/sources/operations/storage/schema/_index.md index e5a14b3ed8a61..9cdca92cb09f2 100644 --- a/docs/sources/operations/storage/schema/_index.md +++ b/docs/sources/operations/storage/schema/_index.md @@ -14,6 +14,32 @@ Loki uses the defined schemas to determine which format to use when storing and Use of a schema allows Loki to iterate over the storage layer without requiring migration of existing data. +## New Loki installs +For a new Loki install with no previous data, here is an example schema configuration with recommended values + +``` +schema_config: + configs: + - from: 2024-04-01 + object_store: s3 + store: tsdb + schema: v13 + index: + prefix: index_ + period: 24h +``` + + +| Property | Description | +|--------------|--------------------------------------------------------------------------------------------------------------------------------------------------------| +| from | for a new install, this must be a date in the past, use a recent date. Format is YYYY-MM-DD. | +| object_store | s3, azure, gcs, alibabacloud, bos, cos, swift, filesystem, or a named_store (see [StorageConfig](https://grafana.com/docs/loki//configure/#storage_config)). | +| store | `tsdb` is the current and only recommended value for store. | +| schema | `v13` is the most recent schema and recommended value. | +| prefix: | any value without spaces is acceptable. | +| period: | must be `24h`. | + + ## Changing the schema Here are items to consider when changing the schema; if schema changes are not done properly, a scenario can be created which prevents data from being read. @@ -23,7 +49,7 @@ Here are items to consider when changing the schema; if schema changes are not d Be aware of your relation to UTC when using the current date. Make sure that UTC 00:00:00 has not already passed for your current date. - As an example, assume that the current date is 2022-04-10, and you want to update to the v12 schema, so you restart Loki with 2022-04-11 as the `from` date for the new schema. If you forget to take into account that your timezone is UTC -5:00 and it’s currently 20:00 hours in your local timezone, that is actually 2022-04-11T01:00:00 UTC. When Loki starts it will see the new schema and begin to write and store objects following that new schema. If you then try to query data that was written between 00:00:00 and 01:00:00 UTC, Loki will use the new schema and the data will be unreadable, because it was created with the previous schema. + As an example, assume that the current date is 2022-04-10, and you want to update to the v13 schema, so you restart Loki with 2022-04-11 as the `from` date for the new schema. If you forget to take into account that your timezone is UTC -5:00 and it’s currently 20:00 hours in your local timezone, that is actually 2022-04-11T01:00:00 UTC. When Loki starts it will see the new schema and begin to write and store objects following that new schema. If you then try to query data that was written between 00:00:00 and 01:00:00 UTC, Loki will use the new schema and the data will be unreadable, because it was created with the previous schema. - You cannot undo or roll back a schema change. @@ -33,19 +59,19 @@ Here are items to consider when changing the schema; if schema changes are not d ``` schema_config: - configs: - - from: "2020-07-31" - index: - period: 24h - prefix: loki_ops_index_ - object_store: gcs - schema: v11 - store: tsdb - - from: "2022-01-20" - index: - period: 24h - prefix: loki_ops_index_ - object_store: gcs - schema: v12 - store: tsdb + configs: + - from: "2020-07-31" + index: + period: 24h + prefix: loki_ops_index_ + object_store: gcs + schema: v11 + store: tsdb + - from: "2022-01-20" + index: + period: 24h + prefix: loki_ops_index_ + object_store: gcs + schema: v13 + store: tsdb ``` diff --git a/docs/sources/operations/storage/table-manager/_index.md b/docs/sources/operations/storage/table-manager/_index.md index 0e6ba42cc71ff..148d1fdf7043d 100644 --- a/docs/sources/operations/storage/table-manager/_index.md +++ b/docs/sources/operations/storage/table-manager/_index.md @@ -7,7 +7,7 @@ weight: # Table manager {{% admonition type="note" %}} -Table manager is only needed if you are using a multi-store [backend]({{< relref "../../storage" >}}). If you are using either TSDB (recommended), or BoltDB (deprecated) you do not need the Table Manager. +Table manager is only needed if you are using a multi-store [backend](https://grafana.com/docs/loki//configure/storage/). If you are using either TSDB (recommended), or BoltDB (deprecated) you do not need the Table Manager. {{% /admonition %}} Grafana Loki supports storing indexes and chunks in table-based data storages. When @@ -49,7 +49,7 @@ to store chunks, are not managed by the Table Manager, and a custom bucket polic should be set to delete old data. For detailed information on configuring the Table Manager, refer to the -[`table_manager`]({{< relref "../../../configure#table_manager" >}}) +[`table_manager`](https://grafana.com/docs/loki//configure/#table_manager) section in the Loki configuration document. @@ -58,10 +58,10 @@ section in the Loki configuration document. A periodic table stores the index or chunk data relative to a specific period of time. The duration of the time range of the data stored in a single table and its storage type is configured in the -[`schema_config`]({{< relref "../../../configure#schema_config" >}}) configuration +[`schema_config`](https://grafana.com/docs/loki//configure/#schema_config) configuration block. -The [`schema_config`]({{< relref "../../../configure#schema_config" >}}) can contain +The [`schema_config`](https://grafana.com/docs/loki//configure/#schema_config) can contain one or more `configs`. Each config, defines the storage used between the day set in `from` (in the format `yyyy-mm-dd`) and the next config, or "now" in the case of the last schema config entry. @@ -115,7 +115,7 @@ order to make sure that the new table is ready once the current table end period is reached. The `creation_grace_period` property - in the -[`table_manager`]({{< relref "../../../configure#table_manager" >}}) +[`table_manager`](https://grafana.com/docs/loki//configure/#table_manager) configuration block - defines how long before a table should be created. @@ -161,7 +161,7 @@ documentation. A table can be active or inactive. A table is considered **active** if the current time is within the range: -- Table start period - [`creation_grace_period`]({{< relref "../../../configure#table_manager" >}}) +- Table start period - [`creation_grace_period`](https://grafana.com/docs/loki//configure/#table_manager) - Table end period + max chunk age (hardcoded to `12h`) ![active_vs_inactive_tables](./table-manager-active-vs-inactive-tables.png) diff --git a/docs/sources/operations/storage/tsdb.md b/docs/sources/operations/storage/tsdb.md index 82e3a6dbf33db..8f640f83f3bdd 100644 --- a/docs/sources/operations/storage/tsdb.md +++ b/docs/sources/operations/storage/tsdb.md @@ -29,7 +29,7 @@ schema_config: period: 24h prefix: index_ object_store: gcs - schema: v12 + schema: v13 store: tsdb storage_config: @@ -73,7 +73,7 @@ We've added a user per-tenant limit called `tsdb_max_query_parallelism` in the ` ### Dynamic Query Sharding -Previously we would statically shard queries based on the index row shards configured [here]({{< relref "../../configure#period_config" >}}). +Previously we would statically shard queries based on the index row shards configured [here](https://grafana.com/docs/loki//configure/#period_config). TSDB does Dynamic Query Sharding based on how much data a query is going to be processing. We additionally store size(KB) and number of lines for each chunk in the TSDB index which is then used by the [Query Frontend]({{< relref "../../get-started/components#query-frontend" >}}) for planning the query. Based on our experience from operating many Loki clusters, we have configured TSDB to aim for processing 300-600 MBs of data per query shard. @@ -81,4 +81,4 @@ This means with TSDB we will be running more, smaller queries. ### Index Caching not required -TSDB is a compact and optimized format. Loki does not currently use an index cache for TSDB. If you are already using Loki with other index types, it is recommended to keep the index caching until all of your existing data falls out of [retention]({{< relref "./retention" >}}) or your configured `max_query_lookback` under [limits_config]({{< relref "../../configure#limits_config" >}}). After that, we suggest running without an index cache (it isn't used in TSDB). +TSDB is a compact and optimized format. Loki does not currently use an index cache for TSDB. If you are already using Loki with other index types, it is recommended to keep the index caching until all of your existing data falls out of [retention](https://grafana.com/docs/loki//operations/storage/retention/)) or your configured `max_query_lookback` under [limits_config](https://grafana.com/docs/loki//configure/#limits_config). After that, we suggest running without an index cache (it isn't used in TSDB). diff --git a/docs/sources/operations/storage/wal.md b/docs/sources/operations/storage/wal.md index 6baf78adc5f4e..2bf9010c948bc 100644 --- a/docs/sources/operations/storage/wal.md +++ b/docs/sources/operations/storage/wal.md @@ -38,7 +38,7 @@ The WAL also includes a backpressure mechanism to allow a large WAL to be replay ## Changes to deployment -1. Since ingesters need to have the same persistent volume across restarts/rollout, all the ingesters should be run on [statefulset](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) with fixed volumes. +1. Since ingesters need to have the same persistent volume across restarts/rollout, all the ingesters should be run on [StatefulSet](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/) with fixed volumes. 2. Following flags needs to be set * `--ingester.wal-enabled` to `true` which enables writing to WAL during ingestion. @@ -48,7 +48,9 @@ The WAL also includes a backpressure mechanism to allow a large WAL to be replay ## Changes in lifecycle when WAL is enabled -1. Flushing of data to chunk store during rollouts or scale down is disabled. This is because during a rollout of statefulset there are no ingesters that are simultaneously leaving and joining, rather the same ingester is shut down and brought back again with updated config. Hence flushing is skipped and the data is recovered from the WAL. + +Flushing of data to chunk store during rollouts or scale down is disabled. This is because during a rollout of statefulset there are no ingesters that are simultaneously leaving and joining, rather the same ingester is shut down and brought back again with updated config. Hence flushing is skipped and the data is recovered from the WAL. If you need to ensure that data is always flushed to the chunk store when your pod shuts down, you can set the `--ingester.flush-on-shutdown` flag to `true`. + ## Disk space requirements @@ -62,7 +64,7 @@ You should not target 100% disk utilisation. ## Migrating from stateless deployments -The ingester _deployment without WAL_ and _statefulset with WAL_ should be scaled down and up respectively in sync without transfer of data between them to ensure that any ingestion after migration is reliable immediately. +The ingester _Deployment without WAL_ and _StatefulSet with WAL_ should be scaled down and up respectively in sync without transfer of data between them to ensure that any ingestion after migration is reliable immediately. Let's take an example of 4 ingesters. The migration would look something like this: @@ -83,25 +85,30 @@ Scaling up is same as what you would do without WAL or statefulsets. Nothing to When scaling down, we must ensure existing data on the leaving ingesters are flushed to storage instead of just the WAL. This is because we won't be replaying the WAL on an ingester that will no longer exist and we need to make sure the data is not orphaned. -Consider you have 4 ingesters `ingester-0 ingester-1 ingester-2 ingester-3` and you want to scale down to 2 ingesters, the ingesters which will be shutdown according to statefulset rules are `ingester-3` and then `ingester-2`. +Consider you have 4 ingesters `ingester-0 ingester-1 ingester-2 ingester-3` and you want to scale down to 2 ingesters, the ingesters which will be shut down according to StatefulSet rules are `ingester-3` and then `ingester-2`. -Hence before actually scaling down in Kubernetes, port forward those ingesters and hit the [`/ingester/shutdown?flush=true`]({{< relref "../../reference/api#flush-in-memory-chunks-and-shut-down" >}}) endpoint. This will flush the chunks and remove itself from the ring, after which it will register as unready and may be deleted. +Hence before actually scaling down in Kubernetes, port forward those ingesters and hit the [`/ingester/shutdown?flush=true`](https://grafana.com/docs/loki//reference/loki-http-api#flush-in-memory-chunks-and-shut-down) endpoint. This will flush the chunks and remove itself from the ring, after which it will register as unready and may be deleted. After hitting the endpoint for `ingester-2 ingester-3`, scale down the ingesters to 2. +Also you can set the `--ingester.flush-on-shutdown` flag to `true`. This enables chunks to be flushed to long-term storage when the ingester is shut down. + + ## Additional notes ### Kubernetes hacking Statefulsets are significantly more cumbersome to work with, upgrade, and so on. Much of this stems from immutable fields on the specification. For example, if one wants to start using the WAL with single store Loki and wants separate volume mounts for the WAL and the boltdb-shipper, you may see immutability errors when attempting updates the Kubernetes statefulsets. -In this case, try `kubectl -n delete sts ingester --cascade=false`. This will leave the pods alive but delete the statefulset. Then you may recreate the (updated) statefulset and one-by-one start deleting the `ingester-0` through `ingester-n` pods _in that order_, allowing the statefulset to spin up new pods to replace them. +In this case, try `kubectl -n delete sts ingester --cascade=false`. +This will leave the Pods alive but delete the StatefulSet. +Then you may recreate the (updated) StatefulSet and one-by-one start deleting the `ingester-0` through `ingester-n` Pods _in that order_, allowing the StatefulSet to spin up new pods to replace them. #### Scaling Down Using `/flush_shutdown` Endpoint and Lifecycle Hook 1. **StatefulSets for Ordered Scaling Down**: Loki's ingesters should be scaled down one by one, which is efficiently handled by Kubernetes StatefulSets. This ensures an ordered and reliable scaling process, as described in the [Deployment and Scaling Guarantees](https://kubernetes.io/docs/concepts/workloads/controllers/statefulset/#deployment-and-scaling-guarantees) documentation. -2. **Using PreStop Lifecycle Hook**: During the pod scaling down process, the PreStop [lifecycle hook](https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/) triggers the `/flush_shutdown` endpoint on the ingester. This action flushes the chunks and removes the ingester from the ring, allowing it to register as unready and become eligible for deletion. +2. **Using PreStop Lifecycle Hook**: During the Pod scaling down process, the PreStop [lifecycle hook](https://kubernetes.io/docs/concepts/containers/container-lifecycle-hooks/) triggers the `/flush_shutdown` endpoint on the ingester. This action flushes the chunks and removes the ingester from the ring, allowing it to register as unready and become eligible for deletion. 3. **Using terminationGracePeriodSeconds**: Provides time for the ingester to flush its data before being deleted, if flushing data takes more than 30 minutes, you may need to increase it. diff --git a/docs/sources/operations/troubleshooting.md b/docs/sources/operations/troubleshooting.md index 914a432f30686..d99436e181a00 100644 --- a/docs/sources/operations/troubleshooting.md +++ b/docs/sources/operations/troubleshooting.md @@ -81,7 +81,7 @@ Loki cache generation number errors(Loki >= 2.6) - Check the metric `loki_delete_cache_gen_load_failures_total` on `/metrics`, which is an indicator for the occurrence of the problem. If the value is greater than 1, it means that there is a problem with that component. - Try Http GET request to route: /loki/api/v1/cache/generation_numbers - - If response is equal as `"deletion is not available for this tenant"`, this means the deletion API is not enabled for the tenant. To enable this api, set `allow_deletes: true` for this tenant via the configuration settings. Check more docs: /docs/loki/latest/operations/storage/logs-deletion/ + - If response is equal as `"deletion is not available for this tenant"`, this means the deletion API is not enabled for the tenant. To enable this api, set `allow_deletes: true` for this tenant via the configuration settings. Check more [deletion docs](/docs/loki//operations/storage/logs-deletion/) ## Troubleshooting targets @@ -123,14 +123,14 @@ promtail -log.level=debug The Promtail configuration contains a `__path__` entry to a directory that Promtail cannot find. -## Connecting to a Promtail pod to troubleshoot +## Connecting to a Promtail Pod to troubleshoot First check [Troubleshooting targets](#troubleshooting-targets) section above. -If that doesn't help answer your questions, you can connect to the Promtail pod +If that doesn't help answer your questions, you can connect to the Promtail Pod to investigate further. If you are running Promtail as a DaemonSet in your cluster, you will have a -Promtail pod on each node, so figure out which Promtail you need to debug first: +Promtail Pod on each node, so figure out which Promtail you need to debug first: ```shell @@ -145,10 +145,10 @@ promtail-bth9q 1/1 Running 0 3h 10.56. That output is truncated to highlight just the two pods we are interested in, you can see with the `-o wide` flag the NODE on which they are running. -You'll want to match the node for the pod you are interested in, in this example +You'll want to match the node for the Pod you are interested in, in this example NGINX, to the Promtail running on the same node. -To debug you can connect to the Promtail pod: +To debug you can connect to the Promtail Pod: ```shell kubectl exec -it promtail-bth9q -- /bin/sh @@ -182,12 +182,12 @@ $ helm upgrade --install loki loki/loki --set "loki.tracing.enabled=true" ## Running Loki with Istio Sidecars -An Istio sidecar runs alongside a pod. It intercepts all traffic to and from the pod. -When a pod tries to communicate with another pod using a given protocol, Istio inspects the destination's service using [Protocol Selection](https://istio.io/latest/docs/ops/configuration/traffic-management/protocol-selection/). +An Istio sidecar runs alongside a Pod. It intercepts all traffic to and from the Pod. +When a Pod tries to communicate with another Pod using a given protocol, Istio inspects the destination's service using [Protocol Selection](https://istio.io/latest/docs/ops/configuration/traffic-management/protocol-selection/). This mechanism uses a convention on the port name (for example, `http-my-port` or `grpc-my-port`) to determine how to handle this outgoing traffic. Istio can then do operations such as authorization and smart routing. -This works fine when one pod communicates with another pod using a hostname. But, +This works fine when one Pod communicates with another Pod using a hostname. But, Istio does not allow pods to communicate with other pods using IP addresses, unless the traffic type is `tcp`. diff --git a/docs/sources/operations/upgrade.md b/docs/sources/operations/upgrade.md new file mode 100644 index 0000000000000..8b47232dff5bb --- /dev/null +++ b/docs/sources/operations/upgrade.md @@ -0,0 +1,11 @@ +--- +title: Upgrade +description: Links to Loki upgrade documentation. +weight: +--- + +# Upgrade + +- [Upgrade](https://grafana.com/docs/loki//setup/upgrade/) from one Loki version to a newer version. + +- [Upgrade Helm](https://grafana.com/docs/loki//setup/upgrade/) from Helm v2.x to Helm v3.x. diff --git a/docs/sources/operations/zone-ingesters.md b/docs/sources/operations/zone-ingesters.md index 63c6c843fde8f..ded92065b2255 100644 --- a/docs/sources/operations/zone-ingesters.md +++ b/docs/sources/operations/zone-ingesters.md @@ -7,25 +7,25 @@ weight: # Zone aware ingesters -Loki's zone aware ingesters are used by Grafana Labs in order to allow for easier rollouts of large Loki deployments. You can think of them as three logical zones, however with some extra Kubernetes config you could deploy them in separate zones. +Loki's zone aware ingesters are used by Grafana Labs in order to allow for easier rollouts of large Loki deployments. You can think of them as three logical zones, however with some extra Kubernetes configuration you could deploy them in separate zones. -By default, an incoming log stream's logs are replicated to 3 random ingesters. Except in the case of some replica scaling up or down, a given stream will always be replicated to the same 3 ingesters. This means that if one of those ingesters is restarted no data is lost, but two restarting can mean data is lost and also impacts the systems ability to ingest logs because of an unhealthy ring status. +By default, an incoming log stream's logs are replicated to 3 random ingesters. Except in the case of some replica scaling up or down, a given stream will always be replicated to the same 3 ingesters. This means that if one of those ingesters is restarted no data is lost. However two or more ingesters restarting can result in data loss and also impacts the systems ability to ingest logs because of an unhealthy ring status. -With zone awareness enabled, an incoming log line will be replicated to one ingester in each zone. This means that we're not only concerned if ingesters in multiple zones restart at the same time. We can now rollout, or lose, an entire zone at once and not impact the system. This allows deployments with a large number of ingesters to be deployed too much more quickly. +With zone awareness enabled, an incoming log line will be replicated to one ingester in each zone. This means that we're not only concerned about ingesters in multiple zones restarting at the same time, we can now rollout or lose an entire zone at once without impacting writes. This allows deployments with a large number of ingesters to be deployed much more quickly. -We also make use of [rollout-operator](https://github.com/grafana/rollout-operator) to manage rollouts to the 3 StatefulSets gracefully. The rollout-operator looks for labels on StatefulSets to know which StatefulSets are part of a certain rollout group, and coordinate rollouts of pods only from a single StatefulSet in the group at a time. See the README in the rollout-operator repo. for a more in depth explanation. +At Grafana Labs, we also make use of [rollout-operator](https://github.com/grafana/rollout-operator) to manage rollouts to the 3 StatefulSets gracefully. The rollout-operator looks for labels on StatefulSets to know which ones are part of a certain rollout group, and coordinates rollouts of pods only from a single StatefulSet in the group at a time. See the README in the rollout-operator repo for a more in depth explanation. ## Migration Migrating from a single ingester StatefulSet to 3 zone aware ingester StatefulSets. The migration follows a few general steps, regardless of deployment method. 1. Configure your existing ingesters to be part of a zone, for example `zone-default`, this will allow us to later exclude them from the write path while still allowing for graceful shutdowns. -1. Prep for the increase in active streams (due to the way streams are split between ingesters) by increasing the # of active streams allowed for your tenants. -1. Add and scale up your new zone aware ingester StatefulSets such that each has 1/3 of the total # of replicas you want to run. -1. Enable zone awareness on the write path by setting `distributor.zone-awareness-enabled` to true for distributors and rulers. -1. Wait some time to ensure that the new zone aware ingesters have data for the time period they are queried for (`query_ingesters_within`). +1. Prep for the increase in active streams (due to the way streams are split between ingesters) by increasing the number of active streams allowed for your tenants. +1. Add and scale up your new zone-aware ingester StatefulSets such that each has 1/3rd of the total number of replicas you want to run. +1. Enable zone awareness on the write path by setting `distributor.zone-awareness-enabled` to `true` for distributors and rulers. +1. Wait some time to ensure that the new zone-aware ingesters have data for the time period they are queried for (`query_ingesters_within`). 1. Enable zone awareness on the read path by setting `distributor.zone-awareness-enabled` to true for queriers. -1. Configure distributors and rulers to exclude ingesters in the `zone-default` so those ingesters no longer receive write traffic via `distributor.excluded-zones`. +1. Configure distributors and rulers to exclude ingesters in the `zone-default` so those ingesters no longer receive write traffic using `distributor.excluded-zones`. 1. Use the shutdown endpoint to flush data from the default ingesters, then scale down and remove the associated StatefulSet. 1. Clean up any config remaining from the migration. @@ -35,32 +35,31 @@ The following are steps to live migrate (no downtime) an existing Loki deploymen These instructions assume you are using the zone aware ingester jsonnet deployment code from this repo, see [here](https://github.com/grafana/loki/blob/main/production/ksonnet/loki/multi-zone.libsonnet). **If you are not using jsonnet see the relevant annotations in some steps that describe how to perform that step manually.** -1. Configure the zone for the existing “ingester†StatefulSet as zone-default by setting multi_zone_default_ingester_zone: true, this allows us to later filter out that zone from the write path. -1. Configure ingester-pdb with maxUnavailable=0 and deploy 3x zone-aware StatefulSets with 0 replicas by setting +1. Configure the zone for the existing ingester StatefulSet as `zone-default` by setting `multi_zone_default_ingester_zone: true`, this allows us to later filter out that zone from the write path. +1. Configure ingester-pdb with `maxUnavailable` as 0 and deploy 3x zone-aware StatefulSets with 0 replicas by setting - ``` + ```jsonnet _config+:: { - multi_zone_ingester_enabled: true, - multi_zone_ingester_migration_enabled: true, - multi_zone_ingester_replicas: 0, - // These last two lines are necessary now that we enable zone aware ingester by default - // so that newly created cells will not be migrated later on. If you miss them you will - // break writes in the cell. - multi_zone_ingester_replication_write_path_enabled: false, - multi_zone_ingester_replication_read_path_enabled: false, + multi_zone_ingester_enabled: true, + multi_zone_ingester_migration_enabled: true, + multi_zone_ingester_replicas: 0, + // These last two lines are necessary now that we enable zone aware ingester by default + // so that newly created cells will not be migrated later on. If you miss them you will + // break writes in the cell. + multi_zone_ingester_replication_write_path_enabled: false, + multi_zone_ingester_replication_read_path_enabled: false, }, - ``` - - If you're not using jsonnet the new ingester StatefulSets should have a label with `rollout-group: ingester`, annotation `rollout-max-unavailable: x` (put a placeholder value in, later you should set the value of this to be some portion of the StatefulSets total replicas, for example in jsonnet we template this so that each StatefulSet runs 1/3 of the total replicas and the max unavailable is 1/3 of each StatefulSets replicas), and set the update strategy to `OnDelete`. + + If you're not using jsonnet, the new ingester StatefulSets should have a label with `rollout-group: ingester`, annotation `rollout-max-unavailable: x` (put a placeholder value in, later you should set the value of this to be some portion of the StatefulSets total replicas, for example in jsonnet we template this so that each StatefulSet runs 1/3 of the total replicas and the max unavailable is 1/3 of each StatefulSets replicas), and set the update strategy to `OnDelete`. 1. Diff ingester and ingester-zone-a StatefulSets and make sure all config matches - ``` + ```bash kubectl get statefulset -n loki-dev-008 ingester -o yaml > ingester.yaml kubectl get statefulset -n loki-dev-008 ingester-zone-a -o yaml > ingester-zone-a.yaml diff ingester.yaml ingester-zone-a.yaml ``` - expected diffs are things like: creation time and revision #, the zone, fields used by rollout operator, # of replicas, anything related to kustomize/flux, and PVC for the wal since the containers don't exist yet. + Expected in diffs are values like: creation time and revision number, the zone, fields used by rollout operator, number of replicas, anything related to kustomize/flux, and PVC for the WAL since the containers don't exist yet. 1. Temporarily double max series limits for users that are using more than 50% of their current limit, the queries are as follows (add label selectors as appropriate): ``` sum by (tenant)(sum (loki_ingester_memory_streams) by (cluster, namespace, tenant) / on (namespace) group_left max by(namespace) (loki_distributor_replication_factor)) @@ -76,15 +75,15 @@ These instructions assume you are using the zone aware ingester jsonnet deployme unless on (tenant) ( (label_replace(loki_overrides{limit_name="max_global_streams_per_user"},"tenant", "$1", "user", "(.+)"))) ``` -1. Scale up zone-aware StatefulSets until they have â…“ of replicas each. In small cells you can do this all at once, in larger cells it might be safer to do it in chunks. The config value you need to change is `multi_zone_ingester_replicas: 6`, the value will be split across the three StatefulSets. So in this case each StatefulSet would run 2 replicas. +1. Scale up zone-aware StatefulSets until they have 1/3rd of replicas each. In smaller cells you can do this all at once, in larger cells it is safer to do it in chunks. The config value you need to change is `multi_zone_ingester_replicas: 6`, the value will be split across the three StatefulSets. In this case, each StatefulSet would run 2 replicas. - If you're not using jsonnet this is the step where you would also set the annotation `rollout-max-unavailable` to some value that is less than or equal to the # of replicas each StatefulSet is running. + If you're not using jsonnet, this is the step where you would also set the annotation `rollout-max-unavailable` to some value that is less than or equal to the number of replicas each StatefulSet is running. -1. enable zone awareness on the write path via `multi_zone_ingester_replication_write_path_enabled: true`, this causes distributors and rulers to reshuffle series to distributors in each zone, be sure to check that all the distributors and rulers have restarted properly. +1. Enable zone awareness on the write path by setting `multi_zone_ingester_replication_write_path_enabled: true`, this causes distributors and rulers to reshuffle series to distributors in each zone. Be sure to check that all the distributors and rulers have restarted properly. - If you're not using jsonnet enable zone awareness on the write path by setting `distributor.zone-awareness-enabled` to true for distributors and rulers. + If you're not using jsonnet, enable zone awareness on the write path by setting `distributor.zone-awareness-enabled` to true for distributors and rulers. -1. Wait `query_ingesters_within` configured hours, by default this is 3h. This ensures that no data will be missing if we query a new ingester. However, because we cut chunks at least every 30m due to `chunk_idle_period` we can likely reduce this amount of time. +1. Wait for `query_ingesters_within` configured hours. The default is `3h`. This ensures that no data will be missing if we query a new ingester. However, because we cut chunks at least every 30m due to `chunk_idle_period` we can likely reduce this amount of time. 1. Check that rule evaluations are still correct on the migration, look for increases in the rate for metrics with names with the following suffixes: @@ -92,25 +91,24 @@ These instructions assume you are using the zone aware ingester jsonnet deployme rule_evaluations_total rule_evaluation_failures_total rule_group_iterations_missed_total - ``` -1. Enable zone-aware replication on the read path `multi_zone_ingester_replication_read_path_enabled: true` or if you're not using jsonnet set `distributor.zone-awareness-enabled` to true for queriers. +1. Enable zone-aware replication on the read path `multi_zone_ingester_replication_read_path_enabled: true`. If you're not using jsonnet, set `distributor.zone-awareness-enabled` to true for queriers. 1. Check that queries are still executing correctly, for example look at `loki_logql_querystats_latency_seconds_count` to see that you don't have a big increase in latency or error count for a specific query type. -1. Configure distributor / ruler to exclude ingesters in the “zone-default†so those ingesters no longer receive write traffic `multi_zone_ingester_exclude_default: true` or if you're not using jsonnet set `distributor.excluded-zones` on distributors and rulers. +1. Configure distributor / ruler to exclude ingesters in the `zone-default` so those ingesters no longer receive write traffic by setting `multi_zone_ingester_exclude_default: true`. If you're not using jsonnet set `distributor.excluded-zones` on distributors and rulers. - It's a good idea to check rules evaluations again at this point, and also that the zone aware ingester StatefulSet is now receiving all the write traffic, you can compare `sum(loki_ingester_memory_streams{cluster="",job=~"()/ingester"})` to `sum(loki_ingester_memory_streams{cluster="",job=~"()/ingester-zone.*"})` + It is a good idea to check rules evaluations again at this point, and also that the zone aware ingester StatefulSet is now receiving all the write traffic, you can compare `sum(loki_ingester_memory_streams{cluster="",job=~"()/ingester"})` to `sum(loki_ingester_memory_streams{cluster="",job=~"()/ingester-zone.*"})` -1. if you're using an automated reconcilliation/deployment system like flux, disable it now (for example using flux ignore), if possible for just the default ingester StatefulSet +1. If you're using an automated reconciliation or deployment system like flux, disable it now (for example using flux ignore) if possible for just the default ingester StatefulSet. -1. Shutdown flush the default ingesters, unregistering them from the ring, you can do this by port-forwarding each ingester pod and using the endpoint: `"http://url:PORT/ingester/shutdown?flush=true&delete_ring_tokens=true&terminate=false"` +1. Shutdown flush the default ingesters, unregistering them from the ring, you can do this by port-forwarding each ingester Pod and using the endpoint: `"http://url:PORT/ingester/shutdown?flush=true&delete_ring_tokens=true&terminate=false"` -1. manually scale down the default ingester StatefulSet to 0 replicas, we do this via `tk apply` but you could do it via modifying the yaml +1. manually scale down the default ingester StatefulSet to 0 replicas, we do this via `tk apply` but you could do it via modifying the yaml. -1. merge a PR to your central config repo to keep the StatefulSet 0'd, and then remove the flux ignore +1. merge a PR to your central config repo to keep the StatefulSet 0'd, and then remove the flux ignore. -1. clean up any remaining temporary config from the migration, for example `multi_zone_ingester_migration_enabled: true` is no longer needed +1. clean up any remaining temporary config from the migration, for example `multi_zone_ingester_migration_enabled: true` is no longer needed. -1. ensure that all the old default ingester PVC/PV are removed \ No newline at end of file +1. ensure that all the old default ingester PVC/PV are removed. \ No newline at end of file diff --git a/docs/sources/query/_index.md b/docs/sources/query/_index.md index cf2aeb3f8acbc..0746d199b7b06 100644 --- a/docs/sources/query/_index.md +++ b/docs/sources/query/_index.md @@ -137,6 +137,38 @@ Same as above, but vectors have their values set to `1` if they pass the compari sum without(app) (count_over_time({app="foo"}[1m])) > bool sum without(app) (count_over_time({app="bar"}[1m])) ``` +### Pattern match filter operators + +- `|>` (line match pattern) +- `!>` (line match not pattern) + +Pattern Filter not only enhances efficiency but also simplifies the process of writing LogQL queries. By eliminating the need for complex regex patterns, users can create queries using a more intuitive syntax, reducing the cognitive load and potential for errors. + +Within the pattern syntax the `<_>` serves as a wildcard, representing any arbitrary text. This allows the query to match log lines where the specified pattern occurs, such as log lines containing static content, with variable content in between. + +Line match pattern example: + +```logql +{service_name=`distributor`} |> `<_> caller=http.go:194 level=debug <_> msg="POST /push.v1.PusherService/Push <_>` +``` + +Line match not pattern example: + +```logql +{service_name=`distributor`} !> `<_> caller=http.go:194 level=debug <_> msg="POST /push.v1.PusherService/Push <_>` +``` + +For example, the example queries above will respectively match and not match the following log line from the `distributor` service: + +```log +ts=2024-04-05T08:40:13.585911094Z caller=http.go:194 level=debug traceID=23e54a271db607cc orgID=3648 msg="POST /push.v1.PusherService/Push (200) 12.684035ms" +ts=2024-04-05T08:41:06.551403339Z caller=http.go:194 level=debug traceID=54325a1a15b42e2d orgID=1218 msg="POST /push.v1.PusherService/Push (200) 1.664285ms" +ts=2024-04-05T08:41:06.506524777Z caller=http.go:194 level=debug traceID=69d4271da1595bcb orgID=1218 msg="POST /push.v1.PusherService/Push (200) 1.783818ms" +ts=2024-04-05T08:41:06.473740396Z caller=http.go:194 level=debug traceID=3b8ec973e6397814 orgID=3648 msg="POST /push.v1.PusherService/Push (200) 1.893987ms" +ts=2024-04-05T08:41:05.88999067Z caller=http.go:194 level=debug traceID=6892d7ef67b4d65c orgID=3648 msg="POST /push.v1.PusherService/Push (200) 2.314337ms" +ts=2024-04-05T08:41:05.826266414Z caller=http.go:194 level=debug traceID=0bb76e910cfd008d orgID=3648 msg="POST /push.v1.PusherService/Push (200) 3.625744ms" +``` + ### Order of operations When chaining or combining operators, you have to consider operator precedence: diff --git a/docs/sources/query/log_queries/_index.md b/docs/sources/query/log_queries/_index.md index eb66dd0b1e5c5..1f8968ecbc20a 100644 --- a/docs/sources/query/log_queries/_index.md +++ b/docs/sources/query/log_queries/_index.md @@ -95,7 +95,7 @@ An example that mutates is the expression ``` -Log pipeline expressions fall into one of three categories: +Log pipeline expressions fall into one of four categories: - Filtering expressions: [line filter expressions](#line-filter-expression) and @@ -104,6 +104,7 @@ and - Formatting expressions: [line format expressions](#line-format-expression) and [label format expressions](#labels-format-expression) +- Labels expressions: [drop labels expression](#drop-labels-expression) and [keep labels expression](#keep-labels-expression) ### Line filter expression diff --git a/docs/sources/query/logcli.md b/docs/sources/query/logcli.md index 297730a589ee4..aa09a3c4712f4 100644 --- a/docs/sources/query/logcli.md +++ b/docs/sources/query/logcli.md @@ -229,7 +229,7 @@ Commands: For more information about log queries and metric queries, refer to the LogQL documentation: - https://grafana.com/docs/loki/latest/logql/ + https://grafana.com/docs/loki//logql/ labels [] [