diff --git a/.devcontainer/devcontainer.json b/.devcontainer/devcontainer.json index 2b6fb69076ca..b4c223478cc6 100644 --- a/.devcontainer/devcontainer.json +++ b/.devcontainer/devcontainer.json @@ -1,5 +1,5 @@ { - "image": "grafana/loki-build-image:0.33.6", + "image": "grafana/loki-build-image:0.34.0", "containerEnv": { "BUILD_IN_CONTAINER": "false" }, diff --git a/.drone/docker-manifest-build-image.tmpl b/.drone/docker-manifest-build-image.tmpl deleted file mode 100644 index b870dd5c6165..000000000000 --- a/.drone/docker-manifest-build-image.tmpl +++ /dev/null @@ -1,17 +0,0 @@ -image: grafana/{{config.target}} -tags: -{{#if build.tags}} -{{#each build.tags}} - - {{this}} -{{/each}} -{{/if}} -manifests: - - image: grafana/{{config.target}}-amd64 - platform: - architecture: amd64 - os: linux - - image: grafana/{{config.target}}-arm64 - platform: - architecture: arm64 - os: linux - variant: v8 diff --git a/.drone/docker-manifest-ecr.tmpl b/.drone/docker-manifest-ecr.tmpl deleted file mode 100644 index 1c8c98417ac0..000000000000 --- a/.drone/docker-manifest-ecr.tmpl +++ /dev/null @@ -1,21 +0,0 @@ -image: public.ecr.aws/grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}} -tags: - - main -{{#if build.tag}} - - latest -{{/if}} -{{#if build.tags}} -{{#each build.tags}} - - {{this}} -{{/each}} -{{/if}} -manifests: - - image: public.ecr.aws/grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-amd64 - platform: - architecture: amd64 - os: linux - - image: public.ecr.aws/grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-arm64 - platform: - architecture: arm64 - os: linux - variant: v8 diff --git a/.drone/docker-manifest-operator.tmpl b/.drone/docker-manifest-operator.tmpl deleted file mode 100644 index f5aaa400867d..000000000000 --- a/.drone/docker-manifest-operator.tmpl +++ /dev/null @@ -1,26 +0,0 @@ -image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "operator/v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}} -tags: - - main -{{#if build.tag}} - - latest -{{/if}} -{{#if build.tags}} -{{#each build.tags}} - - {{this}} -{{/each}} -{{/if}} -manifests: - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "operator/v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-amd64 - platform: - architecture: amd64 - os: linux - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "operator/v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-arm64 - platform: - architecture: arm64 - os: linux - variant: v8 - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "operator/v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-arm - platform: - architecture: arm - os: linux - variant: v7 diff --git a/.drone/docker-manifest.tmpl b/.drone/docker-manifest.tmpl deleted file mode 100644 index 7191e602b4ac..000000000000 --- a/.drone/docker-manifest.tmpl +++ /dev/null @@ -1,26 +0,0 @@ -image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}} -tags: - - main -{{#if build.tag}} - - latest -{{/if}} -{{#if build.tags}} -{{#each build.tags}} - - {{this}} -{{/each}} -{{/if}} -manifests: - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-amd64 - platform: - architecture: amd64 - os: linux - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-arm64 - platform: - architecture: arm64 - os: linux - variant: v8 - - image: grafana/{{config.target}}:{{#if build.tag}}{{trimPrefix "v" build.tag}}{{else}}{{build.branch}}-{{substr 0 7 build.commit}}{{/if}}-arm - platform: - architecture: arm - os: linux - variant: v7 diff --git a/.drone/drone.jsonnet b/.drone/drone.jsonnet deleted file mode 100644 index 718f0a84aa88..000000000000 --- a/.drone/drone.jsonnet +++ /dev/null @@ -1,669 +0,0 @@ -local apps = ['loki', 'loki-canary', 'loki-canary-boringcrypto', 'logcli']; -local archs = ['amd64', 'arm64', 'arm']; - -local build_image_version = std.extVar('__build-image-version'); - -local drone_updater_plugin_image = 'us.gcr.io/kubernetes-dev/drone/plugins/updater@sha256:cbcb09c74f96a34c528f52bf9b4815a036b11fed65f685be216e0c8b8e84285b'; - -local onPRs = { - event: ['pull_request'], -}; - -local onTagOrMain = { - event: ['push', 'tag'], -}; - -local onTag = { - event: ['tag'], -}; - -local onPath(path) = { - paths+: [path], -}; - -local pipeline(name) = { - kind: 'pipeline', - name: name, - steps: [], - trigger: { - // Only trigger pipelines for PRs, tags (v*), or pushes to "main". Excluding runs on grafana/loki (non fork) branches - ref: ['refs/heads/main', 'refs/heads/k???', 'refs/tags/v*', 'refs/pull/*/head'], - }, -}; - -local secret(name, vault_path, vault_key) = { - kind: 'secret', - name: name, - get: { - path: vault_path, - name: vault_key, - }, -}; -local docker_username_secret = secret('docker_username', 'infra/data/ci/docker_hub', 'username'); -local docker_password_secret = secret('docker_password', 'infra/data/ci/docker_hub', 'password'); -local ecr_key = secret('ecr_key', 'infra/data/ci/loki/aws-credentials', 'access_key_id'); -local ecr_secret_key = secret('ecr_secret_key', 'infra/data/ci/loki/aws-credentials', 'secret_access_key'); -local pull_secret = secret('dockerconfigjson', 'secret/data/common/gcr', '.dockerconfigjson'); -local github_secret = secret('github_token', 'infra/data/ci/github/grafanabot', 'pat'); -local gpg_passphrase = secret('gpg_passphrase', 'infra/data/ci/packages-publish/gpg', 'passphrase'); -local gpg_private_key = secret('gpg_private_key', 'infra/data/ci/packages-publish/gpg', 'private-key'); - -// Injected in a secret because this is a public repository and having the config here would leak our environment names -local updater_config_template = secret('updater_config_template', 'secret/data/common/loki_ci_autodeploy', 'updater-config-template.json'); -local helm_chart_auto_update_config_template = secret('helm-chart-update-config-template', 'secret/data/common/loki-helm-chart-auto-update', 'on-loki-release-config.json'); - - -local run(name, commands, env={}, image='grafana/loki-build-image:%s' % build_image_version) = { - name: name, - image: image, - commands: commands, - environment: env, -}; - -local make(target, container=true, args=[]) = run(target, [ - std.join(' ', [ - 'make', - 'BUILD_IN_CONTAINER=' + container, - target, - ] + args), -]); - -// The only indication we have that we're running in a fork is the presence of a secret. -// If a secret is blank, it means we're running in a fork. -local skipMissingSecretPipelineStep(secretName) = run( - 'skip pipeline if missing secret', - [ - 'if [ "$${#TEST_SECRET}" -eq 0 ]; then', - ' echo "Missing a secret to run this pipeline. This branch needs to be re-pushed as a branch in main grafana/loki repository in order to run." && exit 78', - 'fi', - ], - image='alpine', - env={ - TEST_SECRET: { from_secret: secretName }, - }, -); - -local docker(arch, app) = { - name: '%s-image' % if $.settings.dry_run then 'build-' + app else 'publish-' + app, - image: if arch == 'arm' then 'plugins/docker:linux-arm' else 'plugins/docker', - settings: { - repo: 'grafana/%s' % app, - dockerfile: 'cmd/%s/Dockerfile' % app, - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - dry_run: false, - }, -}; - -local clients_docker(arch, app) = { - name: '%s-image' % if $.settings.dry_run then 'build-' + app else 'publish-' + app, - image: if arch == 'arm' then 'plugins/docker:linux-arm' else 'plugins/docker', - settings: { - repo: 'grafana/%s' % app, - dockerfile: 'clients/cmd/%s/Dockerfile' % app, - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - dry_run: false, - }, -}; - -local docker_operator(arch, operator) = { - name: '%s-image' % if $.settings.dry_run then 'build-' + operator else 'publish-' + operator, - image: if arch == 'arm' then 'plugins/docker:linux-arm' else 'plugins/docker', - settings: { - repo: 'grafana/%s' % operator, - context: 'operator', - dockerfile: 'operator/Dockerfile', - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - dry_run: false, - }, -}; - -local lambda_promtail_ecr(app) = { - name: '%s-image' % if $.settings.dry_run then 'build-' + app else 'publish-' + app, - image: 'cstyan/ecr', - privileged: true, - settings: { - repo: 'public.ecr.aws/grafana/lambda-promtail', - registry: 'public.ecr.aws/grafana', - dockerfile: 'tools/%s/Dockerfile' % app, - access_key: { from_secret: ecr_key.name }, - secret_key: { from_secret: ecr_secret_key.name }, - dry_run: false, - region: 'us-east-1', - }, -}; - -local arch_image(arch, tags='') = { - platform: { - os: 'linux', - arch: arch, - }, - steps: [{ - name: 'image-tag', - image: 'alpine', - commands: [ - 'apk add --no-cache bash git', - 'git fetch origin --tags', - 'echo $(./tools/image-tag)-%s > .tags' % arch, - ] + if tags != '' then ['echo ",%s" >> .tags' % tags] else [], - }], -}; - -local querytee() = pipeline('querytee-amd64') + arch_image('amd64', 'main') { - steps+: [ - // publish for tag or main - docker('amd64', 'querytee') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: { - repo: 'grafana/loki-query-tee', - }, - }, - ], -}; - -local fluentbit(arch) = pipeline('fluent-bit-' + arch) + arch_image(arch) { - steps+: [ - // publish for tag or main - clients_docker(arch, 'fluent-bit') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: { - repo: 'grafana/fluent-bit-plugin-loki', - }, - }, - ], -}; - -local fluentd() = pipeline('fluentd-amd64') + arch_image('amd64', 'main') { - steps+: [ - // publish for tag or main - clients_docker('amd64', 'fluentd') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: { - repo: 'grafana/fluent-plugin-loki', - }, - }, - ], -}; - -local logstash() = pipeline('logstash-amd64') + arch_image('amd64', 'main') { - steps+: [ - // publish for tag or main - clients_docker('amd64', 'logstash') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: { - repo: 'grafana/logstash-output-loki', - }, - }, - ], -}; - -local promtail(arch) = pipeline('promtail-' + arch) + arch_image(arch) { - steps+: [ - // publish for tag or main - clients_docker(arch, 'promtail') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: {}, - }, - ], -}; - -local lambda_promtail(arch) = pipeline('lambda-promtail-' + arch) + arch_image(arch) { - local skipStep = skipMissingSecretPipelineStep(ecr_key.name), // Needs ECR secrets to run - - steps+: [ - skipStep, - // publish for tag or main - lambda_promtail_ecr('lambda-promtail') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: {}, - }, - ], -}; - -local lokioperator(arch) = pipeline('lokioperator-' + arch) + arch_image(arch) { - steps+: [ - // publish for tag or main - docker_operator(arch, 'loki-operator') { - depends_on: ['image-tag'], - when: onTagOrMain { - ref: ['refs/heads/main', 'refs/tags/operator/v*'], - }, - settings+: {}, - }, - ], -}; - -local logql_analyzer() = pipeline('logql-analyzer') + arch_image('amd64') { - steps+: [ - // publish for tag or main - docker('amd64', 'logql-analyzer') { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: { - repo: 'grafana/logql-analyzer', - }, - }, - ], -}; - -local multiarch_image(arch) = pipeline('docker-' + arch) + arch_image(arch) { - steps+: [ - // publish for tag or main - docker(arch, app) { - depends_on: ['image-tag'], - when: onTagOrMain, - settings+: {}, - } - for app in apps - ], -}; - -local manifest(apps) = pipeline('manifest') { - steps: std.foldl( - function(acc, app) acc + [{ - name: 'manifest-' + app, - image: 'plugins/manifest:1.4.0', - settings: { - // the target parameter is abused for the app's name, - // as it is unused in spec mode. See docker-manifest.tmpl - target: app, - spec: '.drone/docker-manifest.tmpl', - ignore_missing: false, - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - }, - depends_on: ['clone'] + ( - // Depend on the previous app, if any. - if std.length(acc) > 0 - then [acc[std.length(acc) - 1].name] - else [] - ), - }], - apps, - [], - ), - depends_on: [ - 'docker-%s' % arch - for arch in archs - ] + [ - 'promtail-%s' % arch - for arch in archs - ] + [ - 'fluent-bit-%s' % arch - for arch in archs - ], -}; - -local manifest_operator(app) = pipeline('manifest-operator') { - steps: [{ - name: 'manifest-' + app, - image: 'plugins/manifest:1.4.0', - settings: { - // the target parameter is abused for the app's name, - // as it is unused in spec mode. See docker-manifest-operator.tmpl - target: app, - spec: '.drone/docker-manifest-operator.tmpl', - ignore_missing: false, - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - }, - depends_on: ['clone'], - }], - depends_on: [ - 'lokioperator-%s' % arch - for arch in archs - ], -}; - - -local manifest_ecr(apps, archs) = pipeline('manifest-ecr') { - steps: std.foldl( - function(acc, app) acc + [{ - name: 'manifest-' + app, - image: 'plugins/manifest:1.4.0', - volumes: [{ - name: 'dockerconf', - path: '/.docker', - }], - settings: { - // the target parameter is abused for the app's name, - // as it is unused in spec mode. See docker-manifest-ecr.tmpl - target: app, - spec: '.drone/docker-manifest-ecr.tmpl', - ignore_missing: true, - }, - depends_on: ['clone'] + ( - // Depend on the previous app, if any. - if std.length(acc) > 0 - then [acc[std.length(acc) - 1].name] - else [] - ), - }], - apps, - [{ - name: 'ecr-login', - image: 'docker:dind', - volumes: [{ - name: 'dockerconf', - path: '/root/.docker', - }], - environment: { - AWS_ACCESS_KEY_ID: { from_secret: ecr_key.name }, - AWS_SECRET_ACCESS_KEY: { from_secret: ecr_secret_key.name }, - }, - commands: [ - 'apk add --no-cache aws-cli', - 'docker login --username AWS --password $(aws ecr-public get-login-password --region us-east-1) public.ecr.aws', - ], - depends_on: ['clone'], - }], - ), - volumes: [{ - name: 'dockerconf', - temp: {}, - }], - depends_on: [ - 'lambda-promtail-%s' % arch - for arch in archs - ], -}; - -local build_image_tag = '0.33.2'; -[ - pipeline('loki-build-image-' + arch) { - workspace: { - base: '/src', - path: 'loki', - }, - platform: { - os: 'linux', - arch: arch, - }, - steps: [ - { - name: 'push', - image: 'plugins/docker', - when: onTagOrMain + onPath('loki-build-image/**'), - environment: { - DOCKER_BUILDKIT: 1, - }, - settings: { - repo: 'grafana/loki-build-image', - context: 'loki-build-image', - dockerfile: 'loki-build-image/Dockerfile', - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - tags: [build_image_tag + '-' + arch], - dry_run: false, - }, - }, - ], - } - for arch in ['amd64', 'arm64'] -] + [ - pipeline('loki-build-image-publish') { - steps: [ - { - name: 'manifest', - image: 'plugins/manifest:1.4.0', - when: onTagOrMain + onPath('loki-build-image/**'), - settings: { - // the target parameter is abused for the app's name, as it is unused in spec mode. - target: 'loki-build-image:' + build_image_tag, - spec: '.drone/docker-manifest-build-image.tmpl', - ignore_missing: false, - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - }, - }, - ], - depends_on: [ - 'loki-build-image-%s' % arch - for arch in ['amd64', 'arm64'] - ], - }, - pipeline('helm-test-image') { - workspace: { - base: '/src', - path: 'loki', - }, - steps: [ - { - name: 'push-image', - image: 'plugins/docker', - when: onTagOrMain + onPath('production/helm/loki/src/helm-test/**'), - settings: { - repo: 'grafana/loki-helm-test', - dockerfile: 'production/helm/loki/src/helm-test/Dockerfile', - username: { from_secret: docker_username_secret.name }, - password: { from_secret: docker_password_secret.name }, - dry_run: false, - }, - }, - ], - }, - pipeline('documentation-checks') { - workspace: { - base: '/src', - path: 'loki', - }, - steps: [ - make('documentation-helm-reference-check', container=false) { - depends_on: ['clone'], - }, - ], - }, -] + [ - multiarch_image(arch) - for arch in archs -] + [ - promtail(arch) + ( - // When we're building Promtail for ARM, we want to use Dockerfile.arm32 to fix - // a problem with the published Drone image. See Dockerfile.arm32 for more - // information. - // - // This is really really hacky and a better more permanent solution will be to use - // buildkit. - if arch == 'arm' - then { - steps: [ - step + ( - if std.objectHas(step, 'settings') && step.settings.dockerfile == 'clients/cmd/promtail/Dockerfile' - then { - settings+: { - dockerfile: 'clients/cmd/promtail/Dockerfile.arm32', - }, - } - else {} - ) - for step in super.steps - ], - } - else {} - ) - for arch in archs -] + [ - lokioperator(arch) { - trigger+: { - ref: [ - 'refs/heads/main', - 'refs/tags/operator/v*', - 'refs/pull/*/head', - ], - }, - } - for arch in archs -] + [ - fluentbit(arch) - for arch in archs -] + [ - fluentd(), - logstash(), - querytee(), - manifest(['promtail', 'loki', 'loki-canary', 'loki-canary-boringcrypto', 'fluent-bit-plugin-loki']) { - trigger+: onTagOrMain, - }, - manifest_operator('loki-operator') { - trigger+: onTagOrMain { - ref: [ - 'refs/heads/main', - 'refs/tags/operator/v*', - ], - }, - }, - pipeline('deploy') { - local configFileName = 'updater-config.json', - trigger: onTagOrMain { - ref: ['refs/heads/main', 'refs/tags/v*'], - }, - depends_on: ['manifest'], - image_pull_secrets: [pull_secret.name], - steps: [ - { - name: 'prepare-updater-config', - image: 'alpine', - environment: { - MAJOR_MINOR_VERSION_REGEXP: '([0-9]+\\.[0-9]+)', - RELEASE_TAG_REGEXP: '^([0-9]+\\.[0-9]+\\.[0-9]+)$', - }, - commands: [ - 'apk add --no-cache bash git', - 'git fetch origin --tags', - 'echo $(./tools/image-tag) > .tag', - 'export RELEASE_TAG=$(cat .tag)', - // if the tag matches the pattern `D.D.D` then RELEASE_NAME="D-D-x", otherwise RELEASE_NAME="next" - 'export RELEASE_NAME=$([[ $RELEASE_TAG =~ $RELEASE_TAG_REGEXP ]] && echo $RELEASE_TAG | grep -oE $MAJOR_MINOR_VERSION_REGEXP | sed "s/\\./-/g" | sed "s/$/-x/" || echo "next")', - 'echo $RELEASE_NAME', - 'echo $PLUGIN_CONFIG_TEMPLATE > %s' % configFileName, - // replace placeholders with RELEASE_NAME and RELEASE TAG - 'sed -i "s/\\"{{release}}\\"/\\"$RELEASE_NAME\\"/g" %s' % configFileName, - 'sed -i "s/{{version}}/$RELEASE_TAG/g" %s' % configFileName, - ], - settings: { - config_template: { from_secret: updater_config_template.name }, - }, - depends_on: ['clone'], - }, - { - name: 'trigger', - image: drone_updater_plugin_image, - settings: { - github_token: { from_secret: github_secret.name }, - config_file: configFileName, - }, - depends_on: ['prepare-updater-config'], - }, - ], - }, - pipeline('update-loki-helm-chart-on-loki-release') { - local configFileName = 'updater-config.json', - depends_on: ['manifest'], - image_pull_secrets: [pull_secret.name], - trigger: { - // we need to run it only on Loki tags that starts with `v`. - ref: ['refs/tags/v*'], - }, - steps: [ - { - name: 'check-version-is-latest', - image: 'alpine', - when: onTag, - commands: [ - 'apk add --no-cache bash git', - 'git fetch --tags', - "latest_version=$(git tag -l 'v[0-9]*.[0-9]*.[0-9]*' | sort -V | tail -n 1 | sed 's/v//g')", - 'RELEASE_TAG=$(./tools/image-tag)', - 'if [ "$RELEASE_TAG" != "$latest_version" ]; then echo "Current version $RELEASE_TAG is not the latest version of Loki. The latest version is $latest_version" && exit 78; fi', - ], - }, - { - name: 'prepare-helm-chart-update-config', - image: 'alpine', - depends_on: ['check-version-is-latest'], - commands: [ - 'apk add --no-cache bash git', - 'git fetch origin --tags', - 'RELEASE_TAG=$(./tools/image-tag)', - 'echo $PLUGIN_CONFIG_TEMPLATE > %s' % configFileName, - // replace placeholders with RELEASE TAG - 'sed -i -E "s/\\{\\{release\\}\\}/$RELEASE_TAG/g" %s' % configFileName, - ], - settings: { - config_template: { from_secret: helm_chart_auto_update_config_template.name }, - }, - }, - { - name: 'trigger-helm-chart-update', - image: drone_updater_plugin_image, - settings: { - github_token: { - from_secret: github_secret.name, - }, - config_file: configFileName, - }, - depends_on: ['prepare-helm-chart-update-config'], - }, - ], - }, - logql_analyzer(), - pipeline('docker-driver') { - trigger+: onTagOrMain, - steps: [ - { - name: 'build and push', - image: 'grafana/loki-build-image:%s' % build_image_version, - depends_on: ['clone'], - environment: { - DOCKER_USERNAME: { from_secret: docker_username_secret.name }, - DOCKER_PASSWORD: { from_secret: docker_password_secret.name }, - }, - commands: [ - 'git fetch origin --tags', - 'make docker-driver-push', - ], - volumes: [ - { - name: 'docker', - path: '/var/run/docker.sock', - }, - ], - privileged: true, - }, - ], - volumes: [ - { - name: 'docker', - host: { - path: '/var/run/docker.sock', - }, - }, - ], - }, -] -+ [ - lambda_promtail(arch) - for arch in ['amd64', 'arm64'] -] + [ - manifest_ecr(['lambda-promtail'], ['amd64', 'arm64']) { - trigger+: { event: ['push'] }, - }, -] + [ - github_secret, - pull_secret, - docker_username_secret, - docker_password_secret, - ecr_key, - ecr_secret_key, - updater_config_template, - helm_chart_auto_update_config_template, - gpg_passphrase, - gpg_private_key, -] diff --git a/.drone/drone.yml b/.drone/drone.yml deleted file mode 100644 index 3710a4b9d3dd..000000000000 --- a/.drone/drone.yml +++ /dev/null @@ -1,1313 +0,0 @@ ---- -kind: pipeline -name: loki-build-image-amd64 -platform: - arch: amd64 - os: linux -steps: -- environment: - DOCKER_BUILDKIT: 1 - image: plugins/docker - name: push - settings: - context: loki-build-image - dockerfile: loki-build-image/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-build-image - tags: - - 0.33.2-amd64 - username: - from_secret: docker_username - when: - event: - - push - - tag - paths: - - loki-build-image/** -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -workspace: - base: /src - path: loki ---- -kind: pipeline -name: loki-build-image-arm64 -platform: - arch: arm64 - os: linux -steps: -- environment: - DOCKER_BUILDKIT: 1 - image: plugins/docker - name: push - settings: - context: loki-build-image - dockerfile: loki-build-image/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-build-image - tags: - - 0.33.2-arm64 - username: - from_secret: docker_username - when: - event: - - push - - tag - paths: - - loki-build-image/** -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -workspace: - base: /src - path: loki ---- -depends_on: -- loki-build-image-amd64 -- loki-build-image-arm64 -kind: pipeline -name: loki-build-image-publish -steps: -- image: plugins/manifest:1.4.0 - name: manifest - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest-build-image.tmpl - target: loki-build-image:0.33.2 - username: - from_secret: docker_username - when: - event: - - push - - tag - paths: - - loki-build-image/** -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: helm-test-image -steps: -- image: plugins/docker - name: push-image - settings: - dockerfile: production/helm/loki/src/helm-test/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-helm-test - username: - from_secret: docker_username - when: - event: - - push - - tag - paths: - - production/helm/loki/src/helm-test/** -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -workspace: - base: /src - path: loki ---- -kind: pipeline -name: documentation-checks -steps: -- commands: - - make BUILD_IN_CONTAINER=false documentation-helm-reference-check - depends_on: - - clone - environment: {} - image: grafana/loki-build-image:0.33.6 - name: documentation-helm-reference-check -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -workspace: - base: /src - path: loki ---- -kind: pipeline -name: docker-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-image - settings: - dockerfile: cmd/loki/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-canary-image - settings: - dockerfile: cmd/loki-canary/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-canary-boringcrypto-image - settings: - dockerfile: cmd/loki-canary-boringcrypto/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary-boringcrypto - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-logcli-image - settings: - dockerfile: cmd/logcli/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/logcli - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: docker-arm64 -platform: - arch: arm64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-image - settings: - dockerfile: cmd/loki/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-canary-image - settings: - dockerfile: cmd/loki-canary/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-canary-boringcrypto-image - settings: - dockerfile: cmd/loki-canary-boringcrypto/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary-boringcrypto - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-logcli-image - settings: - dockerfile: cmd/logcli/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/logcli - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: docker-arm -platform: - arch: arm - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-loki-image - settings: - dockerfile: cmd/loki/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-loki-canary-image - settings: - dockerfile: cmd/loki-canary/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-loki-canary-boringcrypto-image - settings: - dockerfile: cmd/loki-canary-boringcrypto/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-canary-boringcrypto - username: - from_secret: docker_username - when: - event: - - push - - tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-logcli-image - settings: - dockerfile: cmd/logcli/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/logcli - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: promtail-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-promtail-image - settings: - dockerfile: clients/cmd/promtail/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/promtail - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: promtail-arm64 -platform: - arch: arm64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-promtail-image - settings: - dockerfile: clients/cmd/promtail/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/promtail - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: promtail-arm -platform: - arch: arm - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-promtail-image - settings: - dockerfile: clients/cmd/promtail/Dockerfile.arm32 - dry_run: false - password: - from_secret: docker_password - repo: grafana/promtail - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: lokioperator-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-operator-image - settings: - context: operator - dockerfile: operator/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-operator - username: - from_secret: docker_username - when: - event: - - push - - tag - ref: - - refs/heads/main - - refs/tags/operator/v* -trigger: - ref: - - refs/heads/main - - refs/tags/operator/v* - - refs/pull/*/head ---- -kind: pipeline -name: lokioperator-arm64 -platform: - arch: arm64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-loki-operator-image - settings: - context: operator - dockerfile: operator/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-operator - username: - from_secret: docker_username - when: - event: - - push - - tag - ref: - - refs/heads/main - - refs/tags/operator/v* -trigger: - ref: - - refs/heads/main - - refs/tags/operator/v* - - refs/pull/*/head ---- -kind: pipeline -name: lokioperator-arm -platform: - arch: arm - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-loki-operator-image - settings: - context: operator - dockerfile: operator/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-operator - username: - from_secret: docker_username - when: - event: - - push - - tag - ref: - - refs/heads/main - - refs/tags/operator/v* -trigger: - ref: - - refs/heads/main - - refs/tags/operator/v* - - refs/pull/*/head ---- -kind: pipeline -name: fluent-bit-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-fluent-bit-image - settings: - dockerfile: clients/cmd/fluent-bit/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/fluent-bit-plugin-loki - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: fluent-bit-arm64 -platform: - arch: arm64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-fluent-bit-image - settings: - dockerfile: clients/cmd/fluent-bit/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/fluent-bit-plugin-loki - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: fluent-bit-arm -platform: - arch: arm - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker:linux-arm - name: publish-fluent-bit-image - settings: - dockerfile: clients/cmd/fluent-bit/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/fluent-bit-plugin-loki - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: fluentd-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - - echo ",main" >> .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-fluentd-image - settings: - dockerfile: clients/cmd/fluentd/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/fluent-plugin-loki - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: logstash-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - - echo ",main" >> .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-logstash-image - settings: - dockerfile: clients/cmd/logstash/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/logstash-output-loki - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: querytee-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - - echo ",main" >> .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-querytee-image - settings: - dockerfile: cmd/querytee/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/loki-query-tee - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -depends_on: -- docker-amd64 -- docker-arm64 -- docker-arm -- promtail-amd64 -- promtail-arm64 -- promtail-arm -- fluent-bit-amd64 -- fluent-bit-arm64 -- fluent-bit-arm -kind: pipeline -name: manifest -steps: -- depends_on: - - clone - image: plugins/manifest:1.4.0 - name: manifest-promtail - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest.tmpl - target: promtail - username: - from_secret: docker_username -- depends_on: - - clone - - manifest-promtail - image: plugins/manifest:1.4.0 - name: manifest-loki - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest.tmpl - target: loki - username: - from_secret: docker_username -- depends_on: - - clone - - manifest-loki - image: plugins/manifest:1.4.0 - name: manifest-loki-canary - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest.tmpl - target: loki-canary - username: - from_secret: docker_username -- depends_on: - - clone - - manifest-loki-canary - image: plugins/manifest:1.4.0 - name: manifest-loki-canary-boringcrypto - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest.tmpl - target: loki-canary-boringcrypto - username: - from_secret: docker_username -- depends_on: - - clone - - manifest-loki-canary-boringcrypto - image: plugins/manifest:1.4.0 - name: manifest-fluent-bit-plugin-loki - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest.tmpl - target: fluent-bit-plugin-loki - username: - from_secret: docker_username -trigger: - event: - - push - - tag - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -depends_on: -- lokioperator-amd64 -- lokioperator-arm64 -- lokioperator-arm -kind: pipeline -name: manifest-operator -steps: -- depends_on: - - clone - image: plugins/manifest:1.4.0 - name: manifest-loki-operator - settings: - ignore_missing: false - password: - from_secret: docker_password - spec: .drone/docker-manifest-operator.tmpl - target: loki-operator - username: - from_secret: docker_username -trigger: - event: - - push - - tag - ref: - - refs/heads/main - - refs/tags/operator/v* ---- -depends_on: -- manifest -image_pull_secrets: -- dockerconfigjson -kind: pipeline -name: deploy -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag) > .tag - - export RELEASE_TAG=$(cat .tag) - - export RELEASE_NAME=$([[ $RELEASE_TAG =~ $RELEASE_TAG_REGEXP ]] && echo $RELEASE_TAG - | grep -oE $MAJOR_MINOR_VERSION_REGEXP | sed "s/\./-/g" | sed "s/$/-x/" || echo - "next") - - echo $RELEASE_NAME - - echo $PLUGIN_CONFIG_TEMPLATE > updater-config.json - - sed -i "s/\"{{release}}\"/\"$RELEASE_NAME\"/g" updater-config.json - - sed -i "s/{{version}}/$RELEASE_TAG/g" updater-config.json - depends_on: - - clone - environment: - MAJOR_MINOR_VERSION_REGEXP: ([0-9]+\.[0-9]+) - RELEASE_TAG_REGEXP: ^([0-9]+\.[0-9]+\.[0-9]+)$ - image: alpine - name: prepare-updater-config - settings: - config_template: - from_secret: updater_config_template -- depends_on: - - prepare-updater-config - image: us.gcr.io/kubernetes-dev/drone/plugins/updater@sha256:cbcb09c74f96a34c528f52bf9b4815a036b11fed65f685be216e0c8b8e84285b - name: trigger - settings: - config_file: updater-config.json - github_token: - from_secret: github_token -trigger: - event: - - push - - tag - ref: - - refs/heads/main - - refs/tags/v* ---- -depends_on: -- manifest -image_pull_secrets: -- dockerconfigjson -kind: pipeline -name: update-loki-helm-chart-on-loki-release -steps: -- commands: - - apk add --no-cache bash git - - git fetch --tags - - latest_version=$(git tag -l 'v[0-9]*.[0-9]*.[0-9]*' | sort -V | tail -n 1 | sed - 's/v//g') - - RELEASE_TAG=$(./tools/image-tag) - - if [ "$RELEASE_TAG" != "$latest_version" ]; then echo "Current version $RELEASE_TAG - is not the latest version of Loki. The latest version is $latest_version" && exit - 78; fi - image: alpine - name: check-version-is-latest - when: - event: - - tag -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - RELEASE_TAG=$(./tools/image-tag) - - echo $PLUGIN_CONFIG_TEMPLATE > updater-config.json - - sed -i -E "s/\{\{release\}\}/$RELEASE_TAG/g" updater-config.json - depends_on: - - check-version-is-latest - image: alpine - name: prepare-helm-chart-update-config - settings: - config_template: - from_secret: helm-chart-update-config-template -- depends_on: - - prepare-helm-chart-update-config - image: us.gcr.io/kubernetes-dev/drone/plugins/updater@sha256:cbcb09c74f96a34c528f52bf9b4815a036b11fed65f685be216e0c8b8e84285b - name: trigger-helm-chart-update - settings: - config_file: updater-config.json - github_token: - from_secret: github_token -trigger: - ref: - - refs/tags/v* ---- -kind: pipeline -name: logql-analyzer -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- depends_on: - - image-tag - image: plugins/docker - name: publish-logql-analyzer-image - settings: - dockerfile: cmd/logql-analyzer/Dockerfile - dry_run: false - password: - from_secret: docker_password - repo: grafana/logql-analyzer - username: - from_secret: docker_username - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: docker-driver -steps: -- commands: - - git fetch origin --tags - - make docker-driver-push - depends_on: - - clone - environment: - DOCKER_PASSWORD: - from_secret: docker_password - DOCKER_USERNAME: - from_secret: docker_username - image: grafana/loki-build-image:0.33.6 - name: build and push - privileged: true - volumes: - - name: docker - path: /var/run/docker.sock -trigger: - event: - - push - - tag - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -volumes: -- host: - path: /var/run/docker.sock - name: docker ---- -kind: pipeline -name: lambda-promtail-amd64 -platform: - arch: amd64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-amd64 > .tags - image: alpine - name: image-tag -- commands: - - if [ "$${#TEST_SECRET}" -eq 0 ]; then - - ' echo "Missing a secret to run this pipeline. This branch needs to be re-pushed - as a branch in main grafana/loki repository in order to run." && exit 78' - - fi - environment: - TEST_SECRET: - from_secret: ecr_key - image: alpine - name: skip pipeline if missing secret -- depends_on: - - image-tag - image: cstyan/ecr - name: publish-lambda-promtail-image - privileged: true - settings: - access_key: - from_secret: ecr_key - dockerfile: tools/lambda-promtail/Dockerfile - dry_run: false - region: us-east-1 - registry: public.ecr.aws/grafana - repo: public.ecr.aws/grafana/lambda-promtail - secret_key: - from_secret: ecr_secret_key - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -kind: pipeline -name: lambda-promtail-arm64 -platform: - arch: arm64 - os: linux -steps: -- commands: - - apk add --no-cache bash git - - git fetch origin --tags - - echo $(./tools/image-tag)-arm64 > .tags - image: alpine - name: image-tag -- commands: - - if [ "$${#TEST_SECRET}" -eq 0 ]; then - - ' echo "Missing a secret to run this pipeline. This branch needs to be re-pushed - as a branch in main grafana/loki repository in order to run." && exit 78' - - fi - environment: - TEST_SECRET: - from_secret: ecr_key - image: alpine - name: skip pipeline if missing secret -- depends_on: - - image-tag - image: cstyan/ecr - name: publish-lambda-promtail-image - privileged: true - settings: - access_key: - from_secret: ecr_key - dockerfile: tools/lambda-promtail/Dockerfile - dry_run: false - region: us-east-1 - registry: public.ecr.aws/grafana - repo: public.ecr.aws/grafana/lambda-promtail - secret_key: - from_secret: ecr_secret_key - when: - event: - - push - - tag -trigger: - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head ---- -depends_on: -- lambda-promtail-amd64 -- lambda-promtail-arm64 -kind: pipeline -name: manifest-ecr -steps: -- commands: - - apk add --no-cache aws-cli - - docker login --username AWS --password $(aws ecr-public get-login-password --region - us-east-1) public.ecr.aws - depends_on: - - clone - environment: - AWS_ACCESS_KEY_ID: - from_secret: ecr_key - AWS_SECRET_ACCESS_KEY: - from_secret: ecr_secret_key - image: docker:dind - name: ecr-login - volumes: - - name: dockerconf - path: /root/.docker -- depends_on: - - clone - - ecr-login - image: plugins/manifest:1.4.0 - name: manifest-lambda-promtail - settings: - ignore_missing: true - spec: .drone/docker-manifest-ecr.tmpl - target: lambda-promtail - volumes: - - name: dockerconf - path: /.docker -trigger: - event: - - push - ref: - - refs/heads/main - - refs/heads/k??? - - refs/tags/v* - - refs/pull/*/head -volumes: -- name: dockerconf - temp: {} ---- -get: - name: pat - path: infra/data/ci/github/grafanabot -kind: secret -name: github_token ---- -get: - name: .dockerconfigjson - path: secret/data/common/gcr -kind: secret -name: dockerconfigjson ---- -get: - name: username - path: infra/data/ci/docker_hub -kind: secret -name: docker_username ---- -get: - name: password - path: infra/data/ci/docker_hub -kind: secret -name: docker_password ---- -get: - name: access_key_id - path: infra/data/ci/loki/aws-credentials -kind: secret -name: ecr_key ---- -get: - name: secret_access_key - path: infra/data/ci/loki/aws-credentials -kind: secret -name: ecr_secret_key ---- -get: - name: updater-config-template.json - path: secret/data/common/loki_ci_autodeploy -kind: secret -name: updater_config_template ---- -get: - name: on-loki-release-config.json - path: secret/data/common/loki-helm-chart-auto-update -kind: secret -name: helm-chart-update-config-template ---- -get: - name: passphrase - path: infra/data/ci/packages-publish/gpg -kind: secret -name: gpg_passphrase ---- -get: - name: private-key - path: infra/data/ci/packages-publish/gpg -kind: secret -name: gpg_private_key ---- -kind: signature -hmac: 3b3b039769ab8c44318749efec569ffe50c4cfb173f577422ec9d514054f0a9e - -... diff --git a/.github/issue_commands.json b/.github/issue_commands.json deleted file mode 100644 index a1f4ec217871..000000000000 --- a/.github/issue_commands.json +++ /dev/null @@ -1,10 +0,0 @@ -[ - { - "type": "label", - "name": "type/docs", - "action": "addToProject", - "addToProject": { - "url": "https://github.com/orgs/grafana/projects/69" - } - } -] diff --git a/.github/jsonnetfile.json b/.github/jsonnetfile.json index 1038aebdcd66..bb6fd2eb41c9 100644 --- a/.github/jsonnetfile.json +++ b/.github/jsonnetfile.json @@ -8,7 +8,7 @@ "subdir": "workflows" } }, - "version": "87cb5090c36b5332e7f21b5c59e136962d5f4f56" + "version": "d900569c04b53e02de6ef208fa77cba41ec5f709" } ], "legacyImports": true diff --git a/.github/jsonnetfile.lock.json b/.github/jsonnetfile.lock.json index 9eef1872519b..7c45536e4f49 100644 --- a/.github/jsonnetfile.lock.json +++ b/.github/jsonnetfile.lock.json @@ -8,8 +8,8 @@ "subdir": "workflows" } }, - "version": "87cb5090c36b5332e7f21b5c59e136962d5f4f56", - "sum": "kVlVZPpPz8d/D6UGK9Hto+NeGy7z8NvGygcB1QboxWw=" + "version": "d900569c04b53e02de6ef208fa77cba41ec5f709", + "sum": "+uAzU+b+aJtp3k+JX5mDxuh8LNY23+cHvUOwzCQ8CS8=" } ], "legacyImports": false diff --git a/.github/release-workflows.jsonnet b/.github/release-workflows.jsonnet index 5b179b3006d8..6c16af50ad74 100644 --- a/.github/release-workflows.jsonnet +++ b/.github/release-workflows.jsonnet @@ -17,11 +17,23 @@ local imageJobs = { querytee: build.image('loki-query-tee', 'cmd/querytee', platform=['linux/amd64']), }; +local weeklyImageJobs = { + loki: build.weeklyImage('loki', 'cmd/loki'), + fluentd: build.weeklyImage('fluent-plugin-loki', 'clients/cmd/fluentd', platform=['linux/amd64']), + 'fluent-bit': build.weeklyImage('fluent-bit-plugin-loki', 'clients/cmd/fluent-bit', platform=['linux/amd64']), + logstash: build.weeklyImage('logstash-output-loki', 'clients/cmd/logstash', platform=['linux/amd64']), + logcli: build.weeklyImage('logcli', 'cmd/logcli'), + 'loki-canary': build.weeklyImage('loki-canary', 'cmd/loki-canary'), + 'loki-canary-boringcrypto': build.weeklyImage('loki-canary-boringcrypto', 'cmd/loki-canary-boringcrypto'), + promtail: build.weeklyImage('promtail', 'clients/cmd/promtail'), + querytee: build.weeklyImage('loki-query-tee', 'cmd/querytee', platform=['linux/amd64']), +}; + local buildImageVersion = std.extVar('BUILD_IMAGE_VERSION'); local buildImage = 'grafana/loki-build-image:%s' % buildImageVersion; -local golangCiLintVersion = 'v1.55.1'; +local golangCiLintVersion = 'v1.60.3'; -local imageBuildTimeoutMin = 40; +local imageBuildTimeoutMin = 60; local imagePrefix = 'grafana'; { @@ -70,7 +82,7 @@ local imagePrefix = 'grafana'; imagePrefix='grafana', releaseLibRef=releaseLibRef, releaseRepo='grafana/loki', - useGitHubAppToken=false, + useGitHubAppToken=true, ), false, false ), 'check.yml': std.manifestYamlDoc({ @@ -94,4 +106,42 @@ local imagePrefix = 'grafana'; }, }, }), + 'images.yml': std.manifestYamlDoc({ + name: 'publish images', + on: { + push: { + branches: [ + 'k[0-9]+*', // This is a weird glob pattern, not a regexp, do not use ".*", see https://docs.github.com/en/actions/using-workflows/workflow-syntax-for-github-actions#filter-pattern-cheat-sheet + 'main', + ], + }, + }, + permissions: { + 'id-token': 'write', + contents: 'write', + 'pull-requests': 'write', + }, + jobs: { + check: { + uses: checkTemplate, + with: { + build_image: buildImage, + golang_ci_lint_version: golangCiLintVersion, + release_lib_ref: releaseLibRef, + skip_validation: false, + use_github_app_token: true, + }, + }, + } + std.mapWithKey(function(name, job) + job + + lokiRelease.job.withNeeds(['check']) + + { + env: { + BUILD_TIMEOUT: imageBuildTimeoutMin, + RELEASE_REPO: 'grafana/loki', + RELEASE_LIB_REF: releaseLibRef, + IMAGE_PREFIX: imagePrefix, + }, + }, weeklyImageJobs), + }), } diff --git a/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet b/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet index bd9d2e2e9b11..7343c7d72963 100644 --- a/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet +++ b/.github/vendor/github.com/grafana/loki-release/workflows/build.libsonnet @@ -42,7 +42,7 @@ local releaseLibStep = common.releaseLibStep; echo "platform_short=$(echo ${{ matrix.platform }} | cut -d / -f 2)" >> $GITHUB_OUTPUT |||), - step.new('Build and export', 'docker/build-push-action@v5') + step.new('Build and export', 'docker/build-push-action@v6') + step.withTimeoutMinutes('${{ fromJSON(env.BUILD_TIMEOUT) }}') + step.withIf('${{ fromJSON(needs.version.outputs.pr_created) }}') + step.withEnv({ @@ -93,7 +93,7 @@ local releaseLibStep = common.releaseLibStep; echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT |||), - step.new('Build and push', 'docker/build-push-action@v5') + step.new('Build and push', 'docker/build-push-action@v6') + step.withTimeoutMinutes('${{ fromJSON(env.BUILD_TIMEOUT) }}') + step.with({ context: context, diff --git a/.github/vendor/github.com/grafana/loki-release/workflows/main.jsonnet b/.github/vendor/github.com/grafana/loki-release/workflows/main.jsonnet index d274d21a0571..b4d7b24246cf 100644 --- a/.github/vendor/github.com/grafana/loki-release/workflows/main.jsonnet +++ b/.github/vendor/github.com/grafana/loki-release/workflows/main.jsonnet @@ -9,13 +9,13 @@ releasePRWorkflow: function( branches=['release-[0-9]+.[0-9]+.x', 'k[0-9]+'], buildArtifactsBucket='loki-build-artifacts', - buildImage='grafana/loki-build-image:0.33.0', + buildImage='grafana/loki-build-image:0.34.0', changelogPath='CHANGELOG.md', checkTemplate='./.github/workflows/check.yml', distMakeTargets=['dist', 'packages'], dryRun=false, dockerUsername='grafana', - golangCiLintVersion='v1.55.1', + golangCiLintVersion='v1.60.3', imageBuildTimeoutMin=25, imageJobs={}, imagePrefix='grafana', @@ -139,7 +139,7 @@ type: 'boolean', }, golang_ci_lint_version: { - default: 'v1.55.1', + default: 'v1.60.3', description: 'version of golangci-lint to use', required: false, type: 'string', @@ -190,7 +190,7 @@ type: 'boolean', }, golang_ci_lint_version: { - default: 'v1.55.1', + default: 'v1.60.3', description: 'version of golangci-lint to use', required: false, type: 'string', diff --git a/.github/vendor/github.com/grafana/loki-release/workflows/validate.libsonnet b/.github/vendor/github.com/grafana/loki-release/workflows/validate.libsonnet index 40bf097049e8..44f4984e4b78 100644 --- a/.github/vendor/github.com/grafana/loki-release/workflows/validate.libsonnet +++ b/.github/vendor/github.com/grafana/loki-release/workflows/validate.libsonnet @@ -115,7 +115,6 @@ local validationJob = _validationJob(false); validationMakeStep('validate dev cluster config', 'validate-dev-cluster-config'), validationMakeStep('check example config docs', 'check-example-config-doc'), validationMakeStep('check helm reference doc', 'documentation-helm-reference-check'), - validationMakeStep('check drone drift', 'check-drone-drift'), ]) + { steps+: [ step.new('build docs website') diff --git a/.github/workflows/add-to-docs-project.yml b/.github/workflows/add-to-docs-project.yml new file mode 100644 index 000000000000..56f57ed9badf --- /dev/null +++ b/.github/workflows/add-to-docs-project.yml @@ -0,0 +1,15 @@ +name: Add to docs project +on: + issues: + types: [labeled] + pull_request: + types: [labeled] +jobs: + main: + if: ${{ github.event.label.name == 'type/docs' }} + permissions: + contents: read + id-token: write + runs-on: ubuntu-latest + steps: + - uses: grafana/writers-toolkit/add-to-docs-project@add-to-docs-project/v1 diff --git a/.github/workflows/backport.yml b/.github/workflows/backport.yml index 2efd4ef90d4e..241d66225145 100644 --- a/.github/workflows/backport.yml +++ b/.github/workflows/backport.yml @@ -7,6 +7,7 @@ on: jobs: main: + if: github.repository == 'grafana/loki' runs-on: ubuntu-latest steps: - name: Checkout Actions @@ -17,10 +18,17 @@ jobs: ref: main - name: Install Actions run: npm install --production --prefix ./actions + - id: "get_github_app_token" + name: "get github app token" + uses: "actions/create-github-app-token@v1" + with: + app-id: "${{ secrets.APP_ID }}" + owner: "${{ github.repository_owner }}" + private-key: "${{ secrets.APP_PRIVATE_KEY }}" - name: Run backport uses: ./actions/backport with: metricsWriteAPIKey: ${{secrets.GRAFANA_MISC_STATS_API_KEY}} - token: ${{secrets.GH_BOT_ACCESS_TOKEN}} + token: ${{ steps.get_github_app_token.outputs.token }} labelsToAdd: "backport" title: "chore: [{{base}}] {{originalTitle}}" diff --git a/.github/workflows/check.yml b/.github/workflows/check.yml index 8e63a1e5d9ba..b9f7b83d5f97 100644 --- a/.github/workflows/check.yml +++ b/.github/workflows/check.yml @@ -2,8 +2,8 @@ "check": "uses": "grafana/loki-release/.github/workflows/check.yml@main" "with": - "build_image": "grafana/loki-build-image:0.33.6" - "golang_ci_lint_version": "v1.55.1" + "build_image": "grafana/loki-build-image:0.34.0" + "golang_ci_lint_version": "v1.60.3" "release_lib_ref": "main" "skip_validation": false "use_github_app_token": true @@ -12,4 +12,4 @@ "pull_request": {} "push": "branches": - - "main" \ No newline at end of file + - "main" diff --git a/.github/workflows/helm-ci.yml b/.github/workflows/helm-ci.yml index d58705d1496b..cbd51c60b8fa 100644 --- a/.github/workflows/helm-ci.yml +++ b/.github/workflows/helm-ci.yml @@ -27,6 +27,14 @@ jobs: - name: Lint Yaml run: make helm-lint + - id: "get_github_app_token" + name: "Get Github app token" + uses: "actions/create-github-app-token@v1" + with: + app-id: "${{ secrets.APP_ID }}" + owner: "${{ github.repository_owner }}" + private-key: "${{ secrets.APP_PRIVATE_KEY }}" + - name: Lint Code Base uses: docker://github/super-linter:v3.12.0 env: @@ -37,7 +45,7 @@ jobs: VALIDATE_YAML: false VALIDATE_GO: false DEFAULT_BRANCH: main - GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }} + GITHUB_TOKEN: ${{ steps.get_github_app_token.outputs.token }} call-test: name: Test Helm Chart runs-on: ubuntu-latest diff --git a/.github/workflows/helm-loki-ci.yml b/.github/workflows/helm-loki-ci.yml new file mode 100644 index 000000000000..7a2850579118 --- /dev/null +++ b/.github/workflows/helm-loki-ci.yml @@ -0,0 +1,215 @@ +--- +name: helm-loki-ci +on: + pull_request: + paths: + - "production/helm/loki/**" + +jobs: + publish-diff: + # temporarily disable the workflow for the PRs where PRs branch is from fork. + if: github.event.pull_request.head.repo.full_name == github.repository + name: Publish Rendered Helm Chart Diff + runs-on: ubuntu-latest + steps: + - name: Setup Helm + uses: azure/setup-helm@v4 + + - name: Add required Helm repositories + run: | + helm repo add minio https://charts.min.io/ + helm repo add grafana https://grafana.github.io/helm-charts + helm repo add grafana-operator https://grafana.github.io/helm-charts + helm repo update + + - name: Prepare directories for base and PR branches + run: | + mkdir -p ${{ github.workspace }}/base + mkdir -p ${{ github.workspace }}/pr + mkdir -p ${{ github.workspace }}/output + mkdir -p ${{ github.workspace }}/output/base + mkdir -p ${{ github.workspace }}/output/pr + + - name: Checkout base branch to 'base' folder within workspace + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.base.ref }} + repository: ${{ github.event.pull_request.base.repo.full_name }} + path: ${{ github.workspace }}/base + + - name: Checkout PR branch to 'pr' folder within workspace + uses: actions/checkout@v4 + with: + ref: ${{ github.event.pull_request.head.ref }} + repository: ${{ github.event.pull_request.head.repo.full_name }} + path: ${{ github.workspace }}/pr + + - name: Render Helm chart for each scenario in the base branch + run: | + cd ${{ github.workspace }}/base/production/helm/loki + if [ ! -d "scenarios" ]; then + echo "Directory with the scenarios does not exist in base branch, skipping rendering them." + exit 0 + fi + helm dependency build + for file in scenarios/*.yaml; do + cat "$file" + schenario_folder=${{ github.workspace }}/output/base/$(basename $file .yaml) + mkdir $schenario_folder + helm template loki-test-chart-name . -f $file --output-dir $schenario_folder + done + + - name: Render Helm chart for each scenario in the PR branch + run: | + cd ${{ github.workspace }}/pr/production/helm/loki + helm dependency build + for file in scenarios/*.yaml; do + cat "$file" + schenario_folder=${{ github.workspace }}/output/pr/$(basename $file .yaml) + mkdir $schenario_folder + helm template loki-test-chart-name . -f $file --output-dir $schenario_folder + done + + - name: Calculate the diff between base and PR rendered manifests for each scenario + run: | + cd ${{ github.workspace }}/pr/production/helm/loki + for scenario_file in scenarios/*.yaml; do + added_files='[]' + modified_files='[]' + removed_files='[]' + + scenario_name=$(basename $scenario_file .yaml) + base_branch_dir=${{ github.workspace }}/output/base/$scenario_name + pr_branch_dir=${{ github.workspace }}/output/pr/$scenario_name + + echo "Comparing directories: $base_branch_dir and $pr_branch_dir" + + # Find all files in the left and right directories + base_branch_files=$(if [[ -d "$base_branch_dir" ]]; then find "$base_branch_dir" -type f | sed "s|$base_branch_dir/||"; else echo ""; fi) + pr_branch_files=$(find "$pr_branch_dir" -type f | sed "s|$pr_branch_dir/||") + + # Check for modified and removed files + for file in $base_branch_files; do + echo "check if file exists: $file" + if [[ -f "$pr_branch_dir/$file" ]]; then + echo "File exists in both directories, check if it is modified" + if ! diff -q "$base_branch_dir/$file" "$pr_branch_dir/$file" >/dev/null; then + echo "file is modified $file" + file_diff=$(diff -c "$base_branch_dir/$file" "$pr_branch_dir/$file" || true) + diff_obj=$(jq -n --arg file "$file" --arg diff "$file_diff" '{"filename": $file, "diff": $diff}') + # Append the new object to the JSON array using jq + modified_files=$(echo "$modified_files" | jq --argjson diff_obj "$diff_obj" '. += [$diff_obj]') + else + echo "file is not modified" + fi + else + echo "file is removed $file" + # File is missing in the PR directory + file_content=$(cat "$base_branch_dir/$file") + removed_obj=$(jq -n --arg filename "$file" --arg content "$file_content" '{"filename": $filename, "content": $content}') + # Append the new object to the JSON array using jq + removed_files=$(echo "$removed_files" | jq --argjson removed_obj "$removed_obj" '. += [$removed_obj]') + fi + done + + # Check for added files in the right directory + for file in $pr_branch_files; do + if [[ ! -f "$base_branch_dir/$file" ]]; then + echo "added file detected" + + # File is missing in the PR directory + file_content=$(cat "$pr_branch_dir/$file") + added_obj=$(jq -n --arg file "$file" --arg content "$file_content" '{"filename": $file, "content": $content}') + # Append the new object to the JSON array using jq + added_files=$(echo "$added_files" | jq --argjson added_obj "$added_obj" '. += [$added_obj]') + fi + done + + scenario_output_dir="${{ github.workspace }}/output/$scenario_name" + mkdir $scenario_output_dir + echo $added_files > $scenario_output_dir/added_files.json + echo $modified_files > $scenario_output_dir/modified_files.json + echo $removed_files > $scenario_output_dir/removed_files.json + echo $removed_files + done + + - name: Generate Markdown Summary + run: | + # Initialize the Markdown output file + output_file="${{ github.workspace }}/output/diff_summary.md" + echo "# Kubernetes Manifest Diff Summary" > $output_file + + # Iterate over each scenario file + for file in ${{ github.workspace }}/pr/production/helm/loki/scenarios/*.yaml; do + scenario=$(basename "$file" .yaml) + echo "Processing scenario: $scenario" + + # Read JSON data for added, modified, and removed files + added_files=$(cat ${{ github.workspace }}/output/$scenario/added_files.json) + modified_files=$(cat ${{ github.workspace }}/output/$scenario/modified_files.json) + removed_files=$(cat ${{ github.workspace }}/output/$scenario/removed_files.json) + + # Count the number of added, modified, and removed files + num_added=$(echo "$added_files" | jq length) + num_modified=$(echo "$modified_files" | jq length) + num_removed=$(echo "$removed_files" | jq length) + + # Create a header for the scenario + echo -e "\n
Scenario: $scenario (Added: $num_added, Modified: $num_modified, Removed: $num_removed) \n" >> $output_file + echo -e "

\n\n" >> $output_file + + # Add summary counts + echo -e "\n**Summary:**" >> $output_file + echo -e "\n- **Added:** $num_added" >> $output_file + echo -e "\n- **Modified:** $num_modified" >> $output_file + echo -e "\n- **Removed:** $num_removed" >> $output_file + + # Add details for added files + echo -e "\n### Added Files" >> $output_file + if [[ "$num_added" -gt 0 ]]; then + echo "$added_files" | jq -c '.[]' | while read -r obj; do + filename=$(echo "$obj" | jq -r '.filename') + content=$(echo "$obj" | jq -r '.content') + echo -e "\n

$filename" >> $output_file + echo -e "\n\`\`\`yaml\n$content\n\`\`\`\n
" >> $output_file + done + else + echo -e "\n_No added files_\n" >> $output_file + fi + + # Add details for modified files + echo -e "\n### Modified Files" >> $output_file + if [[ "$num_modified" -gt 0 ]]; then + echo "$modified_files" | jq -c '.[]' | while read -r obj; do + filename=$(echo "$obj" | jq -r '.filename') + diff=$(echo "$obj" | jq -r '.diff') + echo -e "\n
$filename" >> $output_file + echo -e "\n\`\`\`diff\n$diff\n\`\`\`\n
" >> $output_file + done + else + echo -e "\n_No modified files_\n" >> $output_file + fi + + # Add details for removed files + echo -e "\n### Removed Files" >> $output_file + if [[ "$num_removed" -gt 0 ]]; then + echo "$removed_files" | jq -c '.[]' | while read -r obj; do + filename=$(echo "$obj" | jq -r '.filename') + content=$(echo "$obj" | jq -r '.content') + echo -e "\n
$filename" >> $output_file + echo -e "\n\`\`\`yaml\n$content\n\`\`\`\n
" >> $output_file + done + else + echo -e "\n_No removed files_\n" >> $output_file + fi + + # close

and

+ echo -e "\n\n

\n
" >> $output_file + done + + - name: Post diff as PR comment + uses: marocchino/sticky-pull-request-comment@v2 + with: + hide_and_recreate: true + hide_classify: "OUTDATED" + path: ${{ github.workspace }}/output/diff_summary.md \ No newline at end of file diff --git a/.github/workflows/helm-release.yaml b/.github/workflows/helm-release.yaml index b1d065aa9f69..d2f9fe97ba4f 100644 --- a/.github/workflows/helm-release.yaml +++ b/.github/workflows/helm-release.yaml @@ -16,5 +16,6 @@ jobs: cr_configfile: production/helm/cr.yaml ct_configfile: production/helm/ct.yaml helm_tag_prefix: helm - secrets: - helm_repo_token: ${{ secrets.GH_BOT_ACCESS_TOKEN }} + secrets: + github_app_id: ${{ secrets.APP_ID }} + github_app_pem: ${{ secrets.APP_PRIVATE_KEY }} diff --git a/.github/workflows/images.yml b/.github/workflows/images.yml new file mode 100644 index 000000000000..97b40cb2e05b --- /dev/null +++ b/.github/workflows/images.yml @@ -0,0 +1,433 @@ +"jobs": + "check": + "uses": "grafana/loki-release/.github/workflows/check.yml@main" + "with": + "build_image": "grafana/loki-build-image:0.34.0" + "golang_ci_lint_version": "v1.60.3" + "release_lib_ref": "main" + "skip_validation": false + "use_github_app_token": true + "fluent-bit": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/clients/cmd/fluent-bit/Dockerfile" + "platforms": "linux/amd64" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/fluent-bit-plugin-loki:${{ steps.weekly-version.outputs.version }}" + "fluentd": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/clients/cmd/fluentd/Dockerfile" + "platforms": "linux/amd64" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/fluent-plugin-loki:${{ steps.weekly-version.outputs.version }}" + "logcli": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/cmd/logcli/Dockerfile" + "platforms": "linux/amd64,linux/arm64,linux/arm" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/logcli:${{ steps.weekly-version.outputs.version }}" + "logstash": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/clients/cmd/logstash/Dockerfile" + "platforms": "linux/amd64" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/logstash-output-loki:${{ steps.weekly-version.outputs.version }}" + "loki": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/cmd/loki/Dockerfile" + "platforms": "linux/amd64,linux/arm64,linux/arm" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/loki:${{ steps.weekly-version.outputs.version }}" + "loki-canary": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/cmd/loki-canary/Dockerfile" + "platforms": "linux/amd64,linux/arm64,linux/arm" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/loki-canary:${{ steps.weekly-version.outputs.version }}" + "loki-canary-boringcrypto": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/cmd/loki-canary-boringcrypto/Dockerfile" + "platforms": "linux/amd64,linux/arm64,linux/arm" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/loki-canary-boringcrypto:${{ steps.weekly-version.outputs.version }}" + "promtail": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/clients/cmd/promtail/Dockerfile" + "platforms": "linux/amd64,linux/arm64,linux/arm" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/promtail:${{ steps.weekly-version.outputs.version }}" + "querytee": + "env": + "BUILD_TIMEOUT": 60 + "IMAGE_PREFIX": "grafana" + "RELEASE_LIB_REF": "main" + "RELEASE_REPO": "grafana/loki" + "needs": + - "check" + "runs-on": "ubuntu-latest" + "steps": + - "name": "pull release library code" + "uses": "actions/checkout@v4" + "with": + "path": "lib" + "ref": "${{ env.RELEASE_LIB_REF }}" + "repository": "grafana/loki-release" + - "name": "pull code to release" + "uses": "actions/checkout@v4" + "with": + "path": "release" + "repository": "${{ env.RELEASE_REPO }}" + - "name": "setup node" + "uses": "actions/setup-node@v4" + "with": + "node-version": 20 + - "name": "Set up QEMU" + "uses": "docker/setup-qemu-action@v3" + - "name": "set up docker buildx" + "uses": "docker/setup-buildx-action@v3" + - "name": "Login to DockerHub (from vault)" + "uses": "grafana/shared-workflows/actions/dockerhub-login@main" + - "id": "weekly-version" + "name": "Get weekly version" + "run": | + echo "version=$(./tools/image-tag)" >> $GITHUB_OUTPUT + "working-directory": "release" + - "name": "Build and push" + "timeout-minutes": "${{ fromJSON(env.BUILD_TIMEOUT) }}" + "uses": "docker/build-push-action@v6" + "with": + "build-args": "IMAGE_TAG=${{ steps.weekly-version.outputs.version }}" + "context": "release" + "file": "release/cmd/querytee/Dockerfile" + "platforms": "linux/amd64" + "push": true + "tags": "${{ env.IMAGE_PREFIX }}/loki-query-tee:${{ steps.weekly-version.outputs.version }}" +"name": "publish images" +"on": + "push": + "branches": + - "k[0-9]+*" + - "main" +"permissions": + "contents": "write" + "id-token": "write" + "pull-requests": "write" diff --git a/.github/workflows/issue_commands.yml b/.github/workflows/issue_commands.yml deleted file mode 100644 index cfab551c4207..000000000000 --- a/.github/workflows/issue_commands.yml +++ /dev/null @@ -1,21 +0,0 @@ -name: Run commands when issues are labeled -on: - issues: - types: [labeled] -jobs: - main: - runs-on: ubuntu-latest - steps: - - name: Checkout Actions - uses: actions/checkout@v4 - with: - repository: "grafana/grafana-github-actions" - path: ./actions - ref: main - - name: Install Actions - run: npm install --production --prefix ./actions - - name: Run Commands - uses: ./actions/commands - with: - token: ${{secrets.ISSUE_COMMANDS_TOKEN}} - configPath: issue_commands diff --git a/.github/workflows/lint-jsonnet.yml b/.github/workflows/lint-jsonnet.yml index 37016f255ca5..e1fbf786f616 100644 --- a/.github/workflows/lint-jsonnet.yml +++ b/.github/workflows/lint-jsonnet.yml @@ -14,7 +14,7 @@ jobs: - name: setup go uses: actions/setup-go@v5 with: - go-version: '1.22.2' + go-version: '1.23.1' - name: setup jsonnet run: | go install github.com/google/go-jsonnet/cmd/jsonnet@v0.20.0 diff --git a/.github/workflows/metrics-collector.yml b/.github/workflows/metrics-collector.yml index 86befcfd4d8b..d086aaf7e9ec 100644 --- a/.github/workflows/metrics-collector.yml +++ b/.github/workflows/metrics-collector.yml @@ -5,7 +5,7 @@ on: jobs: main: - if: github.owner == "grafana" + if: github.owner == 'grafana' runs-on: ubuntu-latest steps: - name: Checkout Actions diff --git a/.github/workflows/minor-release-pr.yml b/.github/workflows/minor-release-pr.yml index 74c00fdf23c4..a5c52d0fb2ee 100644 --- a/.github/workflows/minor-release-pr.yml +++ b/.github/workflows/minor-release-pr.yml @@ -2,7 +2,7 @@ concurrency: group: "create-release-pr-${{ github.sha }}" env: BUILD_ARTIFACTS_BUCKET: "loki-build-artifacts" - BUILD_TIMEOUT: 40 + BUILD_TIMEOUT: 60 CHANGELOG_PATH: "CHANGELOG.md" DOCKER_USERNAME: "grafana" DRY_RUN: false @@ -16,8 +16,8 @@ jobs: check: uses: "grafana/loki-release/.github/workflows/check.yml@main" with: - build_image: "grafana/loki-build-image:0.33.6" - golang_ci_lint_version: "v1.55.1" + build_image: "grafana/loki-build-image:0.34.0" + golang_ci_lint_version: "v1.60.3" release_lib_ref: "main" skip_validation: false use_github_app_token: true @@ -143,7 +143,7 @@ jobs: --env SKIP_ARM \ --volume .:/src/loki \ --workdir /src/loki \ - --entrypoint /bin/sh "grafana/loki-build-image:0.33.6" + --entrypoint /bin/sh "grafana/loki-build-image:0.34.0" git config --global --add safe.directory /src/loki echo "${NFPM_SIGNING_KEY}" > $NFPM_SIGNING_KEY_FILE make dist packages @@ -198,7 +198,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -260,7 +260,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -322,7 +322,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -386,7 +386,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -448,7 +448,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -512,7 +512,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -576,7 +576,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -640,7 +640,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -704,7 +704,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -828,4 +828,4 @@ name: "Prepare Minor Release PR from Weekly" permissions: contents: "write" id-token: "write" - pull-requests: "write" \ No newline at end of file + pull-requests: "write" diff --git a/.github/workflows/patch-release-pr.yml b/.github/workflows/patch-release-pr.yml index 0a62fa1c9228..800f9afd7106 100644 --- a/.github/workflows/patch-release-pr.yml +++ b/.github/workflows/patch-release-pr.yml @@ -2,7 +2,7 @@ concurrency: group: "create-release-pr-${{ github.sha }}" env: BUILD_ARTIFACTS_BUCKET: "loki-build-artifacts" - BUILD_TIMEOUT: 40 + BUILD_TIMEOUT: 60 CHANGELOG_PATH: "CHANGELOG.md" DOCKER_USERNAME: "grafana" DRY_RUN: false @@ -16,8 +16,8 @@ jobs: check: uses: "grafana/loki-release/.github/workflows/check.yml@main" with: - build_image: "grafana/loki-build-image:0.33.6" - golang_ci_lint_version: "v1.55.1" + build_image: "grafana/loki-build-image:0.34.0" + golang_ci_lint_version: "v1.60.3" release_lib_ref: "main" skip_validation: false use_github_app_token: true @@ -143,7 +143,7 @@ jobs: --env SKIP_ARM \ --volume .:/src/loki \ --workdir /src/loki \ - --entrypoint /bin/sh "grafana/loki-build-image:0.33.6" + --entrypoint /bin/sh "grafana/loki-build-image:0.34.0" git config --global --add safe.directory /src/loki echo "${NFPM_SIGNING_KEY}" > $NFPM_SIGNING_KEY_FILE make dist packages @@ -198,7 +198,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -260,7 +260,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -322,7 +322,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -386,7 +386,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -448,7 +448,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -512,7 +512,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -576,7 +576,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -640,7 +640,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -704,7 +704,7 @@ jobs: if: "${{ fromJSON(needs.version.outputs.pr_created) }}" name: "Build and export" timeout-minutes: "${{ fromJSON(env.BUILD_TIMEOUT) }}" - uses: "docker/build-push-action@v5" + uses: "docker/build-push-action@v6" with: build-args: "IMAGE_TAG=${{ needs.version.outputs.version }}" context: "release" @@ -828,4 +828,4 @@ name: "Prepare Patch Release PR" permissions: contents: "write" id-token: "write" - pull-requests: "write" \ No newline at end of file + pull-requests: "write" diff --git a/.github/workflows/promtail-windows-test.yml b/.github/workflows/promtail-windows-test.yml index cb47ae283161..90ccf72ad1be 100644 --- a/.github/workflows/promtail-windows-test.yml +++ b/.github/workflows/promtail-windows-test.yml @@ -10,7 +10,7 @@ jobs: runs-on: windows-latest strategy: matrix: - go-version: [ '1.21.9', '1.22.2' ] + go-version: [ '1.22.2', '1.23.1' ] steps: - uses: actions/checkout@v4 - name: Setup Go ${{ matrix.go-version }} @@ -21,4 +21,4 @@ jobs: - name: Display Go version run: go version - name: Run promtail tests - run: go test .\clients\pkg\promtail\targets\windows\... -v \ No newline at end of file + run: go test .\clients\pkg\promtail\targets\windows\... -v diff --git a/.github/workflows/publish-technical-documentation-next.yml b/.github/workflows/publish-technical-documentation-next.yml index b4cf557cc75c..7ff4e49fbc47 100644 --- a/.github/workflows/publish-technical-documentation-next.yml +++ b/.github/workflows/publish-technical-documentation-next.yml @@ -1,37 +1,21 @@ -name: "publish-technical-documentation-next" +name: publish-technical-documentation-next on: push: branches: - - "main" + - main paths: - "docs/sources/**" workflow_dispatch: jobs: sync: - runs-on: "ubuntu-latest" + if: github.repository == 'grafana/loki' + permissions: + contents: read + id-token: write + runs-on: ubuntu-latest steps: - - name: "Check out code" - uses: "actions/checkout@v4" - - - name: "Clone website-sync Action" - # WEBSITE_SYNC_TOKEN is a fine-grained GitHub Personal Access Token that expires. - # It must be regenerated in the grafanabot GitHub account and requires a Grafana organization - # GitHub administrator to update the organization secret. - # The IT helpdesk can update the organization secret. - run: "git clone --single-branch --no-tags --depth 1 -b master https://grafanabot:${{ secrets.WEBSITE_SYNC_TOKEN }}@github.com/grafana/website-sync ./.github/actions/website-sync" - - - name: "Publish to website repository (next)" - uses: "./.github/actions/website-sync" - id: "publish-next" + - uses: actions/checkout@v4 + - uses: grafana/writers-toolkit/publish-technical-documentation@publish-technical-documentation/v1 with: - repository: "grafana/website" - branch: "master" - host: "github.com" - # PUBLISH_TO_WEBSITE_TOKEN is a fine-grained GitHub Personal Access Token that expires. - # It must be regenerated in the grafanabot GitHub account and requires a Grafana organization - # GitHub administrator to update the organization secret. - # The IT helpdesk can update the organization secret. - github_pat: "grafanabot:${{ secrets.PUBLISH_TO_WEBSITE_TOKEN }}" - source_folder: "docs/sources" - target_folder: "content/docs/loki/next" + website_directory: content/docs/loki/next diff --git a/.github/workflows/publish-technical-documentation-release.yml b/.github/workflows/publish-technical-documentation-release.yml index d8f17f4d457f..f948468a5d81 100644 --- a/.github/workflows/publish-technical-documentation-release.yml +++ b/.github/workflows/publish-technical-documentation-release.yml @@ -1,4 +1,4 @@ -name: "publish-technical-documentation-release" +name: publish-technical-documentation-release on: push: @@ -11,57 +11,18 @@ on: workflow_dispatch: jobs: sync: - runs-on: "ubuntu-latest" + if: github.repository == 'grafana/loki' + permissions: + contents: read + id-token: write + runs-on: ubuntu-latest steps: - - name: "Checkout code and tags" - uses: "actions/checkout@v4" + - uses: actions/checkout@v4 with: fetch-depth: 0 - - - name: "Checkout Actions library" - uses: "actions/checkout@v4" + - uses: grafana/writers-toolkit/publish-technical-documentation-release@publish-technical-documentation-release/v1 with: - repository: "grafana/grafana-github-actions" - path: "./actions" - - - name: "Install Actions from library" - run: "npm install --production --prefix ./actions" - - - name: "Determine if there is a matching release tag" - id: "has-matching-release-tag" - uses: "./actions/has-matching-release-tag" - with: - ref_name: "${{ github.ref_name }}" release_tag_regexp: "^v(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)$" release_branch_regexp: "^release-(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.x$" - - - name: "Determine technical documentation version" - if: "steps.has-matching-release-tag.outputs.bool == 'true'" - uses: "./actions/docs-target" - id: "target" - with: - ref_name: "${{ github.ref_name }}" - - - name: "Clone website-sync Action" - if: "steps.has-matching-release-tag.outputs.bool == 'true'" - # WEBSITE_SYNC_TOKEN is a fine-grained GitHub Personal Access Token that expires. - # It must be regenerated in the grafanabot GitHub account and requires a Grafana organization - # GitHub administrator to update the organization secret. - # The IT helpdesk can update the organization secret. - run: "git clone --single-branch --no-tags --depth 1 -b master https://grafanabot:${{ secrets.WEBSITE_SYNC_TOKEN }}@github.com/grafana/website-sync ./.github/actions/website-sync" - - - name: "Publish to website repository (release)" - if: "steps.has-matching-release-tag.outputs.bool == 'true'" - uses: "./.github/actions/website-sync" - id: "publish-release" - with: - repository: "grafana/website" - branch: "master" - host: "github.com" - # PUBLISH_TO_WEBSITE_TOKEN is a fine-grained GitHub Personal Access Token that expires. - # It must be regenerated in the grafanabot GitHub account and requires a Grafana organization - # GitHub administrator to update the organization secret. - # The IT helpdesk can update the organization secret. - github_pat: "grafanabot:${{ secrets.PUBLISH_TO_WEBSITE_TOKEN }}" - source_folder: "docs/sources" - target_folder: "content/docs/loki/${{ steps.target.outputs.target }}.x" + release_branch_with_patch_regexp: "^release-(0|[1-9]\\d*)\\.(0|[1-9]\\d*)\\.(0|[1-9]\\d*)$" + website_directory: content/docs/loki diff --git a/.github/workflows/release.yml b/.github/workflows/release.yml index 9a87b3791775..1628a0b57dfb 100644 --- a/.github/workflows/release.yml +++ b/.github/workflows/release.yml @@ -6,7 +6,7 @@ env: PUBLISH_TO_GCS: false RELEASE_LIB_REF: "main" RELEASE_REPO: "grafana/loki" - USE_GITHUB_APP_TOKEN: false + USE_GITHUB_APP_TOKEN: true jobs: createRelease: if: "${{ fromJSON(needs.shouldRelease.outputs.shouldRelease) }}" diff --git a/.github/workflows/verify-release-workflow.yaml b/.github/workflows/verify-release-workflow.yaml index 818269c6f0ea..dbd693897eb0 100644 --- a/.github/workflows/verify-release-workflow.yaml +++ b/.github/workflows/verify-release-workflow.yaml @@ -8,11 +8,11 @@ jobs: - name: setup go uses: actions/setup-go@v5 with: - go-version: '1.22.2' + go-version: '1.23.1' - name: setup jsonnet run: | go install github.com/google/go-jsonnet/cmd/jsonnet@v0.20.0 go install github.com/jsonnet-bundler/jsonnet-bundler/cmd/jb@v0.5.1 - name: Check that the release workflows have been updated properly run: | - make BUILD_IN_CONTAINER=false release-workflows-check \ No newline at end of file + make BUILD_IN_CONTAINER=false release-workflows-check diff --git a/.golangci.yml b/.golangci.yml index e6475895ad94..ae10a6ba210b 100644 --- a/.golangci.yml +++ b/.golangci.yml @@ -4,13 +4,13 @@ # options for analysis running run: # define go version - go: "1.20" + go: "1.23" # default concurrency is a available CPU number concurrency: 16 # timeout for analysis, e.g. 30s, 5m, default is 1m - timeout: 5m + timeout: 10m # exit code when at least one issue was found, default is 1 issues-exit-code: 1 @@ -24,28 +24,12 @@ run: - cgo - promtail_journal_enabled - integration - - # which dirs to skip: they won't be analyzed; - # can use regexp here: generated.*, regexp is applied on full path; - # default value is empty list, but next dirs are always skipped independently - # from this option's value: - # vendor$, third_party$, testdata$, examples$, Godeps$, builtin$ - skip-dirs: - - win_eventlog$ - - operator - # which files to skip: they will be analyzed, but issues from them - # won't be reported. Default value is empty list, but there is - # no need to include all autogenerated files, we confidently recognize - # autogenerated files. If it's not please let us know. - skip-files: - - .*.pb.go - - .*.y.go - - .*.rl.go - - .*.deepcopy.go + # output configuration options output: - # colored-line-number|line-number|json|tab|checkstyle, default is "colored-line-number" - format: colored-line-number + formats: + # colored-line-number|line-number|json|tab|checkstyle, default is "colored-line-number" + - format: colored-line-number # print lines of code with issue, default is true print-issued-lines: true @@ -101,3 +85,20 @@ issues: linters: - goconst fix: true + # which dirs to skip: they won't be analyzed; + # can use regexp here: generated.*, regexp is applied on full path; + # default value is empty list, but next dirs are always skipped independently + # from this option's value: + # vendor$, third_party$, testdata$, examples$, Godeps$, builtin$ + exclude-dirs: + - win_eventlog$ + - operator + # which files to skip: they will be analyzed, but issues from them + # won't be reported. Default value is empty list, but there is + # no need to include all autogenerated files, we confidently recognize + # autogenerated files. If it's not please let us know. + exclude-files: + - .*.pb.go + - .*.y.go + - .*.rl.go + - .*.deepcopy.go diff --git a/.release-please-manifest.json b/.release-please-manifest.json index 8517b670b8b4..96501106b6f6 100644 --- a/.release-please-manifest.json +++ b/.release-please-manifest.json @@ -1,4 +1,4 @@ { ".": "3.1.1", - "operator": "0.6.1" + "operator": "0.6.2" } diff --git a/Makefile b/Makefile index cd1eb0217486..b75b4f5d5ced 100644 --- a/Makefile +++ b/Makefile @@ -12,7 +12,7 @@ help: .PHONY: fluentd-image, fluentd-push, fluentd-test .PHONY: push-images push-latest save-images load-images promtail-image loki-image build-image build-image-push .PHONY: bigtable-backup, push-bigtable-backup -.PHONY: benchmark-store, drone, check-drone-drift, check-mod +.PHONY: benchmark-store, check-mod .PHONY: migrate migrate-image lint-markdown ragel .PHONY: doc check-doc .PHONY: validate-example-configs generate-example-config-doc check-example-config-doc @@ -36,9 +36,9 @@ DOCKER_IMAGE_DIRS := $(patsubst %/Dockerfile,%,$(DOCKERFILES)) # or you can override this with an environment variable BUILD_IN_CONTAINER ?= true -# ensure you run `make drone` and `make release-workflows` after changing this -BUILD_IMAGE_VERSION ?= 0.33.6 -GO_VERSION := 1.22.6 +# ensure you run `make release-workflows` after changing this +BUILD_IMAGE_VERSION ?= 0.34.0 +GO_VERSION := 1.23.1 # Docker image info IMAGE_PREFIX ?= grafana @@ -699,27 +699,6 @@ benchmark-store: go run ./pkg/storage/hack/main.go $(GOTEST) ./pkg/storage/ -bench=. -benchmem -memprofile memprofile.out -cpuprofile cpuprofile.out -trace trace.out -# regenerate drone yaml -drone: -ifeq ($(BUILD_IN_CONTAINER),true) - @mkdir -p $(shell pwd)/.pkg - @mkdir -p $(shell pwd)/.cache - $(SUDO) docker run $(RM) $(TTY) -i \ - -e DRONE_SERVER -e DRONE_TOKEN \ - -v $(shell pwd)/.cache:/go/cache$(MOUNT_FLAGS) \ - -v $(shell pwd)/.pkg:/go/pkg$(MOUNT_FLAGS) \ - -v $(shell pwd):/src/loki$(MOUNT_FLAGS) \ - $(IMAGE_PREFIX)/loki-build-image:$(BUILD_IMAGE_VERSION) $@; -else - drone jsonnet --stream --format -V __build-image-version=$(BUILD_IMAGE_VERSION) --source .drone/drone.jsonnet --target .drone/drone.yml - drone lint .drone/drone.yml --trusted - drone sign --save grafana/loki .drone/drone.yml || echo "You must set DRONE_SERVER and DRONE_TOKEN. These values can be found on your [drone account](http://drone.grafana.net/account) page." -endif - -check-drone-drift: - ./tools/check-drone-drift.sh $(BUILD_IMAGE_VERSION) - - # support go modules check-mod: ifeq ($(BUILD_IN_CONTAINER),true) @@ -915,5 +894,5 @@ ifeq ($(BUILD_IN_CONTAINER),true) else @$(MAKE) release-workflows @echo "Checking diff" - @git diff --exit-code -- ".github/workflows/*release*" || (echo "Please build release workflows by running 'make release-workflows'" && false) + @git diff --exit-code --ignore-space-at-eol -- ".github/workflows/*release*" || (echo "Please build release workflows by running 'make release-workflows'" && false) endif diff --git a/clients/cmd/docker-driver/Dockerfile b/clients/cmd/docker-driver/Dockerfile index 9de291514c9a..672556240f19 100644 --- a/clients/cmd/docker-driver/Dockerfile +++ b/clients/cmd/docker-driver/Dockerfile @@ -1,4 +1,4 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.6 +ARG BUILD_IMAGE=grafana/loki-build-image:0.34.0 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/loki -f cmd/loki/Dockerfile . @@ -9,7 +9,7 @@ COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false clients/cmd/docker-driver/docker-driver -FROM alpine:3.20.2 +FROM alpine:3.20.3 RUN apk add --update --no-cache ca-certificates tzdata COPY --from=build /src/loki/clients/cmd/docker-driver/docker-driver /bin/docker-driver WORKDIR /bin/ diff --git a/clients/cmd/fluent-bit/Dockerfile b/clients/cmd/fluent-bit/Dockerfile index aadd28ce83b6..ae361b864429 100644 --- a/clients/cmd/fluent-bit/Dockerfile +++ b/clients/cmd/fluent-bit/Dockerfile @@ -1,4 +1,4 @@ -FROM golang:1.22.6-bullseye AS builder +FROM golang:1.23-bullseye AS builder COPY . /src diff --git a/clients/cmd/promtail/Dockerfile b/clients/cmd/promtail/Dockerfile index 3c9088bb83ba..58e05719ac1c 100644 --- a/clients/cmd/promtail/Dockerfile +++ b/clients/cmd/promtail/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 FROM golang:${GO_VERSION}-bookworm as build COPY . /src/loki diff --git a/clients/cmd/promtail/Dockerfile.arm32 b/clients/cmd/promtail/Dockerfile.arm32 index b9f4a26dc8f0..cf517308c531 100644 --- a/clients/cmd/promtail/Dockerfile.arm32 +++ b/clients/cmd/promtail/Dockerfile.arm32 @@ -1,4 +1,4 @@ -FROM golang:1.22.6-bookworm as build +FROM golang:1.23-bookworm as build COPY . /src/loki WORKDIR /src/loki diff --git a/clients/cmd/promtail/Dockerfile.cross b/clients/cmd/promtail/Dockerfile.cross index 5bf89e71fa16..8459b7affb58 100644 --- a/clients/cmd/promtail/Dockerfile.cross +++ b/clients/cmd/promtail/Dockerfile.cross @@ -1,5 +1,5 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.6 -ARG GO_VERSION=1.22 +ARG BUILD_IMAGE=grafana/loki-build-image:0.34.0 +ARG GO_VERSION=1.23 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f clients/cmd/promtail/Dockerfile . diff --git a/clients/cmd/promtail/Dockerfile.debug b/clients/cmd/promtail/Dockerfile.debug index 24b6060241f4..2d48eb77a035 100644 --- a/clients/cmd/promtail/Dockerfile.debug +++ b/clients/cmd/promtail/Dockerfile.debug @@ -2,14 +2,14 @@ # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f clients/cmd/promtail/Dockerfile.debug . -FROM grafana/loki-build-image:0.33.6 as build +FROM grafana/loki-build-image:0.34.0 AS build ARG GOARCH="amd64" COPY . /src/loki WORKDIR /src/loki RUN make clean && make BUILD_IN_CONTAINER=false PROMTAIL_JOURNAL_ENABLED=true promtail-debug -FROM alpine:3.20.2 +FROM alpine:3.20.3 RUN apk add --update --no-cache ca-certificates tzdata COPY --from=build /src/loki/clients/cmd/promtail/promtail-debug /usr/bin/promtail-debug COPY --from=build /usr/bin/dlv /usr/bin/dlv diff --git a/clients/pkg/promtail/config/config.go b/clients/pkg/promtail/config/config.go index 615b8e9abaad..0454a8facf49 100644 --- a/clients/pkg/promtail/config/config.go +++ b/clients/pkg/promtail/config/config.go @@ -40,6 +40,28 @@ type Config struct { WAL wal.Config `yaml:"wal"` } +// UnmarshalYAML implements the yaml.Unmarshaler interface. +func (c *Config) UnmarshalYAML(unmarshal func(interface{}) error) error { + *c = Config{} + // We want to set c to the defaults and then overwrite it with the input. + // To make unmarshal fill the plain data struct rather than calling UnmarshalYAML + // again, we have to hide it using a type indirection. + type plain Config + if err := unmarshal((*plain)(c)); err != nil { + return err + } + + // Validate unique names. + jobNames := map[string]struct{}{} + for _, j := range c.ScrapeConfig { + if _, ok := jobNames[j.JobName]; ok { + return fmt.Errorf("found multiple scrape configs with job name %q", j.JobName) + } + jobNames[j.JobName] = struct{}{} + } + return nil +} + // RegisterFlags with prefix registers flags where every name is prefixed by // prefix. If prefix is a non-empty string, prefix should end with a period. func (c *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { diff --git a/clients/pkg/promtail/config/config_test.go b/clients/pkg/promtail/config/config_test.go index 32bab70501e3..a812dd984abc 100644 --- a/clients/pkg/promtail/config/config_test.go +++ b/clients/pkg/promtail/config/config_test.go @@ -47,12 +47,47 @@ clients: name: value ` +const testDuplicateJobsName = ` +clients: + - external_labels: + cluster: dev1 + url: https://1:shh@example.com/loki/api/v1/push + - external_labels: + cluster: prod1 + url: https://1:shh@example.com/loki/api/v1/push +scrape_configs: + - job_name: kubernetes-pods-name + kubernetes_sd_configs: + - role: pod + - job_name: system + static_configs: + - targets: + - localhost + labels: + job: varlogs + - job_name: system + static_configs: + - targets: + - localhost + labels: + job: varlogs2 +limits_config: + readline_rate: 100 + readline_burst: 200 +` + func Test_Load(t *testing.T) { var dst Config err := yaml.Unmarshal([]byte(testFile), &dst) require.Nil(t, err) } +func Test_Load_DuplicateJobsName(t *testing.T) { + var dst Config + err := yaml.Unmarshal([]byte(testDuplicateJobsName), &dst) + require.ErrorContains(t, err, `found multiple scrape configs with job name "system"`) +} + func TestHeadersConfigLoad(t *testing.T) { var dst Config err := yaml.Unmarshal([]byte(headersTestFile), &dst) diff --git a/clients/pkg/promtail/targets/lokipush/pushtarget.go b/clients/pkg/promtail/targets/lokipush/pushtarget.go index 63630c6e5ac2..1ec021c0b28a 100644 --- a/clients/pkg/promtail/targets/lokipush/pushtarget.go +++ b/clients/pkg/promtail/targets/lokipush/pushtarget.go @@ -153,7 +153,8 @@ func (t *PushTarget) handleLoki(w http.ResponseWriter, r *http.Request) { e := api.Entry{ Labels: filtered.Clone(), Entry: logproto.Entry{ - Line: entry.Line, + Line: entry.Line, + StructuredMetadata: entry.StructuredMetadata, }, } if t.config.KeepTimestamp { diff --git a/clients/pkg/promtail/targets/lokipush/pushtarget_test.go b/clients/pkg/promtail/targets/lokipush/pushtarget_test.go index 3fe48b599a5e..d94a34eca397 100644 --- a/clients/pkg/promtail/targets/lokipush/pushtarget_test.go +++ b/clients/pkg/promtail/targets/lokipush/pushtarget_test.go @@ -20,6 +20,8 @@ import ( "github.com/prometheus/prometheus/model/relabel" "github.com/stretchr/testify/require" + "github.com/grafana/loki/pkg/push" + "github.com/grafana/loki/v3/clients/pkg/promtail/api" "github.com/grafana/loki/v3/clients/pkg/promtail/client" "github.com/grafana/loki/v3/clients/pkg/promtail/client/fake" @@ -101,6 +103,10 @@ func TestLokiPushTarget(t *testing.T) { Entry: logproto.Entry{ Timestamp: time.Unix(int64(i), 0), Line: "line" + strconv.Itoa(i), + StructuredMetadata: push.LabelsAdapter{ + {Name: "i", Value: strconv.Itoa(i)}, + {Name: "anotherMetaData", Value: "val"}, + }, }, } } @@ -123,6 +129,13 @@ func TestLokiPushTarget(t *testing.T) { // Spot check the first value in the result to make sure relabel rules were applied properly require.Equal(t, expectedLabels, eh.Received()[0].Labels) + expectedStructuredMetadata := push.LabelsAdapter{ + {Name: "i", Value: strconv.Itoa(0)}, + {Name: "anotherMetaData", Value: "val"}, + } + // Spot check the first value in the result to make sure structured metadata was received properly + require.Equal(t, expectedStructuredMetadata, eh.Received()[0].StructuredMetadata) + // With keep timestamp enabled, verify timestamp require.Equal(t, time.Unix(99, 0).Unix(), eh.Received()[99].Timestamp.Unix()) diff --git a/cmd/logcli/Dockerfile b/cmd/logcli/Dockerfile index 999434d075a8..52a66fea9a0c 100644 --- a/cmd/logcli/Dockerfile +++ b/cmd/logcli/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 FROM golang:${GO_VERSION} as build COPY . /src/loki diff --git a/cmd/logql-analyzer/Dockerfile b/cmd/logql-analyzer/Dockerfile index 53ba7bee94b4..6cfb7ad795e4 100644 --- a/cmd/logql-analyzer/Dockerfile +++ b/cmd/logql-analyzer/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 FROM golang:${GO_VERSION} as build COPY . /src/loki diff --git a/cmd/loki-canary-boringcrypto/Dockerfile b/cmd/loki-canary-boringcrypto/Dockerfile index e69be2c0aeb6..48a10e92814b 100644 --- a/cmd/loki-canary-boringcrypto/Dockerfile +++ b/cmd/loki-canary-boringcrypto/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 FROM golang:${GO_VERSION} as build COPY . /src/loki diff --git a/cmd/loki-canary/Dockerfile b/cmd/loki-canary/Dockerfile index f0dcf02d5d81..2833d60590ca 100644 --- a/cmd/loki-canary/Dockerfile +++ b/cmd/loki-canary/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 FROM golang:${GO_VERSION} as build COPY . /src/loki diff --git a/cmd/loki-canary/Dockerfile.cross b/cmd/loki-canary/Dockerfile.cross index 078cb62a3726..deaafce0ddba 100644 --- a/cmd/loki-canary/Dockerfile.cross +++ b/cmd/loki-canary/Dockerfile.cross @@ -1,5 +1,5 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.6 -ARG GO_VERSION=1.22 +ARG BUILD_IMAGE=grafana/loki-build-image:0.34.0 +ARG GO_VERSION=1.23 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f cmd/promtail/Dockerfile . diff --git a/cmd/loki/Dockerfile b/cmd/loki/Dockerfile index 521c59159573..521a897d645d 100644 --- a/cmd/loki/Dockerfile +++ b/cmd/loki/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 FROM golang:${GO_VERSION} as build COPY . /src/loki diff --git a/cmd/loki/Dockerfile.cross b/cmd/loki/Dockerfile.cross index da9d358d28c5..97fba445ef2f 100644 --- a/cmd/loki/Dockerfile.cross +++ b/cmd/loki/Dockerfile.cross @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/loki -f cmd/loki/Dockerfile . diff --git a/cmd/loki/Dockerfile.debug b/cmd/loki/Dockerfile.debug index d2a3d7c4dbbe..30edf6416ec3 100644 --- a/cmd/loki/Dockerfile.debug +++ b/cmd/loki/Dockerfile.debug @@ -1,5 +1,5 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.6 -ARG GO_VERSION=1.22 +ARG BUILD_IMAGE=grafana/loki-build-image:0.34.0 +ARG GO_VERSION=1.23 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/loki -f cmd/loki/Dockerfile.debug . diff --git a/cmd/loki/loki-local-config.yaml b/cmd/loki/loki-local-config.yaml index 38efa3f6bf6e..c593b14a252c 100644 --- a/cmd/loki/loki-local-config.yaml +++ b/cmd/loki/loki-local-config.yaml @@ -18,9 +18,6 @@ common: kvstore: store: inmemory -ingester_rf1: - enabled: false - query_range: results_cache: cache: diff --git a/cmd/migrate/Dockerfile b/cmd/migrate/Dockerfile index a24697a719f2..82a78a4782d1 100644 --- a/cmd/migrate/Dockerfile +++ b/cmd/migrate/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 FROM golang:${GO_VERSION} as build COPY . /src/loki WORKDIR /src/loki diff --git a/cmd/querytee/Dockerfile b/cmd/querytee/Dockerfile index ea86fe0249ee..f2403d8df0e7 100644 --- a/cmd/querytee/Dockerfile +++ b/cmd/querytee/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 FROM golang:${GO_VERSION} as build COPY . /src/loki diff --git a/cmd/querytee/Dockerfile.cross b/cmd/querytee/Dockerfile.cross index 478f69a67e3b..83795cd3dc28 100644 --- a/cmd/querytee/Dockerfile.cross +++ b/cmd/querytee/Dockerfile.cross @@ -1,8 +1,8 @@ -ARG BUILD_IMAGE=grafana/loki-build-image:0.33.6 +ARG BUILD_IMAGE=grafana/loki-build-image:0.34.0 # Directories in this file are referenced from the root of the project not this folder # This file is intended to be called from the root like so: # docker build -t grafana/promtail -f cmd/promtail/Dockerfile . -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 FROM golang:${GO_VERSION} as goenv RUN go env GOARCH > /goarch && \ go env GOARM > /goarm diff --git a/docs/sources/configure/storage.md b/docs/sources/configure/storage.md index 27466dbc6e50..0b98cfc18d84 100644 --- a/docs/sources/configure/storage.md +++ b/docs/sources/configure/storage.md @@ -85,9 +85,9 @@ You may use any substitutable services, such as those that implement the S3 API Cassandra is a popular database and one of the possible chunk stores for Loki and is production safe. -{{< collapse title="Title of hidden content" >}} +{{< admonition type="note" >}} This storage type for chunks is deprecated and may be removed in future major versions of Loki. -{{< /collapse >}} +{{< /admonition >}} ## Index storage @@ -95,25 +95,25 @@ This storage type for chunks is deprecated and may be removed in future major ve Cassandra can also be utilized for the index store and aside from the [boltdb-shipper](https://grafana.com/docs/loki//operations/storage/boltdb-shipper/), it's the only non-cloud offering that can be used for the index that's horizontally scalable and has configurable replication. It's a good candidate when you already run Cassandra, are running on-prem, or do not wish to use a managed cloud offering. -{{< collapse title="Title of hidden content" >}} +{{< admonition type="note" >}} This storage type for indexes is deprecated and may be removed in future major versions of Loki. -{{< /collapse >}} +{{< /admonition >}} ### BigTable (deprecated) Bigtable is a cloud database offered by Google. It is a good candidate for a managed index store if you're already using it (due to its heavy fixed costs) or wish to run in GCP. -{{< collapse title="Title of hidden content" >}} +{{< admonition type="note" >}} This storage type for indexes is deprecated and may be removed in future major versions of Loki. -{{< /collapse >}} +{{< /admonition >}} ### DynamoDB (deprecated) DynamoDB is a cloud database offered by AWS. It is a good candidate for a managed index store, especially if you're already running in AWS. -{{< collapse title="Title of hidden content" >}} +{{< admonition type="note" >}} This storage type for indexes is deprecated and may be removed in future major versions of Loki. -{{< /collapse >}} +{{< /admonition >}} #### Rate limiting @@ -123,9 +123,9 @@ DynamoDB is susceptible to rate limiting, particularly due to overconsuming what BoltDB is an embedded database on disk. It is not replicated and thus cannot be used for high availability or clustered Loki deployments, but is commonly paired with a `filesystem` chunk store for proof of concept deployments, trying out Loki, and development. The [boltdb-shipper](https://grafana.com/docs/loki//operations/storage/boltdb-shipper/) aims to support clustered deployments using `boltdb` as an index. -{{< collapse title="Title of hidden content" >}} +{{< admonition type="note" >}} This storage type for indexes is deprecated and may be removed in future major versions of Loki. -{{< /collapse >}} +{{< /admonition >}} ## Schema Config @@ -440,9 +440,9 @@ storage_config: ### On premise deployment (Cassandra+Cassandra) -{{< collapse title="Title of hidden content" >}} +{{< admonition type="note" >}} Cassandra as storage backend for chunks and indexes is deprecated. -{{< /collapse >}} +{{< /admonition >}} **Keeping this for posterity, but this is likely not a common config. Cassandra should work and could be faster in some situations but is likely much more expensive.** diff --git a/docs/sources/get-started/components.md b/docs/sources/get-started/components.md index 641c950ad748..9b6c599782ff 100644 --- a/docs/sources/get-started/components.md +++ b/docs/sources/get-started/components.md @@ -8,6 +8,8 @@ aliases: --- # Loki components +{{< youtube id="_hv4i84Z68s" >}} + Loki is a modular system that contains many components that can either be run together (in "single binary" mode with target `all`), in logical groups (in "simple scalable deployment" mode with targets `read`, `write`, `backend`), or individually (in "microservice" mode). For more information see [Deployment modes]({{< relref "./deployment-modes" >}}). @@ -22,7 +24,8 @@ For more information see [Deployment modes]({{< relref "./deployment-modes" >}}) | [Index Gateway](#index-gateway) | x | | | | x | | [Compactor](#compactor) | x | x | | | x | | [Ruler](#ruler) | x | x | | | x | -| [Bloom Compactor (Experimental)](#bloom-compactor) | x | | | | x | +| [Bloom Planner (Experimental)](#bloom-planner) | x | | | | x | +| [Bloom Builder (Experimental)](#bloom-builder) | x | | | | x | | [Bloom Gateway (Experimental)](#bloom-gateway) | x | | | | x | This page describes the responsibilities of each of these components. @@ -56,9 +59,9 @@ Currently the only way the distributor mutates incoming data is by normalizing l The distributor can also rate limit incoming logs based on the maximum data ingest rate per tenant. It does this by checking a per-tenant limit and dividing it by the current number of distributors. This allows the rate limit to be specified per tenant at the cluster level and enables us to scale the distributors up or down and have the per-distributor limit adjust accordingly. For instance, say we have 10 distributors and tenant A has a 10MB rate limit. Each distributor will allow up to 1MB/s before limiting. Now, say another large tenant joins the cluster and we need to spin up 10 more distributors. The now 20 distributors will adjust their rate limits for tenant A to `(10MB / 20 distributors) = 500KB/s`. This is how global limits allow much simpler and safer operation of the Loki cluster. -{{% admonition type="note" %}} +{{< admonition type="note" >}} The distributor uses the `ring` component under the hood to register itself amongst its peers and get the total number of active distributors. This is a different "key" than the ingesters use in the ring and comes from the distributor's own [ring configuration](https://grafana.com/docs/loki//configure/#distributor). -{{% /admonition %}} +{{< /admonition >}} ### Forwarding @@ -68,9 +71,9 @@ Once the distributor has performed all of its validation duties, it forwards dat In order to mitigate the chance of _losing_ data on any single ingester, the distributor will forward writes to a _replication factor_ of them. Generally, the replication factor is `3`. Replication allows for ingester restarts and rollouts without failing writes and adds additional protection from data loss for some scenarios. Loosely, for each label set (called a _stream_) that is pushed to a distributor, it will hash the labels and use the resulting value to look up `replication_factor` ingesters in the `ring` (which is a subcomponent that exposes a [distributed hash table](https://en.wikipedia.org/wiki/Distributed_hash_table)). It will then try to write the same data to all of them. This will generate an error if less than a _quorum_ of writes succeed. A quorum is defined as `floor( replication_factor / 2 ) + 1`. So, for our `replication_factor` of `3`, we require that two writes succeed. If less than two writes succeed, the distributor returns an error and the write operation will be retried. -{{% admonition type="caution" %}} +{{< admonition type="caution" >}} If a write is acknowledged by 2 out of 3 ingesters, we can tolerate the loss of one ingester but not two, as this would result in data loss. -{{% /admonition %}} +{{< /admonition >}} The replication factor is not the only thing that prevents data loss, though, and its main purpose is to allow writes to continue uninterrupted during rollouts and restarts. The [ingester component](#ingester) now includes a [write ahead log](https://en.wikipedia.org/wiki/Write-ahead_logging) (WAL) which persists incoming writes to disk to ensure they are not lost as long as the disk isn't corrupted. The complementary nature of replication factor and WAL ensures data isn't lost unless there are significant failures in both mechanisms (that is, multiple ingesters die and lose/corrupt their disks). @@ -129,9 +132,9 @@ the hash ring. Each ingester has a state of either `PENDING`, `JOINING`, 1. `PENDING` is an Ingester's state when it is waiting for a [handoff](#handoff) from another ingester that is `LEAVING`. This only applies for legacy deployment modes. - {{% admonition type="note" %}} + {{< admonition type="note" >}} Handoff is deprecated behavior mainly used in stateless deployments of ingesters, which is discouraged. Instead, it's recommended using a stateful deployment model together with the [write ahead log]({{< relref "../operations/storage/wal" >}}). - {{% /admonition %}} + {{< /admonition >}} 1. `JOINING` is an Ingester's state when it is currently inserting its tokens into the ring and initializing itself. It may receive write requests for @@ -205,9 +208,9 @@ nanosecond timestamps: ### Handoff -{{% admonition type="warning" %}} +{{< admonition type="warning" >}} Handoff is deprecated behavior mainly used in stateless deployments of ingesters, which is discouraged. Instead, it's recommended using a stateful deployment model together with the [write ahead log]({{< relref "../operations/storage/wal" >}}). -{{% /admonition %}} +{{< /admonition >}} By default, when an ingester is shutting down and tries to leave the hash ring, it will wait to see if a new ingester tries to enter before flushing and will @@ -337,28 +340,41 @@ from the query frontend. When running multiple rulers, they use a consistent hash ring to distribute rule groups amongst available ruler instances. -## Bloom Compactor -{{% admonition type="warning" %}} -This feature is an [experimental feature](/docs/release-life-cycle/). Engineering and on-call support is not available. No SLA is provided. -{{% /admonition %}} +## Bloom Planner +{{< admonition type="warning" >}} +This feature is an [experimental feature](/docs/release-life-cycle/). Engineering and on-call support is not available. +No SLA is provided. +{{< /admonition >}} + +The Bloom Planner service is responsible for planning the tasks for blooms creation. It runs as a singleton and provides a queue +from which tasks are pulled by the Bloom Builders. The planning runs periodically and takes into account what blooms have already +been built for a given day and tenant and what series need to be newly added. + +This service is also used to apply blooms retention. + +## Bloom Builder +{{< admonition type="warning" >}} +This feature is an [experimental feature](/docs/release-life-cycle/). Engineering and on-call support is not available. +No SLA is provided. +{{< /admonition >}} -The Bloom Compactor service is responsible for building blooms for chunks in the object store. +The Bloom Builder service is responsible for processing the tasks created by the Bloom Planner. +The Bloom Builder creates bloom blocks from structured metadata of log entries. The resulting blooms are grouped in bloom blocks spanning multiple series and chunks from a given day. This component also builds metadata files to track which blocks are available for each series and TSDB index file. -The service is horizontally scalable. When running multiple Bloom Compactors, they use a ring to shard tenants and -distribute series fingerprints among the available Bloom Compactor instances. -The ring is also used to decide which compactor should apply blooms retention. +The service is stateless and horizontally scalable. ## Bloom Gateway -{{% admonition type="warning" %}} -This feature is an [experimental feature](/docs/release-life-cycle/). Engineering and on-call support is not available. No SLA is provided. -{{% /admonition %}} +{{< admonition type="warning" >}} +This feature is an [experimental feature](/docs/release-life-cycle/). Engineering and on-call support is not available. +No SLA is provided. +{{< /admonition >}} The Bloom Gateway service is responsible for handling and serving chunks filtering requests. The index gateway queries the Bloom Gateway when computing chunk references, or when computing shards for a given query. The gateway service takes a list of chunks and a filtering expression and matches them against the blooms, -filtering out any chunks that do not match the given filter expression. +filtering out any chunks that do not match the given label filter expression. -The service is horizontally scalable. When running multiple instances, they use a ring to shard tenants and -distribute series fingerprints across instances. +The service is horizontally scalable. When running multiple instances, the client (Index Gateway) shards requests +across instances based on the hash of the bloom blocks that are referenced. diff --git a/docs/sources/operations/query-acceleration-blooms.md b/docs/sources/operations/query-acceleration-blooms.md index 5eccc45cf202..2fec5f292270 100644 --- a/docs/sources/operations/query-acceleration-blooms.md +++ b/docs/sources/operations/query-acceleration-blooms.md @@ -136,7 +136,7 @@ The sharding of the data is performed on the client side using DNS discovery of and the [jumphash](https://arxiv.org/abs/1406.2294) algorithm for consistent hashing and even distribution of the stream fingerprints across Bloom Gateway instances. -You can find all the configuration options for this component in the Configure section for the [Bloom Gateways][gateway-cfg]. +You can find all the configuration options for this component in the Configure section for the [Bloom Gateways][bloom-gateway-cfg]. Refer to the [Enable Query Acceleration with Blooms](#enable-query-acceleration-with-blooms) section below for a configuration snippet enabling this feature. ### Sizing and configuration @@ -152,7 +152,7 @@ Example calculation for storage requirements of blooms for a single tenant. Since reading blooms depends heavily on disk IOPS, Bloom Gateways should make use of multiple, locally attached SSD disks (NVMe) to increase i/o throughput. -Multiple directories on different disk mounts can be specified using the `-bloom.shipper.working-directory` [setting][gateway-cfg] +Multiple directories on different disk mounts can be specified using the `-bloom.shipper.working-directory` [setting][storage-config-cfg] when using a comma separated list of mount points, for example: ``` -bloom.shipper.working-directory="/mnt/data0,/mnt/data1,/mnt/data2,/mnt/data3" @@ -226,7 +226,7 @@ Loki will check blooms for any log filtering expression within a query that sati ## Query sharding Query acceleration does not just happen while processing chunks, but also happens from the query planning phase where the query frontend applies [query sharding](https://lokidex.com/posts/tsdb/#sharding). -Loki 3.0 introduces a new {per-tenant configuration][tenant-limits] flag `tsdb_sharding_strategy` which defaults to computing +Loki 3.0 introduces a new [per-tenant configuration][tenant-limits] flag `tsdb_sharding_strategy` which defaults to computing shards as in previous versions of Loki by using the index stats to come up with the closest power of two that would optimistically divide the data to process in shards of roughly the same size. Unfortunately, the amount of data each stream has is often unbalanced with the rest, @@ -239,5 +239,6 @@ as well as evenly distributes the amount of chunks each sharded query will need [tenant-limits]: https://grafana.com/docs/loki//configure/#limits_config [bloom-gateway-cfg]: https://grafana.com/docs/loki//configure/#bloom_gateway [bloom-build-cfg]: https://grafana.com/docs/loki//configure/#bloom_build +[storage-config-cfg]: https://grafana.com/docs/loki//configure/#storage_config [microservices]: https://grafana.com/docs/loki//get-started/deployment-modes/#microservices-mode [ssd]: https://grafana.com/docs/loki//get-started/deployment-modes/#simple-scalable diff --git a/docs/sources/query/bp-query.md b/docs/sources/query/bp-query.md new file mode 100644 index 000000000000..819fdc0a76b0 --- /dev/null +++ b/docs/sources/query/bp-query.md @@ -0,0 +1,80 @@ +--- +title: Query best practices +menuTitle: Query best practices +description: Describes best practices for querying in Grafana Loki. +aliases: +- ../bp-query +weight: 700 +--- +# Query best practices + +The way you write queries in Loki affects how quickly you get results returned from those queries. Understanding the way Loki parses queries can help you write queries that are efficient and performant. + +{{< admonition type="tip" >}} +Before you start optimizing queries, read the [labels best practices](https://grafana.com/docs/loki//get-started/labels/bp-labels/) page to understand what makes a good label. Choosing the right labels is the first step towards writing efficient queries. +{{< /admonition >}} + +Loki evaluates a LogQL query from left to right, in the order that it is written. To get the best possible query performance, eliminate as many potential results as you can earlier in the query and then continue to progressively narrow your search as you continue writing the query. This page describes the recommended order for writing queries that efficiently filter out unwanted results. + +## Narrow down your time range first + +Reduce the number of logs Loki needs to look through by specifying a period of time that you'd like to search through. Loki creates one index file per day, so queries that span over multiple days fetches multiple index files. The fewer files Loki has to search, the faster the query results are returned. + +Time ranges are typically not part of the query, but you can set a time range through your visualization tool or through [the Loki API](https://grafana.com/docs/loki//reference/loki-http-api/). + + +### In Grafana + +If you're using Loki with Grafana, you can use the dropdown menu on the upper right hand corner of a dashboard to select a time range, either relative (last X hours) or absolute (a specific date and time). + +![Screenshot of time selector on Grafana](../grafana-time-range-picker.png "Grafana time interval selector") + +### Through Loki API + +If you're querying Loki through [the Loki API](https://grafana.com/docs/loki//reference/loki-http-api/), you can use the [`query_range` endpoint]({{https://grafana.com/docs/loki//reference/loki-http-api/#query-logs-within-a-range-of-time" >}}) to add `start` and `end` timestamps for your query as parameters to the HTTP call rather than as part of the query itself. + +```bash +http:///loki/api/v1/query_range?query={job="app"}&start=1633017600000000000&end=1633104000000000000 + +``` + + +## Use precise label selectors + +Next, write your label selectors. Identify the most specific label you can use within the log line and search based on that first. For example, if the logs contain the labels `namespace` and `app_name` and the latter is a smaller subset of data, start your query by selecting based on `app_name`: + +```bash +{app_name="carnivorousgreenhouse"} +``` + +Using the most specific label selector has the added benefit of reducing the length of your query. Since `app_name` is more specific than `namespace`, you don't need to add a selector for `namespace`. Adding more general label selectors has no further effect on the query. + + +## Use simple line filter expressions over regular expressions + +When using [line filter expressions](https://grafana.com/docs/loki//query/log_queries/#line-filter-expression), prefer the simpler filter operators such as: +- `|=` (contains string) and +- `!=` (does not contain string) +over the regular expression filter operators: +- `|~` (matches the regular expression) +- `!~` (does not match the regular expression) + +Loki evaluates the first two filter expressions faster than it can evaluate regular expressions, so always try to rewrite your query in terms of whether a log line contains or does not contain a certain string. Use regular expressions only as a last resort. + +Line filter expressions are more efficient than parser expressions. + +## Avoid using complex text parsers + +Use [parser expressions](https://grafana.com/docs/loki//query/log_queries/#parser-expression) only after line filter expressions. Parser expressions are ways to look through the log line and extract labels in different formats, which can be useful but are also more intensive for Loki to do than line filter expressions. Using them after line filter expressions means that Loki only needs to evaluate parser expressions for log lines that match the line filter expression, reducing the amount of logs that Loki needs to search through. + +Parser expressions include [JSON](https://grafana.com/docs/loki//query/log_queries/#json, [logfmt](https://grafana.com/docs/loki//query/log_queries/#logfmt), [pattern](https://grafana.com/docs/loki//query/log_queries/#pattern), [regexp](https://grafana.com/docs/loki//query/log_queries/#regular-expression), and [unpack](https://grafana.com/docs/loki//query/log_queries/#unpack) parsers. + +## Use recording rules + +Some queries are sufficiently complex, or some datasets sufficiently large, that there is a limit as to how much query performance can be optimized. If you're following the tips on this page and are still experiencing slow query times, consider creating a [recording rule](https://grafana.com/docs/loki//operations/recording-rules/) for them. A recording rule runs a query at a predetermined time and also precomputes the results of that query, saving those results for faster retrieval later. + +## Further resources + +- [Watch: 5 tips for improving Grafana Loki query performance](https://grafana.com/blog/2023/01/10/watch-5-tips-for-improving-grafana-loki-query-performance/) +- [Grafana Loki Design Basics with Ed Welch (Grafana Office Hours #27)](https://www.youtube.com/live/3uFMJLufgSo?feature=shared&t=3385) +- [Labels best practices](https://grafana.com/docs/loki//get-started/labels/bp-labels/) \ No newline at end of file diff --git a/docs/sources/query/grafana-time-range-picker.png b/docs/sources/query/grafana-time-range-picker.png new file mode 100644 index 000000000000..f207e5c0ccec Binary files /dev/null and b/docs/sources/query/grafana-time-range-picker.png differ diff --git a/docs/sources/release-notes/_index.md b/docs/sources/release-notes/_index.md index 5831c414321b..d8c5f26f24a4 100644 --- a/docs/sources/release-notes/_index.md +++ b/docs/sources/release-notes/_index.md @@ -8,6 +8,7 @@ weight: 100 Release notes for Loki are in the CHANGELOG for the release and listed here by version number. +- [V3.2 release notes](https://grafana.com/docs/loki//release-notes/v3-2/) - [V3.1 release notes](https://grafana.com/docs/loki//release-notes/v3-1/) - [V3.0 release notes](https://grafana.com/docs/loki//release-notes/v3-0/) - [V2.9 release notes](https://grafana.com/docs/loki//release-notes/v2-9/) diff --git a/docs/sources/release-notes/v3-1.md b/docs/sources/release-notes/v3-1.md index 917f8d9ecfd0..ec63aa2636d5 100644 --- a/docs/sources/release-notes/v3-1.md +++ b/docs/sources/release-notes/v3-1.md @@ -1,6 +1,6 @@ --- title: v3.1 -description: Version 3.0 release notes. +description: Version 3.1 release notes. weight: 20 --- @@ -69,7 +69,7 @@ To learn more about breaking changes in this release, refer to the [Upgrade guid ## Upgrade Considerations -he path from 2.9 to 3.0 included several breaking changes. For important upgrade guidance, refer to the [Upgrade Guide](https://grafana.com/docs/loki//setup/upgrade/) and the separate [Helm Upgrade Guide](https://grafana.com/docs/loki//setup/upgrade/upgrade-to-6x/). +The path from 2.9 to 3.0 included several breaking changes. For important upgrade guidance, refer to the [Upgrade Guide](https://grafana.com/docs/loki//setup/upgrade/) and the separate [Helm Upgrade Guide](https://grafana.com/docs/loki//setup/upgrade/upgrade-to-6x/). - **BREAKING CHANGE** Update Helm chart to support distributed mode and 3.0 ([#12067](https://github.com/grafana/loki/issues/12067)). diff --git a/docs/sources/release-notes/v3-2.md b/docs/sources/release-notes/v3-2.md new file mode 100644 index 000000000000..94655ecbc690 --- /dev/null +++ b/docs/sources/release-notes/v3-2.md @@ -0,0 +1,194 @@ +--- +title: v3.2 +description: Version 3.2 release notes. +weight: 10 +--- + +# v3.2 + +Grafana Labs and the Loki team are excited to announce the release of Loki 3.2. Explore Logs is also now Generally Available. Upgrade to Loki/GEL 3.2 to get the best possible experience with Explore Logs. Here's a summary of new enhancements and important fixes. + +For a full list of all changes and fixes, refer to the [CHANGELOG](https://github.com/grafana/loki/blob/release-3.2.x/CHANGELOG.md). + +## Features and enhancements + +Key features in Loki 3.2.0 include the following: + +- ***API:** The Loki APIs have been updated to fail log queries when executed on instant query endpoint ([#13421](https://github.com/grafana/loki/issues/13421)) ([ce71f1c](https://github.com/grafana/loki/commit/ce71f1cf954625cac2af7c2d0c335248b01185a3)), add structured metadata to `/detected_fields` API ([#13604](https://github.com/grafana/loki/issues/13604)) ([ce02cc2](https://github.com/grafana/loki/commit/ce02cc254abc641dc40831b28c81199526581085)), and avoid looking up unnecessary TSDB symbols during Volume API ([#13960](https://github.com/grafana/loki/issues/13960)) ([7c1a849](https://github.com/grafana/loki/commit/7c1a8493b3837396d085547a42d8260271f1d68a)). + +- **Query acceleration with Bloom filters** (experimental): The Bloom filters feature introduced in Loki 3.0 remains experimental. Note that the Bloom Compactor component has been removed ([#13969](https://github.com/grafana/loki/issues/13969)) ([b75eacc](https://github.com/grafana/loki/commit/b75eacc288c52737e41ba9932c06409c643e2e5c)) and replaced by the Bloom Planner and Bloom Builder components ([#13997](https://github.com/grafana/loki/pull/13997)) ([be9eb50](https://github.com/grafana/loki/commit/be9eb5067f57a0a3dfbf60aa785912f490a6396b)). + +- **distributor:** Ignore empty streams in distributor if all entries fail validation ([#13674](https://github.com/grafana/loki/issues/13674)) ([6c4b062](https://github.com/grafana/loki/commit/6c4b0622aa3de44cccb76fe16bb6583bf91cf15c)), and limit to block ingestion until configured date ([#13958](https://github.com/grafana/loki/issues/13958)) ([b5ac6a0](https://github.com/grafana/loki/commit/b5ac6a0258be51a6d6c3a7743e498dc40014b64b)). + +- **Explore Logs** Is now Generally Available (GA). For the best experience, you should be on Grafana 11.2 or later and Loki 3.2. This release includes enhancements to add _extracted suffix to detected fields conflicts ([#13993](https://github.com/grafana/loki/issues/13993)) ([ab1caea](https://github.com/grafana/loki/commit/ab1caea12325b5db777101347acf4f277312adf6)), collect and serve pre-aggregated bytes and counts ([#13020](https://github.com/grafana/loki/issues/13020)) ([467eb1b](https://github.com/grafana/loki/commit/467eb1bb1b08fa69e3d5e40a1e0143f65230ad2b)), and remove cardinality filter ([#13652](https://github.com/grafana/loki/issues/13652)) ([4f534d7](https://github.com/grafana/loki/commit/4f534d7317fa0557251f16b76ebf790f079cf98e)). + +- **Helm:** This release includes updates to the Helm charts to make gateway container port configurable. ([#13294](https://github.com/grafana/loki/issues/13294)) ([05176e4](https://github.com/grafana/loki/commit/05176e445b90597379c268e799b0fb86b8629b9e)) and to support alibabacloud oss in the Helm chart ([#13441](https://github.com/grafana/loki/issues/13441)) ([3ebab6f](https://github.com/grafana/loki/commit/3ebab6f3931841f62ac59e6b09afef98db656c71)). It also includes a **breaking change** to the Helm chart to support distributed mode and 3.0 ([#12067](https://github.com/grafana/loki/issues/12067)). + +- **ingester:** Ingester Stream Limit Improvements: Ingester stream limits now take into account "owned streams" and periodically update when the Ingester ring is changed. Non-owned streams are now also flushed when this update takes place. The stream limit calculation has also been updated for improved accuracy in multi-zone ingester deployments. ([#13532](https://github.com/grafana/loki/issues/13532)) ([ec34aaa](https://github.com/grafana/loki/commit/ec34aaa1ff2e616ef223631657b63f7dffedd3cc)). + +- **lambda-promtail:** Add S3 log parser support for AWS GuardDuty ([#13148](https://github.com/grafana/loki/issues/13148)) ([2d92fff](https://github.com/grafana/loki/commit/2d92fff2aa4dbda5f9f8c18ea19347e1236257af)), build lambda with zip file ([#13787](https://github.com/grafana/loki/issues/13787)) ([9bf08f7](https://github.com/grafana/loki/commit/9bf08f7cc055db1997c439ef8edb11247c4e1d67)), and ensure messages to Kinesis are usable by refactoring parsing of KinesisEvent to match parsing of CWEvents + code cleanup ([#13098](https://github.com/grafana/loki/issues/13098)) ([dbfb19b](https://github.com/grafana/loki/commit/dbfb19be49fb3bc1f2f62613f50370028cbf5552)). + +- **loki:** Add ability to disable AWS S3 dualstack endpoints usage([#13785](https://github.com/grafana/loki/issues/13785)) ([bb257f5](https://github.com/grafana/loki/commit/bb257f54b33ecb04cbe1786c4efac779d8d28d8c)), not enforce max-query-bytes-read and max-querier-bytes-read in limited tripperware ([#13406](https://github.com/grafana/loki/issues/13406)) ([47f6ea5](https://github.com/grafana/loki/commit/47f6ea53fc4816b259bce4ce4efddee377422d3c)), and upgrade Prometheus ([#13671](https://github.com/grafana/loki/issues/13671)) ([b88583d](https://github.com/grafana/loki/commit/b88583da7d3cc840d4b66698de042773422e334d)). + +- **operator:** Add alert for discarded samples ([#13512](https://github.com/grafana/loki/issues/13512)) ([5f2a02f](https://github.com/grafana/loki/commit/5f2a02f14222dab891b7851e8f48052d6c9b594a)), add support for the volume API ([#13369](https://github.com/grafana/loki/issues/13369)) ([d451e23](https://github.com/grafana/loki/commit/d451e23225047a11b4d5d82900cec4a46d6e7b39)), enable leader-election ([#13760](https://github.com/grafana/loki/issues/13760)) ([1ba4bff](https://github.com/grafana/loki/commit/1ba4bff005930b173391df35248e6f58e076fa74)), and update Loki operand to v3.1.0 ([#13422](https://github.com/grafana/loki/issues/13422)) ([cf5f52d](https://github.com/grafana/loki/commit/cf5f52dca0db93847218cdd2c3f4860d983381ae)). + +- **storage:** Convert WalSegmentWriter to io.ReadSeeker ([#13340](https://github.com/grafana/loki/issues/13340)) ([19c0509](https://github.com/grafana/loki/commit/19c050926e75e6dcac6d228b838836367414a5f8)) and instrument failed chunk encoding/decoding ([#13684](https://github.com/grafana/loki/issues/13684)) ([5a87ccb](https://github.com/grafana/loki/commit/5a87ccb648ee3bf48a3704643ae9923d64651aed)). + +- **docs:** Add getting started video for ingesting OTel logs ([#13226](https://github.com/grafana/loki/issues/13226)) ([5e560f9](https://github.com/grafana/loki/commit/5e560f93ecfa399e85878e30998042646ee4e603)). + +Other improvements include the following: + +- **chunks-inspect:** Support structured metadata ([#11506](https://github.com/grafana/loki/issues/11506)) ([1834065](https://github.com/grafana/loki/commit/183406570411a5ad5ceaf32bf07451b8fce608c1)). +- **exporter:** Include boolean values in limit exporter ([#13466](https://github.com/grafana/loki/issues/13466)) ([4220737](https://github.com/grafana/loki/commit/4220737a52da7ab6c9346b12d5a5d7bedbcd641d)). +- **mempool:** Replace `sync.Mutex` with `sync.Once` ([#13293](https://github.com/grafana/loki/issues/13293)) ([61a9854](https://github.com/grafana/loki/commit/61a9854eb189e5d2c91528ced10ecf39071df680)). +- **metrics:** Collect duplicate log line metrics ([#13084](https://github.com/grafana/loki/issues/13084)) ([40ee766](https://github.com/grafana/loki/commit/40ee7667244f2e094b5a7199705b4f3dacb7ffaf)). + +## Deprecations + +One of the focuses of Loki 3.0 was cleaning up unused code and old features that had been previously deprecated but not removed. Loki 3.0 removed a number of previous deprecations and introduces some new deprecations. Some of the main areas with changes include: + +- [Deprecated storage options](https://grafana.com/docs/loki//storage/) including the deprecation of the BoltDB store. + +- [Deprecated configuration options](https://grafana.com/docs/loki//configure/). + +- [API endpoint deprecations](https://grafana.com/docs/loki//reference/api/#deprecated-endpoints). + +To learn more about breaking changes in this release, refer to the [Upgrade guide](https://grafana.com/docs/loki//setup/upgrade/). + +{{< docs/shared source="alloy" lookup="agent-deprecation.md" version="next" >}} + +## Upgrade Considerations + +For important upgrade guidance, refer to the [Upgrade Guide](https://grafana.com/docs/loki//setup/upgrade/) and the separate [Helm Upgrade Guide](https://grafana.com/docs/loki//setup/upgrade/upgrade-to-6x/). + +- **BREAKING CHANGE - API:** Fail log queries when executed on instant query endpoint ([#13421](https://github.com/grafana/loki/issues/13421)). +- **BREAKING CHANGE - blooms:** Remove bloom compactor component ([#13969](https://github.com/grafana/loki/issues/13969)). +- **BREAKING CHANGE - Helm:** Update Helm chart to support distributed mode and 3.0 ([#12067](https://github.com/grafana/loki/issues/12067)). +- **BREAKING CHANGE - Helm:** Fix how we set imagePullSecrets for enterprise-gateway and admin-api. ([#13761](https://github.com/grafana/loki/issues/13761)) ([3be5a45](https://github.com/grafana/loki/commit/3be5a4576fd0f0dca321e017a637f7a3159c00e5)). +- **BREAKING CHANGE - jsonnet:** Convert read statefulset into deployment for loki-simple-scalable ([#13977](https://github.com/grafana/loki/issues/13977)). + +{{< admonition type="important" >}} +Out of an abundance of caution, we advise that users with Loki or Grafana Enterprise Logs (GEL) deployments on AWS upgrade their Helm charts or change the names of their buckets, as outlined in the solutions and mitigations section of this [blog post](https://grafana.com/blog/2024/06/27/grafana-security-update-grafana-loki-and-unintended-data-write-attempts-to-amazon-s3-buckets/). +{{< /admonition >}} + +## Bug fixes + +### 3.2.0 (2024-09-19) + +- **blooms:** Cleanup temp blockdir in bloom compactor ([#13622](https://github.com/grafana/loki/issues/13622)) ([64215e1](https://github.com/grafana/loki/commit/64215e18495b12e6d5565eba6fe54bc381ac7189)). +- **blooms:** Delete outdated metas during planning ([#13363](https://github.com/grafana/loki/issues/13363)) ([11e1976](https://github.com/grafana/loki/commit/11e19763d0ee4e1b1130ab0326ed0f4f605bca8d)). +- **blooms:** Ensure tokenizer cache is reset between series ([#13370](https://github.com/grafana/loki/issues/13370)) ([04bc3a4](https://github.com/grafana/loki/commit/04bc3a423c8ea9e7c945b15dffb83d674bab3a68)). +- **blooms:** Fix eviction of multiple blockcache items ([#13573](https://github.com/grafana/loki/issues/13573)) ([c9950e3](https://github.com/grafana/loki/commit/c9950e394d2bca8bd290f60672a3bc904cd72d7b)). +- **blooms:** Fix panic in bloom gateway ([#13303](https://github.com/grafana/loki/issues/13303)) ([66f97b2](https://github.com/grafana/loki/commit/66f97b2aec3cbe0d60acd5e13a9fda9000b03bae)). +- **blooms:** Ignores bloom filtering errors in bounded shard query planning ([#13285](https://github.com/grafana/loki/issues/13285)) ([ede6941](https://github.com/grafana/loki/commit/ede6941c6ff0f40d836b288e167a26c34c2a9437)). +- **blooms:** Improve error wrap to make ignoreNotFound work when fetching blocks ([#13656](https://github.com/grafana/loki/issues/13656)) ([bd20171](https://github.com/grafana/loki/commit/bd20171975e913e429048a0a30328811fc4c8a87)). +- **blooms:** Improves mempool metrics ([#13283](https://github.com/grafana/loki/issues/13283)) ([d36e1d5](https://github.com/grafana/loki/commit/d36e1d580af0a64ce0fcb8de57724d27e399c0dd)). +- **blooms:** Minor fixes and improvements for testing in dev ([#13341](https://github.com/grafana/loki/issues/13341)) ([d0f56ee](https://github.com/grafana/loki/commit/d0f56eeb0a585c37e4a9c62b7a200f4d8360bf4d)). +- **blooms:** Remove backoff from notify planner ([#13506](https://github.com/grafana/loki/issues/13506)) ([e506995](https://github.com/grafana/loki/commit/e506995e595bb5c465941f3f1227311b2ea1c8c5)). +- **blooms:** Remove unused arg ([#13343](https://github.com/grafana/loki/issues/13343)) ([fcb9b28](https://github.com/grafana/loki/commit/fcb9b283ba0cf927646d332a68c049718ec1d236)). +- **blooms:** Ship chunkrefs in task payload ([#13677](https://github.com/grafana/loki/issues/13677)) ([450bbce](https://github.com/grafana/loki/commit/450bbce938fd548715104f6a1a4dde76e2e7ff34)). +- **blooms:** Add logging to empty bloom ([#13502](https://github.com/grafana/loki/issues/13502)) ([c263a68](https://github.com/grafana/loki/commit/c263a681f8e19417ea3056a3e2cae7d3015d081a)). +- **blooms:** Skip empty blooms on reads ([#13500](https://github.com/grafana/loki/issues/13500)) ([bfa6955](https://github.com/grafana/loki/commit/bfa69556afda160051cab677ce278aba5ab48448)). +- **blooms:** Suppress error from resolving server addresses for blocks ([#13385](https://github.com/grafana/loki/issues/13385)) ([3ac2317](https://github.com/grafana/loki/commit/3ac231728e6bc9d3166684bcb697c78b4fb56fae)). +- **blooms:** Use correct key to populate blockscache at startup ([#13624](https://github.com/grafana/loki/issues/13624)) ([2624a4b](https://github.com/grafana/loki/commit/2624a4bdd43badcd1159b83e26c1b0ff14479ac0)). +- **blooms:** Fix log line for fingerprint not found ([#13555](https://github.com/grafana/loki/issues/13555)) ([aeb23bb](https://github.com/grafana/loki/commit/aeb23bb7fc3d33327060828ddf97cb7da7b3c8f8)). +- **blooms:** Fix panic in BloomStore initialization ([#13457](https://github.com/grafana/loki/issues/13457)) ([5f4b8fc](https://github.com/grafana/loki/commit/5f4b8fc9e44ac386ef5bfc64dd5f8f47b72f8ef9)). +- **blooms:** Flaky test blockPlansForGaps ([#13743](https://github.com/grafana/loki/issues/13743)) ([37e33d4](https://github.com/grafana/loki/commit/37e33d41b4583626a0384e4eb4c4570d3ef11882)). +- **blooms:** Keep blocks referenced by newer metas ([#13614](https://github.com/grafana/loki/issues/13614)) ([784e7d5](https://github.com/grafana/loki/commit/784e7d562fedec7134c8ed4e2cee8ccb7049e271)). +- **blooms:** Lint issues after merge to main ([#13326](https://github.com/grafana/loki/issues/13326)) ([7e19cc7](https://github.com/grafana/loki/commit/7e19cc7dca8480932b39c87c7c2e296f99318c95)). +- **blooms:** Use elements match in gapsBetweenTSDBsAndMetas test ([#13722](https://github.com/grafana/loki/issues/13722)) ([4cbe2a6](https://github.com/grafana/loki/commit/4cbe2a6a85c3095f66610cffd32cb3d3bdd43b3f)). +- **ci:** Add cleanup step into job `dist` ([#13801](https://github.com/grafana/loki/issues/13801)) ([217f928](https://github.com/grafana/loki/commit/217f928f52b3d3fad414a01502c37d143cabf567)). +- **ci:** Fixed release-please manifest ([#13810](https://github.com/grafana/loki/issues/13810)) ([f253db5](https://github.com/grafana/loki/commit/f253db5598156a4461fd1f5ede14443c937e2ac8)). +- **deps:** Update github.com/axiomhq/hyperloglog digest to af9851f ([#13806](https://github.com/grafana/loki/issues/13806)) ([67295e0](https://github.com/grafana/loki/commit/67295e0a16677feabb83284e058926b016993128)). +- **deps:** Update github.com/c2h5oh/datasize digest to aa82cc1 ([#13807](https://github.com/grafana/loki/issues/13807)) ([a93f38c](https://github.com/grafana/loki/commit/a93f38cb055c9a3f22cf07d0bd5888a0596ec5d6)). +- **deps:** Update github.com/docker/go-plugins-helpers digest to 45e2431 ([#13808](https://github.com/grafana/loki/issues/13808)) ([e5a3994](https://github.com/grafana/loki/commit/e5a3994fba37247cf2b81405eb4b19b29af89959)). +- **deps:** Update github.com/grafana/jsonparser digest to ea80629 ([#13814](https://github.com/grafana/loki/issues/13814)) ([d5718eb](https://github.com/grafana/loki/commit/d5718eb111f8f0fbbc43294eb8b72877b250d433)). +- **deps:** Update module github.com/aliyun/aliyun-oss-go-sdk to v2.2.10+incompatible ([#13861](https://github.com/grafana/loki/issues/13861)) ([6f79194](https://github.com/grafana/loki/commit/6f791941ee5a188a658313c12f549d40f8802528)). +- **deps:** Update module github.com/azure/go-autorest/autorest/adal to v0.9.24 ([#13862](https://github.com/grafana/loki/issues/13862)) ([8041bd2](https://github.com/grafana/loki/commit/8041bd29b90a79066f7c6393fef1db5ba29440b0)). +- **deps:** Update module github.com/azure/go-autorest/autorest/azure/auth to v0.5.13 ([#13863](https://github.com/grafana/loki/issues/13863)) ([71c4421](https://github.com/grafana/loki/commit/71c4421e09f30ebd8a1826c976436d3ca3ad603d)). +- **deps:** Update module github.com/baidubce/bce-sdk-go to v0.9.186 ([#13864](https://github.com/grafana/loki/issues/13864)) ([3c0e3e2](https://github.com/grafana/loki/commit/3c0e3e2c13591e3af44ce4826245043c81bb66c3)). +- **deps:** Update module github.com/baidubce/bce-sdk-go to v0.9.187 ([#13933](https://github.com/grafana/loki/issues/13933)) ([56af84d](https://github.com/grafana/loki/commit/56af84d3a638dbe30f1cacffd7d090118720d787)). +- **deps:** Update module github.com/baidubce/bce-sdk-go to v0.9.188 ([#14000](https://github.com/grafana/loki/issues/14000)) ([79039a2](https://github.com/grafana/loki/commit/79039a24a244b06b43018095e29c9ed65b0e1067)). +- **deps:** Update module github.com/cespare/xxhash/v2 to v2.3.0 (main) ([#13615](https://github.com/grafana/loki/issues/13615)) ([cfc7b34](https://github.com/grafana/loki/commit/cfc7b34b9eb94960bd960b7a8a4442a2a1a9ecaf)). +- **deps:** Update module github.com/datadog/sketches-go to v1.4.6 ([#13865](https://github.com/grafana/loki/issues/13865)) ([1f3c467](https://github.com/grafana/loki/commit/1f3c467b412dabf7f330dc71befcdf50596ba517)). +- **deps:** Update module github.com/docker/docker to v25.0.5+incompatible [security] (main) ([#12279](https://github.com/grafana/loki/issues/12279)) ([960c034](https://github.com/grafana/loki/commit/960c03438477435b606cf4dfbb7af43a5b52068d)). +- **deps:** Update module github.com/docker/docker to v27.1.1+incompatible [security] (main) ([#13762](https://github.com/grafana/loki/issues/13762)) ([f8bf3bb](https://github.com/grafana/loki/commit/f8bf3bb3786ccf5c3784e5b75e9d030251dcc8fb)). +- **deps:** Update module github.com/docker/docker to v27.1.2+incompatible ([#13872](https://github.com/grafana/loki/issues/13872)) ([8ab4c20](https://github.com/grafana/loki/commit/8ab4c2057256511b5bc25c5f9c9ff870b5b71cb5)). +- **deps:** Update module github.com/efficientgo/core to v1.0.0-rc.3 ([#14001](https://github.com/grafana/loki/issues/14001)) ([90f7e5f](https://github.com/grafana/loki/commit/90f7e5fa67dcf7b05c8aae54bacdf96f98c27faf)). +- **deps:** Update module github.com/felixge/fgprof to v0.9.4 ([#13870](https://github.com/grafana/loki/issues/13870)) ([c68848f](https://github.com/grafana/loki/commit/c68848f8056aca3ebb358dd1fc8adf6e07611e9c)). +- **deps:** Update module github.com/fsouza/fake-gcs-server to v1.47.7 ([#13935](https://github.com/grafana/loki/issues/13935)) ([d43b2de](https://github.com/grafana/loki/commit/d43b2de1b4e0d0a999569900f69755cfe6b17c21)). +- **deps:** Update module github.com/gogo/googleapis to v1.4.1 ([#13871](https://github.com/grafana/loki/issues/13871)) ([6da7eb5](https://github.com/grafana/loki/commit/6da7eb577cac62208801374af71c90d4a06df097)). +- **deps:** Update module github.com/gorilla/mux to v1.8.1 (main) ([#13618](https://github.com/grafana/loki/issues/13618)) ([19b288e](https://github.com/grafana/loki/commit/19b288eee4ad9c25fa58de56c3be02393e63a20e)). +- **deps:** Update module github.com/gorilla/websocket to v1.5.3 ([#13873](https://github.com/grafana/loki/issues/13873)) ([1eb8342](https://github.com/grafana/loki/commit/1eb8342d41a9fdb6c5fcd3e6e5a8c6b98bde4e43)). +- **deps:** Update module github.com/hashicorp/consul/api to v1.29.4 ([#14002](https://github.com/grafana/loki/issues/14002)) ([e11b244](https://github.com/grafana/loki/commit/e11b244a8bcbc69d6829d31fb164dc43d505068e)). +- **deps:** Update module github.com/ibm/go-sdk-core/v5 to v5.17.4 ([#13892](https://github.com/grafana/loki/issues/13892)) ([b6991f2](https://github.com/grafana/loki/commit/b6991f29d232267c1fa0ed8dff55da72240c23f6)). +- **deps:** Update module github.com/ibm/ibm-cos-sdk-go to v1.11.0 ([#13893](https://github.com/grafana/loki/issues/13893)) ([9b7e7e9](https://github.com/grafana/loki/commit/9b7e7e97a41d2ce0abe62b0d920538e9974cef69)). +- **deps:** Update module github.com/klauspost/pgzip to v1.2.6 ([#13874](https://github.com/grafana/loki/issues/13874)) ([fdea7a1](https://github.com/grafana/loki/commit/fdea7a1763618812284a44d6c247c4215d317950)). +- **deps:** Update module github.com/mattn/go-ieproxy to v0.0.12 ([#13876](https://github.com/grafana/loki/issues/13876)) ([775bf8e](https://github.com/grafana/loki/commit/775bf8ebe7893a5b0807984a1c791f211820eed2)). +- **deps:** Update module github.com/ncw/swift to v2 ([#13951](https://github.com/grafana/loki/issues/13951)) ([246a1df](https://github.com/grafana/loki/commit/246a1dfbe24a00b75b03257cb7e75be6cc96a3a8)). +- **deps:** Update module github.com/oschwald/geoip2-golang to v1.11.0 ([#13934](https://github.com/grafana/loki/issues/13934)) ([3bebba5](https://github.com/grafana/loki/commit/3bebba59b5a81da77c6b0d6c499f92f9ce320d46)). +- **deps:** Update module github.com/schollz/progressbar/v3 to v3.14.6 ([#13884](https://github.com/grafana/loki/issues/13884)) ([fb9cae4](https://github.com/grafana/loki/commit/fb9cae4aaa6a12a375fa6199bfcd562833385737)). +- **deps:** Update module github.com/tonistiigi/fifo to v1 ([#13952](https://github.com/grafana/loki/issues/13952)) ([96b5c79](https://github.com/grafana/loki/commit/96b5c79e7770f706bdc1d07e306bf225706273a3)). +- **deps:** Update module github.com/workiva/go-datastructures to v1.1.5 ([#13885](https://github.com/grafana/loki/issues/13885)) ([d817aee](https://github.com/grafana/loki/commit/d817aeeab374f414b08598a8784ea708000856d2)). +- **deps:** Update module golang.org/x/text to v0.17.0 (main) ([#13794](https://github.com/grafana/loki/issues/13794)) ([df61482](https://github.com/grafana/loki/commit/df61482207eb8f44f43d9c2ef4f450fc0c9a00ee)). +- **deps:** Update module golang.org/x/time to v0.6.0 ([#13910](https://github.com/grafana/loki/issues/13910)) ([dff00bd](https://github.com/grafana/loki/commit/dff00bd8f26e85ce04edc16a9f43cb32d3691add)). +- **detected fields:** Detected fields incorrect type bug ([#13515](https://github.com/grafana/loki/issues/13515)) ([f6a94d3](https://github.com/grafana/loki/commit/f6a94d303444dbf22cf1198f549c9cde070f1bdc)). +- **detected fields:** Remove query size limit for detected fields ([#13423](https://github.com/grafana/loki/issues/13423)) ([1fa5127](https://github.com/grafana/loki/commit/1fa51277978ead6569e31e908dec7f140dadb90f)). +- **detected labels:** Response when store label values are empty ([#13970](https://github.com/grafana/loki/issues/13970)) ([6f99af6](https://github.com/grafana/loki/commit/6f99af62227f98c7d9de8a5cf480ae792ce6220a)). +- **detected_labels:** Add matchers to get labels from store" ([#14012](https://github.com/grafana/loki/issues/14012)) ([25234e8](https://github.com/grafana/loki/commit/25234e83483cb8a974d40b7c80b3d4dd62d6d880)). +- **detected_labels:** Remove limit middleware for `detected_labels` ([#13643](https://github.com/grafana/loki/issues/13643)) ([2642718](https://github.com/grafana/loki/commit/2642718d50569931b71cfc0c9288318ab775ca41)). +- **docs:** Fixed typo in ruler URL ([#13692](https://github.com/grafana/loki/issues/13692)) ([1476498](https://github.com/grafana/loki/commit/14764989a2c6f01803f0313d8151f7aa20affd4a)). +- **docs:** Remove trailing backtick in verify-config for Loki 3.0 ([#13640](https://github.com/grafana/loki/issues/13640)) ([498f29a](https://github.com/grafana/loki/commit/498f29a66b2dbfeff85454f22d0596d20066a635)). +- **Helm:** Fix HPA ingester typo ([#13158](https://github.com/grafana/loki/issues/13158)) ([4ca9785](https://github.com/grafana/loki/commit/4ca97858d9dc33db7abbe20ca01c6735cb9ce34e)). +- **Helm:** Fix extraObjects ([#13107](https://github.com/grafana/loki/issues/13107)) ([b7fcf2b](https://github.com/grafana/loki/commit/b7fcf2bb7ea35206c0015545c93582991f64f581)). +- **Helm:** Fix imagePullSecrets for statefulset-results-cache ([#13051](https://github.com/grafana/loki/issues/13051)) ([8434b2f](https://github.com/grafana/loki/commit/8434b2f6e8e124225aafe6e55ca9c1b6ff6a2c5b)). +- **Helm:** Fixed memcached and provisioner templates ([#13788](https://github.com/grafana/loki/issues/13788)) ([1bf9791](https://github.com/grafana/loki/commit/1bf97912de83200d02689511f48658ce7d9543cf)). +- **Helm:** Removed helm test ([#13651](https://github.com/grafana/loki/issues/13651)) ([ef03476](https://github.com/grafana/loki/commit/ef03476f3dac159e5f58490351223fcdb9ac3469)). +- **Helm:** Update yaml file `./production/helm/loki/Chart.yaml` (+1 other) ([#13392](https://github.com/grafana/loki/issues/13392)) ([b5b861c](https://github.com/grafana/loki/commit/b5b861c348bc768254fd083fb40d2820cf347be6)). +- **Helm:** Update yaml file `./production/helm/loki/values.yaml` (+1 other) ([#13426](https://github.com/grafana/loki/issues/13426)) ([fc3904e](https://github.com/grafana/loki/commit/fc3904ee69d0824dc681ca5a4280f7aa2ec5563b)). +- **Helm:** Querier address in SingleBinary mode ([#13297](https://github.com/grafana/loki/issues/13297)) ([29f1ea9](https://github.com/grafana/loki/commit/29f1ea91ecd935a6becae2bd425224a913285071)). +- **Helm:** Update Loki v3 Helm statefulset-ingester.yaml template ([#13118](https://github.com/grafana/loki/issues/13118)) ([5b4e576](https://github.com/grafana/loki/commit/5b4e57602f7b7e5f4d73204ad682826d1041f8a8)). +- **index gateways:** Do not retain span logger created with index set initialized at query time ([#14027](https://github.com/grafana/loki/issues/14027)) ([bd25ac2](https://github.com/grafana/loki/commit/bd25ac2503b00812d959c2aaf092bd2618f16a5a)). +- **index gateway:** Fix nil pointer dereference panic when using ruler in ring mode ([#13436](https://github.com/grafana/loki/issues/13436)) ([304db10](https://github.com/grafana/loki/commit/304db100b382f0c1d1d9999dfe8ca77d1ac901c9)). +- **ingester:** Fix panic in ingester.go ([#13557](https://github.com/grafana/loki/issues/13557)) ([dbff69a](https://github.com/grafana/loki/commit/dbff69a2e92f3ce34f7d58a7418cd0456d644be3)). +- **ingester:** Redo ingester profile tagging ([#13239](https://github.com/grafana/loki/issues/13239)) ([32097c8](https://github.com/grafana/loki/commit/32097c84627f5190cfcf6c1f247c9d0531d92865)). +- **ingester:** Remove tenant label tagging from profiles to reduce cardinality ([#13270](https://github.com/grafana/loki/issues/13270)) ([f897758](https://github.com/grafana/loki/commit/f8977587476169197d6da4d7055b97b189808344)). +- **ingester:** Stream ownership check ([#13314](https://github.com/grafana/loki/issues/13314)) ([5ae5b31](https://github.com/grafana/loki/commit/5ae5b31b1f9ffcac9193cfd4ba47a64d911966db)). +- **ingester:** Support multi-zone ingesters when converting global to local limits for streams in limiter.go ([#13321](https://github.com/grafana/loki/issues/13321)) ([e28c15f](https://github.com/grafana/loki/commit/e28c15f56c2aab62eecbaa382055eac99fc3a581)). +- **ingester:** Update fixed limit once streams ownership re-checked ([#13231](https://github.com/grafana/loki/issues/13231)) ([7ac19f0](https://github.com/grafana/loki/commit/7ac19f00b4f5186b0c38a8dad23cf61e14d071de)). +- **LOgQL:** AST left circular reference result in out of memory ([#13501](https://github.com/grafana/loki/issues/13501)) ([6dd6b65](https://github.com/grafana/loki/commit/6dd6b65139b3b8d4254f114e99ab8fb3eaa2ae09)). +- **LogQL:** Improve execution speed for queries with label filters ([#13922](https://github.com/grafana/loki/issues/13922)) ([40f4f14](https://github.com/grafana/loki/commit/40f4f1479170a90b39c005292e11a3ec4db4bc34)). +- **LogQL:** Panic when parsing and extracting JSON key values ([#13790](https://github.com/grafana/loki/issues/13790)) ([5ef83a7](https://github.com/grafana/loki/commit/5ef83a741ba515f68343e9dc345fcb8afe921bfd)). +- **LogQL:** Propagate headers/warnings/stats from quantile downstreams ([#13881](https://github.com/grafana/loki/issues/13881)) ([a0c7598](https://github.com/grafana/loki/commit/a0c75987a24d0adc520c60dd5d85df4c34009548)). +- **LogQL:** Record datasample queries are limited query type ([#13930](https://github.com/grafana/loki/issues/13930)) ([ae938d0](https://github.com/grafana/loki/commit/ae938d06d941a386aa839b6717445c2295ce2efa)). +- **LogQL:** Return empty vector instead of nil for empty evaluator. ([#13485](https://github.com/grafana/loki/issues/13485)) ([08615bf](https://github.com/grafana/loki/commit/08615bf7519b31e825903577427f7407194baf74)). +- **LogQL:** Special case the return values from a sharded first/last_over_time query ([#13578](https://github.com/grafana/loki/issues/13578)) ([29a37d5](https://github.com/grafana/loki/commit/29a37d5dcdab33d62615a79aefe97ea2a80dea03)). +- **log results cache:** Include pipeline wrapper disabled in cache key ([#13328](https://github.com/grafana/loki/issues/13328)) ([221491c](https://github.com/grafana/loki/commit/221491c123adb6cedfabace6fc2cd03a32124655)). +- **loki:** Init internal server log along with loki's server instance ([#13221](https://github.com/grafana/loki/issues/13221)) ([66b8c9b](https://github.com/grafana/loki/commit/66b8c9b7738acd0e0616b88d35cf3ddc0df83e7e)). +- **loki-mixin:** Fix latency panel for Index Gateway ([#13629](https://github.com/grafana/loki/issues/13629)) ([f586c00](https://github.com/grafana/loki/commit/f586c00a9fcfa8bb84781698e141dff928b86c92)). +- **loki-mixin:** Various latency panels in operational dashboard should have ms unit type instead of seconds ([#13260](https://github.com/grafana/loki/issues/13260)) ([f5a9905](https://github.com/grafana/loki/commit/f5a99058036f60f5ae0c190c48cbcf5ce22ea96d)). +- **loki-mixin:** Attribute OTLP route correctly to write path ([#13943](https://github.com/grafana/loki/issues/13943)) ([b91b782](https://github.com/grafana/loki/commit/b91b7829075f9df565d468d9e72191e9f4c5e94e)). +- **loki-mixins:** Incorrect pod matcher for compactor in mixin when using ssd mode ([#12846](https://github.com/grafana/loki/issues/12846)) ([515e13c](https://github.com/grafana/loki/commit/515e13cc6c92b08968bc87e220b8bca64683fd05)). +- **operator:** Allow structured metadata only if V13 schema provided ([#13463](https://github.com/grafana/loki/issues/13463)) ([3ac130b](https://github.com/grafana/loki/commit/3ac130b8a152169766cb173718f2312aeb4f694e)). +- **operator:** Don't overwrite annotations for LokiStack ingress resources ([#13708](https://github.com/grafana/loki/issues/13708)) ([f523530](https://github.com/grafana/loki/commit/f52353060dd936cff587ff2060c8616941695ece)). +- **operator:** Remove duplicate conditions from status ([#13497](https://github.com/grafana/loki/issues/13497)) ([527510d](https://github.com/grafana/loki/commit/527510d1a84a981250047dbabba8d492177b8452)). +- **operator:** Set object storage for delete requests when using retention ([#13562](https://github.com/grafana/loki/issues/13562)) ([46de4c1](https://github.com/grafana/loki/commit/46de4c1bc839ef682798bec5003123f7d5f4404b)). +- **operator:** Skip updating annotations for serviceaccounts ([#13450](https://github.com/grafana/loki/issues/13450)) ([1b9b111](https://github.com/grafana/loki/commit/1b9b11116b48fb37b7015d27104668412fc04937)). +- **operator:** Support v3.1.0 in OpenShift dashboards ([#13430](https://github.com/grafana/loki/issues/13430)) ([8279d59](https://github.com/grafana/loki/commit/8279d59f145df9c9132aeff9e3d46c738650027c)). +- **operator:** Watch for CredentialsRequests on CCOAuthEnv only ([#13299](https://github.com/grafana/loki/issues/13299)) ([7fc926e](https://github.com/grafana/loki/commit/7fc926e36ea8fca7bd8e9955c8994574535dbbae)). +- **querier:** Add a retry middleware to all the stats handlers ([#13584](https://github.com/grafana/loki/issues/13584)) ([7232795](https://github.com/grafana/loki/commit/7232795e1f5fb1868c83111f5aab72ca0f3d9891)). +- **querier:** Adjust tailer loop criteria so it is actually re-tested ([#13906](https://github.com/grafana/loki/issues/13906)) ([dabbfd8](https://github.com/grafana/loki/commit/dabbfd81ef5c4f02a255b404ab25edd1eec126cf)). +- **querier:** Fix retry code to handle grpc status codes. updated newer stats retries to be wrapped with spans ([#13592](https://github.com/grafana/loki/issues/13592)) ([d3e1edb](https://github.com/grafana/loki/commit/d3e1edbf1102b2f0f4116c3bb1773000d0368dde)). +- **querier:** Fixes span name of serializeRounTripper ([#13541](https://github.com/grafana/loki/issues/13541)) ([4451d56](https://github.com/grafana/loki/commit/4451d56d6b9a9d2eb54ed75d3d2c8fe0db6908eb)). +- **querier:** Remove retries on the stats handlers because they already retry ([#13608](https://github.com/grafana/loki/issues/13608)) ([1008315](https://github.com/grafana/loki/commit/10083159a7e54df4e41efe2fc2e04e267fee1147)). +- **query engine:** Include lines with ts equal to end timestamp of the query range when executing range aggregations ([#13448](https://github.com/grafana/loki/issues/13448)) ([e0ca67d](https://github.com/grafana/loki/commit/e0ca67dd4563e41c57b2f1409ef235b76b2a1a6e)). +- **retry:** Fix retries when using protobuf encoding ([#13316](https://github.com/grafana/loki/issues/13316)) ([a457c5d](https://github.com/grafana/loki/commit/a457c5d171d5ffa0a7060c98a8bc48abd735911a)). +- **ruler:** Protect ruler remote-write overrides map with a mutex when creating new appenders ([#13676](https://github.com/grafana/loki/issues/13676)) ([e9a9c60](https://github.com/grafana/loki/commit/e9a9c60c22e78b52c0c046d379b4b2b986d91dca)). +- **sharding:** Use without() grouping when merging `avg_over_time` shard results ([#12176](https://github.com/grafana/loki/issues/12176)) ([eb8a363](https://github.com/grafana/loki/commit/eb8a36306674c497d8b0150b482f275e2c00f6c9)). +- **storage:** Handle block offset exceeding chunk length in memchunk.go ([#13661](https://github.com/grafana/loki/issues/13661)) ([d42476a](https://github.com/grafana/loki/commit/d42476aa58fca07b17ee39d388639807624f884a)). +- **storage:** Read "404" as object not exist ([#13901](https://github.com/grafana/loki/issues/13901)) ([3c9c647](https://github.com/grafana/loki/commit/3c9c6479226818229802b97e08d6c9e13e3798a5)). +- **storage:** Separates directory creation from permission checks ([#13248](https://github.com/grafana/loki/issues/13248)) ([1086783](https://github.com/grafana/loki/commit/1086783a1d8886f0e6888289975e771e18d800e6)). +- **storage:** Try reading chunks which have incorrect offset for blocks ([#13720](https://github.com/grafana/loki/issues/13720)) ([7e224d5](https://github.com/grafana/loki/commit/7e224d53de8a5c43448ffd341f0d9c48abb335ef)). +- **structured metadata:** Sanitize structured metadata at query time ([#13983](https://github.com/grafana/loki/issues/13983)) ([3bf7fa9](https://github.com/grafana/loki/commit/3bf7fa9f159a7c76b1bcdd640c765b333766f748)). +- **WAL:** Fix a bug where AppendRequest with no entries triggers flush ([#13672](https://github.com/grafana/loki/issues/13672)) ([8a3ae22](https://github.com/grafana/loki/commit/8a3ae223ba160584d61bd5cb39b546a3c28f46b5)). +- **WAL:** Properly reset wal segment writer ([#13468](https://github.com/grafana/loki/issues/13468)) ([6ea83b4](https://github.com/grafana/loki/commit/6ea83b45b5e9f8e1d0f9d7e5574bb5b520ddfefd)). diff --git a/docs/sources/send-data/fluentbit/_index.md b/docs/sources/send-data/fluentbit/_index.md index 2c2845d766d3..ea2af6a4ac4b 100644 --- a/docs/sources/send-data/fluentbit/_index.md +++ b/docs/sources/send-data/fluentbit/_index.md @@ -130,7 +130,7 @@ For more information about this see our [AWS documentation]({{< relref "../promt First, you need to follow the [instructions](https://github.com/grafana/loki/blob/main/clients/cmd/fluent-bit/README.md) in order to build the plugin dynamic library. -The assuming you have Fluent Bit installed in your `$PATH` you can run the plugin using: +Assuming you have Fluent Bit installed in your `$PATH`, you can run the plugin using: ```bash fluent-bit -e /path/to/built/out_grafana_loki.so -c fluent-bit.conf diff --git a/docs/sources/setup/install/helm/install-monolithic/_index.md b/docs/sources/setup/install/helm/install-monolithic/_index.md index fd52c11c2003..4373907dcfbb 100644 --- a/docs/sources/setup/install/helm/install-monolithic/_index.md +++ b/docs/sources/setup/install/helm/install-monolithic/_index.md @@ -12,7 +12,12 @@ weight: 100 This Helm Chart installation runs the Grafana Loki *single binary* within a Kubernetes cluster. -If you set the `singleBinary.replicas` value to 1 and set the deployment mode to `SingleBinary`, this chart configures Loki to run the `all` target in a [monolithic mode](https://grafana.com/docs/loki//get-started/deployment-modes/#monolithic-mode), designed to work with a filesystem storage. It will also configure meta-monitoring of metrics and logs. +If you set the `singleBinary.replicas` value to 1 and set the deployment mode to `SingleBinary`, this chart configures Loki to run the `all` target in a [monolithic mode](https://grafana.com/docs/loki//get-started/deployment-modes/#monolithic-mode), designed to work with the filesystem storage configuration. It will also configure meta-monitoring of metrics and logs. + +{{< admonition type="note" >}} +You must specify `commonConfig.replication_factor: 1` if you are only using 1 replica, otherwise requests will fail. +{{< /admonition >}} + If you set the `singleBinary.replicas` value to 2 or more, this chart configures Loki to run a *single binary* in a replicated, highly available mode. When running replicas of a single binary, you must configure object storage. **Before you begin: Software Requirements** diff --git a/docs/sources/setup/install/helm/reference.md b/docs/sources/setup/install/helm/reference.md index 159fae0646b5..3aee632a03fe 100644 --- a/docs/sources/setup/install/helm/reference.md +++ b/docs/sources/setup/install/helm/reference.md @@ -753,6 +753,7 @@ null "priorityClassName": null, "replicas": 0, "resources": {}, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 30, "tolerations": [] @@ -1030,6 +1031,15 @@ null
 {}
 
+ + + + bloomBuilder.serviceAnnotations + object + Annotations for bloom-builder service +
+{}
+
@@ -1109,8 +1119,6 @@ null ], "enableStatefulSetAutoDeletePVC": false, "enabled": false, - "size": "10Gi", - "storageClass": null, "whenDeleted": "Retain", "whenScaled": "Retain" }, @@ -1127,6 +1135,7 @@ null "imagePullSecrets": [], "name": null }, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 30, "tolerations": [] @@ -1295,6 +1304,15 @@ null List of the bloom-gateway PVCs
 
+
+ + + + bloomGateway.persistence.claims[0].size + string + Size of persistent disk +
+"10Gi"
 
@@ -1314,24 +1332,6 @@ false
 false
 
- - - - bloomGateway.persistence.size - string - Size of persistent disk -
-"10Gi"
-
- - - - bloomGateway.persistence.storageClass - string - Storage class to be used. If defined, storageClassName: . If set to "-", storageClassName: "", which disables dynamic provisioning. If empty or set to null, no storageClassName spec is set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). -
-null
-
@@ -1422,6 +1422,15 @@ true
 null
 
+ + + + bloomGateway.serviceAnnotations + object + Annotations for bloom-gateway service +
+{}
+
@@ -1492,11 +1501,15 @@ null "nodeSelector": {}, "persistence": { "annotations": {}, - "claims": [], + "claims": [ + { + "name": "data", + "size": "10Gi", + "storageClass": null + } + ], "enableStatefulSetAutoDeletePVC": false, "enabled": false, - "size": "10Gi", - "storageClass": null, "whenDeleted": "Retain", "whenScaled": "Retain" }, @@ -1513,6 +1526,7 @@ null "imagePullSecrets": [], "name": null }, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 30, "tolerations": [] @@ -1680,7 +1694,16 @@ null list List of the bloom-planner PVCs
-[]
+
+
+ + + + bloomPlanner.persistence.claims[0].size + string + Size of persistent disk +
+"10Gi"
 
@@ -1700,24 +1723,6 @@ false
 false
 
- - - - bloomPlanner.persistence.size - string - Size of persistent disk -
-"10Gi"
-
- - - - bloomPlanner.persistence.storageClass - string - Storage class to be used. If defined, storageClassName: . If set to "-", storageClassName: "", which disables dynamic provisioning. If empty or set to null, no storageClassName spec is set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). -
-null
-
@@ -1808,6 +1813,15 @@ true
 null
 
+ + + + bloomPlanner.serviceAnnotations + object + Annotations for bloom-planner service +
+{}
+
@@ -2025,7 +2039,7 @@ null chunksCache.persistence.storageSize string - Size of persistent disk + Size of persistent disk, must be in G or Gi
 "10G"
 
@@ -2268,6 +2282,7 @@ null "imagePullSecrets": [], "name": null }, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 30, "tolerations": [] @@ -2563,6 +2578,15 @@ true
 null
 
+ + + + compactor.serviceAnnotations + object + Annotations for compactor service +
+{}
+
@@ -2658,6 +2682,7 @@ null "priorityClassName": null, "replicas": 0, "resources": {}, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 30, "tolerations": [] @@ -2944,6 +2969,15 @@ null
 {}
 
+ + + + distributor.serviceAnnotations + object + Annotations for distributor service +
+{}
+
@@ -4536,6 +4570,7 @@ null "priorityClassName": null, "replicas": 0, "resources": {}, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 300, "tolerations": [] @@ -4786,6 +4821,15 @@ null
 {}
 
+ + + + indexGateway.serviceAnnotations + object + Annotations for index-gateway service +
+{}
+
@@ -4889,6 +4933,7 @@ null "readinessProbe": {}, "replicas": 0, "resources": {}, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 300, "tolerations": [], @@ -5269,6 +5314,15 @@ false
 {}
 
+ + + + ingester.serviceAnnotations + object + Annotations for ingestor service +
+{}
+
@@ -6147,6 +6201,9 @@ null Additional storage config
 {
+  "bloom_shipper": {
+    "working_directory": "/var/loki/data/bloomshipper"
+  },
   "boltdb_shipper": {
     "index_gateway_client": {
       "server_address": "{{ include \"loki.indexGatewayAddress\" . }}"
@@ -6429,6 +6486,15 @@ true
   "type": "RollingUpdate"
 }
 
+ + + + memberlist.service.annotations + object + +
+{}
+
@@ -6489,7 +6555,12 @@ false object The SecurityContext override for memcached pods
-{}
+{
+  "fsGroup": 11211,
+  "runAsGroup": 11211,
+  "runAsNonRoot": true,
+  "runAsUser": 11211
+}
 
@@ -7471,6 +7542,7 @@ false "imagePullSecrets": [], "name": null }, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 30, "tolerations": [] @@ -7766,6 +7838,15 @@ true
 null
 
+ + + + patternIngester.serviceAnnotations + object + Annotations for pattern ingester service +
+{}
+
@@ -7860,6 +7941,7 @@ null "priorityClassName": null, "replicas": 0, "resources": {}, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 30, "tolerations": [], @@ -8212,6 +8294,15 @@ null
 {}
 
+ + + + querier.serviceAnnotations + object + Annotations for querier service +
+{}
+
@@ -8306,6 +8397,7 @@ Defaults to allow skew no more then 1 node "priorityClassName": null, "replicas": 0, "resources": {}, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 30, "tolerations": [] @@ -8583,6 +8675,15 @@ null
 {}
 
+ + + + queryFrontend.serviceAnnotations + object + Annotations for query-frontend service +
+{}
+
@@ -8654,6 +8755,7 @@ null "priorityClassName": null, "replicas": 0, "resources": {}, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 30, "tolerations": [] @@ -8832,6 +8934,15 @@ null
 {}
 
+ + + + queryScheduler.serviceAnnotations + object + Annotations for query-scheduler service +
+{}
+
@@ -9488,7 +9599,7 @@ null resultsCache.persistence.storageSize string - Size of persistent disk + Size of persistent disk, must be in G or Gi
 "10G"
 
@@ -9753,6 +9864,7 @@ null "priorityClassName": null, "replicas": 0, "resources": {}, + "serviceAnnotations": {}, "serviceLabels": {}, "terminationGracePeriodSeconds": 300, "tolerations": [] @@ -10012,6 +10124,15 @@ null
 {}
 
+ + + + ruler.serviceAnnotations + object + Annotations for ruler service +
+{}
+
diff --git a/docs/sources/shared/configuration.md b/docs/sources/shared/configuration.md index a563a24198ad..15426e54d088 100644 --- a/docs/sources/shared/configuration.md +++ b/docs/sources/shared/configuration.md @@ -167,264 +167,6 @@ querier_rf1: # itself to a key value store. [ingester: ] -ingester_rf1: - # Whether the ingester is enabled. - # CLI flag: -ingester-rf1.enabled - [enabled: | default = false] - - # Configures how the lifecycle of the ingester will operate and where it will - # register for discovery. - lifecycler: - ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, - # etcd, inmemory, memberlist, multi. - # CLI flag: -ingester-rf1.store - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -ingester-rf1.prefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected - # kvstore is consul. - # The CLI flags prefix for this block configuration is: - # ingester-rf1.consul - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected - # kvstore is etcd. - # The CLI flags prefix for this block configuration is: - # ingester-rf1.etcd - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -ingester-rf1.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -ingester-rf1.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -ingester-rf1.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -ingester-rf1.multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # The heartbeat timeout after which ingesters are skipped for - # reads/writes. 0 = never (timeout disabled). - # CLI flag: -ingester-rf1.ring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # The number of ingesters to write to and read from. - # CLI flag: -ingester-rf1.distributor.replication-factor - [replication_factor: | default = 3] - - # True to enable the zone-awareness and replicate ingested samples across - # different availability zones. - # CLI flag: -ingester-rf1.distributor.zone-awareness-enabled - [zone_awareness_enabled: | default = false] - - # Comma-separated list of zones to exclude from the ring. Instances in - # excluded zones will be filtered out from the ring. - # CLI flag: -ingester-rf1.distributor.excluded-zones - [excluded_zones: | default = ""] - - # Number of tokens for each ingester. - # CLI flag: -ingester-rf1.num-tokens - [num_tokens: | default = 128] - - # Period at which to heartbeat to consul. 0 = disabled. - # CLI flag: -ingester-rf1.heartbeat-period - [heartbeat_period: | default = 5s] - - # Heartbeat timeout after which instance is assumed to be unhealthy. 0 = - # disabled. - # CLI flag: -ingester-rf1.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # Observe tokens after generating to resolve collisions. Useful when using - # gossiping ring. - # CLI flag: -ingester-rf1.observe-period - [observe_period: | default = 0s] - - # Period to wait for a claim from another member; will join automatically - # after this. - # CLI flag: -ingester-rf1.join-after - [join_after: | default = 0s] - - # Minimum duration to wait after the internal readiness checks have passed - # but before succeeding the readiness endpoint. This is used to slowdown - # deployment controllers (eg. Kubernetes) after an instance is ready and - # before they proceed with a rolling update, to give the rest of the cluster - # instances enough time to receive ring updates. - # CLI flag: -ingester-rf1.min-ready-duration - [min_ready_duration: | default = 15s] - - # Name of network interface to read address from. - # CLI flag: -ingester-rf1.lifecycler.interface - [interface_names: | default = []] - - # Enable IPv6 support. Required to make use of IP addresses from IPv6 - # interfaces. - # CLI flag: -ingester-rf1.enable-inet6 - [enable_inet6: | default = false] - - # Duration to sleep for before exiting, to ensure metrics are scraped. - # CLI flag: -ingester-rf1.final-sleep - [final_sleep: | default = 0s] - - # File path where tokens are stored. If empty, tokens are not stored at - # shutdown and restored at startup. - # CLI flag: -ingester-rf1.tokens-file-path - [tokens_file_path: | default = ""] - - # The availability zone where this instance is running. - # CLI flag: -ingester-rf1.availability-zone - [availability_zone: | default = ""] - - # Unregister from the ring upon clean shutdown. It can be useful to disable - # for rolling restarts with consistent naming in conjunction with - # -distributor.extend-writes=false. - # CLI flag: -ingester-rf1.unregister-on-shutdown - [unregister_on_shutdown: | default = true] - - # When enabled the readiness probe succeeds only after all instances are - # ACTIVE and healthy in the ring, otherwise only the instance itself is - # checked. This option should be disabled if in your cluster multiple - # instances can be rolled out simultaneously, otherwise rolling updates may - # be slowed down. - # CLI flag: -ingester-rf1.readiness-check-ring-health - [readiness_check_ring_health: | default = true] - - # IP address to advertise in the ring. - # CLI flag: -ingester-rf1.lifecycler.addr - [address: | default = ""] - - # port to advertise in consul (defaults to server.grpc-listen-port). - # CLI flag: -ingester-rf1.lifecycler.port - [port: | default = 0] - - # ID to register in the ring. - # CLI flag: -ingester-rf1.lifecycler.ID - [id: | default = ""] - - # The maximum age of a segment before it should be flushed. Increasing this - # value allows more time for a segment to grow to max-segment-size, but may - # increase latency if the write volume is too small. - # CLI flag: -ingester-rf1.max-segment-age - [max_segment_age: | default = 500ms] - - # The maximum size of a segment before it should be flushed. It is not a - # strict limit, and segments can exceed the maximum size when individual - # appends are larger than the remaining capacity. - # CLI flag: -ingester-rf1.max-segment-size - [max_segment_size: | default = 8388608] - - # The maximum number of segments to buffer in-memory. Increasing this value - # allows for large bursts of writes to be buffered in memory, but may increase - # latency if the write volume exceeds the rate at which segments can be - # flushed. - # CLI flag: -ingester-rf1.max-segments - [max_segments: | default = 10] - - # How many flushes can happen concurrently from each stream. - # CLI flag: -ingester-rf1.concurrent-flushes - [concurrent_flushes: | default = 32] - - # How often should the ingester see if there are any blocks to flush. The - # first flush check is delayed by a random time up to 0.8x the flush check - # period. Additionally, there is +/- 1% jitter added to the interval. - # CLI flag: -ingester-rf1.flush-check-period - [flush_check_period: | default = 500ms] - - flush_op_backoff: - # Minimum backoff period when a flush fails. Each concurrent flush has its - # own backoff, see `ingester.concurrent-flushes`. - # CLI flag: -ingester-rf1.flush-op-backoff-min-period - [min_period: | default = 100ms] - - # Maximum backoff period when a flush fails. Each concurrent flush has its - # own backoff, see `ingester.concurrent-flushes`. - # CLI flag: -ingester-rf1.flush-op-backoff-max-period - [max_period: | default = 1m] - - # Maximum retries for failed flushes. - # CLI flag: -ingester-rf1.flush-op-backoff-retries - [max_retries: | default = 10] - - # The timeout for an individual flush. Will be retried up to - # `flush-op-backoff-retries` times. - # CLI flag: -ingester-rf1.flush-op-timeout - [flush_op_timeout: | default = 10s] - - # Forget about ingesters having heartbeat timestamps older than - # `ring.kvstore.heartbeat_timeout`. This is equivalent to clicking on the - # `/ring` `forget` button in the UI: the ingester is removed from the ring. - # This is a useful setting when you are sure that an unhealthy node won't - # return. An example is when not using stateful sets or the equivalent. Use - # `memberlist.rejoin_interval` > 0 to handle network partition cases when - # using a memberlist. - # CLI flag: -ingester-rf1.autoforget-unhealthy - [autoforget_unhealthy: | default = false] - - # The maximum number of errors a stream will report to the user when a push - # fails. 0 to make unlimited. - # CLI flag: -ingester-rf1.max-ignored-stream-errors - [max_returned_stream_errors: | default = 10] - - # Shard factor used in the ingesters for the in process reverse index. This - # MUST be evenly divisible by ALL schema shard factors or Loki will not start. - # CLI flag: -ingester-rf1.index-shards - [index_shards: | default = 32] - - # Maximum number of dropped streams to keep in memory during tailing. - # CLI flag: -ingester-rf1.tailer.max-dropped-streams - [max_dropped_streams: | default = 10] - - # Path where the shutdown marker file is stored. If not set and - # common.path_prefix is set then common.path_prefix will be used. - # CLI flag: -ingester-rf1.shutdown-marker-path - [shutdown_marker_path: | default = ""] - - # Interval at which the ingester ownedStreamService checks for changes in the - # ring to recalculate owned streams. - # CLI flag: -ingester-rf1.owned-streams-check-interval - [owned_streams_check_interval: | default = 30s] - - # How long stream metadata is retained in memory after it was last seen. - # CLI flag: -ingester-rf1.stream-retain-period - [stream_retain_period: | default = 5m] - - # Configures how the pattern ingester will connect to the ingesters. - client_config: - # Configures how connections are pooled. - pool_config: - # How frequently to clean up clients for ingesters that have gone away. - # CLI flag: -ingester-rf1.client-cleanup-period - [client_cleanup_period: | default = 15s] - - # Run a health check on each ingester client during periodic cleanup. - # CLI flag: -ingester-rf1.health-check-ingesters - [health_check_ingesters: | default = true] - - # Timeout for the health check. - # CLI flag: -ingester-rf1.remote-timeout - [remote_timeout: | default = 1s] - - # The remote request timeout on the client side. - # CLI flag: -ingester-rf1.client.timeout - [remote_timeout: | default = 5s] - - # Configures how the gRPC connection to ingesters work as a client. - # The CLI flags prefix for this block configuration is: - # pattern-ingester.client - [grpc_client_config: ] - pattern_ingester: # Whether the pattern ingester is enabled. # CLI flag: -pattern-ingester.enabled @@ -446,14 +188,12 @@ pattern_ingester: # Configuration for a Consul client. Only applies if the selected # kvstore is consul. - # The CLI flags prefix for this block configuration is: - # pattern-ingester.consul + # The CLI flags prefix for this block configuration is: pattern-ingester [consul: ] # Configuration for an ETCD v3 client. Only applies if the selected # kvstore is etcd. - # The CLI flags prefix for this block configuration is: - # pattern-ingester.etcd + # The CLI flags prefix for this block configuration is: pattern-ingester [etcd: ] multi: @@ -1061,6 +801,12 @@ kafka_config: # CLI flag: -kafka.consumer-group [consumer_group: | default = ""] + # How frequently a consumer should commit the consumed offset to Kafka. The + # last committed offset is used at startup to continue the consumption from + # where it was left. + # CLI flag: -kafka.consumer-group-offset-commit-interval + [consumer_group_offset_commit_interval: | default = 1s] + # How long to retry a failed request to get the last produced offset. # CLI flag: -kafka.last-produced-offset-retry-timeout [last_produced_offset_retry_timeout: | default = 10s] @@ -1069,6 +815,17 @@ kafka_config: # CLI flag: -kafka.auto-create-topic-enabled [auto_create_topic_enabled: | default = true] + # When auto-creation of Kafka topic is enabled and this value is positive, + # Kafka's num.partitions configuration option is set on Kafka brokers with + # this value when Loki component that uses Kafka starts. This configuration + # option specifies the default number of partitions that the Kafka broker uses + # for auto-created topics. Note that this is a Kafka-cluster wide setting, and + # applies to any auto-created topic. If the setting of num.partitions fails, + # Loki proceeds anyways, but auto-created topics could have an incorrect + # number of partitions. + # CLI flag: -kafka.auto-create-topic-default-partitions + [auto_create_topic_default_partitions: | default = 1000] + # The maximum size of a Kafka record data that should be generated by the # producer. An incoming write request larger than this size is split into # multiple Kafka records. We strongly recommend to not change this setting @@ -1082,227 +839,6 @@ kafka_config: # CLI flag: -kafka.producer-max-buffered-bytes [producer_max_buffered_bytes: | default = 1073741824] -kafka_ingester: - # Whether the kafka ingester is enabled. - # CLI flag: -kafka-ingester.enabled - [enabled: | default = false] - - # Configures how the lifecycle of the ingester will operate and where it will - # register for discovery. - lifecycler: - ring: - kvstore: - # Backend storage to use for the ring. Supported values are: consul, - # etcd, inmemory, memberlist, multi. - # CLI flag: -kafka-ingesterstore - [store: | default = "consul"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -kafka-ingesterprefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected - # kvstore is consul. - # The CLI flags prefix for this block configuration is: - # kafka-ingesterconsul - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected - # kvstore is etcd. - # The CLI flags prefix for this block configuration is: - # kafka-ingesteretcd - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -kafka-ingestermulti.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -kafka-ingestermulti.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -kafka-ingestermulti.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -kafka-ingestermulti.mirror-timeout - [mirror_timeout: | default = 2s] - - # The heartbeat timeout after which ingesters are skipped for - # reads/writes. 0 = never (timeout disabled). - # CLI flag: -kafka-ingesterring.heartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # The number of ingesters to write to and read from. - # CLI flag: -kafka-ingesterdistributor.replication-factor - [replication_factor: | default = 3] - - # True to enable the zone-awareness and replicate ingested samples across - # different availability zones. - # CLI flag: -kafka-ingesterdistributor.zone-awareness-enabled - [zone_awareness_enabled: | default = false] - - # Comma-separated list of zones to exclude from the ring. Instances in - # excluded zones will be filtered out from the ring. - # CLI flag: -kafka-ingesterdistributor.excluded-zones - [excluded_zones: | default = ""] - - # Number of tokens for each ingester. - # CLI flag: -kafka-ingesternum-tokens - [num_tokens: | default = 128] - - # Period at which to heartbeat to consul. 0 = disabled. - # CLI flag: -kafka-ingesterheartbeat-period - [heartbeat_period: | default = 5s] - - # Heartbeat timeout after which instance is assumed to be unhealthy. 0 = - # disabled. - # CLI flag: -kafka-ingesterheartbeat-timeout - [heartbeat_timeout: | default = 1m] - - # Observe tokens after generating to resolve collisions. Useful when using - # gossiping ring. - # CLI flag: -kafka-ingesterobserve-period - [observe_period: | default = 0s] - - # Period to wait for a claim from another member; will join automatically - # after this. - # CLI flag: -kafka-ingesterjoin-after - [join_after: | default = 0s] - - # Minimum duration to wait after the internal readiness checks have passed - # but before succeeding the readiness endpoint. This is used to slowdown - # deployment controllers (eg. Kubernetes) after an instance is ready and - # before they proceed with a rolling update, to give the rest of the cluster - # instances enough time to receive ring updates. - # CLI flag: -kafka-ingestermin-ready-duration - [min_ready_duration: | default = 15s] - - # Name of network interface to read address from. - # CLI flag: -kafka-ingesterlifecycler.interface - [interface_names: | default = []] - - # Enable IPv6 support. Required to make use of IP addresses from IPv6 - # interfaces. - # CLI flag: -kafka-ingesterenable-inet6 - [enable_inet6: | default = false] - - # Duration to sleep for before exiting, to ensure metrics are scraped. - # CLI flag: -kafka-ingesterfinal-sleep - [final_sleep: | default = 0s] - - # File path where tokens are stored. If empty, tokens are not stored at - # shutdown and restored at startup. - # CLI flag: -kafka-ingestertokens-file-path - [tokens_file_path: | default = ""] - - # The availability zone where this instance is running. - # CLI flag: -kafka-ingesteravailability-zone - [availability_zone: | default = ""] - - # Unregister from the ring upon clean shutdown. It can be useful to disable - # for rolling restarts with consistent naming in conjunction with - # -distributor.extend-writes=false. - # CLI flag: -kafka-ingesterunregister-on-shutdown - [unregister_on_shutdown: | default = true] - - # When enabled the readiness probe succeeds only after all instances are - # ACTIVE and healthy in the ring, otherwise only the instance itself is - # checked. This option should be disabled if in your cluster multiple - # instances can be rolled out simultaneously, otherwise rolling updates may - # be slowed down. - # CLI flag: -kafka-ingesterreadiness-check-ring-health - [readiness_check_ring_health: | default = true] - - # IP address to advertise in the ring. - # CLI flag: -kafka-ingesterlifecycler.addr - [address: | default = ""] - - # port to advertise in consul (defaults to server.grpc-listen-port). - # CLI flag: -kafka-ingesterlifecycler.port - [port: | default = 0] - - # ID to register in the ring. - # CLI flag: -kafka-ingesterlifecycler.ID - [id: | default = ""] - - # Path where the shutdown marker file is stored. If not set and - # common.path_prefix is set then common.path_prefix will be used. - # CLI flag: -kafka-ingester.shutdown-marker-path - [shutdown_marker_path: | default = ""] - - # The interval at which the ingester will flush and commit offsets to Kafka. - # If not set, the default flush interval will be used. - # CLI flag: -kafka-ingester.flush-interval - [flush_interval: | default = 15s] - - # The size at which the ingester will flush and commit offsets to Kafka. If - # not set, the default flush size will be used. - # CLI flag: -kafka-ingester.flush-size - [flush_size: | default = 314572800] - - partition_ring: - # The key-value store used to share the hash ring across multiple instances. - # This option needs be set on ingesters, distributors, queriers, and rulers - # when running in microservices mode. - kvstore: - # Backend storage to use for the ring. Supported values are: consul, etcd, - # inmemory, memberlist, multi. - # CLI flag: -ingester.partition-ring.store - [store: | default = "memberlist"] - - # The prefix for the keys in the store. Should end with a /. - # CLI flag: -ingester.partition-ring.prefix - [prefix: | default = "collectors/"] - - # Configuration for a Consul client. Only applies if the selected kvstore - # is consul. - # The CLI flags prefix for this block configuration is: - # ingester.partition-ring.consul - [consul: ] - - # Configuration for an ETCD v3 client. Only applies if the selected - # kvstore is etcd. - # The CLI flags prefix for this block configuration is: - # ingester.partition-ring.etcd - [etcd: ] - - multi: - # Primary backend storage used by multi-client. - # CLI flag: -ingester.partition-ring.multi.primary - [primary: | default = ""] - - # Secondary backend storage used by multi-client. - # CLI flag: -ingester.partition-ring.multi.secondary - [secondary: | default = ""] - - # Mirror writes to secondary store. - # CLI flag: -ingester.partition-ring.multi.mirror-enabled - [mirror_enabled: | default = false] - - # Timeout for storing value to secondary store. - # CLI flag: -ingester.partition-ring.multi.mirror-timeout - [mirror_timeout: | default = 2s] - - # Minimum number of owners to wait before a PENDING partition gets switched - # to ACTIVE. - # CLI flag: -ingester.partition-ring.min-partition-owners-count - [min_partition_owners_count: | default = 1] - - # How long the minimum number of owners are enforced before a PENDING - # partition gets switched to ACTIVE. - # CLI flag: -ingester.partition-ring.min-partition-owners-duration - [min_partition_owners_duration: | default = 10s] - - # How long to wait before an INACTIVE partition is eligible for deletion. - # The partition is deleted only if it has been in INACTIVE state for at - # least the configured duration and it has no owners registered. A value of - # 0 disables partitions deletion. - # CLI flag: -ingester.partition-ring.delete-inactive-partition-after - [delete_inactive_partition_after: | default = 13h] - # Configuration for 'runtime config' module, responsible for reloading runtime # configuration file. [runtime_config: ] @@ -2255,14 +1791,12 @@ ring: # Configuration for a Consul client. Only applies if the selected kvstore is # consul. - # The CLI flags prefix for this block configuration is: - # common.storage.ring.consul + # The CLI flags prefix for this block configuration is: common.storage.ring [consul: ] # Configuration for an ETCD v3 client. Only applies if the selected kvstore # is etcd. - # The CLI flags prefix for this block configuration is: - # common.storage.ring.etcd + # The CLI flags prefix for this block configuration is: common.storage.ring [etcd: ] multi: @@ -2383,6 +1917,19 @@ The `compactor` block configures the compactor component, which compacts index s # CLI flag: -compactor.retention-table-timeout [retention_table_timeout: | default = 0s] +retention_backoff_config: + # Minimum delay when backing off. + # CLI flag: -compactor.retention-backoff-config.backoff-min-period + [min_period: | default = 100ms] + + # Maximum delay when backing off. + # CLI flag: -compactor.retention-backoff-config.backoff-max-period + [max_period: | default = 10s] + + # Number of times to backoff and retry before failing. + # CLI flag: -compactor.retention-backoff-config.backoff-retries + [max_retries: | default = 10] + # Store used for managing delete requests. # CLI flag: -compactor.delete-request-store [delete_request_store: | default = ""] @@ -2436,13 +1983,12 @@ compactor_ring: # Configuration for a Consul client. Only applies if the selected kvstore is # consul. - # The CLI flags prefix for this block configuration is: - # compactor.ring.consul + # The CLI flags prefix for this block configuration is: compactor.ring [consul: ] # Configuration for an ETCD v3 client. Only applies if the selected kvstore # is etcd. - # The CLI flags prefix for this block configuration is: compactor.ring.etcd + # The CLI flags prefix for this block configuration is: compactor.ring [etcd: ] multi: @@ -2521,48 +2067,45 @@ compactor_ring: Configuration for a Consul client. Only applies if the selected kvstore is `consul`. The supported CLI flags `` used to reference this configuration block are: -- `common.storage.ring.consul` -- `compactor.ring.consul` -- `consul` -- `distributor.ring.consul` -- `index-gateway.ring.consul` -- `ingester-rf1.consul` -- `ingester.partition-ring.consul` -- `kafka-ingesterconsul` -- `pattern-ingester.consul` -- `query-scheduler.ring.consul` -- `ruler.ring.consul` +- `common.storage.ring` +- `compactor.ring` +- `distributor.ring` +- `index-gateway.ring` +- `ingester.partition-ring` +- `pattern-ingester` +- `query-scheduler.ring` +- `ruler.ring`   ```yaml # Hostname and port of Consul. -# CLI flag: -.hostname +# CLI flag: -.consul.hostname [host: | default = "localhost:8500"] # ACL Token used to interact with Consul. -# CLI flag: -.acl-token +# CLI flag: -.consul.acl-token [acl_token: | default = ""] # HTTP timeout when talking to Consul -# CLI flag: -.client-timeout +# CLI flag: -.consul.client-timeout [http_client_timeout: | default = 20s] # Enable consistent reads to Consul. -# CLI flag: -.consistent-reads +# CLI flag: -.consul.consistent-reads [consistent_reads: | default = false] # Rate limit when watching key or prefix in Consul, in requests per second. 0 # disables the rate limit. -# CLI flag: -.watch-rate-limit +# CLI flag: -.consul.watch-rate-limit [watch_rate_limit: | default = 1] # Burst size used in rate limit. Values less than 1 are treated as 1. -# CLI flag: -.watch-burst-size +# CLI flag: -.consul.watch-burst-size [watch_burst_size: | default = 1] # Maximum duration to wait before retrying a Compare And Swap (CAS) operation. -# CLI flag: -.cas-retry-delay +# CLI flag: -.consul.cas-retry-delay [cas_retry_delay: | default = 1s] ``` @@ -2667,14 +2210,12 @@ ring: # Configuration for a Consul client. Only applies if the selected kvstore is # consul. - # The CLI flags prefix for this block configuration is: - # distributor.ring.consul + # The CLI flags prefix for this block configuration is: distributor.ring [consul: ] # Configuration for an ETCD v3 client. Only applies if the selected kvstore # is etcd. - # The CLI flags prefix for this block configuration is: - # distributor.ring.etcd + # The CLI flags prefix for this block configuration is: distributor.ring [etcd: ] multi: @@ -2707,6 +2248,10 @@ ring: # CLI flag: -distributor.ring.instance-interface-names [instance_interface_names: | default = []] +# Number of workers to push batches to ingesters. +# CLI flag: -distributor.push-worker-count +[push_worker_count: | default = 256] + rate_store: # The max number of concurrent requests to make to ingester stream apis # CLI flag: -distributor.rate-store.max-request-parallelism @@ -2740,64 +2285,69 @@ otlp_config: # List of default otlp resource attributes to be picked as index labels # CLI flag: -distributor.otlp.default_resource_attributes_as_index_labels [default_resource_attributes_as_index_labels: | default = [service.name service.namespace service.instance.id deployment.environment cloud.region cloud.availability_zone k8s.cluster.name k8s.namespace.name k8s.pod.name k8s.container.name container.name k8s.replicaset.name k8s.deployment.name k8s.statefulset.name k8s.daemonset.name k8s.cronjob.name k8s.job.name]] + +# Enable writes to Kafka during Push requests. +# CLI flag: -distributor.kafka-writes-enabled +[kafka_writes_enabled: | default = false] + +# Enable writes to Ingesters during Push requests. Defaults to true. +# CLI flag: -distributor.ingester-writes-enabled +[ingester_writes_enabled: | default = true] ``` ### etcd Configuration for an ETCD v3 client. Only applies if the selected kvstore is `etcd`. The supported CLI flags `` used to reference this configuration block are: -- `common.storage.ring.etcd` -- `compactor.ring.etcd` -- `distributor.ring.etcd` -- `etcd` -- `index-gateway.ring.etcd` -- `ingester-rf1.etcd` -- `ingester.partition-ring.etcd` -- `kafka-ingesteretcd` -- `pattern-ingester.etcd` -- `query-scheduler.ring.etcd` -- `ruler.ring.etcd` +- `common.storage.ring` +- `compactor.ring` +- `distributor.ring` +- `index-gateway.ring` +- `ingester.partition-ring` +- `pattern-ingester` +- `query-scheduler.ring` +- `ruler.ring`   ```yaml # The etcd endpoints to connect to. -# CLI flag: -.endpoints +# CLI flag: -.etcd.endpoints [endpoints: | default = []] # The dial timeout for the etcd connection. -# CLI flag: -.dial-timeout +# CLI flag: -.etcd.dial-timeout [dial_timeout: | default = 10s] # The maximum number of retries to do for failed ops. -# CLI flag: -.max-retries +# CLI flag: -.etcd.max-retries [max_retries: | default = 10] # Enable TLS. -# CLI flag: -.tls-enabled +# CLI flag: -.etcd.tls-enabled [tls_enabled: | default = false] # Path to the client certificate, which will be used for authenticating with the # server. Also requires the key path to be configured. -# CLI flag: -.tls-cert-path +# CLI flag: -.etcd.tls-cert-path [tls_cert_path: | default = ""] # Path to the key for the client certificate. Also requires the client # certificate to be configured. -# CLI flag: -.tls-key-path +# CLI flag: -.etcd.tls-key-path [tls_key_path: | default = ""] # Path to the CA certificates to validate server certificate against. If not # set, the host's root CA certificates are used. -# CLI flag: -.tls-ca-path +# CLI flag: -.etcd.tls-ca-path [tls_ca_path: | default = ""] # Override the expected name on the server certificate. -# CLI flag: -.tls-server-name +# CLI flag: -.etcd.tls-server-name [tls_server_name: | default = ""] # Skip validating server certificate. -# CLI flag: -.tls-insecure-skip-verify +# CLI flag: -.etcd.tls-insecure-skip-verify [tls_insecure_skip_verify: | default = false] # Override the default cipher suite list (separated by commas). Allowed values: @@ -2830,20 +2380,20 @@ Configuration for an ETCD v3 client. Only applies if the selected kvstore is `et # - TLS_ECDHE_RSA_WITH_3DES_EDE_CBC_SHA # - TLS_ECDHE_ECDSA_WITH_AES_128_CBC_SHA256 # - TLS_ECDHE_RSA_WITH_AES_128_CBC_SHA256 -# CLI flag: -.tls-cipher-suites +# CLI flag: -.etcd.tls-cipher-suites [tls_cipher_suites: | default = ""] # Override the default minimum TLS version. Allowed values: VersionTLS10, # VersionTLS11, VersionTLS12, VersionTLS13 -# CLI flag: -.tls-min-version +# CLI flag: -.etcd.tls-min-version [tls_min_version: | default = ""] # Etcd username. -# CLI flag: -.username +# CLI flag: -.etcd.username [username: | default = ""] # Etcd password. -# CLI flag: -.password +# CLI flag: -.etcd.password [password: | default = ""] ``` @@ -3042,7 +2592,6 @@ The `grpc_client` block configures the gRPC client used to communicate between a - `bloom-gateway-client.grpc` - `boltdb.shipper.index-gateway-client.grpc` - `frontend.grpc-client-config` -- `ingester-rf1.client` - `ingester.client` - `metastore.grpc-client-config` - `pattern-ingester.client` @@ -3220,14 +2769,12 @@ ring: # Configuration for a Consul client. Only applies if the selected kvstore is # consul. - # The CLI flags prefix for this block configuration is: - # index-gateway.ring.consul + # The CLI flags prefix for this block configuration is: index-gateway.ring [consul: ] # Configuration for an ETCD v3 client. Only applies if the selected kvstore # is etcd. - # The CLI flags prefix for this block configuration is: - # index-gateway.ring.etcd + # The CLI flags prefix for this block configuration is: index-gateway.ring [etcd: ] multi: @@ -3319,12 +2866,10 @@ lifecycler: # Configuration for a Consul client. Only applies if the selected kvstore # is consul. - # The CLI flags prefix for this block configuration is: consul [consul: ] # Configuration for an ETCD v3 client. Only applies if the selected # kvstore is etcd. - # The CLI flags prefix for this block configuration is: etcd [etcd: ] multi: @@ -3584,6 +3129,71 @@ wal: # ring to recalculate owned streams. # CLI flag: -ingester.owned-streams-check-interval [owned_streams_check_interval: | default = 30s] + +kafka_ingestion: + # Whether the kafka ingester is enabled. + # CLI flag: -ingester.kafka-ingestion-enabled + [enabled: | default = false] + + partition_ring: + # The key-value store used to share the hash ring across multiple instances. + # This option needs be set on ingesters, distributors, queriers, and rulers + # when running in microservices mode. + kvstore: + # Backend storage to use for the ring. Supported values are: consul, etcd, + # inmemory, memberlist, multi. + # CLI flag: -ingester.partition-ring.store + [store: | default = "memberlist"] + + # The prefix for the keys in the store. Should end with a /. + # CLI flag: -ingester.partition-ring.prefix + [prefix: | default = "collectors/"] + + # Configuration for a Consul client. Only applies if the selected kvstore + # is consul. + # The CLI flags prefix for this block configuration is: + # ingester.partition-ring + [consul: ] + + # Configuration for an ETCD v3 client. Only applies if the selected + # kvstore is etcd. + # The CLI flags prefix for this block configuration is: + # ingester.partition-ring + [etcd: ] + + multi: + # Primary backend storage used by multi-client. + # CLI flag: -ingester.partition-ring.multi.primary + [primary: | default = ""] + + # Secondary backend storage used by multi-client. + # CLI flag: -ingester.partition-ring.multi.secondary + [secondary: | default = ""] + + # Mirror writes to secondary store. + # CLI flag: -ingester.partition-ring.multi.mirror-enabled + [mirror_enabled: | default = false] + + # Timeout for storing value to secondary store. + # CLI flag: -ingester.partition-ring.multi.mirror-timeout + [mirror_timeout: | default = 2s] + + # Minimum number of owners to wait before a PENDING partition gets switched + # to ACTIVE. + # CLI flag: -ingester.partition-ring.min-partition-owners-count + [min_partition_owners_count: | default = 1] + + # How long the minimum number of owners are enforced before a PENDING + # partition gets switched to ACTIVE. + # CLI flag: -ingester.partition-ring.min-partition-owners-duration + [min_partition_owners_duration: | default = 10s] + + # How long to wait before an INACTIVE partition is eligible for deletion. + # The partition is deleted only if it has been in INACTIVE state for at + # least the configured duration and it has no owners registered. A value of + # 0 disables partitions deletion. + # CLI flag: -ingester.partition-ring.delete-inactive-partition-after + [delete_inactive_partition_after: | default = 13h] ``` ### ingester_client @@ -3593,16 +3203,26 @@ The `ingester_client` block configures how the distributor will connect to inges ```yaml # Configures how connections are pooled. pool_config: - [client_cleanup_period: ] + # How frequently to clean up clients for ingesters that have gone away. + # CLI flag: -distributor.client-cleanup-period + [client_cleanup_period: | default = 15s] - [health_check_ingesters: ] + # Run a health check on each ingester client during periodic cleanup. + # CLI flag: -distributor.health-check-ingesters + [health_check_ingesters: | default = true] - [remote_timeout: ] + # How quickly a dead client will be removed after it has been detected to + # disappear. Set this to a value to allow time for a secondary health check to + # recover the missing client. + # CLI flag: -ingester.client.healthcheck-timeout + [remote_timeout: | default = 1s] -[remote_timeout: ] +# The remote request timeout on the client side. +# CLI flag: -ingester.client.timeout +[remote_timeout: | default = 5s] # Configures how the gRPC connection to ingesters work as a client. -# The CLI flags prefix for this block configuration is: ingester-rf1.client +# The CLI flags prefix for this block configuration is: ingester.client [grpc_client_config: ] ``` @@ -4135,20 +3755,6 @@ shard_streams: # CLI flag: -bloom-build.split-keyspace-by [bloom_split_series_keyspace_by: | default = 256] -# Experimental. Length of the n-grams created when computing blooms from log -# lines. -# CLI flag: -bloom-build.ngram-length -[bloom_ngram_length: | default = 4] - -# Experimental. Skip factor for the n-grams created when computing blooms from -# log lines. -# CLI flag: -bloom-build.ngram-skip -[bloom_ngram_skip: | default = 1] - -# Experimental. Scalable Bloom Filter desired false-positive rate. -# CLI flag: -bloom-build.false-positive-rate -[bloom_false_positive_rate: | default = 0.01] - # Experimental. Compression algorithm for bloom block pages. # CLI flag: -bloom-build.block-encoding [bloom_block_encoding: | default = "none"] @@ -4799,14 +4405,12 @@ scheduler_ring: # Configuration for a Consul client. Only applies if the selected kvstore is # consul. - # The CLI flags prefix for this block configuration is: - # query-scheduler.ring.consul + # The CLI flags prefix for this block configuration is: query-scheduler.ring [consul: ] # Configuration for an ETCD v3 client. Only applies if the selected kvstore # is etcd. - # The CLI flags prefix for this block configuration is: - # query-scheduler.ring.etcd + # The CLI flags prefix for this block configuration is: query-scheduler.ring [etcd: ] multi: @@ -5107,12 +4711,12 @@ ring: # Configuration for a Consul client. Only applies if the selected kvstore is # consul. - # The CLI flags prefix for this block configuration is: ruler.ring.consul + # The CLI flags prefix for this block configuration is: ruler.ring [consul: ] # Configuration for an ETCD v3 client. Only applies if the selected kvstore # is etcd. - # The CLI flags prefix for this block configuration is: ruler.ring.etcd + # The CLI flags prefix for this block configuration is: ruler.ring [etcd: ] multi: diff --git a/docs/sources/visualize/grafana.md b/docs/sources/visualize/grafana.md index b7da4a6c1bec..c4ba0988770d 100644 --- a/docs/sources/visualize/grafana.md +++ b/docs/sources/visualize/grafana.md @@ -38,11 +38,12 @@ If you are a Grafana Cloud user, you can access Explore Logs in the Grafana Clou 1. Click the **+ Add new data source** button. 1. Search for, or choose Loki from the list. 1. On the **Settings** tab, the **URL** field should be the address of your Loki server. -For example,when running locally or with Docker using port mapping, the address is likely `http://localhost:3100`. +For example, when running locally or with Docker using port mapping, the address is likely `http://localhost:3100`. When running with docker-compose or Kubernetes, the address is likely `http://loki:3100`. When running Grafana (with Docker) and trying to connect to a locally built Loki instance, the address (for the URL field) is: On Mac: `docker.for.mac.localhost` On Windows: `docker.for.win.localhost` +1. If your Loki server has [multi-tenancy](https://grafana.com/docs/loki/latest/operations/multi-tenancy/) enabled, then you must provide your tenant ID in the `X-Scope-OrgID` header. Click the **+ Add header** button under **HTTP headers**, enter `X-Scope-OrgID` in the **Header** field, and your tenant ID in the **Value** field. Multi-tenancy is enabled by default when running Loki with Helm on Kubernetes. 1. To view your logs, click **Explore** in the main menu. 1. Select the Loki datasource in the top-left menu. 1. You can click **Kick start your query** to select from a list of common queries, or use the **Label filters** to start choosing labels that you want to query. For more information about the Loki query language, refer to the [LogQL section](https://grafana.com/docs/loki//query/). diff --git a/go.mod b/go.mod index 9ebf69afe05f..3981ee97bf44 100644 --- a/go.mod +++ b/go.mod @@ -49,7 +49,7 @@ require ( github.com/gorilla/mux v1.8.1 github.com/gorilla/websocket v1.5.3 github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2 - github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b + github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319 github.com/grafana/go-gelf/v2 v2.0.1 github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56 github.com/grafana/regexp v0.0.0-20240518133315-a468a5bfb3bc @@ -124,7 +124,7 @@ require ( github.com/fsnotify/fsnotify v1.7.0 github.com/gogo/googleapis v1.4.1 github.com/grafana/jsonparser v0.0.0-20240425183733-ea80629e1a32 - github.com/grafana/loki/pkg/push v0.0.0-20231124142027-e52380921608 + github.com/grafana/loki/pkg/push v0.0.0-20240924133635-758364c7775f github.com/hashicorp/golang-lru/v2 v2.0.7 github.com/hashicorp/raft v1.7.1 github.com/hashicorp/raft-wal v0.4.1 diff --git a/go.sum b/go.sum index 28ca7bfdf1dc..fabf4aa4fdca 100644 --- a/go.sum +++ b/go.sum @@ -1042,8 +1042,8 @@ github.com/gorilla/websocket v1.5.3 h1:saDtZ6Pbx/0u+bgYQ3q96pZgCzfhKXGPqt7kZ72aN github.com/gorilla/websocket v1.5.3/go.mod h1:YR8l580nyteQvAITg2hZ9XVh4b55+EU/adAjf1fMHhE= github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2 h1:qhugDMdQ4Vp68H0tp/0iN17DM2ehRo1rLEdOFe/gB8I= github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2/go.mod h1:w/aiO1POVIeXUQyl0VQSZjl5OAGDTL5aX+4v0RA1tcw= -github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b h1:x2HCzk29I0o5pRPfqWP/qwhXaPGlcz8pohq5kO1NZoE= -github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b/go.mod h1:SPLNCARd4xdjCkue0O6hvuoveuS1dGJjDnfxYe405YQ= +github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319 h1:KACpOOTqA4WqyyKF2fFPQFiaSOpZdOT5f5gg0qkPLiU= +github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319/go.mod h1:SPLNCARd4xdjCkue0O6hvuoveuS1dGJjDnfxYe405YQ= github.com/grafana/go-gelf/v2 v2.0.1 h1:BOChP0h/jLeD+7F9mL7tq10xVkDG15he3T1zHuQaWak= github.com/grafana/go-gelf/v2 v2.0.1/go.mod h1:lexHie0xzYGwCgiRGcvZ723bSNyNI8ZRD4s0CLobh90= github.com/grafana/gocql v0.0.0-20200605141915-ba5dc39ece85 h1:xLuzPoOzdfNb/RF/IENCw+oLVdZB4G21VPhkHBgwSHY= diff --git a/loki-build-image/Dockerfile b/loki-build-image/Dockerfile index 05dc02244160..ec35fbb249f4 100644 --- a/loki-build-image/Dockerfile +++ b/loki-build-image/Dockerfile @@ -4,7 +4,7 @@ # tag of the Docker image in `../.drone/drone.jsonnet` and run `make drone`. # See ../docs/sources/community/maintaining/release-loki-build-image.md for instructions # on how to publish a new build image. -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 # Install helm (https://helm.sh/) and helm-docs (https://github.com/norwoodj/helm-docs) for generating Helm Chart reference. FROM golang:${GO_VERSION}-bookworm AS helm ARG TARGETARCH @@ -15,7 +15,7 @@ RUN BIN=$([ "$TARGETARCH" = "arm64" ] && echo "helm-docs_Linux_arm64" || echo "h curl -L "https://github.com/norwoodj/helm-docs/releases/download/v1.11.2/$BIN.tar.gz" | tar zx && \ install -t /usr/local/bin helm-docs -FROM alpine:3.20.2 AS lychee +FROM alpine:3.20.3 AS lychee ARG TARGETARCH ARG LYCHEE_VER="0.7.0" RUN apk add --no-cache curl && \ @@ -24,18 +24,18 @@ RUN apk add --no-cache curl && \ mv /tmp/lychee /usr/bin/lychee && \ rm -rf "/tmp/linux-$TARGETARCH" /tmp/lychee-$LYCHEE_VER.tgz -FROM alpine:3.20.2 AS golangci +FROM alpine:3.20.3 AS golangci RUN apk add --no-cache curl && \ cd / && \ - curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s v1.55.1 + curl -sfL https://raw.githubusercontent.com/golangci/golangci-lint/master/install.sh | sh -s v1.60.3 -FROM alpine:3.20.2 AS buf +FROM alpine:3.20.3 AS buf ARG TARGETOS RUN apk add --no-cache curl && \ curl -sSL "https://github.com/bufbuild/buf/releases/download/v1.4.0/buf-$TARGETOS-$(uname -m)" -o "/usr/bin/buf" && \ chmod +x "/usr/bin/buf" -FROM alpine:3.20.2 AS docker +FROM alpine:3.20.3 AS docker RUN apk add --no-cache docker-cli docker-cli-buildx FROM golang:${GO_VERSION}-bookworm AS drone diff --git a/loki-build-image/README.md b/loki-build-image/README.md index 81bb708871f1..48106cb28c42 100644 --- a/loki-build-image/README.md +++ b/loki-build-image/README.md @@ -2,6 +2,11 @@ ## Versions +### 0.34.0 + +- Update to Go 1.23.1 +- Update to Alpine 3.20.3 + ### 0.33.6 - Update to go 1.22.6 diff --git a/operator/CHANGELOG.md b/operator/CHANGELOG.md index 2651831719e4..7727779251bc 100644 --- a/operator/CHANGELOG.md +++ b/operator/CHANGELOG.md @@ -1,5 +1,32 @@ ## Main +## [0.6.2](https://github.com/grafana/loki/compare/operator/v0.6.1...operator/v0.6.2) (2024-09-11) + + +### Features + +* Ingester Stream Limit Improvements ([#13532](https://github.com/grafana/loki/issues/13532)) ([ec34aaa](https://github.com/grafana/loki/commit/ec34aaa1ff2e616ef223631657b63f7dffedd3cc)) +* **operator:** Add alert for discarded samples ([#13512](https://github.com/grafana/loki/issues/13512)) ([5f2a02f](https://github.com/grafana/loki/commit/5f2a02f14222dab891b7851e8f48052d6c9b594a)) +* **operator:** Add support for Loki OTLP limits config ([#13446](https://github.com/grafana/loki/issues/13446)) ([d02f435](https://github.com/grafana/loki/commit/d02f435d3bf121b19e15de4f139c95a6d010b25c)) +* **operator:** Add support for the volume API ([#13369](https://github.com/grafana/loki/issues/13369)) ([d451e23](https://github.com/grafana/loki/commit/d451e23225047a11b4d5d82900cec4a46d6e7b39)) +* **operator:** Enable leader-election ([#13760](https://github.com/grafana/loki/issues/13760)) ([1ba4bff](https://github.com/grafana/loki/commit/1ba4bff005930b173391df35248e6f58e076fa74)) +* **operator:** Update Loki operand to v3.1.0 ([#13422](https://github.com/grafana/loki/issues/13422)) ([cf5f52d](https://github.com/grafana/loki/commit/cf5f52dca0db93847218cdd2c3f4860d983381ae)) +* **operator:** Update Loki operand to v3.1.1 ([#14042](https://github.com/grafana/loki/issues/14042)) ([7ae1588](https://github.com/grafana/loki/commit/7ae1588200396b73a16fadd2610670a5ce5fd747)) + + +### Bug Fixes + +* **deps:** update k8s.io/utils digest to 702e33f ([#14033](https://github.com/grafana/loki/issues/14033)) ([b7eecc7](https://github.com/grafana/loki/commit/b7eecc7a693e96f4d0fe0dcd7583ecdc4dd7283f)) +* **operator:** add alertmanager client config to ruler template ([#13182](https://github.com/grafana/loki/issues/13182)) ([6148c37](https://github.com/grafana/loki/commit/6148c3760d701768e442186d4e7d574c7dc16c91)) +* **operator:** Allow structured metadata only if V13 schema provided ([#13463](https://github.com/grafana/loki/issues/13463)) ([3ac130b](https://github.com/grafana/loki/commit/3ac130b8a152169766cb173718f2312aeb4f694e)) +* **operator:** Don't overwrite annotations for LokiStack ingress resources ([#13708](https://github.com/grafana/loki/issues/13708)) ([f523530](https://github.com/grafana/loki/commit/f52353060dd936cff587ff2060c8616941695ece)) +* **operator:** Improve API documentation for schema version ([#13122](https://github.com/grafana/loki/issues/13122)) ([3a9f50f](https://github.com/grafana/loki/commit/3a9f50f5099a02e662b8ac10ddad0b36cd844161)) +* **operator:** Remove duplicate conditions from status ([#13497](https://github.com/grafana/loki/issues/13497)) ([527510d](https://github.com/grafana/loki/commit/527510d1a84a981250047dbabba8d492177b8452)) +* **operator:** Set object storage for delete requests when using retention ([#13562](https://github.com/grafana/loki/issues/13562)) ([46de4c1](https://github.com/grafana/loki/commit/46de4c1bc839ef682798bec5003123f7d5f4404b)) +* **operator:** Skip updating annotations for serviceaccounts ([#13450](https://github.com/grafana/loki/issues/13450)) ([1b9b111](https://github.com/grafana/loki/commit/1b9b11116b48fb37b7015d27104668412fc04937)) +* **operator:** Support v3.1.0 in OpenShift dashboards ([#13430](https://github.com/grafana/loki/issues/13430)) ([8279d59](https://github.com/grafana/loki/commit/8279d59f145df9c9132aeff9e3d46c738650027c)) +* **operator:** Watch for CredentialsRequests on CCOAuthEnv only ([#13299](https://github.com/grafana/loki/issues/13299)) ([7fc926e](https://github.com/grafana/loki/commit/7fc926e36ea8fca7bd8e9955c8994574535dbbae)) + ## [0.6.1](https://github.com/grafana/loki/compare/operator/v0.6.0...operator/v0.6.1) (2024-06-03) diff --git a/operator/Makefile b/operator/Makefile index 3be618f551b1..8dce06bbce79 100644 --- a/operator/Makefile +++ b/operator/Makefile @@ -21,7 +21,7 @@ LOKI_OPERATOR_NS ?= kubernetes-operators # To re-generate a bundle for another specific version without changing the standard setup, you can: # - use the VERSION as arg of the bundle target (e.g make bundle VERSION=0.0.2) # - use environment variables to overwrite this value (e.g export VERSION=0.0.2) -VERSION ?= 0.6.1 +VERSION ?= 0.6.2 CHANNELS ?= "alpha" DEFAULT_CHANNEL ?= "alpha" diff --git a/operator/apis/loki/v1/lokistack_types.go b/operator/apis/loki/v1/lokistack_types.go index 41f4ad95e6a8..5fd953bc71eb 100644 --- a/operator/apis/loki/v1/lokistack_types.go +++ b/operator/apis/loki/v1/lokistack_types.go @@ -887,7 +887,7 @@ type OTLPResourceAttributesSpec struct { // tenants. type GlobalOTLPSpec struct { // IndexedResourceAttributes contains the global configuration for resource attributes - // to store them as index labels or structured metadata or drop them altogether. + // to store them as index labels. // // +optional // +kubebuilder:validation:Optional @@ -909,7 +909,7 @@ type OTLPSpec struct { ResourceAttributes *OTLPResourceAttributesSpec `json:"resourceAttributes,omitempty"` // ScopeAttributes contains the configuration for scope attributes - // to store them as index labels or structured metadata or drop them altogether. + // to store them as structured metadata or drop them altogether. // // +optional // +kubebuilder:validation:Optional @@ -917,7 +917,7 @@ type OTLPSpec struct { ScopeAttributes []OTLPAttributesSpec `json:"scopeAttributes,omitempty"` // LogAttributes contains the configuration for log attributes - // to store them as index labels or structured metadata or drop them altogether. + // to store them as structured metadata or drop them altogether. // // +optional // +kubebuilder:validation:Optional diff --git a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml index d15e930efe88..eca4a2b99b5d 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml @@ -3,9 +3,9 @@ kind: ServiceAccount metadata: creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-controller-manager-metrics-reader diff --git a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml index 03115d44fdad..83b327cf24d0 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml @@ -6,11 +6,11 @@ metadata: creationTimestamp: null labels: app.kubernetes.io/component: metrics - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-controller-manager-metrics-service spec: ports: diff --git a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml index 109b3393f998..2a1160859812 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-metrics-token_v1_secret.yaml @@ -4,10 +4,10 @@ metadata: annotations: kubernetes.io/service-account.name: loki-operator-controller-manager-metrics-reader labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-controller-manager-metrics-token type: kubernetes.io/service-account-token diff --git a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml index 97d00df9c4ef..1af7f354c22e 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml @@ -3,11 +3,11 @@ kind: ClusterRoleBinding metadata: creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-controller-manager-read-metrics roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/operator/bundle/community-openshift/manifests/loki-operator-manager-config_v1_configmap.yaml b/operator/bundle/community-openshift/manifests/loki-operator-manager-config_v1_configmap.yaml index 868ea5ffb8a5..33f2b19ed8dc 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-manager-config_v1_configmap.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-manager-config_v1_configmap.yaml @@ -60,9 +60,9 @@ data: kind: ConfigMap metadata: labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-manager-config diff --git a/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml b/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml index 53b6d9edb87e..698498c81b6c 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-metrics-monitor_monitoring.coreos.com_v1_servicemonitor.yaml @@ -2,11 +2,11 @@ apiVersion: monitoring.coreos.com/v1 kind: ServiceMonitor metadata: labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator name: loki-operator-metrics-monitor spec: diff --git a/operator/bundle/community-openshift/manifests/loki-operator-metrics-reader_rbac.authorization.k8s.io_v1_clusterrole.yaml b/operator/bundle/community-openshift/manifests/loki-operator-metrics-reader_rbac.authorization.k8s.io_v1_clusterrole.yaml index 6b4e2e91a201..4455d646e6c8 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-metrics-reader_rbac.authorization.k8s.io_v1_clusterrole.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-metrics-reader_rbac.authorization.k8s.io_v1_clusterrole.yaml @@ -3,11 +3,11 @@ kind: ClusterRole metadata: creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-metrics-reader rules: - nonResourceURLs: diff --git a/operator/bundle/community-openshift/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml b/operator/bundle/community-openshift/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml index 79175acf5583..25fa29ce7866 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml @@ -6,11 +6,11 @@ metadata: include.release.openshift.io/single-node-developer: "true" creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-prometheus rules: - apiGroups: diff --git a/operator/bundle/community-openshift/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml b/operator/bundle/community-openshift/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml index 32cc55393781..939d6973181a 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml @@ -6,11 +6,11 @@ metadata: include.release.openshift.io/single-node-developer: "true" creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-prometheus roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/operator/bundle/community-openshift/manifests/loki-operator-webhook-service_v1_service.yaml b/operator/bundle/community-openshift/manifests/loki-operator-webhook-service_v1_service.yaml index 2fe1edca6fd2..56581d01bbc2 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator-webhook-service_v1_service.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator-webhook-service_v1_service.yaml @@ -3,11 +3,11 @@ kind: Service metadata: creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-webhook-service spec: ports: diff --git a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml index 9df0d3707f9b..6b3cff962a8a 100644 --- a/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community-openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -149,8 +149,8 @@ metadata: capabilities: Full Lifecycle categories: OpenShift Optional, Logging & Tracing certified: "false" - containerImage: docker.io/grafana/loki-operator:0.6.1 - createdAt: "2024-09-06T17:56:05Z" + containerImage: docker.io/grafana/loki-operator:0.6.2 + createdAt: "2024-09-25T09:56:42Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. features.operators.openshift.io/disconnected: "true" @@ -167,7 +167,7 @@ metadata: labels: operatorframework.io/arch.amd64: supported operatorframework.io/arch.arm64: supported - name: loki-operator.v0.6.1 + name: loki-operator.v0.6.2 namespace: placeholder spec: apiservicedefinitions: {} @@ -362,12 +362,11 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: IndexedResourceAttributes contains the global configuration for - resource attributes to store them as index labels or structured metadata - or drop them altogether. + resource attributes to store them as index labels. displayName: Indexed Resource Attributes path: limits.global.otlp.indexedResourceAttributes - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.global.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -408,7 +407,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.global.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. @@ -518,7 +517,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.tenants.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -559,7 +558,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.tenants.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. @@ -1814,11 +1813,11 @@ spec: serviceAccountName: loki-operator-controller-manager deployments: - label: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 control-plane: controller-manager name: loki-operator-controller-manager spec: @@ -1852,7 +1851,7 @@ spec: value: quay.io/observatorium/api:latest - name: RELATED_IMAGE_OPA value: quay.io/observatorium/opa-openshift:latest - image: docker.io/grafana/loki-operator:0.6.1 + image: docker.io/grafana/loki-operator:0.6.2 imagePullPolicy: IfNotPresent livenessProbe: httpGet: @@ -1977,8 +1976,8 @@ spec: name: gateway - image: quay.io/observatorium/opa-openshift:latest name: opa - replaces: loki-operator.v0.6.0 - version: 0.6.1 + replaces: loki-operator.v0.6.1 + version: 0.6.2 webhookdefinitions: - admissionReviewVersions: - v1 diff --git a/operator/bundle/community-openshift/manifests/loki.grafana.com_alertingrules.yaml b/operator/bundle/community-openshift/manifests/loki.grafana.com_alertingrules.yaml index 64298e8d5062..1ab9360ef7ac 100644 --- a/operator/bundle/community-openshift/manifests/loki.grafana.com_alertingrules.yaml +++ b/operator/bundle/community-openshift/manifests/loki.grafana.com_alertingrules.yaml @@ -5,11 +5,11 @@ metadata: controller-gen.kubebuilder.io/version: v0.14.0 creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: alertingrules.loki.grafana.com spec: conversion: diff --git a/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml index e58e23492e5a..fec6d40978a4 100644 --- a/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/community-openshift/manifests/loki.grafana.com_lokistacks.yaml @@ -5,11 +5,11 @@ metadata: controller-gen.kubebuilder.io/version: v0.14.0 creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: lokistacks.loki.grafana.com spec: conversion: @@ -173,14 +173,14 @@ spec: indexedResourceAttributes: description: |- IndexedResourceAttributes contains the global configuration for resource attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as index labels. items: type: string type: array logAttributes: description: |- LogAttributes contains the configuration for log attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -256,7 +256,7 @@ spec: scopeAttributes: description: |- ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -437,7 +437,7 @@ spec: logAttributes: description: |- LogAttributes contains the configuration for log attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -513,7 +513,7 @@ spec: scopeAttributes: description: |- ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes diff --git a/operator/bundle/community-openshift/manifests/loki.grafana.com_recordingrules.yaml b/operator/bundle/community-openshift/manifests/loki.grafana.com_recordingrules.yaml index cdef169a4ced..433f205ec820 100644 --- a/operator/bundle/community-openshift/manifests/loki.grafana.com_recordingrules.yaml +++ b/operator/bundle/community-openshift/manifests/loki.grafana.com_recordingrules.yaml @@ -5,11 +5,11 @@ metadata: controller-gen.kubebuilder.io/version: v0.14.0 creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: recordingrules.loki.grafana.com spec: conversion: diff --git a/operator/bundle/community-openshift/manifests/loki.grafana.com_rulerconfigs.yaml b/operator/bundle/community-openshift/manifests/loki.grafana.com_rulerconfigs.yaml index d7af1c24bad2..bb3edbfd454a 100644 --- a/operator/bundle/community-openshift/manifests/loki.grafana.com_rulerconfigs.yaml +++ b/operator/bundle/community-openshift/manifests/loki.grafana.com_rulerconfigs.yaml @@ -5,11 +5,11 @@ metadata: controller-gen.kubebuilder.io/version: v0.14.0 creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: rulerconfigs.loki.grafana.com spec: conversion: diff --git a/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml b/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml index d15e930efe88..eca4a2b99b5d 100644 --- a/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml +++ b/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-reader_v1_serviceaccount.yaml @@ -3,9 +3,9 @@ kind: ServiceAccount metadata: creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-controller-manager-metrics-reader diff --git a/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml b/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml index 8738be99aa87..e82a7c02ef41 100644 --- a/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml +++ b/operator/bundle/community/manifests/loki-operator-controller-manager-metrics-service_v1_service.yaml @@ -4,11 +4,11 @@ metadata: creationTimestamp: null labels: app.kubernetes.io/component: metrics - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-controller-manager-metrics-service spec: ports: diff --git a/operator/bundle/community/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml b/operator/bundle/community/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml index 242c0978035a..0a1c320c138a 100644 --- a/operator/bundle/community/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml +++ b/operator/bundle/community/manifests/loki-operator-controller-manager-read-metrics_rbac.authorization.k8s.io_v1_clusterrolebinding.yaml @@ -3,11 +3,11 @@ kind: ClusterRoleBinding metadata: creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-controller-manager-read-metrics roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/operator/bundle/community/manifests/loki-operator-manager-config_v1_configmap.yaml b/operator/bundle/community/manifests/loki-operator-manager-config_v1_configmap.yaml index 98efac218bb4..867906157015 100644 --- a/operator/bundle/community/manifests/loki-operator-manager-config_v1_configmap.yaml +++ b/operator/bundle/community/manifests/loki-operator-manager-config_v1_configmap.yaml @@ -24,9 +24,9 @@ data: kind: ConfigMap metadata: labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-manager-config diff --git a/operator/bundle/community/manifests/loki-operator-metrics-reader_rbac.authorization.k8s.io_v1_clusterrole.yaml b/operator/bundle/community/manifests/loki-operator-metrics-reader_rbac.authorization.k8s.io_v1_clusterrole.yaml index 6b4e2e91a201..4455d646e6c8 100644 --- a/operator/bundle/community/manifests/loki-operator-metrics-reader_rbac.authorization.k8s.io_v1_clusterrole.yaml +++ b/operator/bundle/community/manifests/loki-operator-metrics-reader_rbac.authorization.k8s.io_v1_clusterrole.yaml @@ -3,11 +3,11 @@ kind: ClusterRole metadata: creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-metrics-reader rules: - nonResourceURLs: diff --git a/operator/bundle/community/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml b/operator/bundle/community/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml index 79175acf5583..25fa29ce7866 100644 --- a/operator/bundle/community/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml +++ b/operator/bundle/community/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_role.yaml @@ -6,11 +6,11 @@ metadata: include.release.openshift.io/single-node-developer: "true" creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-prometheus rules: - apiGroups: diff --git a/operator/bundle/community/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml b/operator/bundle/community/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml index 32cc55393781..939d6973181a 100644 --- a/operator/bundle/community/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml +++ b/operator/bundle/community/manifests/loki-operator-prometheus_rbac.authorization.k8s.io_v1_rolebinding.yaml @@ -6,11 +6,11 @@ metadata: include.release.openshift.io/single-node-developer: "true" creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-prometheus roleRef: apiGroup: rbac.authorization.k8s.io diff --git a/operator/bundle/community/manifests/loki-operator-webhook-service_v1_service.yaml b/operator/bundle/community/manifests/loki-operator-webhook-service_v1_service.yaml index 2fe1edca6fd2..56581d01bbc2 100644 --- a/operator/bundle/community/manifests/loki-operator-webhook-service_v1_service.yaml +++ b/operator/bundle/community/manifests/loki-operator-webhook-service_v1_service.yaml @@ -3,11 +3,11 @@ kind: Service metadata: creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: loki-operator-webhook-service spec: ports: diff --git a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml index 305ab8785d31..a52fb9b070cf 100644 --- a/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/community/manifests/loki-operator.clusterserviceversion.yaml @@ -149,8 +149,8 @@ metadata: capabilities: Full Lifecycle categories: OpenShift Optional, Logging & Tracing certified: "false" - containerImage: docker.io/grafana/loki-operator:0.6.1 - createdAt: "2024-09-06T17:56:04Z" + containerImage: docker.io/grafana/loki-operator:0.6.2 + createdAt: "2024-09-25T09:56:40Z" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. operators.operatorframework.io/builder: operator-sdk-unknown @@ -160,7 +160,7 @@ metadata: labels: operatorframework.io/arch.amd64: supported operatorframework.io/arch.arm64: supported - name: loki-operator.v0.6.1 + name: loki-operator.v0.6.2 namespace: placeholder spec: apiservicedefinitions: {} @@ -355,12 +355,11 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: IndexedResourceAttributes contains the global configuration for - resource attributes to store them as index labels or structured metadata - or drop them altogether. + resource attributes to store them as index labels. displayName: Indexed Resource Attributes path: limits.global.otlp.indexedResourceAttributes - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.global.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -401,7 +400,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.global.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. @@ -511,7 +510,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.tenants.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -552,7 +551,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.tenants.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. @@ -1794,11 +1793,11 @@ spec: serviceAccountName: loki-operator-controller-manager deployments: - label: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 control-plane: controller-manager name: loki-operator-controller-manager spec: @@ -1832,7 +1831,7 @@ spec: value: quay.io/observatorium/api:latest - name: RELATED_IMAGE_OPA value: quay.io/observatorium/opa-openshift:latest - image: docker.io/grafana/loki-operator:0.6.1 + image: docker.io/grafana/loki-operator:0.6.2 imagePullPolicy: IfNotPresent livenessProbe: httpGet: @@ -1945,8 +1944,8 @@ spec: name: gateway - image: quay.io/observatorium/opa-openshift:latest name: opa - replaces: loki-operator.v0.6.0 - version: 0.6.1 + replaces: loki-operator.v0.6.1 + version: 0.6.2 webhookdefinitions: - admissionReviewVersions: - v1 diff --git a/operator/bundle/community/manifests/loki.grafana.com_alertingrules.yaml b/operator/bundle/community/manifests/loki.grafana.com_alertingrules.yaml index 6a8d8c78d103..a1761382e4b0 100644 --- a/operator/bundle/community/manifests/loki.grafana.com_alertingrules.yaml +++ b/operator/bundle/community/manifests/loki.grafana.com_alertingrules.yaml @@ -5,11 +5,11 @@ metadata: controller-gen.kubebuilder.io/version: v0.14.0 creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: alertingrules.loki.grafana.com spec: conversion: diff --git a/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml index e4f08108b696..69c6fb65d26a 100644 --- a/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/community/manifests/loki.grafana.com_lokistacks.yaml @@ -5,11 +5,11 @@ metadata: controller-gen.kubebuilder.io/version: v0.14.0 creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: lokistacks.loki.grafana.com spec: conversion: @@ -173,14 +173,14 @@ spec: indexedResourceAttributes: description: |- IndexedResourceAttributes contains the global configuration for resource attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as index labels. items: type: string type: array logAttributes: description: |- LogAttributes contains the configuration for log attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -256,7 +256,7 @@ spec: scopeAttributes: description: |- ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -437,7 +437,7 @@ spec: logAttributes: description: |- LogAttributes contains the configuration for log attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -513,7 +513,7 @@ spec: scopeAttributes: description: |- ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes diff --git a/operator/bundle/community/manifests/loki.grafana.com_recordingrules.yaml b/operator/bundle/community/manifests/loki.grafana.com_recordingrules.yaml index 8018b4a15207..91df47a6c680 100644 --- a/operator/bundle/community/manifests/loki.grafana.com_recordingrules.yaml +++ b/operator/bundle/community/manifests/loki.grafana.com_recordingrules.yaml @@ -5,11 +5,11 @@ metadata: controller-gen.kubebuilder.io/version: v0.14.0 creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: recordingrules.loki.grafana.com spec: conversion: diff --git a/operator/bundle/community/manifests/loki.grafana.com_rulerconfigs.yaml b/operator/bundle/community/manifests/loki.grafana.com_rulerconfigs.yaml index 71b690e14a63..594a2d724d99 100644 --- a/operator/bundle/community/manifests/loki.grafana.com_rulerconfigs.yaml +++ b/operator/bundle/community/manifests/loki.grafana.com_rulerconfigs.yaml @@ -5,11 +5,11 @@ metadata: controller-gen.kubebuilder.io/version: v0.14.0 creationTimestamp: null labels: - app.kubernetes.io/instance: loki-operator-v0.6.1 + app.kubernetes.io/instance: loki-operator-v0.6.2 app.kubernetes.io/managed-by: operator-lifecycle-manager app.kubernetes.io/name: loki-operator app.kubernetes.io/part-of: loki-operator - app.kubernetes.io/version: 0.6.1 + app.kubernetes.io/version: 0.6.2 name: rulerconfigs.loki.grafana.com spec: conversion: diff --git a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml index 62548fa8217d..d5bd203a90b8 100644 --- a/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml +++ b/operator/bundle/openshift/manifests/loki-operator.clusterserviceversion.yaml @@ -150,7 +150,7 @@ metadata: categories: OpenShift Optional, Logging & Tracing certified: "false" containerImage: quay.io/openshift-logging/loki-operator:0.1.0 - createdAt: "2024-09-06T17:56:07Z" + createdAt: "2024-09-25T09:56:44Z" description: | The Loki Operator for OCP provides a means for configuring and managing a Loki stack for cluster logging. ## Prerequisites and Requirements @@ -375,12 +375,11 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: IndexedResourceAttributes contains the global configuration for - resource attributes to store them as index labels or structured metadata - or drop them altogether. + resource attributes to store them as index labels. displayName: Indexed Resource Attributes path: limits.global.otlp.indexedResourceAttributes - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.global.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -421,7 +420,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.global.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. @@ -531,7 +530,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.tenants.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -572,7 +571,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.tenants.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. diff --git a/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml b/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml index 2084694a6db8..36188dfd3e07 100644 --- a/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml +++ b/operator/bundle/openshift/manifests/loki.grafana.com_lokistacks.yaml @@ -173,14 +173,14 @@ spec: indexedResourceAttributes: description: |- IndexedResourceAttributes contains the global configuration for resource attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as index labels. items: type: string type: array logAttributes: description: |- LogAttributes contains the configuration for log attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -256,7 +256,7 @@ spec: scopeAttributes: description: |- ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -437,7 +437,7 @@ spec: logAttributes: description: |- LogAttributes contains the configuration for log attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -513,7 +513,7 @@ spec: scopeAttributes: description: |- ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes diff --git a/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml b/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml index 0180fe98e7ae..4c87aadac18b 100644 --- a/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml +++ b/operator/config/crd/bases/loki.grafana.com_lokistacks.yaml @@ -155,14 +155,14 @@ spec: indexedResourceAttributes: description: |- IndexedResourceAttributes contains the global configuration for resource attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as index labels. items: type: string type: array logAttributes: description: |- LogAttributes contains the configuration for log attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -238,7 +238,7 @@ spec: scopeAttributes: description: |- ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -419,7 +419,7 @@ spec: logAttributes: description: |- LogAttributes contains the configuration for log attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes @@ -495,7 +495,7 @@ spec: scopeAttributes: description: |- ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. items: description: |- OTLPAttributesSpec contains the configuration for a set of attributes diff --git a/operator/config/manifests/community-openshift/bases/loki-operator.clusterserviceversion.yaml b/operator/config/manifests/community-openshift/bases/loki-operator.clusterserviceversion.yaml index 35946e8e05d9..7e7e67ad51aa 100644 --- a/operator/config/manifests/community-openshift/bases/loki-operator.clusterserviceversion.yaml +++ b/operator/config/manifests/community-openshift/bases/loki-operator.clusterserviceversion.yaml @@ -6,7 +6,7 @@ metadata: capabilities: Full Lifecycle categories: OpenShift Optional, Logging & Tracing certified: "false" - containerImage: docker.io/grafana/loki-operator:0.6.1 + containerImage: docker.io/grafana/loki-operator:0.6.2 createdAt: "2022-12-22T13:28:40+00:00" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. @@ -275,12 +275,11 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: IndexedResourceAttributes contains the global configuration for - resource attributes to store them as index labels or structured metadata - or drop them altogether. + resource attributes to store them as index labels. displayName: Indexed Resource Attributes path: limits.global.otlp.indexedResourceAttributes - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.global.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -321,7 +320,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.global.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. @@ -431,7 +430,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.tenants.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -472,7 +471,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.tenants.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. @@ -2392,5 +2391,5 @@ spec: minKubeVersion: 1.21.1 provider: name: Grafana Loki SIG Operator - replaces: loki-operator.v0.6.0 + replaces: loki-operator.v0.6.1 version: 0.0.0 diff --git a/operator/config/manifests/community/bases/loki-operator.clusterserviceversion.yaml b/operator/config/manifests/community/bases/loki-operator.clusterserviceversion.yaml index 8e42ee8f4770..c1ee0008faa4 100644 --- a/operator/config/manifests/community/bases/loki-operator.clusterserviceversion.yaml +++ b/operator/config/manifests/community/bases/loki-operator.clusterserviceversion.yaml @@ -6,7 +6,7 @@ metadata: capabilities: Full Lifecycle categories: OpenShift Optional, Logging & Tracing certified: "false" - containerImage: docker.io/grafana/loki-operator:0.6.1 + containerImage: docker.io/grafana/loki-operator:0.6.2 createdAt: "2022-12-22T13:28:40+00:00" description: The Community Loki Operator provides Kubernetes native deployment and management of Loki and related logging components. @@ -268,12 +268,11 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: IndexedResourceAttributes contains the global configuration for - resource attributes to store them as index labels or structured metadata - or drop them altogether. + resource attributes to store them as index labels. displayName: Indexed Resource Attributes path: limits.global.otlp.indexedResourceAttributes - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.global.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -314,7 +313,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.global.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. @@ -424,7 +423,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.tenants.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -465,7 +464,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.tenants.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. @@ -2372,5 +2371,5 @@ spec: minKubeVersion: 1.21.1 provider: name: Grafana Loki SIG Operator - replaces: loki-operator.v0.6.0 + replaces: loki-operator.v0.6.1 version: 0.0.0 diff --git a/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml b/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml index 3d25c9f4c244..9361198824da 100644 --- a/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml +++ b/operator/config/manifests/openshift/bases/loki-operator.clusterserviceversion.yaml @@ -287,12 +287,11 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: IndexedResourceAttributes contains the global configuration for - resource attributes to store them as index labels or structured metadata - or drop them altogether. + resource attributes to store them as index labels. displayName: Indexed Resource Attributes path: limits.global.otlp.indexedResourceAttributes - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.global.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -333,7 +332,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.global.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. @@ -443,7 +442,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:number - description: LogAttributes contains the configuration for log attributes to - store them as index labels or structured metadata or drop them altogether. + store them as structured metadata or drop them altogether. displayName: Log Attributes path: limits.tenants.otlp.logAttributes - description: Action defines the indexing action for the selected attributes. @@ -484,7 +483,7 @@ spec: x-descriptors: - urn:alm:descriptor:com.tectonic.ui:booleanSwitch - description: ScopeAttributes contains the configuration for scope attributes - to store them as index labels or structured metadata or drop them altogether. + to store them as structured metadata or drop them altogether. displayName: Scope Attributes path: limits.tenants.otlp.scopeAttributes - description: Action defines the indexing action for the selected attributes. diff --git a/operator/config/overlays/community-openshift/kustomization.yaml b/operator/config/overlays/community-openshift/kustomization.yaml index bf76d87727c8..af5e40aac80b 100644 --- a/operator/config/overlays/community-openshift/kustomization.yaml +++ b/operator/config/overlays/community-openshift/kustomization.yaml @@ -11,8 +11,8 @@ labels: app.kubernetes.io/managed-by: operator-lifecycle-manager includeSelectors: true - pairs: - app.kubernetes.io/instance: loki-operator-v0.6.1 - app.kubernetes.io/version: "0.6.1" + app.kubernetes.io/instance: loki-operator-v0.6.2 + app.kubernetes.io/version: "0.6.2" configMapGenerator: - files: @@ -27,4 +27,4 @@ patchesStrategicMerge: images: - name: controller newName: docker.io/grafana/loki-operator - newTag: 0.6.1 + newTag: 0.6.2 diff --git a/operator/config/overlays/community/kustomization.yaml b/operator/config/overlays/community/kustomization.yaml index c6db762d38a5..ed910555da2c 100644 --- a/operator/config/overlays/community/kustomization.yaml +++ b/operator/config/overlays/community/kustomization.yaml @@ -22,8 +22,8 @@ labels: app.kubernetes.io/managed-by: operator-lifecycle-manager includeSelectors: true - pairs: - app.kubernetes.io/instance: loki-operator-v0.6.1 - app.kubernetes.io/version: "0.6.1" + app.kubernetes.io/instance: loki-operator-v0.6.2 + app.kubernetes.io/version: "0.6.2" generatorOptions: disableNameSuffixHash: true @@ -43,7 +43,7 @@ patchesStrategicMerge: images: - name: controller newName: docker.io/grafana/loki-operator - newTag: 0.6.1 + newTag: 0.6.2 # the following config is for teaching kustomize how to do var substitution vars: diff --git a/operator/docs/operator/api.md b/operator/docs/operator/api.md index ee4ee5c5a62b..ca80b117347e 100644 --- a/operator/docs/operator/api.md +++ b/operator/docs/operator/api.md @@ -1173,7 +1173,7 @@ tenants.

(Optional)

IndexedResourceAttributes contains the global configuration for resource attributes -to store them as index labels or structured metadata or drop them altogether.

+to store them as index labels.

@@ -2897,7 +2897,7 @@ to store them as index labels or structured metadata or drop them altogether.

(Optional)

ScopeAttributes contains the configuration for scope attributes -to store them as index labels or structured metadata or drop them altogether.

+to store them as structured metadata or drop them altogether.

@@ -2912,7 +2912,7 @@ to store them as index labels or structured metadata or drop them altogether.

(Optional)

LogAttributes contains the configuration for log attributes -to store them as index labels or structured metadata or drop them altogether.

+to store them as structured metadata or drop them altogether.

diff --git a/operator/release-please-config.json b/operator/release-please-config.json index c12258d1b063..18f4af8125f5 100644 --- a/operator/release-please-config.json +++ b/operator/release-please-config.json @@ -1,5 +1,5 @@ { - "bootstrap-sha": "c8cc0fb3bc0d72c06fc8df8dc66df09043a8f66e", + "bootstrap-sha": "d4353fa63d9283a941b10b6c90537901e557a9f1", "bump-minor-pre-major": true, "bump-patch-for-minor-pre-major": true, "include-component-in-tag": true, diff --git a/pkg/bloombuild/builder/builder.go b/pkg/bloombuild/builder/builder.go index 6cc2ecfa32f6..fdeab9cf92c7 100644 --- a/pkg/bloombuild/builder/builder.go +++ b/pkg/bloombuild/builder/builder.go @@ -21,7 +21,7 @@ import ( "github.com/grafana/loki/v3/pkg/bloombuild/common" "github.com/grafana/loki/v3/pkg/bloombuild/protos" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" iter "github.com/grafana/loki/v3/pkg/iter/v2" "github.com/grafana/loki/v3/pkg/storage" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" @@ -33,6 +33,9 @@ import ( "github.com/grafana/loki/v3/pkg/util/ring" ) +// TODO(chaudum): Make configurable via (per-tenant?) setting. +var blockCompressionAlgo = compression.EncNone + type Builder struct { services.Service @@ -333,18 +336,16 @@ func (b *Builder) processTask( return nil, fmt.Errorf("failed to get client: %w", err) } - blockEnc, err := chunkenc.ParseEncoding(b.limits.BloomBlockEncoding(task.Tenant)) + blockEnc, err := compression.ParseEncoding(b.limits.BloomBlockEncoding(task.Tenant)) if err != nil { return nil, fmt.Errorf("failed to parse block encoding: %w", err) } var ( blockCt int - nGramSize = uint64(b.limits.BloomNGramLength(tenant)) - nGramSkip = uint64(b.limits.BloomNGramSkip(tenant)) maxBlockSize = uint64(b.limits.BloomMaxBlockSize(tenant)) maxBloomSize = uint64(b.limits.BloomMaxBloomSize(tenant)) - blockOpts = v1.NewBlockOptions(blockEnc, nGramSize, nGramSkip, maxBlockSize, maxBloomSize) + blockOpts = v1.NewBlockOptions(blockEnc, maxBlockSize, maxBloomSize) created []bloomshipper.Meta totalSeries int bytesAdded int @@ -406,7 +407,7 @@ func (b *Builder) processTask( blockCt++ blk := newBlocks.At() - built, err := bloomshipper.BlockFrom(tenant, task.Table.Addr(), blk) + built, err := bloomshipper.BlockFrom(blockCompressionAlgo, tenant, task.Table.Addr(), blk) if err != nil { level.Error(logger).Log("msg", "failed to build block", "err", err) if err = blk.Reader().Cleanup(); err != nil { diff --git a/pkg/bloombuild/builder/config.go b/pkg/bloombuild/builder/config.go index ddacfd884e10..dcb44c55b5f3 100644 --- a/pkg/bloombuild/builder/config.go +++ b/pkg/bloombuild/builder/config.go @@ -38,8 +38,6 @@ func (cfg *Config) Validate() error { type Limits interface { BloomBlockEncoding(tenantID string) string - BloomNGramLength(tenantID string) int - BloomNGramSkip(tenantID string) int BloomMaxBlockSize(tenantID string) int BloomMaxBloomSize(tenantID string) int } diff --git a/pkg/bloombuild/builder/spec.go b/pkg/bloombuild/builder/spec.go index 82457cf92b84..180c2fc32cb0 100644 --- a/pkg/bloombuild/builder/spec.go +++ b/pkg/bloombuild/builder/spec.go @@ -90,8 +90,6 @@ func NewSimpleBloomGenerator( reporter: reporter, tokenizer: v1.NewBloomTokenizer( - opts.Schema.NGramLen(), - opts.Schema.NGramSkip(), int(opts.UnencodedBlockOptions.MaxBloomSizeBytes), metrics, log.With( diff --git a/pkg/bloombuild/builder/spec_test.go b/pkg/bloombuild/builder/spec_test.go index 5be2a0e1c61b..330c0552b657 100644 --- a/pkg/bloombuild/builder/spec_test.go +++ b/pkg/bloombuild/builder/spec_test.go @@ -10,7 +10,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" v2 "github.com/grafana/loki/v3/pkg/iter/v2" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" "github.com/grafana/loki/v3/pkg/storage/stores/shipper/bloomshipper" @@ -115,7 +115,7 @@ func dummyBloomGen(t *testing.T, opts v1.BlockOptions, store v2.Iterator[*v1.Ser func TestSimpleBloomGenerator(t *testing.T) { const maxBlockSize = 100 << 20 // 100MB - for _, enc := range []chunkenc.Encoding{chunkenc.EncNone, chunkenc.EncGZIP, chunkenc.EncSnappy} { + for _, enc := range []compression.Encoding{compression.EncNone, compression.EncGZIP, compression.EncSnappy} { for _, tc := range []struct { desc string fromSchema, toSchema v1.BlockOptions @@ -123,13 +123,13 @@ func TestSimpleBloomGenerator(t *testing.T) { }{ { desc: "SkipsIncompatibleSchemas", - fromSchema: v1.NewBlockOptions(enc, 3, 0, maxBlockSize, 0), - toSchema: v1.NewBlockOptions(enc, 4, 0, maxBlockSize, 0), + fromSchema: v1.NewBlockOptions(enc, maxBlockSize, 0), + toSchema: v1.NewBlockOptions(enc, maxBlockSize, 0), }, { desc: "CombinesBlocks", - fromSchema: v1.NewBlockOptions(enc, 4, 0, maxBlockSize, 0), - toSchema: v1.NewBlockOptions(enc, 4, 0, maxBlockSize, 0), + fromSchema: v1.NewBlockOptions(enc, maxBlockSize, 0), + toSchema: v1.NewBlockOptions(enc, maxBlockSize, 0), }, } { t.Run(fmt.Sprintf("%s/%s", tc.desc, enc), func(t *testing.T) { @@ -137,7 +137,7 @@ func TestSimpleBloomGenerator(t *testing.T) { storeItr := v2.NewMapIter[v1.SeriesWithBlooms, *v1.Series]( v2.NewSliceIter[v1.SeriesWithBlooms](data), func(swb v1.SeriesWithBlooms) *v1.Series { - return swb.Series + return &swb.Series.Series }, ) @@ -161,7 +161,9 @@ func TestSimpleBloomGenerator(t *testing.T) { } require.Equal(t, len(expectedRefs), len(outputRefs)) for i := range expectedRefs { - require.Equal(t, expectedRefs[i].Series, outputRefs[i].Series) + // TODO(chaudum): For now we only compare the series + // but we should also compare meta. + require.Equal(t, expectedRefs[i].Series.Series, outputRefs[i].Series.Series) } }) } diff --git a/pkg/bloombuild/common/tsdb.go b/pkg/bloombuild/common/tsdb.go index a2e22529523b..ea31767cca0b 100644 --- a/pkg/bloombuild/common/tsdb.go +++ b/pkg/bloombuild/common/tsdb.go @@ -13,7 +13,7 @@ import ( "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" iter "github.com/grafana/loki/v3/pkg/iter/v2" baseStore "github.com/grafana/loki/v3/pkg/storage" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" @@ -102,7 +102,7 @@ func (b *BloomTSDBStore) LoadTSDB( } defer data.Close() - decompressorPool := chunkenc.GetReaderPool(chunkenc.EncGZIP) + decompressorPool := compression.GetReaderPool(compression.EncGZIP) decompressor, err := decompressorPool.GetReader(data) if err != nil { return nil, errors.Wrap(err, "failed to get decompressor") diff --git a/pkg/bloombuild/planner/planner_test.go b/pkg/bloombuild/planner/planner_test.go index 32f8d5798a7f..ea780c98e8ee 100644 --- a/pkg/bloombuild/planner/planner_test.go +++ b/pkg/bloombuild/planner/planner_test.go @@ -23,7 +23,7 @@ import ( "github.com/grafana/loki/v3/pkg/bloombuild/common" "github.com/grafana/loki/v3/pkg/bloombuild/protos" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" iter "github.com/grafana/loki/v3/pkg/iter/v2" "github.com/grafana/loki/v3/pkg/storage" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" @@ -188,7 +188,7 @@ func genBlock(ref bloomshipper.BlockRef) (bloomshipper.Block, error) { writer := v1.NewMemoryBlockWriter(indexBuf, bloomsBuf) reader := v1.NewByteReader(indexBuf, bloomsBuf) - blockOpts := v1.NewBlockOptions(chunkenc.EncNone, 4, 1, 0, 0) + blockOpts := v1.NewBlockOptions(compression.EncNone, 0, 0) builder, err := v1.NewBlockBuilder(blockOpts, writer) if err != nil { @@ -202,7 +202,7 @@ func genBlock(ref bloomshipper.BlockRef) (bloomshipper.Block, error) { block := v1.NewBlock(reader, v1.NewMetrics(nil)) buf := bytes.NewBuffer(nil) - if err := v1.TarGz(buf, block.Reader()); err != nil { + if err := v1.TarCompress(ref.Encoding, buf, block.Reader()); err != nil { return bloomshipper.Block{}, err } @@ -1019,7 +1019,7 @@ func Test_deleteOutdatedMetas(t *testing.T) { } { t.Run(tc.name, func(t *testing.T) { logger := log.NewNopLogger() - //logger := log.NewLogfmtLogger(os.Stdout) + // logger := log.NewLogfmtLogger(os.Stdout) cfg := Config{ PlanningInterval: 1 * time.Hour, diff --git a/pkg/bloomgateway/bloomgateway.go b/pkg/bloomgateway/bloomgateway.go index cdc7c96f065b..3c42f68ef0dd 100644 --- a/pkg/bloomgateway/bloomgateway.go +++ b/pkg/bloomgateway/bloomgateway.go @@ -193,12 +193,12 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk return nil, errors.New("from time must not be after through time") } - filters := v1.ExtractTestableLineFilters(req.Plan.AST) - stats.NumFilters = len(filters) - g.metrics.receivedFilters.Observe(float64(len(filters))) + matchers := v1.ExtractTestableLabelMatchers(req.Plan.AST) + stats.NumMatchers = len(matchers) + g.metrics.receivedMatchers.Observe(float64(len(matchers))) // Shortcut if request does not contain filters - if len(filters) == 0 { + if len(matchers) == 0 { stats.Status = labelSuccess return &logproto.FilterChunkRefResponse{ ChunkRefs: req.Refs, @@ -227,7 +227,7 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk stats.NumTasks = len(seriesByDay) sp.LogKV( - "filters", len(filters), + "matchers", len(matchers), "days", len(seriesByDay), "blocks", len(req.Blocks), "series_requested", len(req.Refs), @@ -239,7 +239,7 @@ func (g *Gateway) FilterChunkRefs(ctx context.Context, req *logproto.FilterChunk } series := seriesByDay[0] - task := newTask(ctx, tenantID, series, filters, blocks) + task := newTask(ctx, tenantID, series, matchers, blocks) // TODO(owen-d): include capacity in constructor? task.responses = responsesPool.Get(len(series.series)) diff --git a/pkg/bloomgateway/bloomgateway_test.go b/pkg/bloomgateway/bloomgateway_test.go index 67bb59e460ad..698b1ecf40e4 100644 --- a/pkg/bloomgateway/bloomgateway_test.go +++ b/pkg/bloomgateway/bloomgateway_test.go @@ -157,7 +157,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { chunkRefs := createQueryInputFromBlockData(t, tenantID, data, 100) - expr, err := syntax.ParseExpr(`{foo="bar"} |= "does not match"`) + expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="nomatch"`) require.NoError(t, err) req := &logproto.FilterChunkRefRequest{ @@ -165,7 +165,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { Through: now, Refs: groupRefs(t, chunkRefs), Plan: plan.QueryPlan{AST: expr}, - Blocks: []string{"bloom/invalid/block.tar.gz"}, + Blocks: []string{"bloom/invalid/block.tar"}, } ctx := user.InjectOrgID(context.Background(), tenantID) @@ -196,7 +196,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { // saturate workers // then send additional request for i := 0; i < gw.cfg.WorkerConcurrency+1; i++ { - expr, err := syntax.ParseExpr(`{foo="bar"} |= "does not match"`) + expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="nomatch"`) require.NoError(t, err) req := &logproto.FilterChunkRefRequest{ @@ -240,7 +240,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { // saturate workers // then send additional request for i := 0; i < gw.cfg.WorkerConcurrency+1; i++ { - expr, err := syntax.ParseExpr(`{foo="bar"} |= "does not match"`) + expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="nomatch"`) require.NoError(t, err) req := &logproto.FilterChunkRefRequest{ @@ -341,7 +341,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { Checksum: uint32(idx), }, } - expr, err := syntax.ParseExpr(`{foo="bar"} |= "foo"`) + expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="nomatch"`) require.NoError(t, err) req := &logproto.FilterChunkRefRequest{ From: now.Add(-4 * time.Hour), @@ -380,7 +380,7 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { t.Run("no match - return empty response", func(t *testing.T) { inputChunkRefs := groupRefs(t, chunkRefs) - expr, err := syntax.ParseExpr(`{foo="bar"} |= "does not match"`) + expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="nomatch"`) require.NoError(t, err) req := &logproto.FilterChunkRefRequest{ From: now.Add(-8 * time.Hour), @@ -403,16 +403,14 @@ func TestBloomGateway_FilterChunkRefs(t *testing.T) { inputChunkRefs := groupRefs(t, chunkRefs) // Hack to get search string for a specific series // see MkBasicSeriesWithBlooms() in pkg/storage/bloom/v1/test_util.go - // each series has 1 chunk - // each chunk has multiple strings, from int(fp) to int(nextFp)-1 - x := rand.Intn(len(inputChunkRefs)) - fp := inputChunkRefs[x].Fingerprint - chks := inputChunkRefs[x].Refs - line := fmt.Sprintf("%04x:%04x", int(fp), 0) // first line + rnd := rand.Intn(len(inputChunkRefs)) + fp := inputChunkRefs[rnd].Fingerprint + chks := inputChunkRefs[rnd].Refs + key := fmt.Sprintf("%s:%04x", model.Fingerprint(fp), 0) - t.Log("x=", x, "fp=", fp, "line=", line) + t.Log("rnd=", rnd, "fp=", fp, "key=", key) - expr, err := syntax.ParseExpr(fmt.Sprintf(`{foo="bar"} |= "%s"`, line)) + expr, err := syntax.ParseExpr(fmt.Sprintf(`{foo="bar"} | trace_id="%s"`, key)) require.NoError(t, err) req := &logproto.FilterChunkRefRequest{ diff --git a/pkg/bloomgateway/metrics.go b/pkg/bloomgateway/metrics.go index 5c046d3147c3..9fe096eec2ac 100644 --- a/pkg/bloomgateway/metrics.go +++ b/pkg/bloomgateway/metrics.go @@ -56,7 +56,7 @@ type serverMetrics struct { filteredSeries prometheus.Histogram requestedChunks prometheus.Histogram filteredChunks prometheus.Histogram - receivedFilters prometheus.Histogram + receivedMatchers prometheus.Histogram } func newMetrics(registerer prometheus.Registerer, namespace, subsystem string) *metrics { @@ -105,11 +105,11 @@ func newServerMetrics(registerer prometheus.Registerer, namespace, subsystem str Help: "Total amount of chunk refs filtered by bloom-gateway", Buckets: prometheus.ExponentialBucketsRange(1, 100e3, 10), }), - receivedFilters: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ + receivedMatchers: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ Namespace: namespace, Subsystem: subsystem, - Name: "request_filters", - Help: "Number of filters per request.", + Name: "request_matchers", + Help: "Number of matchers per request.", Buckets: prometheus.ExponentialBuckets(1, 2, 9), // 1 -> 256 }), } diff --git a/pkg/bloomgateway/multiplexing.go b/pkg/bloomgateway/multiplexing.go index b814ae23a5a5..2aee9dc32c48 100644 --- a/pkg/bloomgateway/multiplexing.go +++ b/pkg/bloomgateway/multiplexing.go @@ -9,7 +9,6 @@ import ( iter "github.com/grafana/loki/v3/pkg/iter/v2" "github.com/grafana/loki/v3/pkg/logproto" - "github.com/grafana/loki/v3/pkg/logql/syntax" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" "github.com/grafana/loki/v3/pkg/storage/config" "github.com/grafana/loki/v3/pkg/storage/stores/shipper/bloomshipper" @@ -56,8 +55,8 @@ type Task struct { // series of the original request series []*logproto.GroupedChunkRefs - // filters of the original request - filters []syntax.LineFilterExpr + // matchers to check against + matchers []v1.LabelMatcher // blocks that were resolved on the index gateway and sent with the request blocks []bloomshipper.BlockRef // from..through date of the task's chunks @@ -75,13 +74,13 @@ type Task struct { recorder *v1.BloomRecorder } -func newTask(ctx context.Context, tenantID string, refs seriesWithInterval, filters []syntax.LineFilterExpr, blocks []bloomshipper.BlockRef) Task { +func newTask(ctx context.Context, tenantID string, refs seriesWithInterval, matchers []v1.LabelMatcher, blocks []bloomshipper.BlockRef) Task { return Task{ tenant: tenantID, recorder: v1.NewBloomRecorder(ctx, "task"), err: new(wrappedError), resCh: make(chan v1.Output), - filters: filters, + matchers: matchers, blocks: blocks, series: refs.series, interval: refs.interval, @@ -122,7 +121,7 @@ func (t Task) Copy(series []*logproto.GroupedChunkRefs) Task { tenant: t.tenant, err: t.err, resCh: t.resCh, - filters: t.filters, + matchers: t.matchers, blocks: t.blocks, series: series, interval: t.interval, @@ -132,13 +131,11 @@ func (t Task) Copy(series []*logproto.GroupedChunkRefs) Task { } } -func (t Task) RequestIter( - tokenizer *v1.NGramTokenizer, -) iter.Iterator[v1.Request] { +func (t Task) RequestIter() iter.Iterator[v1.Request] { return &requestIterator{ recorder: t.recorder, series: iter.NewSliceIter(t.series), - search: v1.FiltersToBloomTest(tokenizer, t.filters...), + search: v1.LabelMatchersToBloomTest(t.matchers...), channel: t.resCh, curr: v1.Request{}, } diff --git a/pkg/bloomgateway/multiplexing_test.go b/pkg/bloomgateway/multiplexing_test.go index d395d2a315cb..e6b97679e1ef 100644 --- a/pkg/bloomgateway/multiplexing_test.go +++ b/pkg/bloomgateway/multiplexing_test.go @@ -11,7 +11,6 @@ import ( v2 "github.com/grafana/loki/v3/pkg/iter/v2" "github.com/grafana/loki/v3/pkg/logproto" - "github.com/grafana/loki/v3/pkg/logql/syntax" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" "github.com/grafana/loki/v3/pkg/storage/stores/shipper/bloomshipper" ) @@ -55,15 +54,14 @@ func createTasksForRequests(t *testing.T, tenant string, requests ...*logproto.F func TestTask_RequestIterator(t *testing.T) { ts := mktime("2024-01-24 12:00") tenant := "fake" - tokenizer := v1.NewNGramTokenizer(4, 0) t.Run("empty request yields empty iterator", func(t *testing.T) { swb := seriesWithInterval{ interval: bloomshipper.Interval{Start: 0, End: math.MaxInt64}, series: []*logproto.GroupedChunkRefs{}, } - task := newTask(context.Background(), tenant, swb, []syntax.LineFilterExpr{}, nil) - it := task.RequestIter(tokenizer) + task := newTask(context.Background(), tenant, swb, nil, nil) + it := task.RequestIter() // nothing to iterate over require.False(t, it.Next()) }) @@ -106,7 +104,7 @@ func TestTask_RequestIterator(t *testing.T) { iters := make([]v2.PeekIterator[v1.Request], 0, len(tasks)) for _, task := range tasks { - iters = append(iters, v2.NewPeekIter(task.RequestIter(tokenizer))) + iters = append(iters, v2.NewPeekIter(task.RequestIter())) } // merge the request iterators using the heap sort iterator diff --git a/pkg/bloomgateway/processor.go b/pkg/bloomgateway/processor.go index f422bfd56392..ad804555a3ff 100644 --- a/pkg/bloomgateway/processor.go +++ b/pkg/bloomgateway/processor.go @@ -150,7 +150,6 @@ func (p *processor) processBlock(_ context.Context, bq *bloomshipper.CloseableBl return v1.ErrUnsupportedSchemaVersion } - tokenizer := v1.NewNGramTokenizer(schema.NGramLen(), schema.NGramSkip()) iters := make([]iter.PeekIterator[v1.Request], 0, len(tasks)) for _, task := range tasks { @@ -164,7 +163,7 @@ func (p *processor) processBlock(_ context.Context, bq *bloomshipper.CloseableBl // sp.LogKV("process block", blockID, "series", len(task.series)) // } - it := iter.NewPeekIter(task.RequestIter(tokenizer)) + it := iter.NewPeekIter(task.RequestIter()) iters = append(iters, it) } diff --git a/pkg/bloomgateway/processor_test.go b/pkg/bloomgateway/processor_test.go index 8ce78e7bdb76..f1120fe530a4 100644 --- a/pkg/bloomgateway/processor_test.go +++ b/pkg/bloomgateway/processor_test.go @@ -14,7 +14,6 @@ import ( "github.com/stretchr/testify/require" "go.uber.org/atomic" - "github.com/grafana/loki/v3/pkg/logql/syntax" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" "github.com/grafana/loki/v3/pkg/storage/chunk/client" "github.com/grafana/loki/v3/pkg/storage/config" @@ -140,17 +139,16 @@ func TestProcessor(t *testing.T) { }, day: config.NewDayTime(truncateDay(now)), } - filters := []syntax.LineFilterExpr{ - { - LineFilter: syntax.LineFilter{ - Ty: 0, - Match: "no match", - }, + + matchers := []v1.LabelMatcher{ + v1.PlainLabelMatcher{ + Key: "trace_id", + Value: "nomatch", }, } t.Log("series", len(swb.series)) - task := newTask(ctx, "fake", swb, filters, nil) + task := newTask(ctx, "fake", swb, matchers, nil) tasks := []Task{task} results := atomic.NewInt64(0) @@ -192,17 +190,15 @@ func TestProcessor(t *testing.T) { }, day: config.NewDayTime(truncateDay(now)), } - filters := []syntax.LineFilterExpr{ - { - LineFilter: syntax.LineFilter{ - Ty: 0, - Match: "no match", - }, + matchers := []v1.LabelMatcher{ + v1.PlainLabelMatcher{ + Key: "trace_id", + Value: "nomatch", }, } t.Log("series", len(swb.series)) - task := newTask(ctx, "fake", swb, filters, blocks) + task := newTask(ctx, "fake", swb, matchers, blocks) tasks := []Task{task} results := atomic.NewInt64(0) @@ -241,17 +237,15 @@ func TestProcessor(t *testing.T) { }, day: config.NewDayTime(truncateDay(now)), } - filters := []syntax.LineFilterExpr{ - { - LineFilter: syntax.LineFilter{ - Ty: 0, - Match: "no match", - }, + matchers := []v1.LabelMatcher{ + v1.PlainLabelMatcher{ + Key: "trace_id", + Value: "nomatch", }, } t.Log("series", len(swb.series)) - task := newTask(ctx, "fake", swb, filters, nil) + task := newTask(ctx, "fake", swb, matchers, nil) tasks := []Task{task} results := atomic.NewInt64(0) diff --git a/pkg/bloomgateway/querier.go b/pkg/bloomgateway/querier.go index 23de7a15e2be..dfc3746380ab 100644 --- a/pkg/bloomgateway/querier.go +++ b/pkg/bloomgateway/querier.go @@ -103,7 +103,7 @@ func convertToShortRef(ref *logproto.ChunkRef) *logproto.ShortRef { func (bq *BloomQuerier) FilterChunkRefs(ctx context.Context, tenant string, from, through model.Time, chunkRefs []*logproto.ChunkRef, queryPlan plan.QueryPlan) ([]*logproto.ChunkRef, error) { // Shortcut that does not require any filtering - if !bq.limits.BloomGatewayEnabled(tenant) || len(chunkRefs) == 0 || len(v1.ExtractTestableLineFilters(queryPlan.AST)) == 0 { + if !bq.limits.BloomGatewayEnabled(tenant) || len(chunkRefs) == 0 || len(v1.ExtractTestableLabelMatchers(queryPlan.AST)) == 0 { return chunkRefs, nil } diff --git a/pkg/bloomgateway/querier_test.go b/pkg/bloomgateway/querier_test.go index d4b24447ae12..ca4036d266ed 100644 --- a/pkg/bloomgateway/querier_test.go +++ b/pkg/bloomgateway/querier_test.go @@ -93,7 +93,7 @@ func TestBloomQuerier(t *testing.T) { through := model.Now() from := through.Add(-12 * time.Hour) chunkRefs := []*logproto.ChunkRef{} - expr, err := syntax.ParseExpr(`{foo="bar"} |= "uuid"`) + expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="exists"`) require.NoError(t, err) res, err := bq.FilterChunkRefs(ctx, tenant, from, through, chunkRefs, plan.QueryPlan{AST: expr}) require.NoError(t, err) @@ -113,7 +113,7 @@ func TestBloomQuerier(t *testing.T) { {Fingerprint: 1000, UserID: tenant, From: from, Through: through, Checksum: 2}, {Fingerprint: 2000, UserID: tenant, From: from, Through: through, Checksum: 3}, } - expr, err := syntax.ParseExpr(`{foo="bar"} |= "uuid"`) + expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="exists"`) require.NoError(t, err) res, err := bq.FilterChunkRefs(ctx, tenant, from, through, chunkRefs, plan.QueryPlan{AST: expr}) require.Error(t, err) @@ -132,7 +132,7 @@ func TestBloomQuerier(t *testing.T) { {Fingerprint: 2000, UserID: tenant, From: mktime("2024-04-16 23:30"), Through: mktime("2024-04-17 00:30"), Checksum: 2}, // day 1 {Fingerprint: 3000, UserID: tenant, From: mktime("2024-04-17 00:30"), Through: mktime("2024-04-17 01:30"), Checksum: 3}, // day 2 } - expr, err := syntax.ParseExpr(`{foo="bar"} |= "uuid"`) + expr, err := syntax.ParseExpr(`{foo="bar"} | trace_id="exists"`) require.NoError(t, err) res, err := bq.FilterChunkRefs(ctx, tenant, from, through, chunkRefs, plan.QueryPlan{AST: expr}) require.NoError(t, err) diff --git a/pkg/bloomgateway/stats.go b/pkg/bloomgateway/stats.go index 09f78841e544..59dd9d25287d 100644 --- a/pkg/bloomgateway/stats.go +++ b/pkg/bloomgateway/stats.go @@ -9,7 +9,7 @@ import ( type Stats struct { Status string - NumTasks, NumFilters int + NumTasks, NumMatchers int ChunksRequested, ChunksFiltered int SeriesRequested, SeriesFiltered int QueueTime *atomic.Duration @@ -70,7 +70,7 @@ func (s *Stats) KVArgs() []any { "msg", "stats-report", "status", s.Status, "tasks", s.NumTasks, - "filters", s.NumFilters, + "matchers", s.NumMatchers, "blocks_processed", s.ProcessedBlocks.Load(), "series_requested", s.SeriesRequested, "series_filtered", s.SeriesFiltered, diff --git a/pkg/canary/reader/reader.go b/pkg/canary/reader/reader.go index 88af34ce8e75..c98a7cab8fd7 100644 --- a/pkg/canary/reader/reader.go +++ b/pkg/canary/reader/reader.go @@ -390,6 +390,14 @@ func (r *Reader) run() { // or times out based on the above SetReadDeadline call. err := unmarshal.ReadTailResponseJSON(tailResponse, r.conn) if err != nil { + var e *websocket.CloseError + if errors.As(err, &e) && e.Text == "reached tail max duration limit" { + fmt.Fprintf(r.w, "tail max duration limit exceeded, will retry immediately: %s\n", err) + + r.closeAndReconnect() + continue + } + reason := "error reading websocket" if e, ok := err.(net.Error); ok && e.Timeout() { reason = fmt.Sprintf("timeout tailing new logs (timeout period: %.2fs)", timeoutInterval.Seconds()) diff --git a/pkg/chunkenc/dumb_chunk.go b/pkg/chunkenc/dumb_chunk.go index 33df4501927b..e28298605118 100644 --- a/pkg/chunkenc/dumb_chunk.go +++ b/pkg/chunkenc/dumb_chunk.go @@ -6,6 +6,7 @@ import ( "sort" "time" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/iter" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/log" @@ -69,7 +70,7 @@ func (c *dumbChunk) Utilization() float64 { return float64(len(c.entries)) / float64(tmpNumEntries) } -func (c *dumbChunk) Encoding() Encoding { return EncNone } +func (c *dumbChunk) Encoding() compression.Encoding { return compression.EncNone } // Returns an iterator that goes from _most_ recent to _least_ recent (ie, // backwards). diff --git a/pkg/chunkenc/interface.go b/pkg/chunkenc/interface.go index f0b17c7750f3..057fc8b985ad 100644 --- a/pkg/chunkenc/interface.go +++ b/pkg/chunkenc/interface.go @@ -5,9 +5,9 @@ import ( "errors" "fmt" "io" - "strings" "time" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/iter" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/log" @@ -48,86 +48,6 @@ func IsOutOfOrderErr(err error) bool { return err == ErrOutOfOrder || IsErrTooFarBehind(err) } -// Encoding is the identifier for a chunk encoding. -type Encoding byte - -// The different available encodings. -// Make sure to preserve the order, as these numeric values are written to the chunks! -const ( - EncNone Encoding = iota - EncGZIP - EncDumb - EncLZ4_64k - EncSnappy - EncLZ4_256k - EncLZ4_1M - EncLZ4_4M - EncFlate - EncZstd -) - -var supportedEncoding = []Encoding{ - EncNone, - EncGZIP, - EncLZ4_64k, - EncSnappy, - EncLZ4_256k, - EncLZ4_1M, - EncLZ4_4M, - EncFlate, - EncZstd, -} - -func (e Encoding) String() string { - switch e { - case EncGZIP: - return "gzip" - case EncNone: - return "none" - case EncDumb: - return "dumb" - case EncLZ4_64k: - return "lz4-64k" - case EncLZ4_256k: - return "lz4-256k" - case EncLZ4_1M: - return "lz4-1M" - case EncLZ4_4M: - return "lz4" - case EncSnappy: - return "snappy" - case EncFlate: - return "flate" - case EncZstd: - return "zstd" - default: - return "unknown" - } -} - -// ParseEncoding parses an chunk encoding (compression algorithm) by its name. -func ParseEncoding(enc string) (Encoding, error) { - for _, e := range supportedEncoding { - if strings.EqualFold(e.String(), enc) { - return e, nil - } - } - return 0, fmt.Errorf("invalid encoding: %s, supported: %s", enc, SupportedEncoding()) - -} - -// SupportedEncoding returns the list of supported Encoding. -func SupportedEncoding() string { - var sb strings.Builder - for i := range supportedEncoding { - sb.WriteString(supportedEncoding[i].String()) - if i != len(supportedEncoding)-1 { - sb.WriteString(", ") - } - } - return sb.String() -} - // Chunk is the interface for the compressed logs chunk format. type Chunk interface { Bounds() (time.Time, time.Time) @@ -148,7 +68,7 @@ type Chunk interface { UncompressedSize() int CompressedSize() int Close() error - Encoding() Encoding + Encoding() compression.Encoding Rebound(start, end time.Time, filter filter.Func) (Chunk, error) } diff --git a/pkg/chunkenc/interface_test.go b/pkg/chunkenc/interface_test.go index ed81c4d3604e..8faed8e2c43f 100644 --- a/pkg/chunkenc/interface_test.go +++ b/pkg/chunkenc/interface_test.go @@ -7,29 +7,6 @@ import ( "github.com/stretchr/testify/require" ) -func TestParseEncoding(t *testing.T) { - tests := []struct { - enc string - want Encoding - wantErr bool - }{ - {"gzip", EncGZIP, false}, - {"bad", 0, true}, - } - for _, tt := range tests { - t.Run(tt.enc, func(t *testing.T) { - got, err := ParseEncoding(tt.enc) - if (err != nil) != tt.wantErr { - t.Errorf("ParseEncoding() error = %v, wantErr %v", err, tt.wantErr) - return - } - if got != tt.want { - t.Errorf("ParseEncoding() = %v, want %v", got, tt.want) - } - }) - } -} - func TestIsOutOfOrderErr(t *testing.T) { now := time.Now() diff --git a/pkg/chunkenc/memchunk.go b/pkg/chunkenc/memchunk.go index 328e91c94deb..03f33b817672 100644 --- a/pkg/chunkenc/memchunk.go +++ b/pkg/chunkenc/memchunk.go @@ -16,6 +16,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/prometheus/model/labels" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/iter" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/log" @@ -131,7 +132,7 @@ type MemChunk struct { head HeadBlock format byte - encoding Encoding + encoding compression.Encoding headFmt HeadBlockFmt // compressed size of chunk. Set when chunk is cut or while decoding chunk from storage. @@ -196,7 +197,7 @@ func (hb *headBlock) Append(ts int64, line string, _ labels.Labels) (bool, error return false, nil } -func (hb *headBlock) Serialise(pool WriterPool) ([]byte, error) { +func (hb *headBlock) Serialise(pool compression.WriterPool) ([]byte, error) { inBuf := serializeBytesBufferPool.Get().(*bytes.Buffer) defer func() { inBuf.Reset() @@ -354,7 +355,7 @@ type entry struct { } // NewMemChunk returns a new in-mem chunk. -func NewMemChunk(chunkFormat byte, enc Encoding, head HeadBlockFmt, blockSize, targetSize int) *MemChunk { +func NewMemChunk(chunkFormat byte, enc compression.Encoding, head HeadBlockFmt, blockSize, targetSize int) *MemChunk { return newMemChunkWithFormat(chunkFormat, enc, head, blockSize, targetSize) } @@ -369,7 +370,7 @@ func panicIfInvalidFormat(chunkFmt byte, head HeadBlockFmt) { } // NewMemChunk returns a new in-mem chunk. -func newMemChunkWithFormat(format byte, enc Encoding, head HeadBlockFmt, blockSize, targetSize int) *MemChunk { +func newMemChunkWithFormat(format byte, enc compression.Encoding, head HeadBlockFmt, blockSize, targetSize int) *MemChunk { panicIfInvalidFormat(format, head) symbolizer := newSymbolizer() @@ -413,10 +414,10 @@ func newByteChunk(b []byte, blockSize, targetSize int, fromCheckpoint bool) (*Me bc.format = version switch version { case ChunkFormatV1: - bc.encoding = EncGZIP + bc.encoding = compression.EncGZIP case ChunkFormatV2, ChunkFormatV3, ChunkFormatV4: // format v2+ has a byte for block encoding. - enc := Encoding(db.byte()) + enc := compression.Encoding(db.byte()) if db.err() != nil { return nil, errors.Wrap(db.err(), "verifying encoding") } @@ -535,7 +536,7 @@ func newByteChunk(b []byte, blockSize, targetSize int, fromCheckpoint bool) (*Me if fromCheckpoint { bc.symbolizer = symbolizerFromCheckpoint(lb) } else { - symbolizer, err := symbolizerFromEnc(lb, GetReaderPool(bc.encoding)) + symbolizer, err := symbolizerFromEnc(lb, compression.GetReaderPool(bc.encoding)) if err != nil { return nil, err } @@ -653,7 +654,7 @@ func (c *MemChunk) writeTo(w io.Writer, forCheckpoint bool) (int64, error) { } } else { var err error - n, crcHash, err = c.symbolizer.SerializeTo(w, GetWriterPool(c.encoding)) + n, crcHash, err = c.symbolizer.SerializeTo(w, compression.GetWriterPool(c.encoding)) if err != nil { return offset, errors.Wrap(err, "write structured metadata") } @@ -776,7 +777,7 @@ func MemchunkFromCheckpoint(chk, head []byte, desiredIfNotUnordered HeadBlockFmt } // Encoding implements Chunk. -func (c *MemChunk) Encoding() Encoding { +func (c *MemChunk) Encoding() compression.Encoding { return c.encoding } @@ -941,7 +942,7 @@ func (c *MemChunk) cut() error { return nil } - b, err := c.head.Serialise(GetWriterPool(c.encoding)) + b, err := c.head.Serialise(compression.GetWriterPool(c.encoding)) if err != nil { return err } @@ -1172,7 +1173,7 @@ func (c *MemChunk) Rebound(start, end time.Time, filter filter.Func) (Chunk, err // then allows us to bind a decoding context to a block when requested, but otherwise helps reduce the // chances of chunk<>block encoding drift in the codebase as the latter is parameterized by the former. type encBlock struct { - enc Encoding + enc compression.Encoding format byte symbolizer *symbolizer block @@ -1182,14 +1183,14 @@ func (b encBlock) Iterator(ctx context.Context, pipeline log.StreamPipeline) ite if len(b.b) == 0 { return iter.NoopEntryIterator } - return newEntryIterator(ctx, GetReaderPool(b.enc), b.b, pipeline, b.format, b.symbolizer) + return newEntryIterator(ctx, compression.GetReaderPool(b.enc), b.b, pipeline, b.format, b.symbolizer) } func (b encBlock) SampleIterator(ctx context.Context, extractor log.StreamSampleExtractor) iter.SampleIterator { if len(b.b) == 0 { return iter.NoopSampleIterator } - return newSampleIterator(ctx, GetReaderPool(b.enc), b.b, b.format, extractor, b.symbolizer) + return newSampleIterator(ctx, compression.GetReaderPool(b.enc), b.b, b.format, extractor, b.symbolizer) } func (b block) Offset() int { @@ -1339,7 +1340,7 @@ type bufferedIterator struct { stats *stats.Context reader io.Reader - pool ReaderPool + pool compression.ReaderPool symbolizer *symbolizer err error @@ -1358,7 +1359,7 @@ type bufferedIterator struct { closed bool } -func newBufferedIterator(ctx context.Context, pool ReaderPool, b []byte, format byte, symbolizer *symbolizer) *bufferedIterator { +func newBufferedIterator(ctx context.Context, pool compression.ReaderPool, b []byte, format byte, symbolizer *symbolizer) *bufferedIterator { stats := stats.FromContext(ctx) stats.AddCompressedBytes(int64(len(b))) return &bufferedIterator{ @@ -1619,7 +1620,7 @@ func (si *bufferedIterator) close() { si.origBytes = nil } -func newEntryIterator(ctx context.Context, pool ReaderPool, b []byte, pipeline log.StreamPipeline, format byte, symbolizer *symbolizer) iter.EntryIterator { +func newEntryIterator(ctx context.Context, pool compression.ReaderPool, b []byte, pipeline log.StreamPipeline, format byte, symbolizer *symbolizer) iter.EntryIterator { return &entryBufferedIterator{ bufferedIterator: newBufferedIterator(ctx, pool, b, format, symbolizer), pipeline: pipeline, @@ -1671,7 +1672,7 @@ func (e *entryBufferedIterator) Close() error { return e.bufferedIterator.Close() } -func newSampleIterator(ctx context.Context, pool ReaderPool, b []byte, format byte, extractor log.StreamSampleExtractor, symbolizer *symbolizer) iter.SampleIterator { +func newSampleIterator(ctx context.Context, pool compression.ReaderPool, b []byte, format byte, extractor log.StreamSampleExtractor, symbolizer *symbolizer) iter.SampleIterator { return &sampleBufferedIterator{ bufferedIterator: newBufferedIterator(ctx, pool, b, format, symbolizer), extractor: extractor, diff --git a/pkg/chunkenc/memchunk_test.go b/pkg/chunkenc/memchunk_test.go index 85cccd743cfb..987a5d88b286 100644 --- a/pkg/chunkenc/memchunk_test.go +++ b/pkg/chunkenc/memchunk_test.go @@ -22,6 +22,7 @@ import ( "github.com/grafana/loki/pkg/push" "github.com/grafana/loki/v3/pkg/chunkenc/testdata" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/iter" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/log" @@ -31,16 +32,16 @@ import ( "github.com/grafana/loki/v3/pkg/util/filter" ) -var testEncoding = []Encoding{ - EncNone, - EncGZIP, - EncLZ4_64k, - EncLZ4_256k, - EncLZ4_1M, - EncLZ4_4M, - EncSnappy, - EncFlate, - EncZstd, +var testEncodings = []compression.Encoding{ + compression.EncNone, + compression.EncGZIP, + compression.EncLZ4_64k, + compression.EncLZ4_256k, + compression.EncLZ4_1M, + compression.EncLZ4_4M, + compression.EncSnappy, + compression.EncFlate, + compression.EncZstd, } var ( @@ -84,7 +85,7 @@ const ( ) func TestBlocksInclusive(t *testing.T) { - for _, enc := range testEncoding { + for _, enc := range testEncodings { enc := enc for _, format := range allPossibleFormats { chunkfmt, headfmt := format.chunkFormat, format.headBlockFmt @@ -103,7 +104,7 @@ func TestBlocksInclusive(t *testing.T) { } func TestBlock(t *testing.T) { - for _, enc := range testEncoding { + for _, enc := range testEncodings { enc := enc for _, format := range allPossibleFormats { chunkFormat, headBlockFmt := format.chunkFormat, format.headBlockFmt @@ -258,7 +259,7 @@ func TestBlock(t *testing.T) { } func TestCorruptChunk(t *testing.T) { - for _, enc := range testEncoding { + for _, enc := range testEncodings { enc := enc for _, format := range allPossibleFormats { chunkfmt, headfmt := format.chunkFormat, format.headBlockFmt @@ -298,7 +299,7 @@ func TestCorruptChunk(t *testing.T) { func TestReadFormatV1(t *testing.T) { t.Parallel() - c := NewMemChunk(ChunkFormatV3, EncGZIP, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize) + c := NewMemChunk(ChunkFormatV3, compression.EncGZIP, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize) fillChunk(c) // overrides to v1 for testing that specific version. c.format = ChunkFormatV1 @@ -335,7 +336,7 @@ func TestReadFormatV1(t *testing.T) { // 2) []byte loaded chunks <-> []byte loaded chunks func TestRoundtripV2(t *testing.T) { for _, testData := range allPossibleFormats { - for _, enc := range testEncoding { + for _, enc := range testEncodings { enc := enc t.Run(testNameWithFormats(enc, testData.chunkFormat, testData.headBlockFmt), func(t *testing.T) { t.Parallel() @@ -390,12 +391,12 @@ func TestRoundtripV2(t *testing.T) { } } -func testNameWithFormats(enc Encoding, chunkFormat byte, headBlockFmt HeadBlockFmt) string { +func testNameWithFormats(enc compression.Encoding, chunkFormat byte, headBlockFmt HeadBlockFmt) string { return fmt.Sprintf("encoding:%v chunkFormat:%v headBlockFmt:%v", enc, chunkFormat, headBlockFmt) } func TestRoundtripV3(t *testing.T) { - for _, enc := range testEncoding { + for _, enc := range testEncodings { enc := enc for _, format := range allPossibleFormats { chunkfmt, headfmt := format.chunkFormat, format.headBlockFmt @@ -420,7 +421,7 @@ func TestRoundtripV3(t *testing.T) { func TestSerialization(t *testing.T) { for _, testData := range allPossibleFormats { - for _, enc := range testEncoding { + for _, enc := range testEncodings { enc := enc // run tests with and without structured metadata since it is optional for _, appendWithStructuredMetadata := range []bool{false, true} { @@ -509,7 +510,7 @@ func TestSerialization(t *testing.T) { func TestChunkFilling(t *testing.T) { for _, testData := range allPossibleFormats { - for _, enc := range testEncoding { + for _, enc := range testEncodings { enc := enc t.Run(testNameWithFormats(enc, testData.chunkFormat, testData.headBlockFmt), func(t *testing.T) { t.Parallel() @@ -557,7 +558,7 @@ func TestChunkFilling(t *testing.T) { func TestGZIPChunkTargetSize(t *testing.T) { t.Parallel() - chk := NewMemChunk(ChunkFormatV3, EncGZIP, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize) + chk := NewMemChunk(ChunkFormatV3, compression.EncGZIP, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize) lineSize := 512 entry := &logproto.Entry{ @@ -680,7 +681,7 @@ func TestMemChunk_AppendOutOfOrder(t *testing.T) { t.Run(testName, func(t *testing.T) { t.Parallel() - tester(t, NewMemChunk(ChunkFormatV3, EncGZIP, f, testBlockSize, testTargetSize)) + tester(t, NewMemChunk(ChunkFormatV3, compression.EncGZIP, f, testBlockSize, testTargetSize)) }) } } @@ -696,7 +697,7 @@ func TestChunkSize(t *testing.T) { var result []res for _, bs := range testBlockSizes { for _, f := range allPossibleFormats { - for _, enc := range testEncoding { + for _, enc := range testEncodings { name := fmt.Sprintf("%s_%s", enc.String(), humanize.Bytes(uint64(bs))) t.Run(name, func(t *testing.T) { c := newMemChunkWithFormat(f.chunkFormat, enc, f.headBlockFmt, bs, testTargetSize) @@ -725,7 +726,7 @@ func TestChunkSize(t *testing.T) { } func TestChunkStats(t *testing.T) { - c := NewMemChunk(ChunkFormatV4, EncSnappy, DefaultTestHeadBlockFmt, testBlockSize, 0) + c := NewMemChunk(ChunkFormatV4, compression.EncSnappy, DefaultTestHeadBlockFmt, testBlockSize, 0) first := time.Now() entry := &logproto.Entry{ Timestamp: first, @@ -797,7 +798,7 @@ func TestChunkStats(t *testing.T) { func TestIteratorClose(t *testing.T) { for _, f := range allPossibleFormats { - for _, enc := range testEncoding { + for _, enc := range testEncodings { t.Run(enc.String(), func(t *testing.T) { for _, test := range []func(iter iter.EntryIterator, t *testing.T){ func(iter iter.EntryIterator, t *testing.T) { @@ -846,7 +847,7 @@ func BenchmarkWrite(b *testing.B) { i := int64(0) for _, f := range HeadBlockFmts { - for _, enc := range testEncoding { + for _, enc := range testEncodings { for _, withStructuredMetadata := range []bool{false, true} { name := fmt.Sprintf("%v-%v", f, enc) if withStructuredMetadata { @@ -896,7 +897,7 @@ func (nomatchPipeline) ReferencedStructuredMetadata() bool { func BenchmarkRead(b *testing.B) { for _, bs := range testBlockSizes { - for _, enc := range testEncoding { + for _, enc := range testEncodings { name := fmt.Sprintf("%s_%s", enc.String(), humanize.Bytes(uint64(bs))) b.Run(name, func(b *testing.B) { chunks, size := generateData(enc, 5, bs, testTargetSize) @@ -923,7 +924,7 @@ func BenchmarkRead(b *testing.B) { } for _, bs := range testBlockSizes { - for _, enc := range testEncoding { + for _, enc := range testEncodings { name := fmt.Sprintf("sample_%s_%s", enc.String(), humanize.Bytes(uint64(bs))) b.Run(name, func(b *testing.B) { chunks, size := generateData(enc, 5, bs, testTargetSize) @@ -967,7 +968,7 @@ func BenchmarkBackwardIterator(b *testing.B) { for _, bs := range testBlockSizes { b.Run(humanize.Bytes(uint64(bs)), func(b *testing.B) { b.ReportAllocs() - c := NewMemChunk(ChunkFormatV4, EncSnappy, DefaultTestHeadBlockFmt, bs, testTargetSize) + c := NewMemChunk(ChunkFormatV4, compression.EncSnappy, DefaultTestHeadBlockFmt, bs, testTargetSize) _ = fillChunk(c) b.ResetTimer() for n := 0; n < b.N; n++ { @@ -988,7 +989,7 @@ func BenchmarkBackwardIterator(b *testing.B) { } func TestGenerateDataSize(t *testing.T) { - for _, enc := range testEncoding { + for _, enc := range testEncodings { t.Run(enc.String(), func(t *testing.T) { chunks, size := generateData(enc, 50, testBlockSize, testTargetSize) @@ -1081,7 +1082,7 @@ func BenchmarkHeadBlockSampleIterator(b *testing.B) { func TestMemChunk_IteratorBounds(t *testing.T) { createChunk := func() *MemChunk { t.Helper() - c := NewMemChunk(ChunkFormatV3, EncNone, DefaultTestHeadBlockFmt, 1e6, 1e6) + c := NewMemChunk(ChunkFormatV3, compression.EncNone, DefaultTestHeadBlockFmt, 1e6, 1e6) if _, err := c.Append(&logproto.Entry{ Timestamp: time.Unix(0, 1), @@ -1141,7 +1142,7 @@ func TestMemChunk_IteratorBounds(t *testing.T) { } func TestMemchunkLongLine(t *testing.T) { - for _, enc := range testEncoding { + for _, enc := range testEncodings { enc := enc t.Run(enc.String(), func(t *testing.T) { t.Parallel() @@ -1167,9 +1168,9 @@ func TestMemchunkLongLine(t *testing.T) { func TestBytesWith(t *testing.T) { t.Parallel() - exp, err := NewMemChunk(ChunkFormatV3, EncNone, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize).BytesWith(nil) + exp, err := NewMemChunk(ChunkFormatV3, compression.EncNone, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize).BytesWith(nil) require.Nil(t, err) - out, err := NewMemChunk(ChunkFormatV3, EncNone, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize).BytesWith([]byte{1, 2, 3}) + out, err := NewMemChunk(ChunkFormatV3, compression.EncNone, DefaultTestHeadBlockFmt, testBlockSize, testTargetSize).BytesWith([]byte{1, 2, 3}) require.Nil(t, err) require.Equal(t, exp, out) @@ -1180,8 +1181,8 @@ func TestCheckpointEncoding(t *testing.T) { blockSize, targetSize := 256*1024, 1500*1024 for _, f := range allPossibleFormats { - t.Run(testNameWithFormats(EncSnappy, f.chunkFormat, f.headBlockFmt), func(t *testing.T) { - c := newMemChunkWithFormat(f.chunkFormat, EncSnappy, f.headBlockFmt, blockSize, targetSize) + t.Run(testNameWithFormats(compression.EncSnappy, f.chunkFormat, f.headBlockFmt), func(t *testing.T) { + c := newMemChunkWithFormat(f.chunkFormat, compression.EncSnappy, f.headBlockFmt, blockSize, targetSize) // add a few entries for i := 0; i < 5; i++ { @@ -1266,7 +1267,7 @@ var ( func BenchmarkBufferedIteratorLabels(b *testing.B) { for _, f := range HeadBlockFmts { b.Run(f.String(), func(b *testing.B) { - c := NewMemChunk(ChunkFormatV3, EncSnappy, f, testBlockSize, testTargetSize) + c := NewMemChunk(ChunkFormatV3, compression.EncSnappy, f, testBlockSize, testTargetSize) _ = fillChunk(c) labelsSet := []labels.Labels{ @@ -1366,8 +1367,8 @@ func BenchmarkBufferedIteratorLabels(b *testing.B) { func Test_HeadIteratorReverse(t *testing.T) { for _, testData := range allPossibleFormats { - t.Run(testNameWithFormats(EncSnappy, testData.chunkFormat, testData.headBlockFmt), func(t *testing.T) { - c := newMemChunkWithFormat(testData.chunkFormat, EncSnappy, testData.headBlockFmt, testBlockSize, testTargetSize) + t.Run(testNameWithFormats(compression.EncSnappy, testData.chunkFormat, testData.headBlockFmt), func(t *testing.T) { + c := newMemChunkWithFormat(testData.chunkFormat, compression.EncSnappy, testData.headBlockFmt, testBlockSize, testTargetSize) genEntry := func(i int64) *logproto.Entry { return &logproto.Entry{ Timestamp: time.Unix(0, i), @@ -1482,7 +1483,7 @@ func TestMemChunk_Rebound(t *testing.T) { } func buildTestMemChunk(t *testing.T, from, through time.Time) *MemChunk { - chk := NewMemChunk(ChunkFormatV3, EncGZIP, DefaultTestHeadBlockFmt, defaultBlockSize, 0) + chk := NewMemChunk(ChunkFormatV3, compression.EncGZIP, DefaultTestHeadBlockFmt, defaultBlockSize, 0) for ; from.Before(through); from = from.Add(time.Second) { _, err := chk.Append(&logproto.Entry{ Line: from.String(), @@ -1603,7 +1604,7 @@ func TestMemChunk_ReboundAndFilter_with_filter(t *testing.T) { } func buildFilterableTestMemChunk(t *testing.T, from, through time.Time, matchingFrom, matchingTo *time.Time, withStructuredMetadata bool) *MemChunk { - chk := NewMemChunk(ChunkFormatV4, EncGZIP, DefaultTestHeadBlockFmt, defaultBlockSize, 0) + chk := NewMemChunk(ChunkFormatV4, compression.EncGZIP, DefaultTestHeadBlockFmt, defaultBlockSize, 0) t.Logf("from : %v", from.String()) t.Logf("through: %v", through.String()) var structuredMetadata push.LabelsAdapter @@ -1752,7 +1753,7 @@ func TestMemChunk_SpaceFor(t *testing.T) { t.Run(tc.desc, func(t *testing.T) { for _, format := range allPossibleFormats { t.Run(fmt.Sprintf("chunk_v%d_head_%s", format.chunkFormat, format.headBlockFmt), func(t *testing.T) { - chk := newMemChunkWithFormat(format.chunkFormat, EncNone, format.headBlockFmt, 1024, tc.targetSize) + chk := newMemChunkWithFormat(format.chunkFormat, compression.EncNone, format.headBlockFmt, 1024, tc.targetSize) chk.blocks = make([]block, tc.nBlocks) chk.cutBlockSize = tc.cutBlockSize @@ -1775,7 +1776,7 @@ func TestMemChunk_SpaceFor(t *testing.T) { } func TestMemChunk_IteratorWithStructuredMetadata(t *testing.T) { - for _, enc := range testEncoding { + for _, enc := range testEncodings { enc := enc t.Run(enc.String(), func(t *testing.T) { streamLabels := labels.Labels{ @@ -2054,7 +2055,7 @@ func TestDecodeChunkIncorrectBlockOffset(t *testing.T) { t.Run(fmt.Sprintf("chunkFormat:%v headBlockFmt:%v", format.chunkFormat, format.headBlockFmt), func(t *testing.T) { for incorrectOffsetBlockNum := 0; incorrectOffsetBlockNum < 3; incorrectOffsetBlockNum++ { t.Run(fmt.Sprintf("inorrect offset block: %d", incorrectOffsetBlockNum), func(t *testing.T) { - chk := NewMemChunk(format.chunkFormat, EncNone, format.headBlockFmt, blockSize, testTargetSize) + chk := NewMemChunk(format.chunkFormat, compression.EncNone, format.headBlockFmt, blockSize, testTargetSize) ts := time.Now().Unix() for i := 0; i < 3; i++ { dup, err := chk.Append(&logproto.Entry{ diff --git a/pkg/chunkenc/pool.go b/pkg/chunkenc/pool.go index 486bef44b3da..8c640149a78f 100644 --- a/pkg/chunkenc/pool.go +++ b/pkg/chunkenc/pool.go @@ -1,49 +1,15 @@ package chunkenc import ( - "bufio" "bytes" - "io" - "runtime" "sync" - "github.com/golang/snappy" - "github.com/klauspost/compress/flate" - "github.com/klauspost/compress/gzip" - "github.com/klauspost/compress/zstd" - "github.com/pierrec/lz4/v4" "github.com/prometheus/prometheus/util/pool" "github.com/grafana/loki/v3/pkg/logproto" ) -// WriterPool is a pool of io.Writer -// This is used by every chunk to avoid unnecessary allocations. -type WriterPool interface { - GetWriter(io.Writer) io.WriteCloser - PutWriter(io.WriteCloser) -} - -// ReaderPool similar to WriterPool but for reading chunks. -type ReaderPool interface { - GetReader(io.Reader) (io.Reader, error) - PutReader(io.Reader) -} - var ( - // Gzip is the gnu zip compression pool - Gzip = GzipPool{level: gzip.DefaultCompression} - Lz4_64k = LZ4Pool{bufferSize: 1 << 16} // Lz4_64k is the l4z compression pool, with 64k buffer size - Lz4_256k = LZ4Pool{bufferSize: 1 << 18} // Lz4_256k uses 256k buffer - Lz4_1M = LZ4Pool{bufferSize: 1 << 20} // Lz4_1M uses 1M buffer - Lz4_4M = LZ4Pool{bufferSize: 1 << 22} // Lz4_4M uses 4M buffer - Flate = FlatePool{} - Zstd = ZstdPool{} - // Snappy is the snappy compression pool - Snappy SnappyPool - // Noop is the no compression pool - Noop NoopPool - // BytesBufferPool is a bytes buffer used for lines decompressed. // Buckets [0.5KB,1KB,2KB,4KB,8KB] BytesBufferPool = pool.New(1<<9, 1<<13, 2, func(size int) interface{} { return make([]byte, 0, size) }) @@ -81,315 +47,3 @@ var ( }, } ) - -func GetWriterPool(enc Encoding) WriterPool { - return GetReaderPool(enc).(WriterPool) -} - -func GetReaderPool(enc Encoding) ReaderPool { - switch enc { - case EncGZIP: - return &Gzip - case EncLZ4_64k: - return &Lz4_64k - case EncLZ4_256k: - return &Lz4_256k - case EncLZ4_1M: - return &Lz4_1M - case EncLZ4_4M: - return &Lz4_4M - case EncSnappy: - return &Snappy - case EncNone: - return &Noop - case EncFlate: - return &Flate - case EncZstd: - return &Zstd - default: - panic("unknown encoding") - } -} - -// GzipPool is a gun zip compression pool -type GzipPool struct { - readers sync.Pool - writers sync.Pool - level int -} - -// Gzip needs buffering to read efficiently. -// We need to be able to see the underlying gzip.Reader to Reset it. -type gzipBufferedReader struct { - *bufio.Reader - gzipReader *gzip.Reader -} - -// GetReader gets or creates a new CompressionReader and reset it to read from src -func (pool *GzipPool) GetReader(src io.Reader) (io.Reader, error) { - if r := pool.readers.Get(); r != nil { - reader := r.(*gzipBufferedReader) - err := reader.gzipReader.Reset(src) - if err != nil { - return nil, err - } - reader.Reader.Reset(reader.gzipReader) - return reader, nil - } - gzipReader, err := gzip.NewReader(src) - if err != nil { - return nil, err - } - return &gzipBufferedReader{ - gzipReader: gzipReader, - Reader: bufio.NewReaderSize(gzipReader, 4*1024), - }, nil -} - -// PutReader places back in the pool a CompressionReader -func (pool *GzipPool) PutReader(reader io.Reader) { - pool.readers.Put(reader) -} - -// GetWriter gets or creates a new CompressionWriter and reset it to write to dst -func (pool *GzipPool) GetWriter(dst io.Writer) io.WriteCloser { - if w := pool.writers.Get(); w != nil { - writer := w.(*gzip.Writer) - writer.Reset(dst) - return writer - } - - level := pool.level - if level == 0 { - level = gzip.DefaultCompression - } - w, err := gzip.NewWriterLevel(dst, level) - if err != nil { - panic(err) // never happens, error is only returned on wrong compression level. - } - return w -} - -// PutWriter places back in the pool a CompressionWriter -func (pool *GzipPool) PutWriter(writer io.WriteCloser) { - pool.writers.Put(writer) -} - -// FlatePool is a flate compression pool -type FlatePool struct { - readers sync.Pool - writers sync.Pool - level int -} - -// GetReader gets or creates a new CompressionReader and reset it to read from src -func (pool *FlatePool) GetReader(src io.Reader) (io.Reader, error) { - if r := pool.readers.Get(); r != nil { - reader := r.(flate.Resetter) - err := reader.Reset(src, nil) - if err != nil { - panic(err) - } - return reader.(io.Reader), nil - } - return flate.NewReader(src), nil -} - -// PutReader places back in the pool a CompressionReader -func (pool *FlatePool) PutReader(reader io.Reader) { - pool.readers.Put(reader) -} - -// GetWriter gets or creates a new CompressionWriter and reset it to write to dst -func (pool *FlatePool) GetWriter(dst io.Writer) io.WriteCloser { - if w := pool.writers.Get(); w != nil { - writer := w.(*flate.Writer) - writer.Reset(dst) - return writer - } - - level := pool.level - if level == 0 { - level = flate.DefaultCompression - } - w, err := flate.NewWriter(dst, level) - if err != nil { - panic(err) // never happens, error is only returned on wrong compression level. - } - return w -} - -// PutWriter places back in the pool a CompressionWriter -func (pool *FlatePool) PutWriter(writer io.WriteCloser) { - pool.writers.Put(writer) -} - -// GzipPool is a gun zip compression pool -type ZstdPool struct { - readers sync.Pool - writers sync.Pool -} - -// GetReader gets or creates a new CompressionReader and reset it to read from src -func (pool *ZstdPool) GetReader(src io.Reader) (io.Reader, error) { - if r := pool.readers.Get(); r != nil { - reader := r.(*zstd.Decoder) - err := reader.Reset(src) - if err != nil { - return nil, err - } - return reader, nil - } - reader, err := zstd.NewReader(src) - if err != nil { - return nil, err - } - runtime.SetFinalizer(reader, (*zstd.Decoder).Close) - return reader, nil -} - -// PutReader places back in the pool a CompressionReader -func (pool *ZstdPool) PutReader(reader io.Reader) { - pool.readers.Put(reader) -} - -// GetWriter gets or creates a new CompressionWriter and reset it to write to dst -func (pool *ZstdPool) GetWriter(dst io.Writer) io.WriteCloser { - if w := pool.writers.Get(); w != nil { - writer := w.(*zstd.Encoder) - writer.Reset(dst) - return writer - } - - w, err := zstd.NewWriter(dst) - if err != nil { - panic(err) // never happens, error is only returned on wrong compression level. - } - return w -} - -// PutWriter places back in the pool a CompressionWriter -func (pool *ZstdPool) PutWriter(writer io.WriteCloser) { - pool.writers.Put(writer) -} - -type LZ4Pool struct { - readers sync.Pool - writers sync.Pool - bufferSize uint32 // available values: 1<<16 (64k), 1<<18 (256k), 1<<20 (1M), 1<<22 (4M). Defaults to 4MB, if not set. -} - -// We need to be able to see the underlying lz4.Reader to Reset it. -type lz4BufferedReader struct { - *bufio.Reader - lz4Reader *lz4.Reader -} - -// GetReader gets or creates a new CompressionReader and reset it to read from src -func (pool *LZ4Pool) GetReader(src io.Reader) (io.Reader, error) { - var r *lz4BufferedReader - if pooled := pool.readers.Get(); pooled != nil { - r = pooled.(*lz4BufferedReader) - r.lz4Reader.Reset(src) - r.Reader.Reset(r.lz4Reader) - } else { - lz4Reader := lz4.NewReader(src) - r = &lz4BufferedReader{ - lz4Reader: lz4Reader, - Reader: bufio.NewReaderSize(lz4Reader, 4*1024), - } - } - return r, nil -} - -// PutReader places back in the pool a CompressionReader -func (pool *LZ4Pool) PutReader(reader io.Reader) { - pool.readers.Put(reader) -} - -// GetWriter gets or creates a new CompressionWriter and reset it to write to dst -func (pool *LZ4Pool) GetWriter(dst io.Writer) io.WriteCloser { - var w *lz4.Writer - if fromPool := pool.writers.Get(); fromPool != nil { - w = fromPool.(*lz4.Writer) - w.Reset(dst) - } else { - w = lz4.NewWriter(dst) - } - err := w.Apply( - lz4.ChecksumOption(false), - lz4.BlockSizeOption(lz4.BlockSize(pool.bufferSize)), - lz4.CompressionLevelOption(lz4.Fast), - ) - if err != nil { - panic(err) - } - return w -} - -// PutWriter places back in the pool a CompressionWriter -func (pool *LZ4Pool) PutWriter(writer io.WriteCloser) { - pool.writers.Put(writer) -} - -type SnappyPool struct { - readers sync.Pool - writers sync.Pool -} - -// GetReader gets or creates a new CompressionReader and reset it to read from src -func (pool *SnappyPool) GetReader(src io.Reader) (io.Reader, error) { - if r := pool.readers.Get(); r != nil { - reader := r.(*snappy.Reader) - reader.Reset(src) - return reader, nil - } - return snappy.NewReader(src), nil -} - -// PutReader places back in the pool a CompressionReader -func (pool *SnappyPool) PutReader(reader io.Reader) { - r := reader.(*snappy.Reader) - // Reset to free reference to the underlying reader - r.Reset(nil) - pool.readers.Put(reader) -} - -// GetWriter gets or creates a new CompressionWriter and reset it to write to dst -func (pool *SnappyPool) GetWriter(dst io.Writer) io.WriteCloser { - if w := pool.writers.Get(); w != nil { - writer := w.(*snappy.Writer) - writer.Reset(dst) - return writer - } - return snappy.NewBufferedWriter(dst) -} - -// PutWriter places back in the pool a CompressionWriter -func (pool *SnappyPool) PutWriter(writer io.WriteCloser) { - pool.writers.Put(writer) -} - -type NoopPool struct{} - -// GetReader gets or creates a new CompressionReader and reset it to read from src -func (pool *NoopPool) GetReader(src io.Reader) (io.Reader, error) { - return src, nil -} - -// PutReader places back in the pool a CompressionReader -func (pool *NoopPool) PutReader(_ io.Reader) {} - -type noopCloser struct { - io.Writer -} - -func (noopCloser) Close() error { return nil } - -// GetWriter gets or creates a new CompressionWriter and reset it to write to dst -func (pool *NoopPool) GetWriter(dst io.Writer) io.WriteCloser { - return noopCloser{dst} -} - -// PutWriter places back in the pool a CompressionWriter -func (pool *NoopPool) PutWriter(_ io.WriteCloser) {} diff --git a/pkg/chunkenc/pool_test.go b/pkg/chunkenc/pool_test.go deleted file mode 100644 index 04ecaadf9295..000000000000 --- a/pkg/chunkenc/pool_test.go +++ /dev/null @@ -1,60 +0,0 @@ -package chunkenc - -import ( - "bytes" - "io" - "os" - "runtime" - "runtime/pprof" - "sync" - "testing" - "time" - - "github.com/stretchr/testify/assert" - "github.com/stretchr/testify/require" -) - -func TestPool(t *testing.T) { - var wg sync.WaitGroup - for _, enc := range supportedEncoding { - enc := enc - for i := 0; i < 200; i++ { - wg.Add(1) - go func() { - defer wg.Done() - var ( - buf = bytes.NewBuffer(nil) - res = make([]byte, 1024) - wpool = GetWriterPool(enc) - rpool = GetReaderPool(enc) - ) - - w := wpool.GetWriter(buf) - defer wpool.PutWriter(w) - _, err := w.Write([]byte("test")) - require.NoError(t, err) - require.NoError(t, w.Close()) - - require.True(t, buf.Len() != 0, enc) - r, err := rpool.GetReader(bytes.NewBuffer(buf.Bytes())) - require.NoError(t, err) - defer rpool.PutReader(r) - n, err := r.Read(res) - if err != nil { - require.Error(t, err, io.EOF) - } - require.Equal(t, 4, n, enc.String()) - require.Equal(t, []byte("test"), res[:n], enc) - }() - } - } - - wg.Wait() - - if !assert.Eventually(t, func() bool { - runtime.GC() - return runtime.NumGoroutine() <= 50 - }, 5*time.Second, 10*time.Millisecond) { - _ = pprof.Lookup("goroutine").WriteTo(os.Stdout, 1) - } -} diff --git a/pkg/chunkenc/symbols.go b/pkg/chunkenc/symbols.go index e9f0b4952968..f5d3310921ab 100644 --- a/pkg/chunkenc/symbols.go +++ b/pkg/chunkenc/symbols.go @@ -12,6 +12,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/prometheus/model/labels" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/util" ) @@ -163,7 +164,7 @@ func (s *symbolizer) CheckpointSize() int { // SerializeTo serializes all the labels and writes to the writer in compressed format. // It returns back the number of bytes written and a checksum of the data written. -func (s *symbolizer) SerializeTo(w io.Writer, pool WriterPool) (int, []byte, error) { +func (s *symbolizer) SerializeTo(w io.Writer, pool compression.WriterPool) (int, []byte, error) { crc32Hash := crc32HashPool.Get().(hash.Hash32) defer crc32HashPool.Put(crc32Hash) @@ -324,7 +325,7 @@ func symbolizerFromCheckpoint(b []byte) *symbolizer { } // symbolizerFromEnc builds symbolizer from the bytes generated during serialization. -func symbolizerFromEnc(b []byte, pool ReaderPool) (*symbolizer, error) { +func symbolizerFromEnc(b []byte, pool compression.ReaderPool) (*symbolizer, error) { db := decbuf{b: b} numLabels := db.uvarint() diff --git a/pkg/chunkenc/symbols_test.go b/pkg/chunkenc/symbols_test.go index 7882001c75dd..1f286d7b56d5 100644 --- a/pkg/chunkenc/symbols_test.go +++ b/pkg/chunkenc/symbols_test.go @@ -8,6 +8,8 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" + + "github.com/grafana/loki/v3/pkg/compression" ) func TestSymbolizer(t *testing.T) { @@ -125,7 +127,7 @@ func TestSymbolizer(t *testing.T) { expectedUncompressedSize: 22, }, } { - for _, encoding := range testEncoding { + for _, encoding := range testEncodings { t.Run(fmt.Sprintf("%s - %s", tc.name, encoding), func(t *testing.T) { s := newSymbolizer() for i, labels := range tc.labelsToAdd { @@ -161,10 +163,10 @@ func TestSymbolizer(t *testing.T) { } buf.Reset() - _, _, err = s.SerializeTo(buf, GetWriterPool(encoding)) + _, _, err = s.SerializeTo(buf, compression.GetWriterPool(encoding)) require.NoError(t, err) - loaded, err = symbolizerFromEnc(buf.Bytes(), GetReaderPool(encoding)) + loaded, err = symbolizerFromEnc(buf.Bytes(), compression.GetReaderPool(encoding)) require.NoError(t, err) for i, symbols := range tc.expectedSymbols { require.Equal(t, tc.labelsToAdd[i], loaded.Lookup(symbols, nil)) diff --git a/pkg/chunkenc/unordered.go b/pkg/chunkenc/unordered.go index aed6606c7c6d..3132c77206ab 100644 --- a/pkg/chunkenc/unordered.go +++ b/pkg/chunkenc/unordered.go @@ -14,6 +14,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/prometheus/model/labels" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/iter" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/log" @@ -28,7 +29,7 @@ type HeadBlock interface { CheckpointBytes(b []byte) ([]byte, error) CheckpointSize() int LoadBytes(b []byte) error - Serialise(pool WriterPool) ([]byte, error) + Serialise(pool compression.WriterPool) ([]byte, error) Reset() Bounds() (mint, maxt int64) Entries() int @@ -373,7 +374,7 @@ func (hb *unorderedHeadBlock) SampleIterator( // nolint:unused // serialise is used in creating an ordered, compressed block from an unorderedHeadBlock -func (hb *unorderedHeadBlock) Serialise(pool WriterPool) ([]byte, error) { +func (hb *unorderedHeadBlock) Serialise(pool compression.WriterPool) ([]byte, error) { inBuf := serializeBytesBufferPool.Get().(*bytes.Buffer) defer func() { inBuf.Reset() diff --git a/pkg/chunkenc/unordered_test.go b/pkg/chunkenc/unordered_test.go index 8a3420965bdb..fb341aaa8db9 100644 --- a/pkg/chunkenc/unordered_test.go +++ b/pkg/chunkenc/unordered_test.go @@ -12,6 +12,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/require" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/iter" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/log" @@ -450,7 +451,7 @@ func BenchmarkHeadBlockWrites(b *testing.B) { } func TestUnorderedChunkIterators(t *testing.T) { - c := NewMemChunk(ChunkFormatV4, EncSnappy, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize) + c := NewMemChunk(ChunkFormatV4, compression.EncSnappy, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize) for i := 0; i < 100; i++ { // push in reverse order dup, err := c.Append(&logproto.Entry{ @@ -496,11 +497,11 @@ func TestUnorderedChunkIterators(t *testing.T) { } func BenchmarkUnorderedRead(b *testing.B) { - legacy := NewMemChunk(ChunkFormatV3, EncSnappy, OrderedHeadBlockFmt, testBlockSize, testTargetSize) + legacy := NewMemChunk(ChunkFormatV3, compression.EncSnappy, OrderedHeadBlockFmt, testBlockSize, testTargetSize) fillChunkClose(legacy, false) - ordered := NewMemChunk(ChunkFormatV3, EncSnappy, UnorderedHeadBlockFmt, testBlockSize, testTargetSize) + ordered := NewMemChunk(ChunkFormatV3, compression.EncSnappy, UnorderedHeadBlockFmt, testBlockSize, testTargetSize) fillChunkClose(ordered, false) - unordered := NewMemChunk(ChunkFormatV3, EncSnappy, UnorderedHeadBlockFmt, testBlockSize, testTargetSize) + unordered := NewMemChunk(ChunkFormatV3, compression.EncSnappy, UnorderedHeadBlockFmt, testBlockSize, testTargetSize) fillChunkRandomOrder(unordered, false) tcs := []struct { @@ -558,7 +559,7 @@ func BenchmarkUnorderedRead(b *testing.B) { } func TestUnorderedIteratorCountsAllEntries(t *testing.T) { - c := NewMemChunk(ChunkFormatV4, EncSnappy, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize) + c := NewMemChunk(ChunkFormatV4, compression.EncSnappy, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize) fillChunkRandomOrder(c, false) ct := 0 @@ -595,7 +596,7 @@ func TestUnorderedIteratorCountsAllEntries(t *testing.T) { } func chunkFrom(xs []logproto.Entry) ([]byte, error) { - c := NewMemChunk(ChunkFormatV4, EncSnappy, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize) + c := NewMemChunk(ChunkFormatV4, compression.EncSnappy, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize) for _, x := range xs { if _, err := c.Append(&x); err != nil { return nil, err @@ -655,7 +656,7 @@ func TestReorder(t *testing.T) { }, } { t.Run(tc.desc, func(t *testing.T) { - c := NewMemChunk(ChunkFormatV4, EncSnappy, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize) + c := NewMemChunk(ChunkFormatV4, compression.EncSnappy, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize) for _, x := range tc.input { dup, err := c.Append(&x) require.False(t, dup) @@ -674,7 +675,7 @@ func TestReorder(t *testing.T) { } func TestReorderAcrossBlocks(t *testing.T) { - c := NewMemChunk(ChunkFormatV4, EncSnappy, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize) + c := NewMemChunk(ChunkFormatV4, compression.EncSnappy, UnorderedWithStructuredMetadataHeadBlockFmt, testBlockSize, testTargetSize) for _, batch := range [][]int{ // ensure our blocks have overlapping bounds and must be reordered // before closing. diff --git a/pkg/chunkenc/util_test.go b/pkg/chunkenc/util_test.go index 3da8f9e6d5cb..0d75273d6c81 100644 --- a/pkg/chunkenc/util_test.go +++ b/pkg/chunkenc/util_test.go @@ -5,6 +5,7 @@ import ( "time" "github.com/grafana/loki/v3/pkg/chunkenc/testdata" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/logproto" ) @@ -23,7 +24,7 @@ func logprotoEntryWithStructuredMetadata(ts int64, line string, structuredMetada } } -func generateData(enc Encoding, chunksCount, blockSize, targetSize int) ([]Chunk, uint64) { +func generateData(enc compression.Encoding, chunksCount, blockSize, targetSize int) ([]Chunk, uint64) { chunks := []Chunk{} i := int64(0) size := uint64(0) diff --git a/pkg/compactor/compactor.go b/pkg/compactor/compactor.go index 88637ca8b4f6..908152a3edbb 100644 --- a/pkg/compactor/compactor.go +++ b/pkg/compactor/compactor.go @@ -12,13 +12,15 @@ import ( "time" "github.com/go-kit/log/level" - "github.com/grafana/dskit/kv" - "github.com/grafana/dskit/ring" - "github.com/grafana/dskit/services" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "github.com/grafana/dskit/backoff" + "github.com/grafana/dskit/kv" + "github.com/grafana/dskit/ring" + "github.com/grafana/dskit/services" + "github.com/grafana/loki/v3/pkg/analytics" "github.com/grafana/loki/v3/pkg/compactor/deletion" "github.com/grafana/loki/v3/pkg/compactor/retention" @@ -77,6 +79,7 @@ type Config struct { RetentionDeleteDelay time.Duration `yaml:"retention_delete_delay"` RetentionDeleteWorkCount int `yaml:"retention_delete_worker_count"` RetentionTableTimeout time.Duration `yaml:"retention_table_timeout"` + RetentionBackoffConfig backoff.Config `yaml:"retention_backoff_config"` DeleteRequestStore string `yaml:"delete_request_store"` DeleteRequestStoreKeyPrefix string `yaml:"delete_request_store_key_prefix"` DeleteBatchSize int `yaml:"delete_batch_size"` @@ -110,6 +113,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.IntVar(&cfg.TablesToCompact, "compactor.tables-to-compact", 0, "Number of tables that compactor will try to compact. Newer tables are chosen when this is less than the number of tables available.") f.IntVar(&cfg.SkipLatestNTables, "compactor.skip-latest-n-tables", 0, "Do not compact N latest tables. Together with -compactor.run-once and -compactor.tables-to-compact, this is useful when clearing compactor backlogs.") + cfg.RetentionBackoffConfig.RegisterFlagsWithPrefix("compactor.retention-backoff-config", f) // Ring skipFlags := []string{ "compactor.ring.num-tokens", @@ -323,7 +327,7 @@ func (c *Compactor) init(objectStoreClients map[config.DayTime]client.ObjectClie } chunkClient := client.NewClient(objectClient, encoder, schemaConfig) - sc.sweeper, err = retention.NewSweeper(retentionWorkDir, chunkClient, c.cfg.RetentionDeleteWorkCount, c.cfg.RetentionDeleteDelay, r) + sc.sweeper, err = retention.NewSweeper(retentionWorkDir, chunkClient, c.cfg.RetentionDeleteWorkCount, c.cfg.RetentionDeleteDelay, c.cfg.RetentionBackoffConfig, r) if err != nil { return fmt.Errorf("failed to init sweeper: %w", err) } diff --git a/pkg/compactor/deletion/delete_requests_table.go b/pkg/compactor/deletion/delete_requests_table.go index 80a47a5e6435..7d4c5cf4d254 100644 --- a/pkg/compactor/deletion/delete_requests_table.go +++ b/pkg/compactor/deletion/delete_requests_table.go @@ -13,7 +13,7 @@ import ( "github.com/go-kit/log/level" "go.etcd.io/bbolt" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/storage/chunk/client/local" "github.com/grafana/loki/v3/pkg/storage/stores/series/index" "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/storage" @@ -117,8 +117,9 @@ func (t *deleteRequestsTable) uploadFile() error { }() err = t.db.View(func(tx *bbolt.Tx) (err error) { - compressedWriter := chunkenc.Gzip.GetWriter(f) - defer chunkenc.Gzip.PutWriter(compressedWriter) + gzipPool := compression.GetWriterPool(compression.EncGZIP) + compressedWriter := gzipPool.GetWriter(f) + defer gzipPool.PutWriter(compressedWriter) defer func() { cerr := compressedWriter.Close() diff --git a/pkg/compactor/index_set.go b/pkg/compactor/index_set.go index 7102aef56425..76b5546a9628 100644 --- a/pkg/compactor/index_set.go +++ b/pkg/compactor/index_set.go @@ -12,8 +12,8 @@ import ( "github.com/go-kit/log/level" "github.com/pkg/errors" - "github.com/grafana/loki/v3/pkg/chunkenc" "github.com/grafana/loki/v3/pkg/compactor/retention" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/storage/chunk/client/util" "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/index" "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/storage" @@ -229,8 +229,9 @@ func (is *indexSet) upload() error { } }() - compressedWriter := chunkenc.Gzip.GetWriter(f) - defer chunkenc.Gzip.PutWriter(compressedWriter) + gzipPool := compression.GetWriterPool(compression.EncGZIP) + compressedWriter := gzipPool.GetWriter(f) + defer gzipPool.PutWriter(compressedWriter) idxReader, err := idx.Reader() if err != nil { diff --git a/pkg/compactor/retention/retention.go b/pkg/compactor/retention/retention.go index 0a4aba59be47..96eafcc2a7d5 100644 --- a/pkg/compactor/retention/retention.go +++ b/pkg/compactor/retention/retention.go @@ -11,6 +11,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" + "github.com/grafana/dskit/backoff" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" @@ -272,9 +273,17 @@ type Sweeper struct { markerProcessor MarkerProcessor chunkClient ChunkClient sweeperMetrics *sweeperMetrics + backoffConfig backoff.Config } -func NewSweeper(workingDir string, deleteClient ChunkClient, deleteWorkerCount int, minAgeDelete time.Duration, r prometheus.Registerer) (*Sweeper, error) { +func NewSweeper( + workingDir string, + deleteClient ChunkClient, + deleteWorkerCount int, + minAgeDelete time.Duration, + backoffConfig backoff.Config, + r prometheus.Registerer, +) (*Sweeper, error) { m := newSweeperMetrics(r) p, err := newMarkerStorageReader(workingDir, deleteWorkerCount, minAgeDelete, m) @@ -285,34 +294,43 @@ func NewSweeper(workingDir string, deleteClient ChunkClient, deleteWorkerCount i markerProcessor: p, chunkClient: deleteClient, sweeperMetrics: m, + backoffConfig: backoffConfig, }, nil } func (s *Sweeper) Start() { - s.markerProcessor.Start(func(ctx context.Context, chunkId []byte) error { - status := statusSuccess - start := time.Now() - defer func() { - s.sweeperMetrics.deleteChunkDurationSeconds.WithLabelValues(status).Observe(time.Since(start).Seconds()) - }() - chunkIDString := unsafeGetString(chunkId) - userID, err := getUserIDFromChunkID(chunkId) - if err != nil { - return err - } + s.markerProcessor.Start(s.deleteChunk) +} +func (s *Sweeper) deleteChunk(ctx context.Context, chunkID []byte) error { + status := statusSuccess + start := time.Now() + defer func() { + s.sweeperMetrics.deleteChunkDurationSeconds.WithLabelValues(status).Observe(time.Since(start).Seconds()) + }() + chunkIDString := unsafeGetString(chunkID) + userID, err := getUserIDFromChunkID(chunkID) + if err != nil { + return err + } + + retry := backoff.New(ctx, s.backoffConfig) + for retry.Ongoing() { err = s.chunkClient.DeleteChunk(ctx, unsafeGetString(userID), chunkIDString) + if err == nil { + return nil + } if s.chunkClient.IsChunkNotFoundErr(err) { status = statusNotFound level.Debug(util_log.Logger).Log("msg", "delete on not found chunk", "chunkID", chunkIDString) return nil } - if err != nil { - level.Error(util_log.Logger).Log("msg", "error deleting chunk", "chunkID", chunkIDString, "err", err) - status = statusFailure - } - return err - }) + retry.Wait() + } + + level.Error(util_log.Logger).Log("msg", "error deleting chunk", "chunkID", chunkIDString, "err", err) + status = statusFailure + return err } func getUserIDFromChunkID(chunkID []byte) ([]byte, error) { diff --git a/pkg/compactor/retention/retention_test.go b/pkg/compactor/retention/retention_test.go index 4885c835003c..32dac3293a09 100644 --- a/pkg/compactor/retention/retention_test.go +++ b/pkg/compactor/retention/retention_test.go @@ -4,6 +4,7 @@ import ( "context" "crypto/sha256" "encoding/base64" + "fmt" "os" "path" "path/filepath" @@ -14,6 +15,7 @@ import ( "testing" "time" + "github.com/grafana/dskit/backoff" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" "github.com/prometheus/prometheus/model/labels" @@ -21,6 +23,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" ingesterclient "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/log" @@ -31,14 +34,37 @@ import ( ) type mockChunkClient struct { - mtx sync.Mutex - deletedChunks map[string]struct{} + mtx sync.Mutex + deletedChunks map[string]struct{} + unstableDeletion bool + perObjectCounter map[string]uint32 +} + +// newMockChunkClient creates a client that fails every first call to DeleteChunk if `unstableDeletion` is true. +func newMockChunkClient(unstableDeletion bool) *mockChunkClient { + return &mockChunkClient{ + deletedChunks: map[string]struct{}{}, + unstableDeletion: unstableDeletion, + perObjectCounter: map[string]uint32{}, + } +} + +// shouldFail returns true for every first call +func (m *mockChunkClient) shouldFail(objectKey string) bool { + if !m.unstableDeletion { + return false + } + shouldFail := m.perObjectCounter[objectKey]%2 == 0 + m.perObjectCounter[objectKey]++ + return shouldFail } func (m *mockChunkClient) DeleteChunk(_ context.Context, _, chunkID string) error { m.mtx.Lock() defer m.mtx.Unlock() - + if m.shouldFail(chunkID) { + return fmt.Errorf("chunk deletion for chunkID:%s is failed by mockChunkClient", chunkID) + } m.deletedChunks[string([]byte(chunkID))] = struct{}{} // forces a copy, because this string is only valid within the delete fn. return nil } @@ -143,8 +169,9 @@ func Test_Retention(t *testing.T) { // marks and sweep expiration := NewExpirationChecker(tt.limits) workDir := filepath.Join(t.TempDir(), "retention") - chunkClient := &mockChunkClient{deletedChunks: map[string]struct{}{}} - sweep, err := NewSweeper(workDir, chunkClient, 10, 0, nil) + // must not fail the process because deletion must be retried + chunkClient := newMockChunkClient(true) + sweep, err := NewSweeper(workDir, chunkClient, 10, 0, backoff.Config{MaxRetries: 2}, nil) require.NoError(t, err) sweep.Start() defer sweep.Stop() @@ -175,6 +202,38 @@ func Test_Retention(t *testing.T) { } } +func Test_Sweeper_deleteChunk(t *testing.T) { + chunkID := "1/3fff2c2d7595e046:1916fa8c4bd:1916fdfb33d:bd55fc5" + tests := map[string]struct { + maxRetries int + expectedError error + }{ + "expected error if chunk is not deleted and retry is disabled": { + maxRetries: 1, + expectedError: fmt.Errorf("chunk deletion for chunkID:%s is failed by mockChunkClient", chunkID), + }, + "expected no error if chunk is not deleted at the first attempt but retried": { + maxRetries: 2, + }, + } + for name, data := range tests { + t.Run(name, func(t *testing.T) { + workDir := filepath.Join(t.TempDir(), "retention") + chunkClient := newMockChunkClient(true) + sweep, err := NewSweeper(workDir, chunkClient, 10, 0, backoff.Config{MaxRetries: data.maxRetries}, nil) + require.NoError(t, err) + + err = sweep.deleteChunk(context.Background(), []byte(chunkID)) + if data.expectedError != nil { + require.Equal(t, data.expectedError, err) + } else { + require.NoError(t, err) + } + }) + } + +} + type noopWriter struct { count int64 } @@ -220,7 +279,7 @@ func createChunk(t testing.TB, userID string, lbs labels.Labels, from model.Time labelsBuilder.Set(labels.MetricName, "logs") metric := labelsBuilder.Labels() fp := ingesterclient.Fingerprint(lbs) - chunkEnc := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncSnappy, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, blockSize, targetSize) + chunkEnc := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncSnappy, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, blockSize, targetSize) for ts := from; !ts.After(through); ts = ts.Add(1 * time.Minute) { dup, err := chunkEnc.Append(&logproto.Entry{ diff --git a/pkg/compression/encoding.go b/pkg/compression/encoding.go new file mode 100644 index 000000000000..ecef31f09325 --- /dev/null +++ b/pkg/compression/encoding.go @@ -0,0 +1,83 @@ +package compression + +import ( + "fmt" + "strings" +) + +// Encoding identifies an available compression type. +type Encoding byte + +// The different available encodings. +// Make sure to preserve the order, as the numeric values are serialized! +const ( + EncNone Encoding = iota + EncGZIP + EncDumb // not supported + EncLZ4_64k + EncSnappy + EncLZ4_256k + EncLZ4_1M + EncLZ4_4M + EncFlate + EncZstd +) + +var supportedEncoding = []Encoding{ + EncNone, + EncGZIP, + EncLZ4_64k, + EncSnappy, + EncLZ4_256k, + EncLZ4_1M, + EncLZ4_4M, + EncFlate, + EncZstd, +} + +func (e Encoding) String() string { + switch e { + case EncGZIP: + return "gzip" + case EncNone: + return "none" + case EncLZ4_64k: + return "lz4-64k" + case EncLZ4_256k: + return "lz4-256k" + case EncLZ4_1M: + return "lz4-1M" + case EncLZ4_4M: + return "lz4" + case EncSnappy: + return "snappy" + case EncFlate: + return "flate" + case EncZstd: + return "zstd" + default: + return "unknown" + } +} + +// ParseEncoding parses an chunk encoding (compression algorithm) by its name. +func ParseEncoding(enc string) (Encoding, error) { + for _, e := range supportedEncoding { + if strings.EqualFold(e.String(), enc) { + return e, nil + } + } + return 0, fmt.Errorf("invalid encoding: %s, supported: %s", enc, SupportedEncoding()) +} + +// SupportedEncoding returns the list of supported Encoding. +func SupportedEncoding() string { + var sb strings.Builder + for i := range supportedEncoding { + sb.WriteString(supportedEncoding[i].String()) + if i != len(supportedEncoding)-1 { + sb.WriteString(", ") + } + } + return sb.String() +} diff --git a/pkg/compression/encoding_test.go b/pkg/compression/encoding_test.go new file mode 100644 index 000000000000..d67323ebb2d4 --- /dev/null +++ b/pkg/compression/encoding_test.go @@ -0,0 +1,26 @@ +package compression + +import "testing" + +func TestParseEncoding(t *testing.T) { + tests := []struct { + enc string + want Encoding + wantErr bool + }{ + {"gzip", EncGZIP, false}, + {"bad", 0, true}, + } + for _, tt := range tests { + t.Run(tt.enc, func(t *testing.T) { + got, err := ParseEncoding(tt.enc) + if (err != nil) != tt.wantErr { + t.Errorf("ParseEncoding() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("ParseEncoding() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/compression/fileext.go b/pkg/compression/fileext.go new file mode 100644 index 000000000000..8cd09c392d08 --- /dev/null +++ b/pkg/compression/fileext.go @@ -0,0 +1,50 @@ +package compression + +import "fmt" + +const ( + ExtNone = "" + ExtGZIP = ".gz" + ExtSnappy = ".sz" + ExtLZ4 = ".lz4" + ExtFlate = ".zz" + ExtZstd = ".zst" +) + +func ToFileExtension(e Encoding) string { + switch e { + case EncNone: + return ExtNone + case EncGZIP: + return ExtGZIP + case EncLZ4_64k, EncLZ4_256k, EncLZ4_1M, EncLZ4_4M: + return ExtLZ4 + case EncSnappy: + return ExtSnappy + case EncFlate: + return ExtFlate + case EncZstd: + return ExtZstd + default: + panic(fmt.Sprintf("invalid encoding: %d, supported: %s", e, SupportedEncoding())) + } +} + +func FromFileExtension(ext string) Encoding { + switch ext { + case ExtNone: + return EncNone + case ExtGZIP: + return EncGZIP + case ExtLZ4: + return EncLZ4_4M + case ExtSnappy: + return EncSnappy + case ExtFlate: + return EncFlate + case ExtZstd: + return EncZstd + default: + panic(fmt.Sprintf("invalid file extension: %s", ext)) + } +} diff --git a/pkg/compression/pool.go b/pkg/compression/pool.go new file mode 100644 index 000000000000..b68ff7de47b1 --- /dev/null +++ b/pkg/compression/pool.go @@ -0,0 +1,368 @@ +package compression + +import ( + "bufio" + "io" + "runtime" + "sync" + + snappylib "github.com/golang/snappy" + flatelib "github.com/klauspost/compress/flate" + gziplib "github.com/klauspost/compress/gzip" + zstdlib "github.com/klauspost/compress/zstd" + lz4lib "github.com/pierrec/lz4/v4" +) + +// WriterPool is a pool of io.Writer +// This is used by every chunk to avoid unnecessary allocations. +type WriterPool interface { + GetWriter(io.Writer) io.WriteCloser + PutWriter(io.WriteCloser) +} + +// ReaderPool is a pool of io.Reader +// ReaderPool similar to WriterPool but for reading chunks. +type ReaderPool interface { + GetReader(io.Reader) (io.Reader, error) + PutReader(io.Reader) +} + +// ReaderPool is a pool of io.Reader and io.Writer +type ReaderWriterPool interface { + ReaderPool + WriterPool +} + +var ( + // gzip is the gnu zip compression pool + gzip = GzipPool{level: gziplib.DefaultCompression} + // lz4_* are the lz4 compression pools + lz4_64k = LZ4Pool{bufferSize: 1 << 16} // lz4_64k is the l4z compression pool, with 64k buffer size + lz4_256k = LZ4Pool{bufferSize: 1 << 18} // lz4_256k uses 256k buffer + lz4_1M = LZ4Pool{bufferSize: 1 << 20} // lz4_1M uses 1M buffer + lz4_4M = LZ4Pool{bufferSize: 1 << 22} // lz4_4M uses 4M buffer + // flate is the flate compression pool + flate = FlatePool{} + // zstd is the zstd compression pool + zstd = ZstdPool{} + // snappy is the snappy compression pool + snappy = SnappyPool{} + // noop is the no compression pool + noop = NoopPool{} +) + +func GetWriterPool(enc Encoding) WriterPool { + return GetPool(enc).(WriterPool) +} + +func GetReaderPool(enc Encoding) ReaderPool { + return GetPool(enc).(ReaderPool) +} + +func GetPool(enc Encoding) ReaderWriterPool { + switch enc { + case EncGZIP: + return &gzip + case EncLZ4_64k: + return &lz4_64k + case EncLZ4_256k: + return &lz4_256k + case EncLZ4_1M: + return &lz4_1M + case EncLZ4_4M: + return &lz4_4M + case EncSnappy: + return &snappy + case EncNone: + return &noop + case EncFlate: + return &flate + case EncZstd: + return &zstd + default: + panic("unknown encoding") + } +} + +// GzipPool is a gnu zip compression pool +type GzipPool struct { + readers sync.Pool + writers sync.Pool + level int +} + +// Gzip needs buffering to read efficiently. +// We need to be able to see the underlying gzip.Reader to Reset it. +type gzipBufferedReader struct { + *bufio.Reader + gzipReader *gziplib.Reader +} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *GzipPool) GetReader(src io.Reader) (io.Reader, error) { + if r := pool.readers.Get(); r != nil { + reader := r.(*gzipBufferedReader) + err := reader.gzipReader.Reset(src) + if err != nil { + return nil, err + } + reader.Reader.Reset(reader.gzipReader) + return reader, nil + } + gzipReader, err := gziplib.NewReader(src) + if err != nil { + return nil, err + } + return &gzipBufferedReader{ + gzipReader: gzipReader, + Reader: bufio.NewReaderSize(gzipReader, 4*1024), + }, nil +} + +// PutReader places back in the pool a CompressionReader +func (pool *GzipPool) PutReader(reader io.Reader) { + pool.readers.Put(reader) +} + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *GzipPool) GetWriter(dst io.Writer) io.WriteCloser { + if w := pool.writers.Get(); w != nil { + writer := w.(*gziplib.Writer) + writer.Reset(dst) + return writer + } + + level := pool.level + if level == 0 { + level = gziplib.DefaultCompression + } + w, err := gziplib.NewWriterLevel(dst, level) + if err != nil { + panic(err) // never happens, error is only returned on wrong compression level. + } + return w +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *GzipPool) PutWriter(writer io.WriteCloser) { + pool.writers.Put(writer) +} + +// FlatePool is a flate compression pool +type FlatePool struct { + readers sync.Pool + writers sync.Pool + level int +} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *FlatePool) GetReader(src io.Reader) (io.Reader, error) { + if r := pool.readers.Get(); r != nil { + reader := r.(flatelib.Resetter) + err := reader.Reset(src, nil) + if err != nil { + panic(err) + } + return reader.(io.Reader), nil + } + return flatelib.NewReader(src), nil +} + +// PutReader places back in the pool a CompressionReader +func (pool *FlatePool) PutReader(reader io.Reader) { + pool.readers.Put(reader) +} + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *FlatePool) GetWriter(dst io.Writer) io.WriteCloser { + if w := pool.writers.Get(); w != nil { + writer := w.(*flatelib.Writer) + writer.Reset(dst) + return writer + } + + level := pool.level + if level == 0 { + level = flatelib.DefaultCompression + } + w, err := flatelib.NewWriter(dst, level) + if err != nil { + panic(err) // never happens, error is only returned on wrong compression level. + } + return w +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *FlatePool) PutWriter(writer io.WriteCloser) { + pool.writers.Put(writer) +} + +// GzipPool is a gun zip compression pool +type ZstdPool struct { + readers sync.Pool + writers sync.Pool +} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *ZstdPool) GetReader(src io.Reader) (io.Reader, error) { + if r := pool.readers.Get(); r != nil { + reader := r.(*zstdlib.Decoder) + err := reader.Reset(src) + if err != nil { + return nil, err + } + return reader, nil + } + reader, err := zstdlib.NewReader(src) + if err != nil { + return nil, err + } + runtime.SetFinalizer(reader, (*zstdlib.Decoder).Close) + return reader, nil +} + +// PutReader places back in the pool a CompressionReader +func (pool *ZstdPool) PutReader(reader io.Reader) { + pool.readers.Put(reader) +} + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *ZstdPool) GetWriter(dst io.Writer) io.WriteCloser { + if w := pool.writers.Get(); w != nil { + writer := w.(*zstdlib.Encoder) + writer.Reset(dst) + return writer + } + + w, err := zstdlib.NewWriter(dst) + if err != nil { + panic(err) // never happens, error is only returned on wrong compression level. + } + return w +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *ZstdPool) PutWriter(writer io.WriteCloser) { + pool.writers.Put(writer) +} + +type LZ4Pool struct { + readers sync.Pool + writers sync.Pool + bufferSize uint32 // available values: 1<<16 (64k), 1<<18 (256k), 1<<20 (1M), 1<<22 (4M). Defaults to 4MB, if not set. +} + +// We need to be able to see the underlying lz4.Reader to Reset it. +type lz4BufferedReader struct { + *bufio.Reader + lz4Reader *lz4lib.Reader +} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *LZ4Pool) GetReader(src io.Reader) (io.Reader, error) { + var r *lz4BufferedReader + if pooled := pool.readers.Get(); pooled != nil { + r = pooled.(*lz4BufferedReader) + r.lz4Reader.Reset(src) + r.Reader.Reset(r.lz4Reader) + } else { + lz4Reader := lz4lib.NewReader(src) + r = &lz4BufferedReader{ + lz4Reader: lz4Reader, + Reader: bufio.NewReaderSize(lz4Reader, 4*1024), + } + } + return r, nil +} + +// PutReader places back in the pool a CompressionReader +func (pool *LZ4Pool) PutReader(reader io.Reader) { + pool.readers.Put(reader) +} + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *LZ4Pool) GetWriter(dst io.Writer) io.WriteCloser { + var w *lz4lib.Writer + if fromPool := pool.writers.Get(); fromPool != nil { + w = fromPool.(*lz4lib.Writer) + w.Reset(dst) + } else { + w = lz4lib.NewWriter(dst) + } + err := w.Apply( + lz4lib.ChecksumOption(false), + lz4lib.BlockSizeOption(lz4lib.BlockSize(pool.bufferSize)), + lz4lib.CompressionLevelOption(lz4lib.Fast), + ) + if err != nil { + panic(err) + } + return w +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *LZ4Pool) PutWriter(writer io.WriteCloser) { + pool.writers.Put(writer) +} + +type SnappyPool struct { + readers sync.Pool + writers sync.Pool +} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *SnappyPool) GetReader(src io.Reader) (io.Reader, error) { + if r := pool.readers.Get(); r != nil { + reader := r.(*snappylib.Reader) + reader.Reset(src) + return reader, nil + } + return snappylib.NewReader(src), nil +} + +// PutReader places back in the pool a CompressionReader +func (pool *SnappyPool) PutReader(reader io.Reader) { + r := reader.(*snappylib.Reader) + // Reset to free reference to the underlying reader + r.Reset(nil) + pool.readers.Put(reader) +} + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *SnappyPool) GetWriter(dst io.Writer) io.WriteCloser { + if w := pool.writers.Get(); w != nil { + writer := w.(*snappylib.Writer) + writer.Reset(dst) + return writer + } + return snappylib.NewBufferedWriter(dst) +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *SnappyPool) PutWriter(writer io.WriteCloser) { + pool.writers.Put(writer) +} + +type NoopPool struct{} + +// GetReader gets or creates a new CompressionReader and reset it to read from src +func (pool *NoopPool) GetReader(src io.Reader) (io.Reader, error) { + return src, nil +} + +// PutReader places back in the pool a CompressionReader +func (pool *NoopPool) PutReader(_ io.Reader) {} + +type noopCloser struct { + io.Writer +} + +func (noopCloser) Close() error { return nil } + +// GetWriter gets or creates a new CompressionWriter and reset it to write to dst +func (pool *NoopPool) GetWriter(dst io.Writer) io.WriteCloser { + return noopCloser{dst} +} + +// PutWriter places back in the pool a CompressionWriter +func (pool *NoopPool) PutWriter(_ io.WriteCloser) {} diff --git a/pkg/compression/pool_test.go b/pkg/compression/pool_test.go new file mode 100644 index 000000000000..b39bbe0ad6f4 --- /dev/null +++ b/pkg/compression/pool_test.go @@ -0,0 +1,64 @@ +package compression + +import ( + "bytes" + "io" + "os" + "runtime" + "runtime/pprof" + "sync" + "testing" + "time" + + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" +) + +func TestPool(t *testing.T) { + for _, enc := range supportedEncoding { + enc := enc + t.Run(enc.String(), func(t *testing.T) { + var wg sync.WaitGroup + + for i := 0; i < 200; i++ { + wg.Add(1) + go func() { + defer wg.Done() + var ( + buf = bytes.NewBuffer(nil) + res = make([]byte, 1024) + wpool = GetWriterPool(enc) + rpool = GetReaderPool(enc) + ) + + w := wpool.GetWriter(buf) + defer wpool.PutWriter(w) + _, err := w.Write([]byte("test")) + require.NoError(t, err) + require.NoError(t, w.Close()) + + require.True(t, buf.Len() != 0, enc) + r, err := rpool.GetReader(bytes.NewBuffer(buf.Bytes())) + require.NoError(t, err) + defer rpool.PutReader(r) + n, err := r.Read(res) + if err != nil { + require.Error(t, err, io.EOF) + } + require.Equal(t, 4, n, enc.String()) + require.Equal(t, []byte("test"), res[:n], enc) + }() + } + + wg.Wait() + + if !assert.Eventually(t, func() bool { + runtime.GC() + return runtime.NumGoroutine() <= 50 + }, 5*time.Second, 10*time.Millisecond) { + _ = pprof.Lookup("goroutine").WriteTo(os.Stdout, 1) + } + + }) + } +} diff --git a/pkg/distributor/distributor.go b/pkg/distributor/distributor.go index f6ae454e1482..476bad507ea0 100644 --- a/pkg/distributor/distributor.go +++ b/pkg/distributor/distributor.go @@ -10,6 +10,7 @@ import ( "sort" "strconv" "strings" + "sync" "time" "unicode" "unsafe" @@ -19,6 +20,7 @@ import ( "github.com/go-kit/log/level" "github.com/gogo/status" "github.com/prometheus/prometheus/model/labels" + "github.com/twmb/franz-go/pkg/kgo" "go.opentelemetry.io/collector/pdata/plog" "google.golang.org/grpc/codes" @@ -44,6 +46,7 @@ import ( "github.com/grafana/loki/v3/pkg/distributor/writefailures" "github.com/grafana/loki/v3/pkg/ingester" "github.com/grafana/loki/v3/pkg/ingester/client" + "github.com/grafana/loki/v3/pkg/kafka" "github.com/grafana/loki/v3/pkg/loghttp/push" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/log/logfmt" @@ -77,6 +80,7 @@ var allowedLabelsForLevel = map[string]struct{}{ type Config struct { // Distributors ring DistributorRing RingConfig `yaml:"ring,omitempty"` + PushWorkerCount int `yaml:"push_worker_count"` // For testing. factory ring_client.PoolFactory `yaml:"-"` @@ -88,6 +92,10 @@ type Config struct { WriteFailuresLogging writefailures.Cfg `yaml:"write_failures_logging" doc:"description=Customize the logging of write failures."` OTLPConfig push.GlobalOTLPConfig `yaml:"otlp_config"` + + KafkaEnabled bool `yaml:"kafka_writes_enabled"` + IngesterEnabled bool `yaml:"ingester_writes_enabled"` + KafkaConfig kafka.Config `yaml:"-"` } // RegisterFlags registers distributor-related flags. @@ -96,6 +104,16 @@ func (cfg *Config) RegisterFlags(fs *flag.FlagSet) { cfg.DistributorRing.RegisterFlags(fs) cfg.RateStore.RegisterFlagsWithPrefix("distributor.rate-store", fs) cfg.WriteFailuresLogging.RegisterFlagsWithPrefix("distributor.write-failures-logging", fs) + fs.IntVar(&cfg.PushWorkerCount, "distributor.push-worker-count", 256, "Number of workers to push batches to ingesters.") + fs.BoolVar(&cfg.KafkaEnabled, "distributor.kafka-writes-enabled", false, "Enable writes to Kafka during Push requests.") + fs.BoolVar(&cfg.IngesterEnabled, "distributor.ingester-writes-enabled", true, "Enable writes to Ingesters during Push requests. Defaults to true.") +} + +func (cfg *Config) Validate() error { + if !cfg.KafkaEnabled && !cfg.IngesterEnabled { + return fmt.Errorf("at least one of kafka and ingestor writes must be enabled") + } + return nil } // RateStore manages the ingestion rate of streams, populated by data fetched from ingesters. @@ -103,6 +121,11 @@ type RateStore interface { RateFor(tenantID string, streamHash uint64) (int64, float64) } +type KafkaProducer interface { + ProduceSync(ctx context.Context, records []*kgo.Record) kgo.ProduceResults + Close() +} + // Distributor coordinates replicates and distribution of log streams. type Distributor struct { services.Service @@ -145,7 +168,19 @@ type Distributor struct { replicationFactor prometheus.Gauge streamShardCount prometheus.Counter - usageTracker push.UsageTracker + usageTracker push.UsageTracker + ingesterTasks chan pushIngesterTask + ingesterTaskWg sync.WaitGroup + + // kafka + kafkaWriter KafkaProducer + partitionRing ring.PartitionRingReader + + // kafka metrics + kafkaAppends *prometheus.CounterVec + kafkaWriteBytesTotal prometheus.Counter + kafkaWriteLatency prometheus.Histogram + kafkaRecordsPerRequest prometheus.Histogram } // New a distributor creates. @@ -154,6 +189,7 @@ func New( clientCfg client.Config, configs *runtime.TenantConfigs, ingestersRing ring.ReadRing, + partitionRing ring.PartitionRingReader, overrides Limits, registerer prometheus.Registerer, metricsNamespace string, @@ -192,6 +228,20 @@ func New( return nil, err } + if partitionRing == nil && cfg.KafkaEnabled { + return nil, fmt.Errorf("partition ring is required for kafka writes") + } + + var kafkaWriter KafkaProducer + if cfg.KafkaEnabled { + kafkaClient, err := kafka.NewWriterClient(cfg.KafkaConfig, 20, logger, registerer) + if err != nil { + return nil, fmt.Errorf("failed to start kafka client: %w", err) + } + kafkaWriter = kafka.NewProducer(kafkaClient, cfg.KafkaConfig.ProducerMaxBufferedBytes, + prometheus.WrapRegistererWithPrefix("_kafka_", registerer)) + } + d := &Distributor{ cfg: cfg, logger: logger, @@ -207,6 +257,7 @@ func New( rateLimitStrat: rateLimitStrat, tee: tee, usageTracker: usageTracker, + ingesterTasks: make(chan pushIngesterTask), ingesterAppends: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ Namespace: constants.Loki, Name: "distributor_ingester_appends_total", @@ -227,7 +278,30 @@ func New( Name: "stream_sharding_count", Help: "Total number of times the distributor has sharded streams", }), + kafkaAppends: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ + Name: "kafka_appends_total", + Help: "The total number of appends sent to kafka ingest path.", + }, []string{"partition", "status"}), + kafkaWriteLatency: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ + Name: "kafka_latency_seconds", + Help: "Latency to write an incoming request to the ingest storage.", + NativeHistogramBucketFactor: 1.1, + NativeHistogramMinResetDuration: 1 * time.Hour, + NativeHistogramMaxBucketNumber: 100, + Buckets: prometheus.DefBuckets, + }), + kafkaWriteBytesTotal: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ + Name: "kafka_sent_bytes_total", + Help: "Total number of bytes sent to the ingest storage.", + }), + kafkaRecordsPerRequest: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ + Name: "kafka_records_per_write_request", + Help: "The number of records a single per-partition write request has been split into.", + Buckets: prometheus.ExponentialBuckets(1, 2, 8), + }), writeFailuresManager: writefailures.NewManager(logger, registerer, cfg.WriteFailuresLogging, configs, "distributor"), + kafkaWriter: kafkaWriter, + partitionRing: partitionRing, } if overrides.IngestionRateStrategy() == validation.GlobalIngestionRateStrategy { @@ -285,6 +359,15 @@ func (d *Distributor) starting(ctx context.Context) error { } func (d *Distributor) running(ctx context.Context) error { + ctx, cancel := context.WithCancel(ctx) + defer func() { + cancel() + d.ingesterTaskWg.Wait() + }() + d.ingesterTaskWg.Add(d.cfg.PushWorkerCount) + for i := 0; i < d.cfg.PushWorkerCount; i++ { + go d.pushIngesterWorker(ctx) + } select { case <-ctx.Done(): return nil @@ -294,6 +377,9 @@ func (d *Distributor) running(ctx context.Context) error { } func (d *Distributor) stopping(_ error) error { + if d.kafkaWriter != nil { + d.kafkaWriter.Close() + } return services.StopManagerAndAwaitStopped(context.Background(), d.subservices) } @@ -319,6 +405,21 @@ type pushTracker struct { err chan error } +// doneWithResult records the result of a stream push. +// If err is nil, the stream push is considered successful. +// If err is not nil, the stream push is considered failed. +func (p *pushTracker) doneWithResult(err error) { + if err == nil { + if p.streamsPending.Dec() == 0 { + p.done <- struct{}{} + } + } else { + if p.streamsFailed.Inc() == 1 { + p.err <- err + } + } +} + // Push a set of streams. // The returned error is the last one seen. func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*logproto.PushResponse, error) { @@ -488,57 +589,85 @@ func (d *Distributor) Push(ctx context.Context, req *logproto.PushRequest) (*log const maxExpectedReplicationSet = 5 // typical replication factor 3 plus one for inactive plus one for luck var descs [maxExpectedReplicationSet]ring.InstanceDesc - streamTrackers := make([]streamTracker, len(streams)) - streamsByIngester := map[string][]*streamTracker{} - ingesterDescs := map[string]ring.InstanceDesc{} + tracker := pushTracker{ + done: make(chan struct{}, 1), // buffer avoids blocking if caller terminates - sendSamples() only sends once on each + err: make(chan error, 1), + } + streamsToWrite := 0 + if d.cfg.IngesterEnabled { + streamsToWrite += len(streams) + } + if d.cfg.KafkaEnabled { + streamsToWrite += len(streams) + } + // We must correctly set streamsPending before beginning any writes to ensure we don't have a race between finishing all of one path before starting the other. + tracker.streamsPending.Store(int32(streamsToWrite)) - if err := func() error { - sp := opentracing.SpanFromContext(ctx) - if sp != nil { - sp.LogKV("event", "started to query ingesters ring") - defer func() { - sp.LogKV("event", "finished to query ingesters ring") - }() - } + if d.cfg.KafkaEnabled { + // We don't need to create a new context like the ingester writes, because we don't return unless all writes have succeeded. + d.sendStreamsToKafka(ctx, streams, tenantID, &tracker) + } - for i, stream := range streams { - replicationSet, err := d.ingestersRing.Get(stream.HashKey, ring.WriteNoExtend, descs[:0], nil, nil) - if err != nil { - return err + if d.cfg.IngesterEnabled { + streamTrackers := make([]streamTracker, len(streams)) + streamsByIngester := map[string][]*streamTracker{} + ingesterDescs := map[string]ring.InstanceDesc{} + + if err := func() error { + sp := opentracing.SpanFromContext(ctx) + if sp != nil { + sp.LogKV("event", "started to query ingesters ring") + defer func() { + sp.LogKV("event", "finished to query ingesters ring") + }() } - streamTrackers[i] = streamTracker{ - KeyedStream: stream, - minSuccess: len(replicationSet.Instances) - replicationSet.MaxErrors, - maxFailures: replicationSet.MaxErrors, - } - for _, ingester := range replicationSet.Instances { - streamsByIngester[ingester.Addr] = append(streamsByIngester[ingester.Addr], &streamTrackers[i]) - ingesterDescs[ingester.Addr] = ingester + for i, stream := range streams { + replicationSet, err := d.ingestersRing.Get(stream.HashKey, ring.WriteNoExtend, descs[:0], nil, nil) + if err != nil { + return err + } + + streamTrackers[i] = streamTracker{ + KeyedStream: stream, + minSuccess: len(replicationSet.Instances) - replicationSet.MaxErrors, + maxFailures: replicationSet.MaxErrors, + } + for _, ingester := range replicationSet.Instances { + streamsByIngester[ingester.Addr] = append(streamsByIngester[ingester.Addr], &streamTrackers[i]) + ingesterDescs[ingester.Addr] = ingester + } } + return nil + }(); err != nil { + return nil, err } - return nil - }(); err != nil { - return nil, err - } - tracker := pushTracker{ - done: make(chan struct{}, 1), // buffer avoids blocking if caller terminates - sendSamples() only sends once on each - err: make(chan error, 1), - } - tracker.streamsPending.Store(int32(len(streams))) - for ingester, streams := range streamsByIngester { - go func(ingester ring.InstanceDesc, samples []*streamTracker) { - // Use a background context to make sure all ingesters get samples even if we return early - localCtx, cancel := context.WithTimeout(context.Background(), d.clientCfg.RemoteTimeout) - defer cancel() - localCtx = user.InjectOrgID(localCtx, tenantID) - if sp := opentracing.SpanFromContext(ctx); sp != nil { - localCtx = opentracing.ContextWithSpan(localCtx, sp) - } - d.sendStreams(localCtx, ingester, samples, &tracker) - }(ingesterDescs[ingester], streams) + for ingester, streams := range streamsByIngester { + func(ingester ring.InstanceDesc, samples []*streamTracker) { + // Use a background context to make sure all ingesters get samples even if we return early + localCtx, cancel := context.WithTimeout(context.Background(), d.clientCfg.RemoteTimeout) + localCtx = user.InjectOrgID(localCtx, tenantID) + if sp := opentracing.SpanFromContext(ctx); sp != nil { + localCtx = opentracing.ContextWithSpan(localCtx, sp) + } + select { + case <-ctx.Done(): + cancel() + return + case d.ingesterTasks <- pushIngesterTask{ + ingester: ingester, + streamTracker: samples, + pushTracker: &tracker, + ctx: localCtx, + cancel: cancel, + }: + return + } + }(ingesterDescs[ingester], streams) + } } + select { case err := <-tracker.err: return nil, err @@ -726,9 +855,30 @@ func (d *Distributor) truncateLines(vContext validationContext, stream *logproto validation.MutatedBytes.WithLabelValues(validation.LineTooLong, vContext.userID).Add(float64(truncatedBytes)) } +type pushIngesterTask struct { + streamTracker []*streamTracker + pushTracker *pushTracker + ingester ring.InstanceDesc + ctx context.Context + cancel context.CancelFunc +} + +func (d *Distributor) pushIngesterWorker(ctx context.Context) { + defer d.ingesterTaskWg.Done() + for { + select { + case <-ctx.Done(): + return + case task := <-d.ingesterTasks: + d.sendStreams(task) + } + } +} + // TODO taken from Cortex, see if we can refactor out an usable interface. -func (d *Distributor) sendStreams(ctx context.Context, ingester ring.InstanceDesc, streamTrackers []*streamTracker, pushTracker *pushTracker) { - err := d.sendStreamsErr(ctx, ingester, streamTrackers) +func (d *Distributor) sendStreams(task pushIngesterTask) { + defer task.cancel() + err := d.sendStreamsErr(task.ctx, task.ingester, task.streamTracker) // If we succeed, decrement each stream's pending count by one. // If we reach the required number of successful puts on this stream, then @@ -739,21 +889,17 @@ func (d *Distributor) sendStreams(ctx context.Context, ingester ring.InstanceDes // // The use of atomic increments here guarantees only a single sendStreams // goroutine will write to either channel. - for i := range streamTrackers { + for i := range task.streamTracker { if err != nil { - if streamTrackers[i].failed.Inc() <= int32(streamTrackers[i].maxFailures) { + if task.streamTracker[i].failed.Inc() <= int32(task.streamTracker[i].maxFailures) { continue } - if pushTracker.streamsFailed.Inc() == 1 { - pushTracker.err <- err - } + task.pushTracker.doneWithResult(err) } else { - if streamTrackers[i].succeeded.Inc() != int32(streamTrackers[i].minSuccess) { + if task.streamTracker[i].succeeded.Inc() != int32(task.streamTracker[i].minSuccess) { continue } - if pushTracker.streamsPending.Dec() == 0 { - pushTracker.done <- struct{}{} - } + task.pushTracker.doneWithResult(nil) } } } @@ -785,6 +931,69 @@ func (d *Distributor) sendStreamsErr(ctx context.Context, ingester ring.Instance return err } +func (d *Distributor) sendStreamsToKafka(ctx context.Context, streams []KeyedStream, tenant string, tracker *pushTracker) { + for _, s := range streams { + go func(s KeyedStream) { + err := d.sendStreamToKafka(ctx, s, tenant) + if err != nil { + err = fmt.Errorf("failed to write stream to kafka: %w", err) + } + tracker.doneWithResult(err) + }(s) + } +} + +func (d *Distributor) sendStreamToKafka(ctx context.Context, stream KeyedStream, tenant string) error { + if len(stream.Stream.Entries) == 0 { + return nil + } + partitionID, err := d.partitionRing.PartitionRing().ActivePartitionForKey(stream.HashKey) + if err != nil { + d.kafkaAppends.WithLabelValues("kafka", "fail").Inc() + return fmt.Errorf("failed to find active partition for stream: %w", err) + } + + startTime := time.Now() + + records, err := kafka.Encode(partitionID, tenant, stream.Stream, d.cfg.KafkaConfig.ProducerMaxRecordSizeBytes) + if err != nil { + d.kafkaAppends.WithLabelValues(fmt.Sprintf("partition_%d", partitionID), "fail").Inc() + return fmt.Errorf("failed to marshal write request to records: %w", err) + } + + d.kafkaRecordsPerRequest.Observe(float64(len(records))) + + produceResults := d.kafkaWriter.ProduceSync(ctx, records) + + if count, sizeBytes := successfulProduceRecordsStats(produceResults); count > 0 { + d.kafkaWriteLatency.Observe(time.Since(startTime).Seconds()) + d.kafkaWriteBytesTotal.Add(float64(sizeBytes)) + } + + var finalErr error + for _, result := range produceResults { + if result.Err != nil { + d.kafkaAppends.WithLabelValues(fmt.Sprintf("partition_%d", partitionID), "fail").Inc() + finalErr = result.Err + } else { + d.kafkaAppends.WithLabelValues(fmt.Sprintf("partition_%d", partitionID), "success").Inc() + } + } + + return finalErr +} + +func successfulProduceRecordsStats(results kgo.ProduceResults) (count, sizeBytes int) { + for _, res := range results { + if res.Err == nil && res.Record != nil { + count++ + sizeBytes += len(res.Record.Value) + } + } + + return +} + type labelData struct { ls labels.Labels hash uint64 diff --git a/pkg/distributor/distributor_test.go b/pkg/distributor/distributor_test.go index 2e8f7b895e0f..cebd46858e17 100644 --- a/pkg/distributor/distributor_test.go +++ b/pkg/distributor/distributor_test.go @@ -26,6 +26,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/require" + "github.com/twmb/franz-go/pkg/kgo" "go.opentelemetry.io/collector/pdata/plog" "google.golang.org/grpc" "google.golang.org/grpc/health/grpc_health_v1" @@ -505,6 +506,76 @@ func TestDistributorPushErrors(t *testing.T) { }) } +func TestDistributorPushToKafka(t *testing.T) { + limits := &validation.Limits{} + flagext.DefaultValues(limits) + + t.Run("with kafka, any failure fails the request", func(t *testing.T) { + kafkaWriter := &mockKafkaWriter{ + failOnWrite: true, + } + distributors, _ := prepare(t, 1, 0, limits, nil) + for _, d := range distributors { + d.cfg.KafkaEnabled = true + d.cfg.IngesterEnabled = false + d.cfg.KafkaConfig.ProducerMaxRecordSizeBytes = 1000 + d.kafkaWriter = kafkaWriter + } + + request := makeWriteRequest(10, 64) + _, err := distributors[0].Push(ctx, request) + require.Error(t, err) + }) + + t.Run("with kafka, no failures is successful", func(t *testing.T) { + kafkaWriter := &mockKafkaWriter{ + failOnWrite: false, + } + distributors, _ := prepare(t, 1, 0, limits, nil) + for _, d := range distributors { + d.cfg.KafkaEnabled = true + d.cfg.IngesterEnabled = false + d.cfg.KafkaConfig.ProducerMaxRecordSizeBytes = 1000 + d.kafkaWriter = kafkaWriter + } + + request := makeWriteRequest(10, 64) + _, err := distributors[0].Push(ctx, request) + require.NoError(t, err) + + require.Equal(t, 1, kafkaWriter.pushed) + }) + + t.Run("with kafka and ingesters, both must complete", func(t *testing.T) { + kafkaWriter := &mockKafkaWriter{ + failOnWrite: false, + } + distributors, ingesters := prepare(t, 1, 3, limits, nil) + ingesters[0].succeedAfter = 5 * time.Millisecond + ingesters[1].succeedAfter = 10 * time.Millisecond + ingesters[2].succeedAfter = 15 * time.Millisecond + + for _, d := range distributors { + d.cfg.KafkaEnabled = true + d.cfg.IngesterEnabled = true + d.cfg.KafkaConfig.ProducerMaxRecordSizeBytes = 1000 + d.kafkaWriter = kafkaWriter + } + + request := makeWriteRequest(10, 64) + _, err := distributors[0].Push(ctx, request) + require.NoError(t, err) + + require.Equal(t, 1, kafkaWriter.pushed) + + require.Equal(t, 1, len(ingesters[0].pushed)) + require.Equal(t, 1, len(ingesters[1].pushed)) + require.Eventually(t, func() bool { + return len(ingesters[2].pushed) == 1 + }, time.Second, 10*time.Millisecond) + }) +} + func Test_SortLabelsOnPush(t *testing.T) { t.Run("with service_name already present in labels", func(t *testing.T) { limits := &validation.Limits{} @@ -1270,9 +1341,26 @@ func prepare(t *testing.T, numDistributors, numIngesters int, limits *validation require.NoError(t, err) require.NoError(t, services.StartAndAwaitRunning(context.Background(), ingestersRing)) - test.Poll(t, time.Second, numIngesters, func() interface{} { - return ingestersRing.InstancesCount() + partitionRing := ring.NewPartitionRing(ring.PartitionRingDesc{ + Partitions: map[int32]ring.PartitionDesc{ + 1: { + Id: 1, + Tokens: []uint32{1}, + State: ring.PartitionActive, + StateTimestamp: time.Now().Unix(), + }, + }, + Owners: map[string]ring.OwnerDesc{ + "test": { + OwnedPartition: 1, + State: ring.OwnerActive, + UpdatedTimestamp: time.Now().Unix(), + }, + }, }) + partitionRingReader := mockPartitionRingReader{ + ring: partitionRing, + } loopbackName, err := loki_net.LoopbackInterfaceName() require.NoError(t, err) @@ -1299,7 +1387,7 @@ func prepare(t *testing.T, numDistributors, numIngesters int, limits *validation overrides, err := validation.NewOverrides(*limits, nil) require.NoError(t, err) - d, err := New(distributorConfig, clientConfig, runtime.DefaultTenantConfigs(), ingestersRing, overrides, prometheus.NewPedanticRegistry(), constants.Loki, nil, nil, log.NewNopLogger()) + d, err := New(distributorConfig, clientConfig, runtime.DefaultTenantConfigs(), ingestersRing, partitionRingReader, overrides, prometheus.NewPedanticRegistry(), constants.Loki, nil, nil, log.NewNopLogger()) require.NoError(t, err) require.NoError(t, services.StartAndAwaitRunning(context.Background(), d)) distributors[i] = d @@ -1373,6 +1461,37 @@ func makeWriteRequest(lines, size int) *logproto.PushRequest { return makeWriteRequestWithLabels(lines, size, []string{`{foo="bar"}`}) } +type mockKafkaWriter struct { + failOnWrite bool + pushed int +} + +func (m *mockKafkaWriter) ProduceSync(_ context.Context, _ []*kgo.Record) kgo.ProduceResults { + if m.failOnWrite { + return kgo.ProduceResults{ + { + Err: kgo.ErrRecordTimeout, + }, + } + } + m.pushed++ + return kgo.ProduceResults{ + { + Err: nil, + }, + } +} + +func (m *mockKafkaWriter) Close() {} + +type mockPartitionRingReader struct { + ring *ring.PartitionRing +} + +func (m mockPartitionRingReader) PartitionRing() *ring.PartitionRing { + return m.ring +} + type mockIngester struct { grpc_health_v1.HealthClient logproto.PusherClient diff --git a/pkg/indexgateway/gateway.go b/pkg/indexgateway/gateway.go index 052575647951..92d476d49667 100644 --- a/pkg/indexgateway/gateway.go +++ b/pkg/indexgateway/gateway.go @@ -246,9 +246,10 @@ func (g *Gateway) GetChunkRef(ctx context.Context, req *logproto.GetChunkRefRequ return result, nil } - // Extract LineFiltersExpr from the plan. If there is none, we can short-circuit and return before making a req - // to the bloom-gateway (through the g.bloomQuerier) - if len(v1.ExtractTestableLineFilters(req.Plan.AST)) == 0 { + // Extract testable LabelFilters from the plan. If there is none, we can + // short-circuit and return before making a req to the bloom-gateway (through + // the g.bloomQuerier) + if len(v1.ExtractTestableLabelMatchers(req.Plan.AST)) == 0 { return result, nil } @@ -412,7 +413,7 @@ func (g *Gateway) GetShards(request *logproto.ShardsRequest, server logproto.Ind return g.boundedShards(ctx, request, server, instanceID, p, forSeries) } -// boundedShards handles bounded shard requests, optionally using blooms and/or returning precomputed chunks. +// boundedShards handles bounded shard requests, optionally returning precomputed chunks. func (g *Gateway) boundedShards( ctx context.Context, req *logproto.ShardsRequest, @@ -464,19 +465,21 @@ func (g *Gateway) boundedShards( filtered := refs // 2) filter via blooms if enabled - filters := syntax.ExtractLineFilters(p.Plan().AST) - if g.bloomQuerier != nil && len(filters) > 0 { - xs, err := g.bloomQuerier.FilterChunkRefs(ctx, instanceID, req.From, req.Through, refs, p.Plan()) - if err != nil { - level.Error(logger).Log("msg", "failed to filter chunk refs", "err", err) - } else { - filtered = xs - } - sp.LogKV( - "stage", "queried bloom gateway", - "err", err, - ) - } + filters := v1.ExtractTestableLabelMatchers(p.Plan().AST) + // NOTE(chaudum): Temporarily disable bloom filtering of chunk refs, + // as this doubles the load on bloom gateways. + // if g.bloomQuerier != nil && len(filters) > 0 { + // xs, err := g.bloomQuerier.FilterChunkRefs(ctx, instanceID, req.From, req.Through, refs, p.Plan()) + // if err != nil { + // level.Error(logger).Log("msg", "failed to filter chunk refs", "err", err) + // } else { + // filtered = xs + // } + // sp.LogKV( + // "stage", "queried bloom gateway", + // "err", err, + // ) + // } g.metrics.preFilterChunks.WithLabelValues(routeShards).Observe(float64(ct)) g.metrics.postFilterChunks.WithLabelValues(routeShards).Observe(float64(len(filtered))) diff --git a/pkg/ingester-kafka/kafka/kafka_tee.go b/pkg/ingester-kafka/kafka/kafka_tee.go deleted file mode 100644 index 6aeaad9724e6..000000000000 --- a/pkg/ingester-kafka/kafka/kafka_tee.go +++ /dev/null @@ -1,209 +0,0 @@ -package kafka - -import ( - "context" - "errors" - "flag" - "fmt" - "math" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/dskit/ring" - "github.com/grafana/dskit/user" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/twmb/franz-go/pkg/kgo" - - "github.com/twmb/franz-go/plugin/kprom" - - "github.com/grafana/loki/v3/pkg/distributor" - "github.com/grafana/loki/v3/pkg/logproto" -) - -const writeTimeout = time.Minute - -type Config struct { - Address string `yaml:"address" docs:"the kafka endpoint to connect to"` - Topic string `yaml:"topic" docs:"the kafka topic to write to"` -} - -func (cfg *Config) RegisterFlags(f *flag.FlagSet) { - cfg.RegisterFlagsWithPrefix("", f) -} - -func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { - f.StringVar(&cfg.Address, prefix+"address", "localhost:9092", "the kafka endpoint to connect to") - f.StringVar(&cfg.Topic, prefix+".topic", "loki.push", "The Kafka topic name.") -} - -type Tee struct { - logger log.Logger - kafkaClient *kgo.Client - partitionRing *ring.PartitionInstanceRing - - ingesterAppends *prometheus.CounterVec -} - -func NewTee( - cfg Config, - metricsNamespace string, - registerer prometheus.Registerer, - logger log.Logger, - partitionRing *ring.PartitionInstanceRing, -) (*Tee, error) { - registerer = prometheus.WrapRegistererWithPrefix(metricsNamespace+"_", registerer) - - metrics := kprom.NewMetrics( - "", // No prefix. We expect the input prometheus.Registered to be wrapped with a prefix. - kprom.Registerer(registerer), - kprom.FetchAndProduceDetail(kprom.Batches, kprom.Records, kprom.CompressedBytes, kprom.UncompressedBytes)) - - opts := append([]kgo.Opt{}, - kgo.SeedBrokers(cfg.Address), - - kgo.WithHooks(metrics), - // commonKafkaClientOptions(kafkaCfg, metrics, logger), - kgo.RequiredAcks(kgo.AllISRAcks()), - kgo.DefaultProduceTopic(cfg.Topic), - - kgo.AllowAutoTopicCreation(), - // We set the partition field in each record. - kgo.RecordPartitioner(kgo.ManualPartitioner()), - - // Set the upper bounds the size of a record batch. - kgo.ProducerBatchMaxBytes(1024*1024*1), - - // By default, the Kafka client allows 1 Produce in-flight request per broker. Disabling write idempotency - // (which we don't need), we can increase the max number of in-flight Produce requests per broker. A higher - // number of in-flight requests, in addition to short buffering ("linger") in client side before firing the - // next Produce request allows us to reduce the end-to-end latency. - // - // The result of the multiplication of producer linger and max in-flight requests should match the maximum - // Produce latency expected by the Kafka backend in a steady state. For example, 50ms * 20 requests = 1s, - // which means the Kafka client will keep issuing a Produce request every 50ms as far as the Kafka backend - // doesn't take longer than 1s to process them (if it takes longer, the client will buffer data and stop - // issuing new Produce requests until some previous ones complete). - kgo.DisableIdempotentWrite(), - kgo.ProducerLinger(50*time.Millisecond), - kgo.MaxProduceRequestsInflightPerBroker(20), - - // Unlimited number of Produce retries but a deadline on the max time a record can take to be delivered. - // With the default config it would retry infinitely. - // - // Details of the involved timeouts: - // - RecordDeliveryTimeout: how long a Kafka client Produce() call can take for a given record. The overhead - // timeout is NOT applied. - // - ProduceRequestTimeout: how long to wait for the response to the Produce request (the Kafka protocol message) - // after being sent on the network. The actual timeout is increased by the configured overhead. - // - // When a Produce request to Kafka fail, the client will retry up until the RecordDeliveryTimeout is reached. - // Once the timeout is reached, the Produce request will fail and all other buffered requests in the client - // (for the same partition) will fail too. See kgo.RecordDeliveryTimeout() documentation for more info. - kgo.RecordRetries(math.MaxInt), - kgo.RecordDeliveryTimeout(time.Minute), - kgo.ProduceRequestTimeout(time.Minute), - kgo.RequestTimeoutOverhead(time.Minute), - - // Unlimited number of buffered records because we limit on bytes in Writer. The reason why we don't use - // kgo.MaxBufferedBytes() is because it suffers a deadlock issue: - // https://github.com/twmb/franz-go/issues/777 - kgo.MaxBufferedRecords(math.MaxInt), // Use a high value to set it as unlimited, because the client doesn't support "0 as unlimited". - kgo.MaxBufferedBytes(0), - ) - - kafkaClient, err := kgo.NewClient(opts...) - if err != nil { - panic("failed to start kafka client") - } - - t := &Tee{ - logger: log.With(logger, "component", "kafka-tee"), - ingesterAppends: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Name: "kafka_ingester_appends_total", - Help: "The total number of appends sent to kafka ingest path.", - }, []string{"partition", "status"}), - kafkaClient: kafkaClient, - partitionRing: partitionRing, - } - - return t, nil -} - -// Duplicate Implements distributor.Tee which is used to tee distributor requests to pattern ingesters. -func (t *Tee) Duplicate(tenant string, streams []distributor.KeyedStream) { - for idx := range streams { - go func(stream distributor.KeyedStream) { - if err := t.sendStream(tenant, stream); err != nil { - level.Error(t.logger).Log("msg", "failed to send stream to kafka", "err", err) - } - }(streams[idx]) - } -} - -func (t *Tee) sendStream(tenant string, stream distributor.KeyedStream) error { - partitionID, err := t.partitionRing.PartitionRing().ActivePartitionForKey(stream.HashKey) - if err != nil { - t.ingesterAppends.WithLabelValues("partition_unknown", "fail").Inc() - return fmt.Errorf("failed to find active partition for stream: %w", err) - } - records, err := marshalWriteRequestToRecords(partitionID, tenant, stream.Stream, 1024*1024) - - ctx, cancel := context.WithTimeout(user.InjectOrgID(context.Background(), tenant), writeTimeout) - defer cancel() - produceResults := t.kafkaClient.ProduceSync(ctx, records...) - - var finalErr error - for _, result := range produceResults { - if result.Err != nil { - t.ingesterAppends.WithLabelValues(fmt.Sprintf("partition_%d", partitionID), "fail").Inc() - finalErr = err - } else { - t.ingesterAppends.WithLabelValues(fmt.Sprintf("partition_%d", partitionID), "success").Inc() - } - } - - return finalErr -} - -// marshalWriteRequestToRecords marshals a mimirpb.WriteRequest to one or more Kafka records. -// The request may be split to multiple records to get that each single Kafka record -// data size is not bigger than maxSize. -// -// This function is a best-effort. The returned Kafka records are not strictly guaranteed to -// have their data size limited to maxSize. The reason is that the WriteRequest is split -// by each individual Timeseries and Metadata: if a single Timeseries or Metadata is bigger than -// maxSize, than the resulting record will be bigger than the limit as well. -func marshalWriteRequestToRecords(partitionID int32, tenantID string, stream logproto.Stream, maxSize int) ([]*kgo.Record, error) { - reqSize := stream.Size() - - if reqSize <= maxSize { - // No need to split the request. We can take a fast path. - rec, err := marshalWriteRequestToRecord(partitionID, tenantID, stream, reqSize) - if err != nil { - return nil, err - } - - return []*kgo.Record{rec}, nil - } - return nil, errors.New("large write requests are not supported yet") - - // return marshalWriteRequestsToRecords(partitionID, tenantID, mimirpb.SplitWriteRequestByMaxMarshalSize(req, reqSize, maxSize)) -} - -func marshalWriteRequestToRecord(partitionID int32, tenantID string, stream logproto.Stream, reqSize int) (*kgo.Record, error) { - // Marshal the request. - data := make([]byte, reqSize) - n, err := stream.MarshalToSizedBuffer(data) - if err != nil { - return nil, fmt.Errorf("failed to serialise write request: %w", err) - } - data = data[:n] - - return &kgo.Record{ - Key: []byte(tenantID), // We don't partition based on the key, so the value here doesn't make any difference. - Value: data, - Partition: partitionID, - }, nil -} diff --git a/pkg/ingester-rf1/objstore/storage.go b/pkg/ingester-rf1/objstore/storage.go index ec0d734b316b..5a8c61fd5117 100644 --- a/pkg/ingester-rf1/objstore/storage.go +++ b/pkg/ingester-rf1/objstore/storage.go @@ -70,6 +70,14 @@ func (m *Multi) GetStoreFor(ts model.Time) (client.ObjectClient, error) { return nil, fmt.Errorf("no store found for timestamp %s", ts) } +func (m *Multi) ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) { + s, err := m.GetStoreFor(model.Now()) + if err != nil { + return false, 0, err + } + return s.ObjectExistsWithSize(ctx, objectKey) +} + func (m *Multi) ObjectExists(ctx context.Context, objectKey string) (bool, error) { s, err := m.GetStoreFor(model.Now()) if err != nil { diff --git a/pkg/ingester/checkpoint_test.go b/pkg/ingester/checkpoint_test.go index 5a816a3b779d..1639125390a0 100644 --- a/pkg/ingester/checkpoint_test.go +++ b/pkg/ingester/checkpoint_test.go @@ -16,6 +16,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/distributor/writefailures" "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/logproto" @@ -72,7 +73,7 @@ func TestIngesterWAL(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -115,7 +116,7 @@ func TestIngesterWAL(t *testing.T) { expectCheckpoint(t, walDir, false, time.Second) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -129,7 +130,7 @@ func TestIngesterWAL(t *testing.T) { require.Nil(t, services.StopAndAwaitTerminated(context.Background(), i)) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -154,7 +155,7 @@ func TestIngesterWALIgnoresStreamLimits(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -200,7 +201,7 @@ func TestIngesterWALIgnoresStreamLimits(t *testing.T) { require.NoError(t, err) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -259,7 +260,7 @@ func TestIngesterWALBackpressureSegments(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -280,7 +281,7 @@ func TestIngesterWALBackpressureSegments(t *testing.T) { expectCheckpoint(t, walDir, false, time.Second) // restart the ingester, ensuring we replayed from WAL. - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -303,7 +304,7 @@ func TestIngesterWALBackpressureCheckpoint(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -324,7 +325,7 @@ func TestIngesterWALBackpressureCheckpoint(t *testing.T) { require.Nil(t, services.StopAndAwaitTerminated(context.Background(), i)) // restart the ingester, ensuring we can replay from the checkpoint as well. - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) @@ -565,7 +566,7 @@ func buildChunks(t testing.TB, size int) []Chunk { for i := 0; i < size; i++ { // build chunks of 256k blocks, 1.5MB target size. Same as default config. - c := chunkenc.NewMemChunk(chunkenc.ChunkFormatV3, chunkenc.EncGZIP, chunkenc.UnorderedHeadBlockFmt, 256*1024, 1500*1024) + c := chunkenc.NewMemChunk(chunkenc.ChunkFormatV3, compression.EncGZIP, chunkenc.UnorderedHeadBlockFmt, 256*1024, 1500*1024) fillChunk(t, c) descs = append(descs, chunkDesc{ chunk: c, @@ -601,7 +602,7 @@ func TestIngesterWALReplaysUnorderedToOrdered(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -673,7 +674,7 @@ func TestIngesterWALReplaysUnorderedToOrdered(t *testing.T) { require.NoError(t, err) // restart the ingester - i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, newStore(), limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokit_log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck require.Nil(t, services.StartAndAwaitRunning(context.Background(), i)) diff --git a/pkg/ingester/chunk_test.go b/pkg/ingester/chunk_test.go index f6a16731e6d4..961b256ea58c 100644 --- a/pkg/ingester/chunk_test.go +++ b/pkg/ingester/chunk_test.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/iter" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/log" @@ -49,7 +50,7 @@ func TestIterator(t *testing.T) { }{ {"dumbChunk", chunkenc.NewDumbChunk}, {"gzipChunk", func() chunkenc.Chunk { - return chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncGZIP, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 256*1024, 0) + return chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncGZIP, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 256*1024, 0) }}, } { t.Run(chk.name, func(t *testing.T) { diff --git a/pkg/ingester/downscale.go b/pkg/ingester/downscale.go new file mode 100644 index 000000000000..55b3ee2d0ae9 --- /dev/null +++ b/pkg/ingester/downscale.go @@ -0,0 +1,104 @@ +package ingester + +import ( + "net/http" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/grafana/dskit/ring" + "github.com/grafana/dskit/services" + + "github.com/grafana/loki/v3/pkg/util" +) + +// PreparePartitionDownscaleHandler prepares the ingester's partition downscaling. The partition owned by the +// ingester will switch to INACTIVE state (read-only). +// +// Following methods are supported: +// +// - GET +// Returns timestamp when partition was switched to INACTIVE state, or 0, if partition is not in INACTIVE state. +// +// - POST +// Switches the partition to INACTIVE state (if not yet), and returns the timestamp when the switch to +// INACTIVE state happened. +// +// - DELETE +// Sets partition back from INACTIVE to ACTIVE state, and returns 0 signalling the partition is not in INACTIVE state +func (i *Ingester) PreparePartitionDownscaleHandler(w http.ResponseWriter, r *http.Request) { + logger := log.With(i.logger, "partition", i.ingestPartitionID) + + // Don't allow callers to change the shutdown configuration while we're in the middle + // of starting or shutting down. + if i.State() != services.Running { + w.WriteHeader(http.StatusServiceUnavailable) + return + } + + if !i.cfg.KafkaIngestion.Enabled { + w.WriteHeader(http.StatusMethodNotAllowed) + return + } + + switch r.Method { + case http.MethodPost: + // It's not allowed to prepare the downscale while in PENDING state. Why? Because if the downscale + // will be later cancelled, we don't know if it was requested in PENDING or ACTIVE state, so we + // don't know to which state reverting back. Given a partition is expected to stay in PENDING state + // for a short period, we simply don't allow this case. + state, _, err := i.partitionRingLifecycler.GetPartitionState(r.Context()) + if err != nil { + level.Error(logger).Log("msg", "failed to check partition state in the ring", "err", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + + if state == ring.PartitionPending { + level.Warn(logger).Log("msg", "received a request to prepare partition for shutdown, but the request can't be satisfied because the partition is in PENDING state") + w.WriteHeader(http.StatusConflict) + return + } + + if err := i.partitionRingLifecycler.ChangePartitionState(r.Context(), ring.PartitionInactive); err != nil { + level.Error(logger).Log("msg", "failed to change partition state to inactive", "err", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + + case http.MethodDelete: + state, _, err := i.partitionRingLifecycler.GetPartitionState(r.Context()) + if err != nil { + level.Error(logger).Log("msg", "failed to check partition state in the ring", "err", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + + // If partition is inactive, make it active. We ignore other states Active and especially Pending. + if state == ring.PartitionInactive { + // We don't switch it back to PENDING state if there are not enough owners because we want to guarantee consistency + // in the read path. If the partition is within the lookback period we need to guarantee that partition will be queried. + // Moving back to PENDING will cause us loosing consistency, because PENDING partitions are not queried by design. + // We could move back to PENDING if there are not enough owners and the partition moved to INACTIVE more than + // "lookback period" ago, but since we delete inactive partitions with no owners that moved to inactive since longer + // than "lookback period" ago, it looks to be an edge case not worth to address. + if err := i.partitionRingLifecycler.ChangePartitionState(r.Context(), ring.PartitionActive); err != nil { + level.Error(logger).Log("msg", "failed to change partition state to active", "err", err) + http.Error(w, err.Error(), http.StatusInternalServerError) + return + } + } + } + + state, stateTimestamp, err := i.partitionRingLifecycler.GetPartitionState(r.Context()) + if err != nil { + level.Error(logger).Log("msg", "failed to check partition state in the ring", "err", err) + w.WriteHeader(http.StatusInternalServerError) + return + } + + if state == ring.PartitionInactive { + util.WriteJSONResponse(w, map[string]any{"timestamp": stateTimestamp.Unix()}) + } else { + util.WriteJSONResponse(w, map[string]any{"timestamp": 0}) + } +} diff --git a/pkg/ingester/encoding_test.go b/pkg/ingester/encoding_test.go index 458da1132c96..ee2ad1d8f681 100644 --- a/pkg/ingester/encoding_test.go +++ b/pkg/ingester/encoding_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/ingester/wal" "github.com/grafana/loki/v3/pkg/logproto" ) @@ -58,7 +59,7 @@ func Test_EncodingChunks(t *testing.T) { t.Run(fmt.Sprintf("%v-%s", close, tc.desc), func(t *testing.T) { conf := tc.conf - c := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncGZIP, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, conf.BlockSize, conf.TargetChunkSize) + c := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncGZIP, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, conf.BlockSize, conf.TargetChunkSize) fillChunk(t, c) if close { require.Nil(t, c.Close()) @@ -121,7 +122,7 @@ func Test_EncodingChunks(t *testing.T) { func Test_EncodingCheckpoint(t *testing.T) { conf := dummyConf() - c := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncGZIP, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, conf.BlockSize, conf.TargetChunkSize) + c := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncGZIP, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, conf.BlockSize, conf.TargetChunkSize) dup, err := c.Append(&logproto.Entry{ Timestamp: time.Unix(1, 0), Line: "hi there", diff --git a/pkg/ingester/flush_test.go b/pkg/ingester/flush_test.go index 69462a3d352a..f01fb02e8730 100644 --- a/pkg/ingester/flush_test.go +++ b/pkg/ingester/flush_test.go @@ -24,6 +24,7 @@ import ( "github.com/grafana/dskit/tenant" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/distributor/writefailures" "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/ingester/wal" @@ -188,7 +189,7 @@ func buildChunkDecs(t testing.TB) []*chunkDesc { for i := range res { res[i] = &chunkDesc{ closed: true, - chunk: chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncSnappy, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, dummyConf().BlockSize, dummyConf().TargetChunkSize), + chunk: chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncSnappy, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, dummyConf().BlockSize, dummyConf().TargetChunkSize), } fillChunk(t, res[i].chunk) require.NoError(t, res[i].chunk.Close()) @@ -392,7 +393,7 @@ func newTestStore(t require.TestingT, cfg Config, walOverride WAL) (*testStore, limits, err := validation.NewOverrides(defaultLimitsTestConfig(), nil) require.NoError(t, err) - ing, err := New(cfg, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokitlog.NewNopLogger(), nil, readRingMock) + ing, err := New(cfg, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, gokitlog.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) require.NoError(t, services.StartAndAwaitRunning(context.Background(), ing)) diff --git a/pkg/ingester/ingester.go b/pkg/ingester/ingester.go index da0021c11f3f..7776b9097f08 100644 --- a/pkg/ingester/ingester.go +++ b/pkg/ingester/ingester.go @@ -14,16 +14,11 @@ import ( "sync" "time" - "github.com/grafana/loki/v3/pkg/loghttp/push" - "github.com/grafana/loki/v3/pkg/logqlmodel/metadata" - "github.com/grafana/loki/v3/pkg/storage/types" - - lokilog "github.com/grafana/loki/v3/pkg/logql/log" - "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/backoff" "github.com/grafana/dskit/concurrency" + "github.com/grafana/dskit/kv" "github.com/grafana/dskit/modules" "github.com/grafana/dskit/multierror" "github.com/grafana/dskit/ring" @@ -36,17 +31,21 @@ import ( "golang.org/x/time/rate" "google.golang.org/grpc/health/grpc_health_v1" - server_util "github.com/grafana/loki/v3/pkg/util/server" - "github.com/grafana/loki/v3/pkg/analytics" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/distributor/writefailures" "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/ingester/index" "github.com/grafana/loki/v3/pkg/iter" + "github.com/grafana/loki/v3/pkg/kafka" + "github.com/grafana/loki/v3/pkg/kafka/partition" + "github.com/grafana/loki/v3/pkg/kafka/partitionring" + "github.com/grafana/loki/v3/pkg/loghttp/push" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql" + lokilog "github.com/grafana/loki/v3/pkg/logql/log" "github.com/grafana/loki/v3/pkg/logql/syntax" + "github.com/grafana/loki/v3/pkg/logqlmodel/metadata" "github.com/grafana/loki/v3/pkg/logqlmodel/stats" "github.com/grafana/loki/v3/pkg/querier/plan" "github.com/grafana/loki/v3/pkg/runtime" @@ -57,8 +56,10 @@ import ( indexstore "github.com/grafana/loki/v3/pkg/storage/stores/index" "github.com/grafana/loki/v3/pkg/storage/stores/index/seriesvolume" index_stats "github.com/grafana/loki/v3/pkg/storage/stores/index/stats" + "github.com/grafana/loki/v3/pkg/storage/types" "github.com/grafana/loki/v3/pkg/util" util_log "github.com/grafana/loki/v3/pkg/util/log" + server_util "github.com/grafana/loki/v3/pkg/util/server" "github.com/grafana/loki/v3/pkg/util/wal" ) @@ -67,6 +68,10 @@ const ( RingKey = "ring" shutdownMarkerFilename = "shutdown-requested.txt" + + // PartitionRingKey is the key under which we store the partitions ring used by the "ingest storage". + PartitionRingKey = "ingester-partitions-key" + PartitionRingName = "ingester-partitions" ) // ErrReadOnly is returned when the ingester is shutting down and a push was @@ -84,18 +89,18 @@ var ( type Config struct { LifecyclerConfig ring.LifecyclerConfig `yaml:"lifecycler,omitempty" doc:"description=Configures how the lifecycle of the ingester will operate and where it will register for discovery."` - ConcurrentFlushes int `yaml:"concurrent_flushes"` - FlushCheckPeriod time.Duration `yaml:"flush_check_period"` - FlushOpBackoff backoff.Config `yaml:"flush_op_backoff"` - FlushOpTimeout time.Duration `yaml:"flush_op_timeout"` - RetainPeriod time.Duration `yaml:"chunk_retain_period"` - MaxChunkIdle time.Duration `yaml:"chunk_idle_period"` - BlockSize int `yaml:"chunk_block_size"` - TargetChunkSize int `yaml:"chunk_target_size"` - ChunkEncoding string `yaml:"chunk_encoding"` - parsedEncoding chunkenc.Encoding `yaml:"-"` // placeholder for validated encoding - MaxChunkAge time.Duration `yaml:"max_chunk_age"` - AutoForgetUnhealthy bool `yaml:"autoforget_unhealthy"` + ConcurrentFlushes int `yaml:"concurrent_flushes"` + FlushCheckPeriod time.Duration `yaml:"flush_check_period"` + FlushOpBackoff backoff.Config `yaml:"flush_op_backoff"` + FlushOpTimeout time.Duration `yaml:"flush_op_timeout"` + RetainPeriod time.Duration `yaml:"chunk_retain_period"` + MaxChunkIdle time.Duration `yaml:"chunk_idle_period"` + BlockSize int `yaml:"chunk_block_size"` + TargetChunkSize int `yaml:"chunk_target_size"` + ChunkEncoding string `yaml:"chunk_encoding"` + parsedEncoding compression.Encoding `yaml:"-"` // placeholder for validated encoding + MaxChunkAge time.Duration `yaml:"max_chunk_age"` + AutoForgetUnhealthy bool `yaml:"autoforget_unhealthy"` // Synchronization settings. Used to make sure that ingesters cut their chunks at the same moments. SyncPeriod time.Duration `yaml:"sync_period"` @@ -125,12 +130,15 @@ type Config struct { ShutdownMarkerPath string `yaml:"shutdown_marker_path"` OwnedStreamsCheckInterval time.Duration `yaml:"owned_streams_check_interval" doc:"description=Interval at which the ingester ownedStreamService checks for changes in the ring to recalculate owned streams."` + + KafkaIngestion KafkaIngestionConfig `yaml:"kafka_ingestion,omitempty"` } // RegisterFlags registers the flags. func (cfg *Config) RegisterFlags(f *flag.FlagSet) { cfg.LifecyclerConfig.RegisterFlags(f, util_log.Logger) cfg.WAL.RegisterFlags(f) + cfg.KafkaIngestion.RegisterFlags(f) f.IntVar(&cfg.ConcurrentFlushes, "ingester.concurrent-flushes", 32, "How many flushes can happen concurrently from each stream.") f.DurationVar(&cfg.FlushCheckPeriod, "ingester.flush-check-period", 30*time.Second, "How often should the ingester see if there are any blocks to flush. The first flush check is delayed by a random time up to 0.8x the flush check period. Additionally, there is +/- 1% jitter added to the interval.") @@ -142,7 +150,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { f.DurationVar(&cfg.MaxChunkIdle, "ingester.chunks-idle-period", 30*time.Minute, "How long chunks should sit in-memory with no updates before being flushed if they don't hit the max block size. This means that half-empty chunks will still be flushed after a certain period as long as they receive no further activity.") f.IntVar(&cfg.BlockSize, "ingester.chunks-block-size", 256*1024, "The targeted _uncompressed_ size in bytes of a chunk block When this threshold is exceeded the head block will be cut and compressed inside the chunk.") f.IntVar(&cfg.TargetChunkSize, "ingester.chunk-target-size", 1572864, "A target _compressed_ size in bytes for chunks. This is a desired size not an exact size, chunks may be slightly bigger or significantly smaller if they get flushed for other reasons (e.g. chunk_idle_period). A value of 0 creates chunks with a fixed 10 blocks, a non zero value will create chunks with a variable number of blocks to meet the target size.") // 1.5 MB - f.StringVar(&cfg.ChunkEncoding, "ingester.chunk-encoding", chunkenc.EncGZIP.String(), fmt.Sprintf("The algorithm to use for compressing chunk. (%s)", chunkenc.SupportedEncoding())) + f.StringVar(&cfg.ChunkEncoding, "ingester.chunk-encoding", compression.EncGZIP.String(), fmt.Sprintf("The algorithm to use for compressing chunk. (%s)", compression.SupportedEncoding())) f.DurationVar(&cfg.SyncPeriod, "ingester.sync-period", 1*time.Hour, "Parameters used to synchronize ingesters to cut chunks at the same moment. Sync period is used to roll over incoming entry to a new chunk. If chunk's utilization isn't high enough (eg. less than 50% when sync_min_utilization is set to 0.5), then this chunk rollover doesn't happen.") f.Float64Var(&cfg.SyncMinUtilization, "ingester.sync-min-utilization", 0.1, "Minimum utilization of chunk when doing synchronization.") f.IntVar(&cfg.MaxReturnedErrors, "ingester.max-ignored-stream-errors", 10, "The maximum number of errors a stream will report to the user when a push fails. 0 to make unlimited.") @@ -156,7 +164,7 @@ func (cfg *Config) RegisterFlags(f *flag.FlagSet) { } func (cfg *Config) Validate() error { - enc, err := chunkenc.ParseEncoding(cfg.ChunkEncoding) + enc, err := compression.ParseEncoding(cfg.ChunkEncoding) if err != nil { return err } @@ -182,6 +190,17 @@ func (cfg *Config) Validate() error { return nil } +type KafkaIngestionConfig struct { + Enabled bool `yaml:"enabled" doc:"description=Whether the kafka ingester is enabled."` + PartitionRingConfig partitionring.Config `yaml:"partition_ring" category:"experimental"` + KafkaConfig kafka.Config `yaml:"-"` +} + +func (cfg *KafkaIngestionConfig) RegisterFlags(f *flag.FlagSet) { + cfg.PartitionRingConfig.RegisterFlagsWithPrefix("ingester.", f) + f.BoolVar(&cfg.Enabled, "ingester.kafka-ingestion-enabled", false, "Whether the ingester will consume data from kafka.") +} + type Wrapper interface { Wrap(wrapped Interface) Interface } @@ -208,6 +227,7 @@ type Interface interface { GetOrCreateInstance(instanceID string) (*instance, error) ShutdownHandler(w http.ResponseWriter, r *http.Request) PrepareShutdown(w http.ResponseWriter, r *http.Request) + PreparePartitionDownscaleHandler(w http.ResponseWriter, r *http.Request) } // Ingester builds chunks for incoming log streams. @@ -272,11 +292,15 @@ type Ingester struct { // recalculateOwnedStreams periodically checks the ring for changes and recalculates owned streams for each instance. readRing ring.ReadRing - recalculateOwnedStreams *recalculateOwnedStreams + recalculateOwnedStreams *recalculateOwnedStreamsSvc + + ingestPartitionID int32 + partitionRingLifecycler *ring.PartitionInstanceLifecycler + partitionReader *partition.Reader } // New makes a new Ingester. -func New(cfg Config, clientConfig client.Config, store Store, limits Limits, configs *runtime.TenantConfigs, registerer prometheus.Registerer, writeFailuresCfg writefailures.Cfg, metricsNamespace string, logger log.Logger, customStreamsTracker push.UsageTracker, readRing ring.ReadRing) (*Ingester, error) { +func New(cfg Config, clientConfig client.Config, store Store, limits Limits, configs *runtime.TenantConfigs, registerer prometheus.Registerer, writeFailuresCfg writefailures.Cfg, metricsNamespace string, logger log.Logger, customStreamsTracker push.UsageTracker, readRing ring.ReadRing, partitionRingWatcher *ring.PartitionRingWatcher) (*Ingester, error) { if cfg.ingesterClientFactory == nil { cfg.ingesterClientFactory = client.New } @@ -336,6 +360,34 @@ func New(cfg Config, clientConfig client.Config, store Store, limits Limits, con i.lifecyclerWatcher = services.NewFailureWatcher() i.lifecyclerWatcher.WatchService(i.lifecycler) + if i.cfg.KafkaIngestion.Enabled { + i.ingestPartitionID, err = partitionring.ExtractIngesterPartitionID(cfg.LifecyclerConfig.ID) + if err != nil { + return nil, fmt.Errorf("calculating ingester partition ID: %w", err) + } + partitionRingKV := cfg.KafkaIngestion.PartitionRingConfig.KVStore.Mock + if partitionRingKV == nil { + partitionRingKV, err = kv.NewClient(cfg.KafkaIngestion.PartitionRingConfig.KVStore, ring.GetPartitionRingCodec(), kv.RegistererWithKVName(registerer, PartitionRingName+"-lifecycler"), logger) + if err != nil { + return nil, fmt.Errorf("creating KV store for ingester partition ring: %w", err) + } + } + i.partitionRingLifecycler = ring.NewPartitionInstanceLifecycler( + i.cfg.KafkaIngestion.PartitionRingConfig.ToLifecyclerConfig(i.ingestPartitionID, cfg.LifecyclerConfig.ID), + PartitionRingName, + PartitionRingKey, + partitionRingKV, + logger, + prometheus.WrapRegistererWithPrefix("loki_", registerer)) + + i.partitionReader, err = partition.NewReader(cfg.KafkaIngestion.KafkaConfig, i.ingestPartitionID, cfg.LifecyclerConfig.ID, NewKafkaConsumerFactory(i, logger, registerer), logger, registerer) + if err != nil { + return nil, err + } + i.lifecyclerWatcher.WatchService(i.partitionRingLifecycler) + i.lifecyclerWatcher.WatchService(i.partitionReader) + } + // Now that the lifecycler has been created, we can create the limiter // which depends on it. i.limiter = NewLimiter(limits, metrics, i.lifecycler, cfg.LifecyclerConfig.RingConfig.ReplicationFactor) @@ -356,7 +408,13 @@ func New(cfg Config, clientConfig client.Config, store Store, limits Limits, con i.SetExtractorWrapper(i.cfg.SampleExtractorWrapper) } - i.recalculateOwnedStreams = newRecalculateOwnedStreams(i.getInstances, i.lifecycler.ID, i.readRing, cfg.OwnedStreamsCheckInterval, util_log.Logger) + var ownedStreamsStrategy ownershipStrategy + if i.cfg.KafkaIngestion.Enabled { + ownedStreamsStrategy = newOwnedStreamsPartitionStrategy(i.ingestPartitionID, partitionRingWatcher, util_log.Logger) + } else { + ownedStreamsStrategy = newOwnedStreamsIngesterStrategy(i.lifecycler.ID, i.readRing, util_log.Logger) + } + i.recalculateOwnedStreams = newRecalculateOwnedStreamsSvc(i.getInstances, ownedStreamsStrategy, cfg.OwnedStreamsCheckInterval, util_log.Logger) return i, nil } @@ -444,7 +502,15 @@ func (i *Ingester) setupAutoForget() { }() } -func (i *Ingester) starting(ctx context.Context) error { +func (i *Ingester) starting(ctx context.Context) (err error) { + defer func() { + if err != nil { + // if starting() fails for any reason (e.g., context canceled), + // the lifecycler must be stopped. + _ = services.StopAndAwaitTerminated(context.Background(), i.lifecycler) + } + }() + if i.cfg.WAL.Enabled { start := time.Now() @@ -529,17 +595,6 @@ func (i *Ingester) starting(ctx context.Context) error { i.InitFlushQueues() - // pass new context to lifecycler, so that it doesn't stop automatically when Ingester's service context is done - err := i.lifecycler.StartAsync(context.Background()) - if err != nil { - return err - } - - err = i.lifecycler.AwaitRunning(ctx) - if err != nil { - return err - } - shutdownMarkerPath := path.Join(i.cfg.ShutdownMarkerPath, shutdownMarkerFilename) shutdownMarker, err := shutdownMarkerExists(shutdownMarkerPath) if err != nil { @@ -551,16 +606,41 @@ func (i *Ingester) starting(ctx context.Context) error { i.setPrepareShutdown() } + // When kafka ingestion is enabled, we have to make sure that reader catches up replaying the partition + // BEFORE the ingester ring lifecycler is started, because once the ingester ring lifecycler will start + // it will switch the ingester state in the ring to ACTIVE. + if i.partitionReader != nil { + if err := services.StartAndAwaitRunning(ctx, i.partitionReader); err != nil { + return fmt.Errorf("failed to start partition reader: %w", err) + } + } + + // pass new context to lifecycler, so that it doesn't stop automatically when Ingester's service context is done + err = i.lifecycler.StartAsync(context.Background()) + if err != nil { + return err + } + + err = i.lifecycler.AwaitRunning(ctx) + if err != nil { + return err + } + err = i.recalculateOwnedStreams.StartAsync(ctx) if err != nil { return fmt.Errorf("can not start recalculate owned streams service: %w", err) } - err = i.lifecycler.AwaitRunning(ctx) + err = i.recalculateOwnedStreams.AwaitRunning(ctx) if err != nil { return fmt.Errorf("can not ensure recalculate owned streams service is running: %w", err) } + if i.partitionRingLifecycler != nil { + if err := services.StartAndAwaitRunning(ctx, i.partitionRingLifecycler); err != nil { + return fmt.Errorf("failed to start partition ring lifecycler: %w", err) + } + } // start our loop i.loopDone.Add(1) go i.loop() @@ -593,6 +673,19 @@ func (i *Ingester) running(ctx context.Context) error { // At this point, loop no longer runs, but flushers are still running. func (i *Ingester) stopping(_ error) error { i.stopIncomingRequests() + + if i.partitionReader != nil { + if err := services.StopAndAwaitTerminated(context.Background(), i.partitionReader); err != nil { + level.Warn(i.logger).Log("msg", "failed to stop partition reader", "err", err) + } + } + + if i.partitionRingLifecycler != nil { + if err := services.StopAndAwaitTerminated(context.Background(), i.partitionRingLifecycler); err != nil { + level.Warn(i.logger).Log("msg", "failed to stop partition ring lifecycler", "err", err) + } + } + var errs util.MultiError errs.Add(i.wal.Stop()) @@ -749,6 +842,18 @@ func (i *Ingester) setPrepareShutdown() { i.lifecycler.SetUnregisterOnShutdown(true) i.terminateOnShutdown = true i.metrics.shutdownMarker.Set(1) + + if i.partitionRingLifecycler != nil { + // When the prepare shutdown endpoint is called there are two changes in the partitions ring behavior: + // + // 1. If setPrepareShutdown() is called at startup, because of the shutdown marker found on disk, + // the ingester shouldn't create the partition if doesn't exist, because we expect the ingester will + // be scaled down shortly after. + // 2. When the ingester will shutdown we'll have to remove the ingester from the partition owners, + // because we expect the ingester to be scaled down. + i.partitionRingLifecycler.SetCreatePartitionOnStartup(false) + i.partitionRingLifecycler.SetRemoveOwnerOnShutdown(true) + } } func (i *Ingester) unsetPrepareShutdown() { @@ -785,7 +890,7 @@ func createShutdownMarker(p string) error { return err } - dir, err := os.OpenFile(path.Dir(p), os.O_RDONLY, 0777) + dir, err := os.OpenFile(path.Dir(p), os.O_RDONLY, 0o777) if err != nil { return err } @@ -802,7 +907,7 @@ func removeShutdownMarker(p string) error { return err } - dir, err := os.OpenFile(path.Dir(p), os.O_RDONLY, 0777) + dir, err := os.OpenFile(path.Dir(p), os.O_RDONLY, 0o777) if err != nil { return err } @@ -1388,6 +1493,7 @@ func (i *Ingester) Tail(req *logproto.TailRequest, queryServer logproto.Querier_ err = server_util.ClientGrpcStatusAndError(err) return err } + func (i *Ingester) tail(req *logproto.TailRequest, queryServer logproto.Querier_TailServer) error { select { case <-i.tailersQuit: @@ -1524,7 +1630,6 @@ func (i *Ingester) getDetectedLabels(ctx context.Context, req *logproto.Detected } labelMap, err := instance.LabelsWithValues(ctx, req.Start, matchers...) - if err != nil { return nil, err } diff --git a/pkg/ingester/ingester_test.go b/pkg/ingester/ingester_test.go index 17d34b57dc54..a9108c52c2a1 100644 --- a/pkg/ingester/ingester_test.go +++ b/pkg/ingester/ingester_test.go @@ -32,7 +32,7 @@ import ( "github.com/grafana/dskit/tenant" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/distributor/writefailures" "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/ingester/index" @@ -63,7 +63,7 @@ func TestPrepareShutdownMarkerPathNotSet(t *testing.T) { mockRing := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, mockRing) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, mockRing, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -88,7 +88,7 @@ func TestPrepareShutdown(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -151,7 +151,7 @@ func TestIngester_GetStreamRates_Correctness(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -184,7 +184,7 @@ func BenchmarkGetStreamRatesAllocs(b *testing.B) { } readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(b, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -210,7 +210,7 @@ func TestIngester(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -397,7 +397,7 @@ func TestIngesterStreamLimitExceeded(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, overrides, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, overrides, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -697,7 +697,7 @@ func TestValidate(t *testing.T) { }{ { in: Config{ - ChunkEncoding: chunkenc.EncGZIP.String(), + ChunkEncoding: compression.EncGZIP.String(), FlushOpBackoff: backoff.Config{ MinBackoff: 100 * time.Millisecond, MaxBackoff: 10 * time.Second, @@ -708,7 +708,7 @@ func TestValidate(t *testing.T) { MaxChunkAge: time.Minute, }, expected: Config{ - ChunkEncoding: chunkenc.EncGZIP.String(), + ChunkEncoding: compression.EncGZIP.String(), FlushOpBackoff: backoff.Config{ MinBackoff: 100 * time.Millisecond, MaxBackoff: 10 * time.Second, @@ -717,12 +717,12 @@ func TestValidate(t *testing.T) { FlushOpTimeout: 15 * time.Second, IndexShards: index.DefaultIndexShards, MaxChunkAge: time.Minute, - parsedEncoding: chunkenc.EncGZIP, + parsedEncoding: compression.EncGZIP, }, }, { in: Config{ - ChunkEncoding: chunkenc.EncSnappy.String(), + ChunkEncoding: compression.EncSnappy.String(), FlushOpBackoff: backoff.Config{ MinBackoff: 100 * time.Millisecond, MaxBackoff: 10 * time.Second, @@ -732,7 +732,7 @@ func TestValidate(t *testing.T) { IndexShards: index.DefaultIndexShards, }, expected: Config{ - ChunkEncoding: chunkenc.EncSnappy.String(), + ChunkEncoding: compression.EncSnappy.String(), FlushOpBackoff: backoff.Config{ MinBackoff: 100 * time.Millisecond, MaxBackoff: 10 * time.Second, @@ -740,7 +740,7 @@ func TestValidate(t *testing.T) { }, FlushOpTimeout: 15 * time.Second, IndexShards: index.DefaultIndexShards, - parsedEncoding: chunkenc.EncSnappy, + parsedEncoding: compression.EncSnappy, }, }, { @@ -758,7 +758,7 @@ func TestValidate(t *testing.T) { }, { in: Config{ - ChunkEncoding: chunkenc.EncGZIP.String(), + ChunkEncoding: compression.EncGZIP.String(), FlushOpBackoff: backoff.Config{ MinBackoff: 100 * time.Millisecond, MaxBackoff: 10 * time.Second, @@ -771,7 +771,7 @@ func TestValidate(t *testing.T) { }, { in: Config{ - ChunkEncoding: chunkenc.EncGZIP.String(), + ChunkEncoding: compression.EncGZIP.String(), FlushOpBackoff: backoff.Config{ MinBackoff: 100 * time.Millisecond, MaxBackoff: 10 * time.Second, @@ -784,7 +784,7 @@ func TestValidate(t *testing.T) { }, { in: Config{ - ChunkEncoding: chunkenc.EncGZIP.String(), + ChunkEncoding: compression.EncGZIP.String(), FlushOpBackoff: backoff.Config{ MinBackoff: 100 * time.Millisecond, MaxBackoff: 10 * time.Second, @@ -819,7 +819,7 @@ func Test_InMemoryLabels(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -874,7 +874,7 @@ func TestIngester_GetDetectedLabels(t *testing.T) { } readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -938,7 +938,7 @@ func TestIngester_GetDetectedLabelsWithQuery(t *testing.T) { } readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) defer services.StopAndAwaitTerminated(context.Background(), i) //nolint:errcheck @@ -1306,7 +1306,7 @@ func TestStats(t *testing.T) { require.NoError(t, err) readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) i.instances["test"] = defaultInstance(t) @@ -1334,7 +1334,7 @@ func TestVolume(t *testing.T) { require.NoError(t, err) readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) i.instances["test"] = defaultInstance(t) @@ -1414,7 +1414,7 @@ func createIngesterServer(t *testing.T, ingesterConfig Config) (ingesterClient, require.NoError(t, err) readRingMock := mockReadRingWithOneActiveIngester() - ing, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + ing, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) listener := bufconn.Listen(1024 * 1024) @@ -1631,7 +1631,7 @@ func TestUpdateOwnedStreams(t *testing.T) { require.NoError(t, err) readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, &mockStore{}, limits, runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) i.instances["test"] = defaultInstance(t) diff --git a/pkg/ingester/instance.go b/pkg/ingester/instance.go index e2fd472656a9..72ed01793ce7 100644 --- a/pkg/ingester/instance.go +++ b/pkg/ingester/instance.go @@ -13,7 +13,6 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/httpgrpc" - "github.com/grafana/dskit/ring" "github.com/grafana/dskit/tenant" "github.com/opentracing/opentracing-go" "github.com/prometheus/client_golang/prometheus" @@ -47,7 +46,6 @@ import ( "github.com/grafana/loki/v3/pkg/util/httpreq" util_log "github.com/grafana/loki/v3/pkg/util/log" mathutil "github.com/grafana/loki/v3/pkg/util/math" - lokiring "github.com/grafana/loki/v3/pkg/util/ring" server_util "github.com/grafana/loki/v3/pkg/util/server" "github.com/grafana/loki/v3/pkg/validation" ) @@ -1175,35 +1173,24 @@ func minTs(stream *logproto.Stream) model.Time { } // For each stream, we check if the stream is owned by the ingester or not and increment/decrement the owned stream count. -func (i *instance) updateOwnedStreams(ingesterRing ring.ReadRing, ingesterID string) error { +func (i *instance) updateOwnedStreams(isOwnedStream func(*stream) (bool, error)) error { start := time.Now() defer func() { i.metrics.streamsOwnershipCheck.Observe(float64(time.Since(start).Milliseconds())) }() - var descsBuf = make([]ring.InstanceDesc, ingesterRing.ReplicationFactor()+1) - var hostsBuf = make([]string, ingesterRing.ReplicationFactor()+1) - var zoneBuf = make([]string, ingesterRing.ZonesCount()+1) + var err error i.streams.WithLock(func() { i.ownedStreamsSvc.resetStreamCounts() err = i.streams.ForEach(func(s *stream) (bool, error) { - replicationSet, err := ingesterRing.Get(lokiring.TokenFor(i.instanceID, s.labelsString), ring.WriteNoExtend, descsBuf, hostsBuf, zoneBuf) + ownedStream, err := isOwnedStream(s) if err != nil { - return false, fmt.Errorf("error getting replication set for stream %s: %v", s.labelsString, err) + return false, err } - ownedStream := i.isOwnedStream(replicationSet, ingesterID) + i.ownedStreamsSvc.trackStreamOwnership(s.fp, ownedStream) return true, nil }) }) return err } - -func (i *instance) isOwnedStream(replicationSet ring.ReplicationSet, ingesterID string) bool { - for _, instanceDesc := range replicationSet.Instances { - if instanceDesc.Id == ingesterID { - return true - } - } - return false -} diff --git a/pkg/ingester/kafka_consumer.go b/pkg/ingester/kafka_consumer.go new file mode 100644 index 000000000000..c2fe90ee052f --- /dev/null +++ b/pkg/ingester/kafka_consumer.go @@ -0,0 +1,156 @@ +package ingester + +import ( + "context" + "errors" + math "math" + "sync" + "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/grafana/dskit/backoff" + "github.com/grafana/dskit/user" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/client_golang/prometheus/promauto" + + "github.com/grafana/loki/v3/pkg/kafka" + "github.com/grafana/loki/v3/pkg/kafka/partition" + "github.com/grafana/loki/v3/pkg/logproto" +) + +type consumerMetrics struct { + consumeLatency prometheus.Histogram + currentOffset prometheus.Gauge +} + +// newConsumerMetrics initializes and returns a new consumerMetrics instance +func newConsumerMetrics(reg prometheus.Registerer) *consumerMetrics { + return &consumerMetrics{ + consumeLatency: promauto.With(reg).NewHistogram(prometheus.HistogramOpts{ + Name: "loki_ingester_partition_records_batch_process_duration_seconds", + Help: "How long a kafka ingester consumer spent processing a batch of records from Kafka.", + NativeHistogramBucketFactor: 1.1, + }), + currentOffset: promauto.With(reg).NewGauge(prometheus.GaugeOpts{ + Name: "loki_ingester_partition_current_offset", + Help: "The current offset of the Kafka ingester consumer.", + }), + } +} + +func NewKafkaConsumerFactory(pusher logproto.PusherServer, logger log.Logger, reg prometheus.Registerer) partition.ConsumerFactory { + metrics := newConsumerMetrics(reg) + return func(committer partition.Committer) (partition.Consumer, error) { + decoder, err := kafka.NewDecoder() + if err != nil { + return nil, err + } + return &kafkaConsumer{ + pusher: pusher, + logger: logger, + decoder: decoder, + metrics: metrics, + committer: committer, + }, nil + } +} + +type kafkaConsumer struct { + pusher logproto.PusherServer + logger log.Logger + decoder *kafka.Decoder + committer partition.Committer + + metrics *consumerMetrics +} + +func (kc *kafkaConsumer) Start(ctx context.Context, recordsChan <-chan []partition.Record) func() { + var wg sync.WaitGroup + wg.Add(1) + go func() { + defer wg.Done() + for { + select { + case <-ctx.Done(): + level.Info(kc.logger).Log("msg", "shutting down kafka consumer") + return + case records := <-recordsChan: + kc.consume(ctx, records) + } + } + }() + return wg.Wait +} + +func (kc *kafkaConsumer) consume(ctx context.Context, records []partition.Record) { + if len(records) == 0 { + return + } + var ( + minOffset = int64(math.MaxInt64) + maxOffset = int64(0) + consumeStart = time.Now() + ) + + for _, record := range records { + minOffset = min(minOffset, record.Offset) + maxOffset = max(maxOffset, record.Offset) + } + + level.Debug(kc.logger).Log("msg", "consuming records", "min_offset", minOffset, "max_offset", maxOffset) + for _, record := range records { + stream, err := kc.decoder.DecodeWithoutLabels(record.Content) + if err != nil { + level.Error(kc.logger).Log("msg", "failed to decode record", "error", err) + continue + } + recordCtx := user.InjectOrgID(record.Ctx, record.TenantID) + req := &logproto.PushRequest{ + Streams: []logproto.Stream{stream}, + } + if err := retryWithBackoff(ctx, func(attempts int) error { + if _, err := kc.pusher.Push(recordCtx, req); err != nil { + level.Warn(kc.logger).Log("msg", "failed to push records", "err", err, "offset", record.Offset, "attempts", attempts) + return err + } + return nil + }); err != nil { + level.Error(kc.logger).Log("msg", "exhausted all retry attempts, failed to push records", "err", err, "offset", record.Offset) + } + kc.committer.EnqueueOffset(record.Offset) + } + kc.metrics.consumeLatency.Observe(time.Since(consumeStart).Seconds()) + kc.metrics.currentOffset.Set(float64(maxOffset)) +} + +func canRetry(err error) bool { + return errors.Is(err, ErrReadOnly) +} + +func retryWithBackoff(ctx context.Context, fn func(attempts int) error) error { + err := fn(0) + if err == nil { + return nil + } + if !canRetry(err) { + return err + } + backoff := backoff.New(ctx, backoff.Config{ + MinBackoff: 100 * time.Millisecond, + MaxBackoff: 5 * time.Second, + MaxRetries: 0, // Retry infinitely + }) + backoff.Wait() + for backoff.Ongoing() { + err = fn(backoff.NumRetries()) + if err == nil { + return nil + } + if !canRetry(err) { + return err + } + backoff.Wait() + } + return backoff.Err() +} diff --git a/pkg/ingester/kafka_consumer_test.go b/pkg/ingester/kafka_consumer_test.go new file mode 100644 index 000000000000..f9ac98c513e5 --- /dev/null +++ b/pkg/ingester/kafka_consumer_test.go @@ -0,0 +1,132 @@ +package ingester + +import ( + "context" + "os" + "testing" + "time" + + "github.com/go-kit/log" + "github.com/grafana/dskit/tenant" + "github.com/prometheus/client_golang/prometheus" + "github.com/prometheus/prometheus/model/labels" + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/v3/pkg/kafka" + "github.com/grafana/loki/v3/pkg/kafka/partition" + "github.com/grafana/loki/v3/pkg/logproto" + + "github.com/grafana/loki/pkg/push" +) + +var ( + tenantID = "foo" + streamBar = logproto.Stream{ + Labels: labels.Labels{labels.Label{Name: "stream", Value: "1"}}.String(), + Entries: []logproto.Entry{ + { + Timestamp: time.Unix(0, 1).UTC(), + Line: "1", + }, + { + Timestamp: time.Unix(0, 2).UTC(), + Line: "2", + }, + }, + } + streamFoo = logproto.Stream{ + Labels: labels.Labels{labels.Label{Name: "stream", Value: "2"}}.String(), + Entries: []logproto.Entry{ + { + Timestamp: time.Unix(0, 1).UTC(), + Line: "3", + }, + { + Timestamp: time.Unix(0, 2).UTC(), + Line: "4", + }, + }, + } +) + +type fakePusher struct { + pushes []*logproto.PushRequest + t *testing.T +} + +func (f *fakePusher) Push(ctx context.Context, in *logproto.PushRequest) (*logproto.PushResponse, error) { + tenant, err := tenant.TenantID(ctx) + require.NoError(f.t, err) + require.Equal(f.t, tenant, tenant) + // we need to copy in as it will be reused by the decoder. + req := &logproto.PushRequest{} + for _, s := range in.Streams { + newStream := push.Stream{ + Labels: s.Labels, + Entries: make([]push.Entry, len(s.Entries)), + } + copy(newStream.Entries, s.Entries) + req.Streams = append(req.Streams, newStream) + } + f.pushes = append(f.pushes, req) + return nil, nil +} + +type noopCommitter struct{} + +func (nc *noopCommitter) EnqueueOffset(_ int64) {} + +func (noopCommitter) Commit(_ context.Context, _ int64) error { return nil } + +func TestConsumer(t *testing.T) { + var ( + toPush []partition.Record + offset = int64(0) + pusher = &fakePusher{t: t} + ) + + consumer, err := NewKafkaConsumerFactory(pusher, log.NewLogfmtLogger(os.Stdout), prometheus.NewRegistry())(&noopCommitter{}) + require.NoError(t, err) + + records, err := kafka.Encode(0, tenantID, streamBar, 10000) + require.NoError(t, err) + + for _, record := range records { + toPush = append(toPush, partition.Record{ + Ctx: context.Background(), + TenantID: tenantID, + Content: record.Value, + Offset: offset, + }) + offset++ + } + records, err = kafka.Encode(0, "foo", streamFoo, 10000) + require.NoError(t, err) + for _, record := range records { + toPush = append(toPush, partition.Record{ + Ctx: context.Background(), + TenantID: tenantID, + Content: record.Value, + Offset: offset, + }) + offset++ + } + + ctx, cancel := context.WithCancel(context.Background()) + recordChan := make(chan []partition.Record) + wait := consumer.Start(ctx, recordChan) + + recordChan <- toPush + + cancel() + wait() + + require.Equal(t, []*logproto.PushRequest{ + { + Streams: []logproto.Stream{streamBar}, + }, + { + Streams: []logproto.Stream{streamFoo}, + }, + }, pusher.pushes) +} diff --git a/pkg/ingester/recalculate_owned_streams.go b/pkg/ingester/recalculate_owned_streams.go index d3bf79d29f74..b1f6bd62ebfc 100644 --- a/pkg/ingester/recalculate_owned_streams.go +++ b/pkg/ingester/recalculate_owned_streams.go @@ -2,49 +2,56 @@ package ingester import ( "context" + "fmt" + "sync" "time" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/ring" "github.com/grafana/dskit/services" + "golang.org/x/exp/slices" + + lokiring "github.com/grafana/loki/v3/pkg/util/ring" ) -type recalculateOwnedStreams struct { +type ownershipStrategy interface { + checkRingForChanges() (bool, error) + isOwnedStream(*stream) (bool, error) +} + +type recalculateOwnedStreamsSvc struct { services.Service logger log.Logger + ownershipStrategy ownershipStrategy instancesSupplier func() []*instance - ingesterID string - previousRing ring.ReplicationSet - ingestersRing ring.ReadRing ticker *time.Ticker } -func newRecalculateOwnedStreams(instancesSupplier func() []*instance, ingesterID string, ring ring.ReadRing, ringPollInterval time.Duration, logger log.Logger) *recalculateOwnedStreams { - svc := &recalculateOwnedStreams{ - ingestersRing: ring, +func newRecalculateOwnedStreamsSvc(instancesSupplier func() []*instance, ownershipStrategy ownershipStrategy, ringPollInterval time.Duration, logger log.Logger) *recalculateOwnedStreamsSvc { + svc := &recalculateOwnedStreamsSvc{ instancesSupplier: instancesSupplier, - ingesterID: ingesterID, logger: logger, + ownershipStrategy: ownershipStrategy, } svc.Service = services.NewTimerService(ringPollInterval, nil, svc.iteration, nil) return svc } -func (s *recalculateOwnedStreams) iteration(_ context.Context) error { +func (s *recalculateOwnedStreamsSvc) iteration(_ context.Context) error { s.recalculate() return nil } -func (s *recalculateOwnedStreams) recalculate() { +func (s *recalculateOwnedStreamsSvc) recalculate() { level.Info(s.logger).Log("msg", "starting recalculate owned streams job") defer func() { s.updateFixedLimitForAll() level.Info(s.logger).Log("msg", "completed recalculate owned streams job") }() - ringChanged, err := s.checkRingForChanges() + ringChanged, err := s.ownershipStrategy.checkRingForChanges() if err != nil { level.Error(s.logger).Log("msg", "failed to check ring for changes", "err", err) return @@ -61,14 +68,14 @@ func (s *recalculateOwnedStreams) recalculate() { } level.Info(s.logger).Log("msg", "updating streams ownership", "tenant", instance.instanceID) - err := instance.updateOwnedStreams(s.ingestersRing, s.ingesterID) + err := instance.updateOwnedStreams(s.ownershipStrategy.isOwnedStream) if err != nil { level.Error(s.logger).Log("msg", "failed to re-evaluate streams ownership", "tenant", instance.instanceID, "err", err) } } } -func (s *recalculateOwnedStreams) updateFixedLimitForAll() { +func (s *recalculateOwnedStreamsSvc) updateFixedLimitForAll() { for _, instance := range s.instancesSupplier() { oldLimit, newLimit := instance.ownedStreamsSvc.updateFixedLimit() if oldLimit != newLimit { @@ -77,7 +84,36 @@ func (s *recalculateOwnedStreams) updateFixedLimitForAll() { } } -func (s *recalculateOwnedStreams) checkRingForChanges() (bool, error) { +type ownedStreamsIngesterStrategy struct { + logger log.Logger + + ingesterID string + previousRing ring.ReplicationSet + ingestersRing ring.ReadRing + + descsBufPool sync.Pool + hostsBufPool sync.Pool + zoneBufPool sync.Pool +} + +func newOwnedStreamsIngesterStrategy(ingesterID string, ingestersRing ring.ReadRing, logger log.Logger) *ownedStreamsIngesterStrategy { + return &ownedStreamsIngesterStrategy{ + ingesterID: ingesterID, + ingestersRing: ingestersRing, + logger: logger, + descsBufPool: sync.Pool{New: func() interface{} { + return make([]ring.InstanceDesc, ingestersRing.ReplicationFactor()+1) + }}, + hostsBufPool: sync.Pool{New: func() interface{} { + return make([]string, ingestersRing.ReplicationFactor()+1) + }}, + zoneBufPool: sync.Pool{New: func() interface{} { + return make([]string, ingestersRing.ZonesCount()+1) + }}, + } +} + +func (s *ownedStreamsIngesterStrategy) checkRingForChanges() (bool, error) { rs, err := s.ingestersRing.GetAllHealthy(ring.WriteNoExtend) if err != nil { return false, err @@ -87,3 +123,69 @@ func (s *recalculateOwnedStreams) checkRingForChanges() (bool, error) { s.previousRing = rs return ringChanged, nil } + +//nolint:staticcheck +func (s *ownedStreamsIngesterStrategy) isOwnedStream(str *stream) (bool, error) { + descsBuf := s.descsBufPool.Get().([]ring.InstanceDesc) + hostsBuf := s.hostsBufPool.Get().([]string) + zoneBuf := s.zoneBufPool.Get().([]string) + defer func() { + s.descsBufPool.Put(descsBuf[:0]) + s.hostsBufPool.Put(hostsBuf[:0]) + s.zoneBufPool.Put(zoneBuf[:0]) + }() + + replicationSet, err := s.ingestersRing.Get(lokiring.TokenFor(str.tenant, str.labelsString), ring.WriteNoExtend, descsBuf, hostsBuf, zoneBuf) + if err != nil { + return false, fmt.Errorf("error getting replication set for stream %s: %v", str.labelsString, err) + } + return s.isOwnedStreamInner(replicationSet, s.ingesterID), nil +} + +func (s *ownedStreamsIngesterStrategy) isOwnedStreamInner(replicationSet ring.ReplicationSet, ingesterID string) bool { + for _, instanceDesc := range replicationSet.Instances { + if instanceDesc.Id == ingesterID { + return true + } + } + return false +} + +type ownedStreamsPartitionStrategy struct { + logger log.Logger + + partitionID int32 + partitionRingWatcher ring.PartitionRingReader + previousActivePartitions []int32 + getPartitionShardSize func(user string) int +} + +func newOwnedStreamsPartitionStrategy(partitionID int32, ring ring.PartitionRingReader, logger log.Logger) *ownedStreamsPartitionStrategy { + return &ownedStreamsPartitionStrategy{ + partitionID: partitionID, + partitionRingWatcher: ring, + logger: logger, + } +} + +func (s *ownedStreamsPartitionStrategy) checkRingForChanges() (bool, error) { + // When using partitions ring, we consider ring to be changed if active partitions have changed. + r := s.partitionRingWatcher.PartitionRing() + if r.PartitionsCount() == 0 { + return false, ring.ErrEmptyRing + } + + activePartitions := r.ActivePartitionIDs() + ringChanged := !slices.Equal(s.previousActivePartitions, activePartitions) + s.previousActivePartitions = activePartitions + return ringChanged, nil +} + +func (s *ownedStreamsPartitionStrategy) isOwnedStream(str *stream) (bool, error) { + partitionForStream, err := s.partitionRingWatcher.PartitionRing().ActivePartitionForKey(lokiring.TokenFor(str.tenant, str.labelsString)) + if err != nil { + return false, fmt.Errorf("failed to find active partition for stream: %w", err) + } + + return partitionForStream == s.partitionID, nil +} diff --git a/pkg/ingester/recalculate_owned_streams_test.go b/pkg/ingester/recalculate_owned_streams_test.go index 91b32baef820..82a733e593d6 100644 --- a/pkg/ingester/recalculate_owned_streams_test.go +++ b/pkg/ingester/recalculate_owned_streams_test.go @@ -18,12 +18,13 @@ import ( "github.com/grafana/loki/v3/pkg/validation" ) -func Test_recalculateOwnedStreams_newRecalculateOwnedStreams(t *testing.T) { +func Test_recalculateOwnedStreams_newRecalculateOwnedStreamsIngester(t *testing.T) { mockInstancesSupplier := &mockTenantsSuplier{tenants: []*instance{}} mockRing := newReadRingMock([]ring.InstanceDesc{ {Addr: "test", Timestamp: time.Now().UnixNano(), State: ring.ACTIVE, Tokens: []uint32{1, 2, 3}}, }, 0) - service := newRecalculateOwnedStreams(mockInstancesSupplier.get, "test", mockRing, 50*time.Millisecond, log.NewNopLogger()) + strategy := newOwnedStreamsIngesterStrategy("test", mockRing, log.NewNopLogger()) + service := newRecalculateOwnedStreamsSvc(mockInstancesSupplier.get, strategy, 50*time.Millisecond, log.NewNopLogger()) require.Equal(t, 0, mockRing.getAllHealthyCallsCount, "ring must be called only after service's start up") ctx := context.Background() require.NoError(t, service.StartAsync(ctx)) @@ -33,7 +34,7 @@ func Test_recalculateOwnedStreams_newRecalculateOwnedStreams(t *testing.T) { }, 1*time.Second, 50*time.Millisecond, "expected at least two runs of the iteration") } -func Test_recalculateOwnedStreams_recalculate(t *testing.T) { +func Test_recalculateOwnedStreams_recalculateWithIngesterStrategy(t *testing.T) { tests := map[string]struct { featureEnabled bool expectedOwnedStreamCount int @@ -105,7 +106,8 @@ func Test_recalculateOwnedStreams_recalculate(t *testing.T) { mockTenantsSupplier := &mockTenantsSuplier{tenants: []*instance{tenant}} - service := newRecalculateOwnedStreams(mockTenantsSupplier.get, currentIngesterName, mockRing, 50*time.Millisecond, log.NewNopLogger()) + strategy := newOwnedStreamsIngesterStrategy(currentIngesterName, mockRing, log.NewNopLogger()) + service := newRecalculateOwnedStreamsSvc(mockTenantsSupplier.get, strategy, 50*time.Millisecond, log.NewNopLogger()) //change the limit to assert that fixed limit is updated after the recalculation limits.DefaultLimits().MaxGlobalStreamsPerUser = 50 @@ -153,14 +155,13 @@ func (r *mockStreamsOwnershipRing) Get(streamToken uint32, _ ring.Operation, _ [ return set, nil } -func Test_recalculateOwnedStreams_checkRingForChanges(t *testing.T) { +func Test_ownedStreamsIngesterStrategy_checkRingForChanges(t *testing.T) { mockRing := &readRingMock{ replicationSet: ring.ReplicationSet{ Instances: []ring.InstanceDesc{{Addr: "ingester-0", Timestamp: time.Now().UnixNano(), State: ring.ACTIVE, Tokens: []uint32{100, 200, 300}}}, }, } - mockTenantsSupplier := &mockTenantsSuplier{tenants: []*instance{{}}} - service := newRecalculateOwnedStreams(mockTenantsSupplier.get, "ingester-0", mockRing, 50*time.Millisecond, log.NewNopLogger()) + service := newOwnedStreamsIngesterStrategy("ingester-0", mockRing, log.NewNopLogger()) ringChanged, err := service.checkRingForChanges() require.NoError(t, err) @@ -178,6 +179,64 @@ func Test_recalculateOwnedStreams_checkRingForChanges(t *testing.T) { require.True(t, ringChanged) } +func newMockPartitionRingWithActivePartitions(activePartitions ...int32) *ring.PartitionRing { + partitionRing := ring.PartitionRingDesc{ + Partitions: map[int32]ring.PartitionDesc{}, + Owners: map[string]ring.OwnerDesc{}, + } + + for _, id := range activePartitions { + partitionRing.Partitions[id] = ring.PartitionDesc{ + Id: id, + Tokens: []uint32{uint32(id)}, + State: ring.PartitionActive, + } + partitionRing.Owners[fmt.Sprintf("test%d", id)] = ring.OwnerDesc{ + OwnedPartition: id, + State: ring.OwnerActive, + } + } + return ring.NewPartitionRing(partitionRing) +} + +func Test_ownedStreamsPartitionStrategy_checkRingForChanges(t *testing.T) { + ringReader := &mockPartitionRingReader{ + ring: newMockPartitionRingWithActivePartitions(1), + } + service := newOwnedStreamsPartitionStrategy(1, ringReader, log.NewNopLogger()) + + ringChanged, err := service.checkRingForChanges() + require.NoError(t, err) + require.True(t, ringChanged, "expected ring to be changed because it was not initialized yet") + + ringChanged, err = service.checkRingForChanges() + require.NoError(t, err) + require.False(t, ringChanged, "expected ring not to be changed because token ranges is not changed") + + ringReader.ring = newMockPartitionRingWithActivePartitions(1, 2) + + ringChanged, err = service.checkRingForChanges() + require.NoError(t, err) + require.True(t, ringChanged) +} + +func Test_ownedStreamsPartitionStrategy_isOwnedStream(t *testing.T) { + ringReader := &mockPartitionRingReader{ + ring: newMockPartitionRingWithActivePartitions(1, 2, 3), + } + stream := &stream{tenant: "test1", labelsString: "mock=1"} // has a hashkey mapping to partition 1 + + service1 := newOwnedStreamsPartitionStrategy(1, ringReader, log.NewNopLogger()) + owned, err := service1.isOwnedStream(stream) + require.NoError(t, err) + require.True(t, owned) + + service2 := newOwnedStreamsPartitionStrategy(2, ringReader, log.NewNopLogger()) + owned, err = service2.isOwnedStream(stream) + require.NoError(t, err) + require.False(t, owned) +} + func createStream(t *testing.T, inst *instance, fingerprint int) *stream { lbls := labels.Labels{labels.Label{Name: "mock", Value: strconv.Itoa(fingerprint)}} @@ -195,3 +254,11 @@ type mockTenantsSuplier struct { func (m *mockTenantsSuplier) get() []*instance { return m.tenants } + +type mockPartitionRingReader struct { + ring *ring.PartitionRing +} + +func (m mockPartitionRingReader) PartitionRing() *ring.PartitionRing { + return m.ring +} diff --git a/pkg/ingester/recovery_test.go b/pkg/ingester/recovery_test.go index 97c55d3da9f8..180d02e954e6 100644 --- a/pkg/ingester/recovery_test.go +++ b/pkg/ingester/recovery_test.go @@ -229,7 +229,7 @@ func TestSeriesRecoveryNoDuplicates(t *testing.T) { readRingMock := mockReadRingWithOneActiveIngester() - i, err := New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err := New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) mkSample := func(i int) *logproto.PushRequest { @@ -263,7 +263,7 @@ func TestSeriesRecoveryNoDuplicates(t *testing.T) { require.Equal(t, false, iter.Next()) // create a new ingester now - i, err = New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock) + i, err = New(ingesterConfig, client.Config{}, store, limits, loki_runtime.DefaultTenantConfigs(), nil, writefailures.Cfg{}, constants.Loki, log.NewNopLogger(), nil, readRingMock, nil) require.NoError(t, err) // recover the checkpointed series diff --git a/pkg/ingester/stream.go b/pkg/ingester/stream.go index fe4a644c7110..d1577b1d2fff 100644 --- a/pkg/ingester/stream.go +++ b/pkg/ingester/stream.go @@ -454,6 +454,9 @@ func (s *stream) validateEntries(ctx context.Context, entries []logproto.Entry, failedEntriesWithError = append(failedEntriesWithError, entryWithError{&toStore[i], &validation.ErrStreamRateLimit{RateLimit: flagext.ByteSize(limit), Labels: s.labelsString, Bytes: flagext.ByteSize(len(toStore[i].Line))}}) rateLimitedBytes += len(toStore[i].Line) } + + // Log the only last error to the write failures manager. + s.writeFailures.Log(s.tenant, failedEntriesWithError[len(failedEntriesWithError)-1].e) } s.streamRateCalculator.Record(s.tenant, s.labelHash, s.labelHashNoShard, totalBytes) diff --git a/pkg/ingester/stream_test.go b/pkg/ingester/stream_test.go index 6dbd521f1abc..9ac86fbd3015 100644 --- a/pkg/ingester/stream_test.go +++ b/pkg/ingester/stream_test.go @@ -14,6 +14,7 @@ import ( gokitlog "github.com/go-kit/log" "github.com/prometheus/client_golang/prometheus" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/runtime" "github.com/grafana/dskit/httpgrpc" @@ -276,7 +277,7 @@ func TestStreamIterator(t *testing.T) { {"gzipChunk", func() *chunkenc.MemChunk { chunkfmt, headfmt := defaultChunkFormat(t) - return chunkenc.NewMemChunk(chunkfmt, chunkenc.EncGZIP, headfmt, 256*1024, 0) + return chunkenc.NewMemChunk(chunkfmt, compression.EncGZIP, headfmt, 256*1024, 0) }}, } { t.Run(chk.name, func(t *testing.T) { diff --git a/pkg/kafka/config.go b/pkg/kafka/config.go index f916b145f008..7f981b7b5e73 100644 --- a/pkg/kafka/config.go +++ b/pkg/kafka/config.go @@ -1,12 +1,18 @@ package kafka import ( + "context" "errors" "flag" "fmt" "strconv" "strings" "time" + + "github.com/go-kit/log" + "github.com/go-kit/log/level" + "github.com/twmb/franz-go/pkg/kadm" + "github.com/twmb/franz-go/pkg/kgo" ) const ( @@ -52,12 +58,13 @@ type Config struct { DialTimeout time.Duration `yaml:"dial_timeout"` WriteTimeout time.Duration `yaml:"write_timeout"` - ConsumerGroup string `yaml:"consumer_group"` + ConsumerGroup string `yaml:"consumer_group"` + ConsumerGroupOffsetCommitInterval time.Duration `yaml:"consumer_group_offset_commit_interval"` LastProducedOffsetRetryTimeout time.Duration `yaml:"last_produced_offset_retry_timeout"` - AutoCreateTopicEnabled bool `yaml:"auto_create_topic_enabled"` - // AutoCreateTopicDefaultPartitions int `yaml:"auto_create_topic_default_partitions"` + AutoCreateTopicEnabled bool `yaml:"auto_create_topic_enabled"` + AutoCreateTopicDefaultPartitions int `yaml:"auto_create_topic_default_partitions"` ProducerMaxRecordSizeBytes int `yaml:"producer_max_record_size_bytes"` ProducerMaxBufferedBytes int64 `yaml:"producer_max_buffered_bytes"` @@ -75,11 +82,12 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { f.DurationVar(&cfg.WriteTimeout, prefix+".write-timeout", 10*time.Second, "How long to wait for an incoming write request to be successfully committed to the Kafka backend.") f.StringVar(&cfg.ConsumerGroup, prefix+".consumer-group", "", "The consumer group used by the consumer to track the last consumed offset. The consumer group must be different for each ingester. If the configured consumer group contains the '' placeholder, it is replaced with the actual partition ID owned by the ingester. When empty (recommended), Mimir uses the ingester instance ID to guarantee uniqueness.") + f.DurationVar(&cfg.ConsumerGroupOffsetCommitInterval, prefix+".consumer-group-offset-commit-interval", time.Second, "How frequently a consumer should commit the consumed offset to Kafka. The last committed offset is used at startup to continue the consumption from where it was left.") f.DurationVar(&cfg.LastProducedOffsetRetryTimeout, prefix+".last-produced-offset-retry-timeout", 10*time.Second, "How long to retry a failed request to get the last produced offset.") f.BoolVar(&cfg.AutoCreateTopicEnabled, prefix+".auto-create-topic-enabled", true, "Enable auto-creation of Kafka topic if it doesn't exist.") - // f.IntVar(&cfg.AutoCreateTopicDefaultPartitions, prefix+".auto-create-topic-default-partitions", 0, "When auto-creation of Kafka topic is enabled and this value is positive, Kafka's num.partitions configuration option is set on Kafka brokers with this value when Mimir component that uses Kafka starts. This configuration option specifies the default number of partitions that the Kafka broker uses for auto-created topics. Note that this is a Kafka-cluster wide setting, and applies to any auto-created topic. If the setting of num.partitions fails, Mimir proceeds anyways, but auto-created topics could have an incorrect number of partitions.") + f.IntVar(&cfg.AutoCreateTopicDefaultPartitions, prefix+".auto-create-topic-default-partitions", 1000, "When auto-creation of Kafka topic is enabled and this value is positive, Kafka's num.partitions configuration option is set on Kafka brokers with this value when Loki component that uses Kafka starts. This configuration option specifies the default number of partitions that the Kafka broker uses for auto-created topics. Note that this is a Kafka-cluster wide setting, and applies to any auto-created topic. If the setting of num.partitions fails, Loki proceeds anyways, but auto-created topics could have an incorrect number of partitions.") f.IntVar(&cfg.ProducerMaxRecordSizeBytes, prefix+".producer-max-record-size-bytes", maxProducerRecordDataBytesLimit, "The maximum size of a Kafka record data that should be generated by the producer. An incoming write request larger than this size is split into multiple Kafka records. We strongly recommend to not change this setting unless for testing purposes.") f.Int64Var(&cfg.ProducerMaxBufferedBytes, prefix+".producer-max-buffered-bytes", 1024*1024*1024, "The maximum size of (uncompressed) buffered and unacknowledged produced records sent to Kafka. The produce request fails once this limit is reached. This limit is per Kafka client. 0 to disable the limit.") @@ -107,3 +115,35 @@ func (cfg *Config) GetConsumerGroup(instanceID string, partitionID int32) string return strings.ReplaceAll(cfg.ConsumerGroup, "", strconv.Itoa(int(partitionID))) } + +// SetDefaultNumberOfPartitionsForAutocreatedTopics tries to set num.partitions config option on brokers. +// This is best-effort, if setting the option fails, error is logged, but not returned. +func (cfg Config) SetDefaultNumberOfPartitionsForAutocreatedTopics(logger log.Logger) { + if cfg.AutoCreateTopicDefaultPartitions <= 0 { + return + } + + cl, err := kgo.NewClient(commonKafkaClientOptions(cfg, nil, logger)...) + if err != nil { + level.Error(logger).Log("msg", "failed to create kafka client", "err", err) + return + } + + adm := kadm.NewClient(cl) + defer adm.Close() + + defaultNumberOfPartitions := fmt.Sprintf("%d", cfg.AutoCreateTopicDefaultPartitions) + _, err = adm.AlterBrokerConfigsState(context.Background(), []kadm.AlterConfig{ + { + Op: kadm.SetConfig, + Name: "num.partitions", + Value: &defaultNumberOfPartitions, + }, + }) + if err != nil { + level.Error(logger).Log("msg", "failed to alter default number of partitions", "err", err) + return + } + + level.Info(logger).Log("msg", "configured Kafka-wide default number of partitions for auto-created topics (num.partitions)", "value", cfg.AutoCreateTopicDefaultPartitions) +} diff --git a/pkg/kafka/config_test.go b/pkg/kafka/config_test.go new file mode 100644 index 000000000000..7c21e38fd141 --- /dev/null +++ b/pkg/kafka/config_test.go @@ -0,0 +1,41 @@ +package kafka + +import ( + "testing" + + "github.com/go-kit/log" + "github.com/stretchr/testify/require" + "github.com/twmb/franz-go/pkg/kfake" + "github.com/twmb/franz-go/pkg/kmsg" +) + +func TestSetDefaultNumberOfPartitionsForAutocreatedTopics(t *testing.T) { + cluster, err := kfake.NewCluster(kfake.NumBrokers(1)) + require.NoError(t, err) + t.Cleanup(cluster.Close) + + addrs := cluster.ListenAddrs() + require.Len(t, addrs, 1) + + cfg := Config{ + Address: addrs[0], + AutoCreateTopicDefaultPartitions: 100, + } + + cluster.ControlKey(kmsg.AlterConfigs.Int16(), func(request kmsg.Request) (kmsg.Response, error, bool) { + r := request.(*kmsg.AlterConfigsRequest) + + require.Len(t, r.Resources, 1) + res := r.Resources[0] + require.Equal(t, kmsg.ConfigResourceTypeBroker, res.ResourceType) + require.Len(t, res.Configs, 1) + cfg := res.Configs[0] + require.Equal(t, "num.partitions", cfg.Name) + require.NotNil(t, *cfg.Value) + require.Equal(t, "100", *cfg.Value) + + return &kmsg.AlterConfigsResponse{}, nil, true + }) + + cfg.SetDefaultNumberOfPartitionsForAutocreatedTopics(log.NewNopLogger()) +} diff --git a/pkg/kafka/encoding.go b/pkg/kafka/encoding.go index c4977054f32f..65daf59c25e7 100644 --- a/pkg/kafka/encoding.go +++ b/pkg/kafka/encoding.go @@ -167,6 +167,15 @@ func (d *Decoder) Decode(data []byte) (logproto.Stream, labels.Labels, error) { return *d.stream, ls, nil } +// DecodeWithoutLabels converts a Kafka record's byte data back into a logproto.Stream without parsing labels. +func (d *Decoder) DecodeWithoutLabels(data []byte) (logproto.Stream, error) { + d.stream.Entries = d.stream.Entries[:0] + if err := d.stream.Unmarshal(data); err != nil { + return logproto.Stream{}, fmt.Errorf("failed to unmarshal stream: %w", err) + } + return *d.stream, nil +} + // sovPush calculates the size of varint-encoded uint64. // It is used to determine the number of bytes needed to encode a uint64 value // in Protocol Buffers' variable-length integer format. diff --git a/pkg/kafka/ingester/consumer.go b/pkg/kafka/ingester/consumer.go index 352916838c40..57abb2b00ff3 100644 --- a/pkg/kafka/ingester/consumer.go +++ b/pkg/kafka/ingester/consumer.go @@ -22,6 +22,7 @@ import ( "github.com/grafana/loki/v3/pkg/ingester-rf1/metastore/metastorepb" "github.com/grafana/loki/v3/pkg/kafka" + "github.com/grafana/loki/v3/pkg/kafka/partition" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/storage/wal" ) @@ -36,17 +37,12 @@ type MetadataStore interface { AddBlock(ctx context.Context, in *metastorepb.AddBlockRequest, opts ...grpc.CallOption) (*metastorepb.AddBlockResponse, error) } -// Committer defines an interface for committing offsets -type Committer interface { - Commit(ctx context.Context, offset int64) error -} - // consumer represents a Kafka consumer that processes and stores log entries type consumer struct { metastoreClient MetadataStore storage ObjectStorage writer *wal.SegmentWriter - committer Committer + committer partition.Committer flushInterval time.Duration maxFlushSize int64 lastOffset int64 @@ -67,8 +63,8 @@ func NewConsumerFactory( maxFlushSize int64, logger log.Logger, reg prometheus.Registerer, -) ConsumerFactory { - return func(committer Committer) (Consumer, error) { +) partition.ConsumerFactory { + return func(committer partition.Committer) (partition.Consumer, error) { writer, err := wal.NewWalSegmentWriter() if err != nil { return nil, err @@ -95,7 +91,7 @@ func NewConsumerFactory( // Start starts the consumer and returns a function to wait for it to finish // It consumes records from the recordsChan, and flushes them to storage periodically. -func (c *consumer) Start(ctx context.Context, recordsChan <-chan []record) func() { +func (c *consumer) Start(ctx context.Context, recordsChan <-chan []partition.Record) func() { var wg sync.WaitGroup wg.Add(1) go func() { @@ -127,7 +123,7 @@ func (c *consumer) Start(ctx context.Context, recordsChan <-chan []record) func( } // consume processes a batch of Kafka records, decoding and storing them -func (c *consumer) consume(records []record) error { +func (c *consumer) consume(records []partition.Record) error { if len(records) == 0 { return nil } @@ -136,8 +132,8 @@ func (c *consumer) consume(records []record) error { maxOffset = int64(0) ) for _, record := range records { - minOffset = min(minOffset, record.offset) - maxOffset = max(maxOffset, record.offset) + minOffset = min(minOffset, record.Offset) + maxOffset = max(maxOffset, record.Offset) } level.Debug(c.logger).Log("msg", "consuming records", "min_offset", minOffset, "max_offset", maxOffset) return c.retryWithBackoff(context.Background(), backoff.Config{ @@ -163,9 +159,9 @@ func (c *consumer) consume(records []record) error { }) } -func (c *consumer) appendRecords(records []record) error { +func (c *consumer) appendRecords(records []partition.Record) error { for _, record := range records { - stream, labels, err := c.decoder.Decode(record.content) + stream, labels, err := c.decoder.Decode(record.Content) if err != nil { return fmt.Errorf("failed to decode record: %w", err) } @@ -184,7 +180,7 @@ func (c *consumer) appendRecords(records []record) error { Parsed: entry.Parsed, }) } - c.writer.Append(record.tenantID, stream.Labels, labels, c.toStore, time.Now()) + c.writer.Append(record.TenantID, stream.Labels, labels, c.toStore, time.Now()) } return nil } @@ -249,7 +245,7 @@ func (c *consumer) flush(ctx context.Context) error { wal.ReportSegmentStats(stats, c.metrics.segmentMetrics) id := ulid.MustNew(ulid.Timestamp(time.Now()), rand.Reader).String() - if err := c.storage.PutObject(ctx, fmt.Sprintf(wal.Dir+id), c.flushBuf); err != nil { + if err := c.storage.PutObject(ctx, wal.Dir+id, c.flushBuf); err != nil { return fmt.Errorf("failed to put object to object storage: %w", err) } diff --git a/pkg/kafka/ingester/consumer_test.go b/pkg/kafka/ingester/consumer_test.go index 3f0adcce6247..a0baa92ba86a 100644 --- a/pkg/kafka/ingester/consumer_test.go +++ b/pkg/kafka/ingester/consumer_test.go @@ -14,6 +14,7 @@ import ( "github.com/grafana/loki/v3/pkg/ingester-rf1/metastore/metastorepb" "github.com/grafana/loki/v3/pkg/ingester-rf1/objstore" "github.com/grafana/loki/v3/pkg/kafka" + "github.com/grafana/loki/v3/pkg/kafka/partition" "github.com/grafana/loki/v3/pkg/logproto" ) @@ -32,6 +33,11 @@ func (m *mockCommitter) Commit(_ context.Context, offset int64) error { return nil } +func (m *mockCommitter) EnqueueOffset(offset int64) { + // For testing purposes, we'll just set the committed offset directly + m.committed = offset +} + func TestConsumer_PeriodicFlush(t *testing.T) { ctx, cancel := context.WithCancel(context.Background()) defer cancel() @@ -45,12 +51,12 @@ func TestConsumer_PeriodicFlush(t *testing.T) { flushInterval := 100 * time.Millisecond maxFlushSize := int64(1000) - committer := &mockCommitter{} + committer := newMockCommitter() consumerFactory := NewConsumerFactory(metastore, storage, flushInterval, maxFlushSize, log.NewLogfmtLogger(os.Stdout), reg) consumer, err := consumerFactory(committer) require.NoError(t, err) - recordsChan := make(chan []record) + recordsChan := make(chan []partition.Record) _ = consumer.Start(ctx, recordsChan) stream := logproto.Stream{ @@ -63,10 +69,10 @@ func TestConsumer_PeriodicFlush(t *testing.T) { encodedRecords, err := kafka.Encode(0, "tenant1", stream, 10<<20) require.NoError(t, err) - records := []record{{ - tenantID: "tenant1", - content: encodedRecords[0].Value, - offset: 0, + records := []partition.Record{{ + TenantID: "tenant1", + Content: encodedRecords[0].Value, + Offset: 0, }} recordsChan <- records @@ -98,12 +104,12 @@ func TestConsumer_ShutdownFlush(t *testing.T) { flushInterval := 1 * time.Hour maxFlushSize := int64(1000) - committer := &mockCommitter{} + committer := newMockCommitter() consumerFactory := NewConsumerFactory(metastore, storage, flushInterval, maxFlushSize, log.NewLogfmtLogger(os.Stdout), reg) consumer, err := consumerFactory(committer) require.NoError(t, err) - recordsChan := make(chan []record) + recordsChan := make(chan []partition.Record) wait := consumer.Start(ctx, recordsChan) stream := logproto.Stream{ @@ -116,10 +122,10 @@ func TestConsumer_ShutdownFlush(t *testing.T) { encodedRecords, err := kafka.Encode(0, "tenant1", stream, 10<<20) require.NoError(t, err) - records := []record{{ - tenantID: "tenant1", - content: encodedRecords[0].Value, - offset: 0, + records := []partition.Record{{ + TenantID: "tenant1", + Content: encodedRecords[0].Value, + Offset: 0, }} recordsChan <- records @@ -152,12 +158,12 @@ func TestConsumer_MaxFlushSize(t *testing.T) { flushInterval := 1 * time.Hour maxFlushSize := int64(10) - committer := &mockCommitter{} + committer := newMockCommitter() consumerFactory := NewConsumerFactory(metastore, storage, flushInterval, maxFlushSize, log.NewLogfmtLogger(os.Stdout), reg) consumer, err := consumerFactory(committer) require.NoError(t, err) - recordsChan := make(chan []record) + recordsChan := make(chan []partition.Record) _ = consumer.Start(ctx, recordsChan) stream := logproto.Stream{ @@ -170,10 +176,10 @@ func TestConsumer_MaxFlushSize(t *testing.T) { encodedRecords, err := kafka.Encode(0, "tenant1", stream, 10<<20) require.NoError(t, err) - records := []record{{ - tenantID: "tenant1", - content: encodedRecords[0].Value, - offset: 0, + records := []partition.Record{{ + TenantID: "tenant1", + Content: encodedRecords[0].Value, + Offset: 0, }} recordsChan <- records diff --git a/pkg/kafka/ingester/ingester.go b/pkg/kafka/ingester/ingester.go index 56421b1b712d..39595df142ba 100644 --- a/pkg/kafka/ingester/ingester.go +++ b/pkg/kafka/ingester/ingester.go @@ -6,9 +6,6 @@ import ( "flag" "fmt" "net/http" - "regexp" - "strconv" - "strings" "time" "github.com/go-kit/log" @@ -21,6 +18,7 @@ import ( "github.com/grafana/loki/v3/pkg/kafka" "github.com/grafana/loki/v3/pkg/kafka/ingester/shutdownmarker" + "github.com/grafana/loki/v3/pkg/kafka/partition" "github.com/grafana/loki/v3/pkg/kafka/partitionring" util_log "github.com/grafana/loki/v3/pkg/util/log" @@ -33,7 +31,6 @@ const ( ) var ( - ingesterIDRegexp = regexp.MustCompile("-([0-9]+)$") defaultFlushInterval = 15 * time.Second defaultFlushSize int64 = 300 << 20 // 300 MB ) @@ -51,8 +48,8 @@ type Config struct { // RegisterFlags registers the flags. func (cfg *Config) RegisterFlags(f *flag.FlagSet) { - cfg.LifecyclerConfig.RegisterFlagsWithPrefix("kafka-ingester", f, util_log.Logger) - cfg.PartitionRingConfig.RegisterFlags(f) + cfg.LifecyclerConfig.RegisterFlagsWithPrefix("kafka-ingester.", f, util_log.Logger) + cfg.PartitionRingConfig.RegisterFlagsWithPrefix("kafka-ingester.", f) f.StringVar(&cfg.ShutdownMarkerPath, "kafka-ingester.shutdown-marker-path", "", "Path where the shutdown marker file is stored. If not set and common.path_prefix is set then common.path_prefix will be used.") f.BoolVar(&cfg.Enabled, "kafka-ingester.enabled", false, "Whether the Kafka-based ingester path is enabled") f.DurationVar(&cfg.FlushInterval, "kafka-ingester.flush-interval", defaultFlushInterval, "The interval at which the ingester will flush and commit offsets to Kafka. If not set, the default flush interval will be used.") @@ -98,19 +95,19 @@ type Ingester struct { lifecyclerWatcher *services.FailureWatcher ingesterPartitionID int32 partitionRingLifecycler *ring.PartitionInstanceLifecycler - partitionReader *PartitionReader + partitionReader *partition.Reader } // New makes a new Ingester. func New(cfg Config, - consumerFactory ConsumerFactory, + consumerFactory partition.ConsumerFactory, logger log.Logger, metricsNamespace string, registerer prometheus.Registerer, ) (*Ingester, error) { metrics := newIngesterMetrics(registerer) - ingesterPartitionID, err := extractIngesterPartitionID(cfg.LifecyclerConfig.ID) + ingesterPartitionID, err := partitionring.ExtractIngesterPartitionID(cfg.LifecyclerConfig.ID) if err != nil { return nil, fmt.Errorf("calculating ingester partition ID: %w", err) } @@ -142,7 +139,7 @@ func New(cfg Config, if err != nil { return nil, err } - i.partitionReader, err = NewPartitionReader(cfg.KafkaConfig, ingesterPartitionID, cfg.LifecyclerConfig.ID, consumerFactory, logger, registerer) + i.partitionReader, err = partition.NewReader(cfg.KafkaConfig, ingesterPartitionID, cfg.LifecyclerConfig.ID, consumerFactory, logger, registerer) if err != nil { return nil, err } @@ -157,25 +154,6 @@ func New(cfg Config, return i, nil } -// ingesterPartitionID returns the partition ID owner the the given ingester. -func extractIngesterPartitionID(ingesterID string) (int32, error) { - if strings.Contains(ingesterID, "local") { - return 0, nil - } - - match := ingesterIDRegexp.FindStringSubmatch(ingesterID) - if len(match) == 0 { - return 0, fmt.Errorf("ingester ID %s doesn't match regular expression %q", ingesterID, ingesterIDRegexp.String()) - } - // Parse the ingester sequence number. - ingesterSeq, err := strconv.Atoi(match[1]) - if err != nil { - return 0, fmt.Errorf("no ingester sequence number in ingester ID %s", ingesterID) - } - - return int32(ingesterSeq), nil -} - // ServeHTTP implements the pattern ring status page. func (i *Ingester) ServeHTTP(w http.ResponseWriter, r *http.Request) { i.lifecycler.ServeHTTP(w, r) diff --git a/pkg/kafka/ingester/ingester_test.go b/pkg/kafka/ingester/ingester_test.go index a3bcca72ca3d..c7d62b9593a4 100644 --- a/pkg/kafka/ingester/ingester_test.go +++ b/pkg/kafka/ingester/ingester_test.go @@ -8,7 +8,6 @@ import ( "time" "github.com/go-kit/log" - gokitlog "github.com/go-kit/log" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/kv/consul" "github.com/grafana/dskit/ring" @@ -28,8 +27,8 @@ func TestPreparePartitionDownscaleHandler(t *testing.T) { storage, err := objstore.NewTestStorage(t) require.NoError(t, err) ing, err := New(cfg, - NewConsumerFactory(NewTestMetastore(), storage, cfg.FlushInterval, cfg.FlushSize, gokitlog.NewNopLogger(), prometheus.NewRegistry()), - gokitlog.NewNopLogger(), "test", prometheus.NewRegistry()) + NewConsumerFactory(NewTestMetastore(), storage, cfg.FlushInterval, cfg.FlushSize, log.NewNopLogger(), prometheus.NewRegistry()), + log.NewNopLogger(), "test", prometheus.NewRegistry()) require.NoError(t, err) err = services.StartAndAwaitRunning(context.Background(), ing) require.NoError(t, err) @@ -99,53 +98,6 @@ func defaultIngesterTestConfig(t testing.TB) Config { return cfg } -func TestExtractIngesterPartitionID(t *testing.T) { - tests := []struct { - name string - ingesterID string - want int32 - wantErr bool - }{ - { - name: "Valid ingester ID", - ingesterID: "ingester-5", - want: 5, - wantErr: false, - }, - { - name: "Local ingester ID", - ingesterID: "ingester-local", - want: 0, - wantErr: false, - }, - { - name: "Invalid ingester ID format", - ingesterID: "invalid-format", - want: 0, - wantErr: true, - }, - { - name: "Invalid sequence number", - ingesterID: "ingester-abc", - want: 0, - wantErr: true, - }, - } - - for _, tt := range tests { - t.Run(tt.name, func(t *testing.T) { - got, err := extractIngesterPartitionID(tt.ingesterID) - if (err != nil) != tt.wantErr { - t.Errorf("extractIngesterPartitionID() error = %v, wantErr %v", err, tt.wantErr) - return - } - if got != tt.want { - t.Errorf("extractIngesterPartitionID() = %v, want %v", got, tt.want) - } - }) - } -} - // TestMetastore is a simple in-memory metastore for testing type TestMetastore struct { blocks map[string][]*metastorepb.BlockMeta diff --git a/pkg/kafka/ingester/partition_committer.go b/pkg/kafka/partition/committer.go similarity index 70% rename from pkg/kafka/ingester/partition_committer.go rename to pkg/kafka/partition/committer.go index a76e363a64e4..f9aeda3f0fc5 100644 --- a/pkg/kafka/ingester/partition_committer.go +++ b/pkg/kafka/partition/committer.go @@ -1,8 +1,9 @@ -package ingester +package partition import ( "context" "strconv" + "sync" "time" "github.com/go-kit/log" @@ -10,10 +11,17 @@ import ( "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/client_golang/prometheus/promauto" "github.com/twmb/franz-go/pkg/kadm" + "go.uber.org/atomic" "github.com/grafana/loki/v3/pkg/kafka" ) +// Committer defines an interface for committing offsets +type Committer interface { + Commit(ctx context.Context, offset int64) error + EnqueueOffset(offset int64) +} + // partitionCommitter is responsible for committing offsets for a specific Kafka partition // to the Kafka broker. It also tracks metrics related to the commit process. type partitionCommitter struct { @@ -28,11 +36,15 @@ type partitionCommitter struct { kafkaCfg kafka.Config partitionID int32 consumerGroup string + + toCommit *atomic.Int64 + wg sync.WaitGroup + cancel context.CancelFunc } -// newPartitionCommitter creates and initializes a new partitionCommitter. +// newCommitter creates and initializes a new Committer. // It sets up the necessary metrics and initializes the committer with the provided configuration. -func newPartitionCommitter(kafkaCfg kafka.Config, admClient *kadm.Client, partitionID int32, consumerGroup string, logger log.Logger, reg prometheus.Registerer) *partitionCommitter { +func newCommitter(kafkaCfg kafka.Config, admClient *kadm.Client, partitionID int32, consumerGroup string, logger log.Logger, reg prometheus.Registerer) *partitionCommitter { c := &partitionCommitter{ logger: logger, kafkaCfg: kafkaCfg, @@ -63,14 +75,51 @@ func newPartitionCommitter(kafkaCfg kafka.Config, admClient *kadm.Client, partit Help: "The last consumed offset successfully committed by the partition reader. Set to -1 if not offset has been committed yet.", ConstLabels: prometheus.Labels{"partition": strconv.Itoa(int(partitionID))}, }), + toCommit: atomic.NewInt64(-1), } // Initialise the last committed offset metric to -1 to signal no offset has been committed yet (0 is a valid offset). c.lastCommittedOffset.Set(-1) + if kafkaCfg.ConsumerGroupOffsetCommitInterval > 0 { + c.wg.Add(1) + ctx, cancel := context.WithCancel(context.Background()) + c.cancel = cancel + go c.autoCommitLoop(ctx) + } + return c } +func (r *partitionCommitter) autoCommitLoop(ctx context.Context) { + defer r.wg.Done() + commitTicker := time.NewTicker(r.kafkaCfg.ConsumerGroupOffsetCommitInterval) + defer commitTicker.Stop() + + previousOffset := r.toCommit.Load() + for { + select { + case <-ctx.Done(): + return + case <-commitTicker.C: + currOffset := r.toCommit.Load() + if currOffset == previousOffset { + continue + } + + if err := r.Commit(ctx, currOffset); err == nil { + previousOffset = currOffset + } + } + } +} + +func (r *partitionCommitter) EnqueueOffset(o int64) { + if r.kafkaCfg.ConsumerGroupOffsetCommitInterval > 0 { + r.toCommit.Store(o) + } +} + // commit attempts to commit the given offset to Kafka for the partition this committer is responsible for. // It updates relevant metrics and logs the result of the commit operation. func (r *partitionCommitter) Commit(ctx context.Context, offset int64) (returnErr error) { @@ -101,3 +150,18 @@ func (r *partitionCommitter) Commit(ctx context.Context, offset int64) (returnEr r.lastCommittedOffset.Set(float64(committedOffset.At)) return nil } + +func (r *partitionCommitter) Stop() { + if r.kafkaCfg.ConsumerGroupOffsetCommitInterval <= 0 { + return + } + r.cancel() + r.wg.Wait() + + offset := r.toCommit.Load() + if offset < 0 { + return + } + // Commit has internal timeouts, so this call shouldn't block for too long. + _ = r.Commit(context.Background(), offset) +} diff --git a/pkg/kafka/ingester/partition_committer_test.go b/pkg/kafka/partition/committer_test.go similarity index 95% rename from pkg/kafka/ingester/partition_committer_test.go rename to pkg/kafka/partition/committer_test.go index 8fb823e3f2ed..9ef02f910e5d 100644 --- a/pkg/kafka/ingester/partition_committer_test.go +++ b/pkg/kafka/partition/committer_test.go @@ -1,4 +1,4 @@ -package ingester +package partition import ( "context" @@ -36,7 +36,7 @@ func TestPartitionCommitter(t *testing.T) { reg := prometheus.NewRegistry() partitionID := int32(1) consumerGroup := "test-consumer-group" - committer := newPartitionCommitter(kafkaCfg, admClient, partitionID, consumerGroup, logger, reg) + committer := newCommitter(kafkaCfg, admClient, partitionID, consumerGroup, logger, reg) // Test committing an offset ctx, cancel := context.WithTimeout(context.Background(), 5*time.Second) diff --git a/pkg/kafka/ingester/partition_reader.go b/pkg/kafka/partition/reader.go similarity index 85% rename from pkg/kafka/ingester/partition_reader.go rename to pkg/kafka/partition/reader.go index 5ed70412d9e0..9972d13307e8 100644 --- a/pkg/kafka/ingester/partition_reader.go +++ b/pkg/kafka/partition/reader.go @@ -1,4 +1,4 @@ -package ingester +package partition import ( "context" @@ -20,10 +20,10 @@ import ( "github.com/grafana/loki/v3/pkg/kafka" ) -// PartitionReader is responsible for reading data from a specific Kafka partition +// Reader is responsible for reading data from a specific Kafka partition // and passing it to the consumer for processing. It is a core component of the // Loki ingester's Kafka-based ingestion pipeline. -type PartitionReader struct { +type Reader struct { services.Service kafkaCfg kafka.Config @@ -39,31 +39,31 @@ type PartitionReader struct { reg prometheus.Registerer } -type record struct { +type Record struct { // Context holds the tracing (and potentially other) info, that the record was enriched with on fetch from Kafka. - ctx context.Context - tenantID string - content []byte - offset int64 + Ctx context.Context + TenantID string + Content []byte + Offset int64 } type ConsumerFactory func(committer Committer) (Consumer, error) type Consumer interface { - Start(ctx context.Context, recordsChan <-chan []record) func() + Start(ctx context.Context, recordsChan <-chan []Record) func() } -// NewPartitionReader creates and initializes a new PartitionReader. +// NewReader creates and initializes a new PartitionReader. // It sets up the basic service and initializes the reader with the provided configuration. -func NewPartitionReader( +func NewReader( kafkaCfg kafka.Config, partitionID int32, instanceID string, consumerFactory ConsumerFactory, logger log.Logger, reg prometheus.Registerer, -) (*PartitionReader, error) { - r := &PartitionReader{ +) (*Reader, error) { + r := &Reader{ kafkaCfg: kafkaCfg, partitionID: partitionID, consumerGroup: kafkaCfg.GetConsumerGroup(instanceID, partitionID), @@ -79,7 +79,7 @@ func NewPartitionReader( // start initializes the Kafka client and committer for the PartitionReader. // This method is called when the PartitionReader service starts. -func (p *PartitionReader) start(_ context.Context) error { +func (p *Reader) start(_ context.Context) error { var err error p.client, err = kafka.NewReaderClient(p.kafkaCfg, p.metrics.kprom, p.logger, kgo.ConsumePartitions(map[string]map[int32]kgo.Offset{ @@ -89,14 +89,14 @@ func (p *PartitionReader) start(_ context.Context) error { if err != nil { return errors.Wrap(err, "creating kafka reader client") } - p.committer = newPartitionCommitter(p.kafkaCfg, kadm.NewClient(p.client), p.partitionID, p.consumerGroup, p.logger, p.reg) - + p.committer = newCommitter(p.kafkaCfg, kadm.NewClient(p.client), p.partitionID, p.consumerGroup, p.logger, p.reg) + // todo: attempt to ensure max lag timestamp on startup. return nil } // run is the main loop of the PartitionReader. It continuously fetches and processes // data from Kafka, and send it to the consumer. -func (p *PartitionReader) run(ctx context.Context) error { +func (p *Reader) run(ctx context.Context) error { level.Info(p.logger).Log("msg", "starting partition reader", "partition", p.partitionID, "consumer_group", p.consumerGroup) ctx, cancel := context.WithCancel(ctx) defer cancel() @@ -110,11 +110,12 @@ func (p *PartitionReader) run(ctx context.Context) error { wait := consumer.Start(ctx, recordsChan) wait() + p.committer.Stop() return nil } -func (p *PartitionReader) startFetchLoop(ctx context.Context) <-chan []record { - records := make(chan []record) +func (p *Reader) startFetchLoop(ctx context.Context) <-chan []Record { + records := make(chan []Record) go func() { for { select { @@ -129,7 +130,7 @@ func (p *PartitionReader) startFetchLoop(ctx context.Context) <-chan []record { } // logFetchErrors logs any errors encountered during the fetch operation. -func (p *PartitionReader) logFetchErrors(fetches kgo.Fetches) { +func (p *Reader) logFetchErrors(fetches kgo.Fetches) { mErr := multierror.New() fetches.EachError(func(topic string, partition int32, err error) { if errors.Is(err, context.Canceled) { @@ -148,7 +149,7 @@ func (p *PartitionReader) logFetchErrors(fetches kgo.Fetches) { } // pollFetches retrieves the next batch of records from Kafka and measures the fetch duration. -func (p *PartitionReader) poll(ctx context.Context) []record { +func (p *Reader) poll(ctx context.Context) []Record { defer func(start time.Time) { p.metrics.fetchWaitDuration.Observe(time.Since(start).Seconds()) }(time.Now()) @@ -159,23 +160,27 @@ func (p *PartitionReader) poll(ctx context.Context) []record { if fetches.NumRecords() == 0 { return nil } - records := make([]record, 0, fetches.NumRecords()) + records := make([]Record, 0, fetches.NumRecords()) fetches.EachRecord(func(rec *kgo.Record) { - records = append(records, record{ + if rec.Partition != p.partitionID { + level.Error(p.logger).Log("msg", "wrong partition record received", "partition", rec.Partition, "expected_partition", p.partitionID) + return + } + records = append(records, Record{ // This context carries the tracing data for this individual record; // kotel populates this data when it fetches the messages. - ctx: rec.Context, - tenantID: string(rec.Key), - content: rec.Value, - offset: rec.Offset, + Ctx: rec.Context, + TenantID: string(rec.Key), + Content: rec.Value, + Offset: rec.Offset, }) }) - p.lastProcessedOffset = records[len(records)-1].offset + p.lastProcessedOffset = records[len(records)-1].Offset return records } // recordFetchesMetrics updates various metrics related to the fetch operation. -func (p *PartitionReader) recordFetchesMetrics(fetches kgo.Fetches) { +func (p *Reader) recordFetchesMetrics(fetches kgo.Fetches) { var ( now = time.Now() numRecords = 0 diff --git a/pkg/kafka/ingester/partition_reader_test.go b/pkg/kafka/partition/reader_test.go similarity index 83% rename from pkg/kafka/ingester/partition_reader_test.go rename to pkg/kafka/partition/reader_test.go index 2bc6db474765..addc5779bb6a 100644 --- a/pkg/kafka/ingester/partition_reader_test.go +++ b/pkg/kafka/partition/reader_test.go @@ -1,4 +1,4 @@ -package ingester +package partition import ( "context" @@ -21,17 +21,17 @@ import ( type mockConsumer struct { mock.Mock - recordsChan chan []record + recordsChan chan []Record wg sync.WaitGroup } func newMockConsumer() *mockConsumer { return &mockConsumer{ - recordsChan: make(chan []record, 100), + recordsChan: make(chan []Record, 100), } } -func (m *mockConsumer) Start(ctx context.Context, recordsChan <-chan []record) func() { +func (m *mockConsumer) Start(ctx context.Context, recordsChan <-chan []Record) func() { m.wg.Add(1) go func() { defer m.wg.Done() @@ -56,11 +56,11 @@ func TestPartitionReader_BasicFunctionality(t *testing.T) { _, kafkaCfg := testkafka.CreateCluster(t, 1, "test-topic") consumer := newMockConsumer() - consumerFactory := func(committer Committer) (Consumer, error) { + consumerFactory := func(_ Committer) (Consumer, error) { return consumer, nil } - partitionReader, err := NewPartitionReader(kafkaCfg, 0, "test-consumer-group", consumerFactory, log.NewNopLogger(), prometheus.NewRegistry()) + partitionReader, err := NewReader(kafkaCfg, 0, "test-consumer-group", consumerFactory, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) producer, err := kafka.NewWriterClient(kafkaCfg, 100, log.NewNopLogger(), prometheus.NewRegistry()) require.NoError(t, err) @@ -90,8 +90,8 @@ func TestPartitionReader_BasicFunctionality(t *testing.T) { select { case receivedRecords := <-consumer.recordsChan: require.Len(t, receivedRecords, 1) - assert.Equal(t, "test-tenant", receivedRecords[0].tenantID) - assert.Equal(t, records[0].Value, receivedRecords[0].content) + assert.Equal(t, "test-tenant", receivedRecords[0].TenantID) + assert.Equal(t, records[0].Value, receivedRecords[0].Content) case <-time.After(1 * time.Second): t.Fatal("Timeout waiting for records") } diff --git a/pkg/kafka/partitionring/parition_ring_test.go b/pkg/kafka/partitionring/parition_ring_test.go new file mode 100644 index 000000000000..ad24e0c4ff1d --- /dev/null +++ b/pkg/kafka/partitionring/parition_ring_test.go @@ -0,0 +1,50 @@ +package partitionring + +import "testing" + +func TestExtractIngesterPartitionID(t *testing.T) { + tests := []struct { + name string + ingesterID string + want int32 + wantErr bool + }{ + { + name: "Valid ingester ID", + ingesterID: "ingester-5", + want: 5, + wantErr: false, + }, + { + name: "Local ingester ID", + ingesterID: "ingester-local", + want: 0, + wantErr: false, + }, + { + name: "Invalid ingester ID format", + ingesterID: "invalid-format", + want: 0, + wantErr: true, + }, + { + name: "Invalid sequence number", + ingesterID: "ingester-abc", + want: 0, + wantErr: true, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got, err := ExtractIngesterPartitionID(tt.ingesterID) + if (err != nil) != tt.wantErr { + t.Errorf("extractIngesterPartitionID() error = %v, wantErr %v", err, tt.wantErr) + return + } + if got != tt.want { + t.Errorf("extractIngesterPartitionID() = %v, want %v", got, tt.want) + } + }) + } +} diff --git a/pkg/kafka/partitionring/partition_ring.go b/pkg/kafka/partitionring/partition_ring.go index dedfb8ac33bb..15dad003dd93 100644 --- a/pkg/kafka/partitionring/partition_ring.go +++ b/pkg/kafka/partitionring/partition_ring.go @@ -2,12 +2,18 @@ package partitionring import ( "flag" + "fmt" + "regexp" + "strconv" + "strings" "time" "github.com/grafana/dskit/kv" "github.com/grafana/dskit/ring" ) +var ingesterIDRegexp = regexp.MustCompile("-([0-9]+)$") + type Config struct { KVStore kv.Config `yaml:"kvstore" doc:"description=The key-value store used to share the hash ring across multiple instances. This option needs be set on ingesters, distributors, queriers, and rulers when running in microservices mode."` @@ -25,14 +31,14 @@ type Config struct { } // RegisterFlags adds the flags required to config this to the given FlagSet -func (cfg *Config) RegisterFlags(f *flag.FlagSet) { +func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { // Ring flags cfg.KVStore.Store = "memberlist" // Override default value. - cfg.KVStore.RegisterFlagsWithPrefix("ingester.partition-ring.", "collectors/", f) + cfg.KVStore.RegisterFlagsWithPrefix(prefix+"partition-ring.", "collectors/", f) - f.IntVar(&cfg.MinOwnersCount, "ingester.partition-ring.min-partition-owners-count", 1, "Minimum number of owners to wait before a PENDING partition gets switched to ACTIVE.") - f.DurationVar(&cfg.MinOwnersDuration, "ingester.partition-ring.min-partition-owners-duration", 10*time.Second, "How long the minimum number of owners are enforced before a PENDING partition gets switched to ACTIVE.") - f.DurationVar(&cfg.DeleteInactivePartitionAfter, "ingester.partition-ring.delete-inactive-partition-after", 13*time.Hour, "How long to wait before an INACTIVE partition is eligible for deletion. The partition is deleted only if it has been in INACTIVE state for at least the configured duration and it has no owners registered. A value of 0 disables partitions deletion.") + f.IntVar(&cfg.MinOwnersCount, prefix+"partition-ring.min-partition-owners-count", 1, "Minimum number of owners to wait before a PENDING partition gets switched to ACTIVE.") + f.DurationVar(&cfg.MinOwnersDuration, prefix+"partition-ring.min-partition-owners-duration", 10*time.Second, "How long the minimum number of owners are enforced before a PENDING partition gets switched to ACTIVE.") + f.DurationVar(&cfg.DeleteInactivePartitionAfter, prefix+"partition-ring.delete-inactive-partition-after", 13*time.Hour, "How long to wait before an INACTIVE partition is eligible for deletion. The partition is deleted only if it has been in INACTIVE state for at least the configured duration and it has no owners registered. A value of 0 disables partitions deletion.") } func (cfg *Config) ToLifecyclerConfig(partitionID int32, instanceID string) ring.PartitionInstanceLifecyclerConfig { @@ -45,3 +51,22 @@ func (cfg *Config) ToLifecyclerConfig(partitionID int32, instanceID string) ring PollingInterval: cfg.lifecyclerPollingInterval, } } + +// ExtractIngesterPartitionID returns the partition ID owner the the given ingester. +func ExtractIngesterPartitionID(ingesterID string) (int32, error) { + if strings.Contains(ingesterID, "local") { + return 0, nil + } + + match := ingesterIDRegexp.FindStringSubmatch(ingesterID) + if len(match) == 0 { + return 0, fmt.Errorf("ingester ID %s doesn't match regular expression %q", ingesterID, ingesterIDRegexp.String()) + } + // Parse the ingester sequence number. + ingesterSeq, err := strconv.Atoi(match[1]) + if err != nil { + return 0, fmt.Errorf("no ingester sequence number in ingester ID %s", ingesterID) + } + + return int32(ingesterSeq), nil +} diff --git a/pkg/kafka/reader_client.go b/pkg/kafka/reader_client.go index 1b8c6b3bc1dc..9237686fee60 100644 --- a/pkg/kafka/reader_client.go +++ b/pkg/kafka/reader_client.go @@ -13,10 +13,10 @@ import ( ) // NewReaderClient returns the kgo.Client that should be used by the Reader. -func NewReaderClient(cfg Config, metrics *kprom.Metrics, logger log.Logger, opts ...kgo.Opt) (*kgo.Client, error) { +func NewReaderClient(kafkaCfg Config, metrics *kprom.Metrics, logger log.Logger, opts ...kgo.Opt) (*kgo.Client, error) { const fetchMaxBytes = 100_000_000 - opts = append(opts, commonKafkaClientOptions(cfg, metrics, logger)...) + opts = append(opts, commonKafkaClientOptions(kafkaCfg, metrics, logger)...) opts = append(opts, kgo.FetchMinBytes(1), kgo.FetchMaxBytes(fetchMaxBytes), @@ -32,7 +32,9 @@ func NewReaderClient(cfg Config, metrics *kprom.Metrics, logger log.Logger, opts if err != nil { return nil, errors.Wrap(err, "creating kafka client") } - + if kafkaCfg.AutoCreateTopicEnabled { + kafkaCfg.SetDefaultNumberOfPartitionsForAutocreatedTopics(logger) + } return client, nil } diff --git a/pkg/kafka/tee/tee.go b/pkg/kafka/tee/tee.go deleted file mode 100644 index 2228883efb32..000000000000 --- a/pkg/kafka/tee/tee.go +++ /dev/null @@ -1,174 +0,0 @@ -package tee - -import ( - "context" - "fmt" - "time" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - "github.com/grafana/dskit/ring" - "github.com/grafana/dskit/user" - "github.com/prometheus/client_golang/prometheus" - "github.com/prometheus/client_golang/prometheus/promauto" - "github.com/twmb/franz-go/pkg/kgo" - - "github.com/grafana/loki/v3/pkg/distributor" - "github.com/grafana/loki/v3/pkg/kafka" -) - -const writeTimeout = time.Minute - -// Tee represents a component that duplicates log streams to Kafka. -type Tee struct { - logger log.Logger - producer *kafka.Producer - partitionRing ring.PartitionRingReader - cfg kafka.Config - - ingesterAppends *prometheus.CounterVec - writeLatency prometheus.Histogram - writeBytesTotal prometheus.Counter - recordsPerRequest prometheus.Histogram -} - -// NewTee creates and initializes a new Tee instance. -// -// Parameters: -// - cfg: Kafka configuration -// - metricsNamespace: Namespace for Prometheus metrics -// - registerer: Prometheus registerer for metrics -// - logger: Logger instance -// - partitionRing: Ring for managing partitions -// -// Returns: -// - A new Tee instance and any error encountered during initialization -func NewTee( - cfg kafka.Config, - metricsNamespace string, - registerer prometheus.Registerer, - logger log.Logger, - partitionRing ring.PartitionRingReader, -) (*Tee, error) { - registerer = prometheus.WrapRegistererWithPrefix(metricsNamespace+"_", registerer) - - kafkaClient, err := kafka.NewWriterClient(cfg, 20, logger, registerer) - if err != nil { - return nil, fmt.Errorf("failed to start kafka client: %w", err) - } - producer := kafka.NewProducer(kafkaClient, cfg.ProducerMaxBufferedBytes, - prometheus.WrapRegistererWithPrefix("_kafka_ingester_", registerer)) - - t := &Tee{ - logger: log.With(logger, "component", "kafka-tee"), - ingesterAppends: promauto.With(registerer).NewCounterVec(prometheus.CounterOpts{ - Name: "kafka_ingester_appends_total", - Help: "The total number of appends sent to kafka ingest path.", - }, []string{"partition", "status"}), - producer: producer, - partitionRing: partitionRing, - cfg: cfg, - // Metrics. - writeLatency: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ - Name: "kafka_ingester_tee_latency_seconds", - Help: "Latency to write an incoming request to the ingest storage.", - NativeHistogramBucketFactor: 1.1, - NativeHistogramMinResetDuration: 1 * time.Hour, - NativeHistogramMaxBucketNumber: 100, - Buckets: prometheus.DefBuckets, - }), - writeBytesTotal: promauto.With(registerer).NewCounter(prometheus.CounterOpts{ - Name: "kafka_ingester_tee_sent_bytes_total", - Help: "Total number of bytes sent to the ingest storage.", - }), - recordsPerRequest: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ - Name: "kafka_ingester_tee_records_per_write_request", - Help: "The number of records a single per-partition write request has been split into.", - Buckets: prometheus.ExponentialBuckets(1, 2, 8), - }), - } - - return t, nil -} - -// Duplicate implements the distributor.Tee interface, which is used to duplicate -// distributor requests to pattern ingesters. It asynchronously sends each stream -// to Kafka. -// -// Parameters: -// - tenant: The tenant identifier -// - streams: A slice of KeyedStream to be duplicated -func (t *Tee) Duplicate(tenant string, streams []distributor.KeyedStream) { - for idx := range streams { - go func(stream distributor.KeyedStream) { - if err := t.sendStream(tenant, stream); err != nil { - level.Error(t.logger).Log("msg", "failed to send stream to kafka", "err", err) - } - }(streams[idx]) - } -} - -func (t *Tee) Close() { - t.producer.Close() -} - -// sendStream sends a single stream to Kafka. -// -// Parameters: -// - tenant: The tenant identifier -// - stream: The KeyedStream to be sent -// -// Returns: -// - An error if the stream couldn't be sent successfully -func (t *Tee) sendStream(tenant string, stream distributor.KeyedStream) error { - if len(stream.Stream.Entries) == 0 { - return nil - } - partitionID, err := t.partitionRing.PartitionRing().ActivePartitionForKey(stream.HashKey) - if err != nil { - t.ingesterAppends.WithLabelValues("partition_unknown", "fail").Inc() - return fmt.Errorf("failed to find active partition for stream: %w", err) - } - - startTime := time.Now() - - records, err := kafka.Encode(partitionID, tenant, stream.Stream, t.cfg.ProducerMaxRecordSizeBytes) - if err != nil { - t.ingesterAppends.WithLabelValues(fmt.Sprintf("partition_%d", partitionID), "fail").Inc() - return fmt.Errorf("failed to marshal write request to records: %w", err) - } - - t.recordsPerRequest.Observe(float64(len(records))) - - ctx, cancel := context.WithTimeout(user.InjectOrgID(context.Background(), tenant), writeTimeout) - defer cancel() - produceResults := t.producer.ProduceSync(ctx, records) - - if count, sizeBytes := successfulProduceRecordsStats(produceResults); count > 0 { - t.writeLatency.Observe(time.Since(startTime).Seconds()) - t.writeBytesTotal.Add(float64(sizeBytes)) - } - - var finalErr error - for _, result := range produceResults { - if result.Err != nil { - t.ingesterAppends.WithLabelValues(fmt.Sprintf("partition_%d", partitionID), "fail").Inc() - finalErr = err - } else { - t.ingesterAppends.WithLabelValues(fmt.Sprintf("partition_%d", partitionID), "success").Inc() - } - } - - return finalErr -} - -func successfulProduceRecordsStats(results kgo.ProduceResults) (count, sizeBytes int) { - for _, res := range results { - if res.Err == nil && res.Record != nil { - count++ - sizeBytes += len(res.Record.Value) - } - } - - return -} diff --git a/pkg/kafka/tee/tee_test.go b/pkg/kafka/tee/tee_test.go deleted file mode 100644 index 2431f42033fc..000000000000 --- a/pkg/kafka/tee/tee_test.go +++ /dev/null @@ -1,50 +0,0 @@ -package tee - -import ( - "os" - "testing" - "time" - - "github.com/go-kit/log" - "github.com/grafana/dskit/ring" - "github.com/prometheus/client_golang/prometheus" - "github.com/stretchr/testify/require" - - "github.com/grafana/loki/v3/pkg/distributor" - "github.com/grafana/loki/v3/pkg/kafka/testkafka" - - "github.com/grafana/loki/pkg/push" -) - -func TestPushKafkaRecords(t *testing.T) { - _, cfg := testkafka.CreateCluster(t, 1, "topic") - tee, err := NewTee(cfg, "test", prometheus.NewRegistry(), log.NewLogfmtLogger(os.Stdout), newTestPartitionRing()) - require.NoError(t, err) - - err = tee.sendStream("test", distributor.KeyedStream{ - HashKey: 1, - Stream: push.Stream{ - Labels: `{foo="bar"}`, - Entries: []push.Entry{ - {Timestamp: time.Now(), Line: "test"}, - }, - }, - }) - require.NoError(t, err) -} - -type testPartitionRing struct { - partitionRing *ring.PartitionRing -} - -func (t *testPartitionRing) PartitionRing() *ring.PartitionRing { - return t.partitionRing -} - -func newTestPartitionRing() ring.PartitionRingReader { - desc := ring.NewPartitionRingDesc() - desc.AddPartition(0, ring.PartitionActive, time.Now()) - return &testPartitionRing{ - partitionRing: ring.NewPartitionRing(*desc), - } -} diff --git a/pkg/kafka/writer_client.go b/pkg/kafka/writer_client.go index bb6fbb8082c5..ddd12a646d69 100644 --- a/pkg/kafka/writer_client.go +++ b/pkg/kafka/writer_client.go @@ -68,7 +68,7 @@ func NewWriterClient(kafkaCfg Config, maxInflightProduceRequests int, logger log // When a Produce request to Kafka fail, the client will retry up until the RecordDeliveryTimeout is reached. // Once the timeout is reached, the Produce request will fail and all other buffered requests in the client // (for the same partition) will fail too. See kgo.RecordDeliveryTimeout() documentation for more info. - kgo.RecordRetries(math.MaxInt64), + kgo.RecordRetries(math.MaxInt), kgo.RecordDeliveryTimeout(kafkaCfg.WriteTimeout), kgo.ProduceRequestTimeout(kafkaCfg.WriteTimeout), kgo.RequestTimeoutOverhead(writerRequestTimeoutOverhead), @@ -79,7 +79,9 @@ func NewWriterClient(kafkaCfg Config, maxInflightProduceRequests int, logger log kgo.MaxBufferedRecords(math.MaxInt), // Use a high value to set it as unlimited, because the client doesn't support "0 as unlimited". kgo.MaxBufferedBytes(0), ) - + if kafkaCfg.AutoCreateTopicEnabled { + kafkaCfg.SetDefaultNumberOfPartitionsForAutocreatedTopics(logger) + } return kgo.NewClient(opts...) } diff --git a/pkg/logql/accumulator.go b/pkg/logql/accumulator.go index 434af93cb3c2..3313e9c17d42 100644 --- a/pkg/logql/accumulator.go +++ b/pkg/logql/accumulator.go @@ -86,6 +86,7 @@ func (a *QuantileSketchAccumulator) Accumulate(_ context.Context, res logqlmodel var err error a.matrix, err = a.matrix.Merge(data) + a.stats.Merge(res.Statistics) return err } diff --git a/pkg/logql/engine.go b/pkg/logql/engine.go index 23c3073fb728..c44ee06c1694 100644 --- a/pkg/logql/engine.go +++ b/pkg/logql/engine.go @@ -265,7 +265,6 @@ func (q *query) Exec(ctx context.Context) (logqlmodel.Result, error) { sp.LogKV(statResult.KVList()...) status, _ := server.ClientHTTPStatusAndError(err) - if q.record { RecordRangeAndInstantQueryMetrics(ctx, q.logger, q.params, strconv.Itoa(status), statResult, data) } diff --git a/pkg/logql/log/labels.go b/pkg/logql/log/labels.go index c68fe1af0e5b..c8d0bcb31ebb 100644 --- a/pkg/logql/log/labels.go +++ b/pkg/logql/log/labels.go @@ -327,20 +327,52 @@ func (b *LabelsBuilder) Get(key string) (string, bool) { // Del deletes the label of the given name. func (b *LabelsBuilder) Del(ns ...string) *LabelsBuilder { for _, n := range ns { - for category, lbls := range b.add { - for i, a := range lbls { - if a.Name == n { - b.add[category] = append(lbls[:i], lbls[i+1:]...) - } - } + for category := range b.add { + b.deleteWithCategory(LabelCategory(category), n) } b.del = append(b.del, n) } return b } +// deleteWithCategory removes the label from the specified category +func (b *LabelsBuilder) deleteWithCategory(category LabelCategory, n string) { + for i, l := range b.add[category] { + if l.Name == n { + b.add[category] = append(b.add[category][:i], b.add[category][i+1:]...) + } + } +} + // Set the name/value pair as a label. +// The value `v` may not be set if a category with higher preference already contains `n`. +// Category preference goes as Parsed > Structured Metadata > Stream. func (b *LabelsBuilder) Set(category LabelCategory, n, v string) *LabelsBuilder { + // Parsed takes precedence over Structured Metadata and Stream labels. + // If category is Parsed, we delete `n` from the structured metadata and stream labels. + if category == ParsedLabel { + b.deleteWithCategory(StructuredMetadataLabel, n) + b.deleteWithCategory(StreamLabel, n) + } + + // Structured Metadata takes precedence over Stream labels. + // If category is `StructuredMetadataLabel`,we delete `n` from the stream labels. + // If `n` exists in the parsed labels, we won't overwrite it's value and we just return what we have. + if category == StructuredMetadataLabel { + b.deleteWithCategory(StreamLabel, n) + if labelsContain(b.add[ParsedLabel], n) { + return b + } + } + + // Finally, if category is `StreamLabel` and `n` already exists in either the structured metadata or + // parsed labels, the `Set` operation is a noop and we return the unmodified labels builder. + if category == StreamLabel { + if labelsContain(b.add[StructuredMetadataLabel], n) || labelsContain(b.add[ParsedLabel], n) { + return b + } + } + for i, a := range b.add[category] { if a.Name == n { b.add[category][i].Value = v @@ -430,6 +462,7 @@ func (b *LabelsBuilder) UnsortedLabels(buf labels.Labels, categories ...LabelCat } else { buf = buf[:0] } + if categoriesContain(categories, StreamLabel) { Outer: for _, l := range b.base { @@ -439,20 +472,38 @@ func (b *LabelsBuilder) UnsortedLabels(buf labels.Labels, categories ...LabelCat continue Outer } } - // Skip stream labels which value will be replaced - for _, lbls := range b.add { - for _, la := range lbls { - if l.Name == la.Name { - continue Outer - } - } + + // Skip stream labels which value will be replaced by structured metadata + if labelsContain(b.add[StructuredMetadataLabel], l.Name) { + continue + } + + // Skip stream labels which value will be replaced by parsed labels + if labelsContain(b.add[ParsedLabel], l.Name) { + continue + } + + // Take value from stream label if present + if labelsContain(b.add[StreamLabel], l.Name) { + buf = append(buf, labels.Label{Name: l.Name, Value: b.add[StreamLabel].Get(l.Name)}) + } else { + buf = append(buf, l) + } + } + } + + if categoriesContain(categories, StructuredMetadataLabel) { + for _, l := range b.add[StructuredMetadataLabel] { + if labelsContain(b.add[ParsedLabel], l.Name) { + continue } + buf = append(buf, l) } } - for _, category := range categories { - buf = append(buf, b.add[category]...) + if categoriesContain(categories, ParsedLabel) { + buf = append(buf, b.add[ParsedLabel]...) } if (b.HasErr() || b.HasErrorDetails()) && categoriesContain(categories, ParsedLabel) { buf = b.appendErrors(buf) @@ -566,6 +617,15 @@ func flattenLabels(buf labels.Labels, many ...labels.Labels) labels.Labels { return buf } +func labelsContain(labels labels.Labels, name string) bool { + for _, l := range labels { + if l.Name == name { + return true + } + } + return false +} + func (b *BaseLabelsBuilder) toUncategorizedResult(buf labels.Labels) LabelsResult { hash := b.hasher.Hash(buf) if cached, ok := b.resultCache[hash]; ok { diff --git a/pkg/logql/log/labels_test.go b/pkg/logql/log/labels_test.go index 97c9a8899c22..7f543a48d7d8 100644 --- a/pkg/logql/log/labels_test.go +++ b/pkg/logql/log/labels_test.go @@ -1,6 +1,7 @@ package log import ( + "sort" "testing" "github.com/prometheus/prometheus/model/labels" @@ -198,6 +199,185 @@ func TestLabelsBuilder_LabelsResult(t *testing.T) { assert.Equal(t, expectedStreamLbls, actual.Stream()) assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) assert.Equal(t, expectedParsedLbls, actual.Parsed()) + + b.Reset() + b.Set(StreamLabel, "namespace", "tempo") + b.Set(StreamLabel, "bazz", "tazz") + b.Set(StructuredMetadataLabel, "bazz", "sazz") + b.Set(ParsedLabel, "ToReplace", "other") + + expectedStreamLbls = labels.FromStrings( + "namespace", "tempo", + "cluster", "us-central1", + "job", "us-central1/loki", + ) + expectedStucturedMetadataLbls = labels.FromStrings( + "bazz", "sazz", + ) + expectedParsedLbls = labels.FromStrings( + "ToReplace", "other", + ) + + expected = make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + assertLabelResult(t, expected, b.LabelsResult()) + // cached. + assertLabelResult(t, expected, b.LabelsResult()) + actual = b.LabelsResult() + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) +} + +func TestLabelsBuilder_Set(t *testing.T) { + strs := []string{ + "namespace", "loki", + "cluster", "us-central1", + "toreplace", "fuzz", + } + lbs := labels.FromStrings(strs...) + b := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash()) + + // test duplicating stream label with parsed label + b.Set(StructuredMetadataLabel, "stzz", "stvzz") + b.Set(ParsedLabel, "toreplace", "buzz") + expectedStreamLbls := labels.FromStrings("namespace", "loki", "cluster", "us-central1") + expectedStucturedMetadataLbls := labels.FromStrings("stzz", "stvzz") + expectedParsedLbls := labels.FromStrings("toreplace", "buzz") + + expected := make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + + actual := b.LabelsResult() + assertLabelResult(t, expected, actual) + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) + + b.Reset() + + // test duplicating structured metadata label with parsed label + b.Set(StructuredMetadataLabel, "stzz", "stvzz") + b.Set(StructuredMetadataLabel, "toreplace", "muzz") + b.Set(ParsedLabel, "toreplace", "buzz") + expectedStreamLbls = labels.FromStrings("namespace", "loki", "cluster", "us-central1") + expectedStucturedMetadataLbls = labels.FromStrings("stzz", "stvzz") + expectedParsedLbls = labels.FromStrings("toreplace", "buzz") + + expected = make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + + actual = b.LabelsResult() + assertLabelResult(t, expected, actual) + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) + + b.Reset() + + // test duplicating stream label with structured meta data label + b.Set(StructuredMetadataLabel, "toreplace", "muzz") + b.Set(ParsedLabel, "stzz", "stvzz") + expectedStreamLbls = labels.FromStrings("namespace", "loki", "cluster", "us-central1") + expectedStucturedMetadataLbls = labels.FromStrings("toreplace", "muzz") + expectedParsedLbls = labels.FromStrings("stzz", "stvzz") + + expected = make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + + actual = b.LabelsResult() + assertLabelResult(t, expected, actual) + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) + + b.Reset() + + // test duplicating parsed label with structured meta data label + b.Set(ParsedLabel, "toreplace", "puzz") + b.Set(StructuredMetadataLabel, "stzz", "stvzzz") + b.Set(StructuredMetadataLabel, "toreplace", "muzz") + expectedStreamLbls = labels.FromStrings("namespace", "loki", "cluster", "us-central1") + expectedStucturedMetadataLbls = labels.FromStrings("stzz", "stvzzz") + expectedParsedLbls = labels.FromStrings("toreplace", "puzz") + + expected = make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + + actual = b.LabelsResult() + assertLabelResult(t, expected, actual) + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) + + b.Reset() + + // test duplicating structured meta data label with stream label + b.Set(ParsedLabel, "stzz", "stvzzz") + b.Set(StructuredMetadataLabel, "toreplace", "muzz") + expectedStreamLbls = labels.FromStrings("namespace", "loki", "cluster", "us-central1") + expectedStucturedMetadataLbls = labels.FromStrings("toreplace", "muzz") + expectedParsedLbls = labels.FromStrings("stzz", "stvzzz") + + expected = make(labels.Labels, 0, len(expectedStreamLbls)+len(expectedStucturedMetadataLbls)+len(expectedParsedLbls)) + expected = append(expected, expectedStreamLbls...) + expected = append(expected, expectedStucturedMetadataLbls...) + expected = append(expected, expectedParsedLbls...) + expected = labels.New(expected...) + + actual = b.LabelsResult() + assertLabelResult(t, expected, actual) + assert.Equal(t, expectedStreamLbls, actual.Stream()) + assert.Equal(t, expectedStucturedMetadataLbls, actual.StructuredMetadata()) + assert.Equal(t, expectedParsedLbls, actual.Parsed()) +} + +func TestLabelsBuilder_UnsortedLabels(t *testing.T) { + strs := []string{ + "namespace", "loki", + "cluster", "us-central1", + "toreplace", "fuzz", + } + lbs := labels.FromStrings(strs...) + b := NewBaseLabelsBuilder().ForLabels(lbs, lbs.Hash()) + b.add[StructuredMetadataLabel] = labels.FromStrings("toreplace", "buzz", "fzz", "bzz") + b.add[ParsedLabel] = labels.FromStrings("pzz", "pvzz") + expected := labels.FromStrings("cluster", "us-central1", "namespace", "loki", "fzz", "bzz", "toreplace", "buzz", "pzz", "pvzz") + actual := b.UnsortedLabels(nil) + require.ElementsMatch(t, expected, actual) + + b.Reset() + b.add[StructuredMetadataLabel] = labels.FromStrings("fzz", "bzz") + b.add[ParsedLabel] = labels.FromStrings("toreplace", "buzz", "pzz", "pvzz") + expected = labels.FromStrings("cluster", "us-central1", "namespace", "loki", "fzz", "bzz", "toreplace", "buzz", "pzz", "pvzz") + actual = b.UnsortedLabels(nil) + sort.Sort(expected) + sort.Sort(actual) + assert.Equal(t, expected, actual) + + b.Reset() + b.add[StructuredMetadataLabel] = labels.FromStrings("fzz", "bzz", "toreplacezz", "test") + b.add[ParsedLabel] = labels.FromStrings("toreplacezz", "buzz", "pzz", "pvzz") + expected = labels.FromStrings("cluster", "us-central1", "namespace", "loki", "fzz", "bzz", "toreplace", "fuzz", "pzz", "pvzz", "toreplacezz", "buzz") + actual = b.UnsortedLabels(nil) + sort.Sort(expected) + sort.Sort(actual) + assert.Equal(t, expected, actual) } func TestLabelsBuilder_GroupedLabelsResult(t *testing.T) { diff --git a/pkg/logql/log/pipeline.go b/pkg/logql/log/pipeline.go index efd3acadd41e..a205039dd771 100644 --- a/pkg/logql/log/pipeline.go +++ b/pkg/logql/log/pipeline.go @@ -2,7 +2,6 @@ package log import ( "context" - "reflect" "sync" "unsafe" @@ -383,11 +382,7 @@ func ReduceStages(stages []Stage) Stage { } func unsafeGetBytes(s string) []byte { - var buf []byte - p := unsafe.Pointer(&buf) - *(*string)(p) = s - (*reflect.SliceHeader)(p).Cap = len(s) - return buf + return unsafe.Slice(unsafe.StringData(s), len(s)) } func unsafeGetString(buf []byte) string { diff --git a/pkg/logql/log/pipeline_test.go b/pkg/logql/log/pipeline_test.go index 78a97778919f..8c11d0c198a1 100644 --- a/pkg/logql/log/pipeline_test.go +++ b/pkg/logql/log/pipeline_test.go @@ -546,6 +546,42 @@ func TestKeepLabelsPipeline(t *testing.T) { } +func TestUnsafeGetBytes(t *testing.T) { + tests := []struct { + name string + input string + want []byte + }{ + { + name: "empty string", + input: "", + want: nil, + }, + { + name: "simple string", + input: "hello", + want: []byte{'h', 'e', 'l', 'l', 'o'}, + }, + { + name: "string with spaces", + input: "hello world", + want: []byte{'h', 'e', 'l', 'l', 'o', ' ', 'w', 'o', 'r', 'l', 'd'}, + }, + { + name: "string with special characters", + input: "hello\nworld\t!", + want: []byte{'h', 'e', 'l', 'l', 'o', '\n', 'w', 'o', 'r', 'l', 'd', '\t', '!'}, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + got := unsafeGetBytes(tt.input) + require.Equal(t, tt.want, got) + }) + } +} + func Benchmark_Pipeline(b *testing.B) { b.ReportAllocs() diff --git a/pkg/logql/metrics.go b/pkg/logql/metrics.go index d06a8fbac7d2..cce6aa398cc8 100644 --- a/pkg/logql/metrics.go +++ b/pkg/logql/metrics.go @@ -8,7 +8,6 @@ import ( "time" "github.com/c2h5oh/datasize" - "github.com/dustin/go-humanize" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" @@ -156,9 +155,9 @@ func RecordRangeAndInstantQueryMetrics( "status", status, "limit", p.Limit(), "returned_lines", returnedLines, - "throughput", humanizeBytes(uint64(stats.Summary.BytesProcessedPerSecond)), - "total_bytes", humanizeBytes(uint64(stats.Summary.TotalBytesProcessed)), - "total_bytes_structured_metadata", humanizeBytes(uint64(stats.Summary.TotalStructuredMetadataBytesProcessed)), + "throughput", util.HumanizeBytes(uint64(stats.Summary.BytesProcessedPerSecond)), + "total_bytes", util.HumanizeBytes(uint64(stats.Summary.TotalBytesProcessed)), + "total_bytes_structured_metadata", util.HumanizeBytes(uint64(stats.Summary.TotalStructuredMetadataBytesProcessed)), "lines_per_second", stats.Summary.LinesProcessedPerSecond, "total_lines", stats.Summary.TotalLinesProcessed, "post_filter_lines", stats.Summary.TotalPostFilterLines, @@ -197,11 +196,11 @@ func RecordRangeAndInstantQueryMetrics( // Total ingester reached for this query. "ingester_requests", stats.Ingester.GetTotalReached(), // Total bytes processed but was already in memory (found in the headchunk). Includes structured metadata bytes. - "ingester_chunk_head_bytes", humanizeBytes(uint64(stats.Ingester.Store.Chunk.GetHeadChunkBytes())), + "ingester_chunk_head_bytes", util.HumanizeBytes(uint64(stats.Ingester.Store.Chunk.GetHeadChunkBytes())), // Total bytes of compressed chunks (blocks) processed. - "ingester_chunk_compressed_bytes", humanizeBytes(uint64(stats.Ingester.Store.Chunk.GetCompressedBytes())), + "ingester_chunk_compressed_bytes", util.HumanizeBytes(uint64(stats.Ingester.Store.Chunk.GetCompressedBytes())), // Total bytes decompressed and processed from chunks. Includes structured metadata bytes. - "ingester_chunk_decompressed_bytes", humanizeBytes(uint64(stats.Ingester.Store.Chunk.GetDecompressedBytes())), + "ingester_chunk_decompressed_bytes", util.HumanizeBytes(uint64(stats.Ingester.Store.Chunk.GetDecompressedBytes())), // Total lines post filtering. "ingester_post_filter_lines", stats.Ingester.Store.Chunk.GetPostFilterLines(), // Time spent being blocked on congestion control. @@ -220,6 +219,13 @@ func RecordRangeAndInstantQueryMetrics( logValues = append(logValues, "disable_pipeline_wrappers", "false") } + // Query is eligible for bloom filtering + if hasMatchEqualLabelFilterBeforeParser(p) { + logValues = append(logValues, "has_labelfilter_before_parser", "true") + } else { + logValues = append(logValues, "has_labelfilter_before_parser", "false") + } + level.Info(logger).Log( logValues..., ) @@ -243,8 +249,17 @@ func RecordRangeAndInstantQueryMetrics( recordUsageStats(queryType, stats) } -func humanizeBytes(val uint64) string { - return strings.Replace(humanize.Bytes(val), " ", "", 1) +func hasMatchEqualLabelFilterBeforeParser(p Params) bool { + filters := syntax.ExtractLabelFiltersBeforeParser(p.GetExpression()) + if len(filters) == 0 { + return false + } + for _, f := range filters { + if !syntax.IsMatchEqualFilterer(f.LabelFilterer) { + return false + } + } + return true } func RecordLabelQueryMetrics( diff --git a/pkg/logql/metrics_test.go b/pkg/logql/metrics_test.go index 577627a20203..36bb0cb566a9 100644 --- a/pkg/logql/metrics_test.go +++ b/pkg/logql/metrics_test.go @@ -215,3 +215,40 @@ func TestQueryHashing(t *testing.T) { // check that it evaluate same queries as same hashes, even if evaluated at different timestamps. require.Equal(t, h1, h3) } + +func TestHasMatchEqualLabelFilterBeforeParser(t *testing.T) { + cases := []struct { + query string + result bool + }{ + { + query: `{env="prod"} |= "id"`, + result: false, + }, + { + query: `{env="prod"} |= "id" | level="debug"`, + result: true, + }, + { + query: `{env="prod"} |= "id" | logfmt | level="debug"`, + result: false, + }, + { + query: `{env="prod"} | level="debug" or level="info"`, + result: true, + }, + { + query: `{env="prod"} | level="debug" and level!="info"`, + result: false, + }, + } + + for _, c := range cases { + t.Run(fmt.Sprintf("%s => %v", c.query, c.result), func(t *testing.T) { + p := LiteralParams{ + queryExpr: syntax.MustParseExpr(c.query), + } + assert.Equal(t, c.result, hasMatchEqualLabelFilterBeforeParser(p)) + }) + } +} diff --git a/pkg/logql/quantile_over_time_sketch.go b/pkg/logql/quantile_over_time_sketch.go index 81cd3369e349..a14bf303ab94 100644 --- a/pkg/logql/quantile_over_time_sketch.go +++ b/pkg/logql/quantile_over_time_sketch.go @@ -20,8 +20,10 @@ const ( QuantileSketchMatrixType = "QuantileSketchMatrix" ) -type ProbabilisticQuantileVector []ProbabilisticQuantileSample -type ProbabilisticQuantileMatrix []ProbabilisticQuantileVector +type ( + ProbabilisticQuantileVector []ProbabilisticQuantileSample + ProbabilisticQuantileMatrix []ProbabilisticQuantileVector +) var streamHashPool = sync.Pool{ New: func() interface{} { return make(map[uint64]int) }, @@ -177,7 +179,8 @@ func (e *QuantileSketchStepEvaluator) Explain(parent Node) { func newQuantileSketchIterator( it iter.PeekingSampleIterator, - selRange, step, start, end, offset int64) RangeVectorIterator { + selRange, step, start, end, offset int64, +) RangeVectorIterator { inner := &batchRangeVectorIterator{ iter: it, step: step, @@ -343,70 +346,6 @@ func (*QuantileSketchMatrixStepEvaluator) Explain(parent Node) { parent.Child("QuantileSketchMatrix") } -// QuantileSketchMergeStepEvaluator merges multiple quantile sketches into one for each -// step. -type QuantileSketchMergeStepEvaluator struct { - evaluators []StepEvaluator - err error -} - -func NewQuantileSketchMergeStepEvaluator(evaluators []StepEvaluator) *QuantileSketchMergeStepEvaluator { - return &QuantileSketchMergeStepEvaluator{ - evaluators: evaluators, - err: nil, - } -} - -func (e *QuantileSketchMergeStepEvaluator) Next() (bool, int64, StepResult) { - ok, ts, r := e.evaluators[0].Next() - var cur ProbabilisticQuantileVector - if ok { - cur = r.QuantileSketchVec() - } - - if len(e.evaluators) == 1 { - return ok, ts, cur - } - - for _, eval := range e.evaluators[1:] { - ok, nextTs, vec := eval.Next() - if ok { - if cur == nil { - cur = vec.QuantileSketchVec() - } else { - if ts != nextTs { - e.err = fmt.Errorf("timestamps of sketches differ: %d!=%d", ts, nextTs) - return false, 0, nil - } - - _, e.err = cur.Merge(vec.QuantileSketchVec()) - if e.err != nil { - return false, 0, nil - } - } - } - } - - return ok, ts, cur -} - -func (*QuantileSketchMergeStepEvaluator) Close() error { return nil } - -func (e *QuantileSketchMergeStepEvaluator) Error() error { return e.err } - -func (e *QuantileSketchMergeStepEvaluator) Explain(parent Node) { - b := parent.Child("QuantileSketchMerge") - if len(e.evaluators) < MaxChildrenDisplay { - for _, child := range e.evaluators { - child.Explain(b) - } - } else { - e.evaluators[0].Explain(b) - b.Child("...") - e.evaluators[len(e.evaluators)-1].Explain(b) - } -} - // QuantileSketchVectorStepEvaluator evaluates a quantile sketch into a // promql.Vector. type QuantileSketchVectorStepEvaluator struct { diff --git a/pkg/logql/sketch/topk.go b/pkg/logql/sketch/topk.go index e5efad409727..e6f2c036f367 100644 --- a/pkg/logql/sketch/topk.go +++ b/pkg/logql/sketch/topk.go @@ -2,7 +2,6 @@ package sketch import ( "container/heap" - "reflect" "sort" "unsafe" @@ -210,14 +209,8 @@ func (t *Topk) updateBF(removed, added string) { } } -// todo: is there a way to save more bytes/allocs via a pool? func unsafeGetBytes(s string) []byte { - if s == "" { - return nil // or []byte{} - } - return (*[0x7fff0000]byte)(unsafe.Pointer( - (*reflect.StringHeader)(unsafe.Pointer(&s)).Data), - )[:len(s):len(s)] + return unsafe.Slice(unsafe.StringData(s), len(s)) } // Observe is our sketch event observation function, which is a bit more complex than the original count min sketch + heap TopK diff --git a/pkg/logql/syntax/ast.go b/pkg/logql/syntax/ast.go index 0ecab6313a40..51bee23b01a9 100644 --- a/pkg/logql/syntax/ast.go +++ b/pkg/logql/syntax/ast.go @@ -70,6 +70,55 @@ func ExtractLineFilters(e Expr) []LineFilterExpr { return filters } +func ExtractLabelFiltersBeforeParser(e Expr) []*LabelFilterExpr { + if e == nil { + return nil + } + var ( + filters []*LabelFilterExpr + foundParseStage bool + ) + + visitor := &DepthFirstTraversal{ + VisitLabelFilterFn: func(_ RootVisitor, e *LabelFilterExpr) { + if !foundParseStage { + filters = append(filters, e) + } + }, + + // TODO(rfratto): Find a way to generically represent or test for an + // expression that modifies extracted labels (parsers, keep, drop, etc.). + // + // As the AST is now, we can't prove at compile time that the list of + // visitors below is complete. For example, if a new parser stage + // expression is added without updating this list, blooms can silently + // misbehave. + + VisitLogfmtParserFn: func(_ RootVisitor, _ *LogfmtParserExpr) { foundParseStage = true }, + VisitLabelParserFn: func(_ RootVisitor, _ *LabelParserExpr) { foundParseStage = true }, + VisitJSONExpressionParserFn: func(_ RootVisitor, _ *JSONExpressionParser) { foundParseStage = true }, + VisitLogfmtExpressionParserFn: func(_ RootVisitor, _ *LogfmtExpressionParser) { foundParseStage = true }, + VisitLabelFmtFn: func(_ RootVisitor, _ *LabelFmtExpr) { foundParseStage = true }, + VisitKeepLabelFn: func(_ RootVisitor, _ *KeepLabelsExpr) { foundParseStage = true }, + VisitDropLabelsFn: func(_ RootVisitor, _ *DropLabelsExpr) { foundParseStage = true }, + } + e.Accept(visitor) + return filters +} + +func IsMatchEqualFilterer(filterer log.LabelFilterer) bool { + switch filter := filterer.(type) { + case *log.LineFilterLabelFilter: + return filter.Type == labels.MatchEqual + case *log.StringLabelFilter: + return filter.Type == labels.MatchEqual + case *log.BinaryLabelFilter: + return IsMatchEqualFilterer(filter.Left) && IsMatchEqualFilterer(filter.Right) + default: + return false + } +} + // implicit holds default implementations type implicit struct{} diff --git a/pkg/logql/syntax/visit.go b/pkg/logql/syntax/visit.go index d4478346859f..968c5b53b01b 100644 --- a/pkg/logql/syntax/visit.go +++ b/pkg/logql/syntax/visit.go @@ -95,7 +95,7 @@ func (v *DepthFirstTraversal) VisitDropLabels(e *DropLabelsExpr) { if e == nil { return } - if v.VisitDecolorizeFn != nil { + if v.VisitDropLabelsFn != nil { v.VisitDropLabelsFn(v, e) } } diff --git a/pkg/loki/config_wrapper.go b/pkg/loki/config_wrapper.go index 3885dffe6263..16d25c1ff5e8 100644 --- a/pkg/loki/config_wrapper.go +++ b/pkg/loki/config_wrapper.go @@ -246,21 +246,6 @@ func applyConfigToRings(r, defaults *ConfigWrapper, rc lokiring.RingConfig, merg r.Ingester.LifecyclerConfig.ObservePeriod = rc.ObservePeriod } - if mergeWithExisting { - r.IngesterRF1.LifecyclerConfig.RingConfig.KVStore = rc.KVStore - r.IngesterRF1.LifecyclerConfig.HeartbeatPeriod = rc.HeartbeatPeriod - r.IngesterRF1.LifecyclerConfig.RingConfig.HeartbeatTimeout = rc.HeartbeatTimeout - r.IngesterRF1.LifecyclerConfig.TokensFilePath = rc.TokensFilePath - r.IngesterRF1.LifecyclerConfig.RingConfig.ZoneAwarenessEnabled = rc.ZoneAwarenessEnabled - r.IngesterRF1.LifecyclerConfig.ID = rc.InstanceID - r.IngesterRF1.LifecyclerConfig.InfNames = rc.InstanceInterfaceNames - r.IngesterRF1.LifecyclerConfig.Port = rc.InstancePort - r.IngesterRF1.LifecyclerConfig.Addr = rc.InstanceAddr - r.IngesterRF1.LifecyclerConfig.Zone = rc.InstanceZone - r.IngesterRF1.LifecyclerConfig.ListenPort = rc.ListenPort - r.IngesterRF1.LifecyclerConfig.ObservePeriod = rc.ObservePeriod - } - if mergeWithExisting { r.Pattern.LifecyclerConfig.RingConfig.KVStore = rc.KVStore r.Pattern.LifecyclerConfig.HeartbeatPeriod = rc.HeartbeatPeriod @@ -276,21 +261,6 @@ func applyConfigToRings(r, defaults *ConfigWrapper, rc lokiring.RingConfig, merg r.Pattern.LifecyclerConfig.ObservePeriod = rc.ObservePeriod } - if mergeWithExisting { - r.KafkaIngester.LifecyclerConfig.RingConfig.KVStore = rc.KVStore - r.KafkaIngester.LifecyclerConfig.HeartbeatPeriod = rc.HeartbeatPeriod - r.KafkaIngester.LifecyclerConfig.RingConfig.HeartbeatTimeout = rc.HeartbeatTimeout - r.KafkaIngester.LifecyclerConfig.TokensFilePath = rc.TokensFilePath - r.KafkaIngester.LifecyclerConfig.RingConfig.ZoneAwarenessEnabled = rc.ZoneAwarenessEnabled - r.KafkaIngester.LifecyclerConfig.ID = rc.InstanceID - r.KafkaIngester.LifecyclerConfig.InfNames = rc.InstanceInterfaceNames - r.KafkaIngester.LifecyclerConfig.Port = rc.InstancePort - r.KafkaIngester.LifecyclerConfig.Addr = rc.InstanceAddr - r.KafkaIngester.LifecyclerConfig.Zone = rc.InstanceZone - r.KafkaIngester.LifecyclerConfig.ListenPort = rc.ListenPort - r.KafkaIngester.LifecyclerConfig.ObservePeriod = rc.ObservePeriod - } - // Distributor if mergeWithExisting || reflect.DeepEqual(r.Distributor.DistributorRing, defaults.Distributor.DistributorRing) { r.Distributor.DistributorRing.HeartbeatTimeout = rc.HeartbeatTimeout @@ -570,6 +540,26 @@ func applyStorageConfig(cfg, defaults *ConfigWrapper) error { } } + if !reflect.DeepEqual(cfg.Common.Storage.AlibabaCloud, defaults.StorageConfig.AlibabaStorageConfig) { + configsFound++ + + applyConfig = func(r *ConfigWrapper) { + r.Ruler.StoreConfig.Type = "alibaba" + r.Ruler.StoreConfig.AlibabaCloud = r.Common.Storage.AlibabaCloud + r.StorageConfig.AlibabaStorageConfig = r.Common.Storage.AlibabaCloud + } + } + + if !reflect.DeepEqual(cfg.Common.Storage.COS, defaults.StorageConfig.COSConfig) { + configsFound++ + + applyConfig = func(r *ConfigWrapper) { + r.Ruler.StoreConfig.Type = "cos" + r.Ruler.StoreConfig.COS = r.Common.Storage.COS + r.StorageConfig.COSConfig = r.Common.Storage.COS + } + } + if !reflect.DeepEqual(cfg.Common.Storage.CongestionControl, defaults.StorageConfig.CongestionControl) { applyConfig = func(r *ConfigWrapper) { r.StorageConfig.CongestionControl = r.Common.Storage.CongestionControl @@ -673,7 +663,6 @@ func applyIngesterFinalSleep(cfg *ConfigWrapper) { func applyIngesterReplicationFactor(cfg *ConfigWrapper) { cfg.Ingester.LifecyclerConfig.RingConfig.ReplicationFactor = cfg.Common.ReplicationFactor - cfg.IngesterRF1.LifecyclerConfig.RingConfig.ReplicationFactor = cfg.Common.ReplicationFactor } // applyChunkRetain is used to set chunk retain based on having an index query cache configured diff --git a/pkg/loki/config_wrapper_test.go b/pkg/loki/config_wrapper_test.go index e8894d6329b7..5e1ad00bec50 100644 --- a/pkg/loki/config_wrapper_test.go +++ b/pkg/loki/config_wrapper_test.go @@ -219,12 +219,16 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig, config.StorageConfig.AWSStorageConfig) assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when multiple configs are provided, an error is returned", func(t *testing.T) { @@ -296,12 +300,17 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) + // should remain empty assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when common s3 storage config is provided (with session token), ruler and storage config are defaulted to use it", func(t *testing.T) { @@ -356,12 +365,17 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) + // should remain empty assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when common gcs storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { @@ -392,12 +406,17 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) + // should remain empty assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig.S3Config, config.StorageConfig.AWSStorageConfig.S3Config) assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when common azure storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { @@ -444,6 +463,8 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) // should remain empty assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) @@ -451,6 +472,8 @@ memberlist: assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when common bos storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { @@ -482,6 +505,8 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.S3, config.Ruler.StoreConfig.S3) assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) // should remain empty assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) @@ -489,6 +514,8 @@ memberlist: assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig.S3Config, config.StorageConfig.AWSStorageConfig.S3Config) assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) }) t.Run("when common swift storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { @@ -549,12 +576,103 @@ memberlist: assert.EqualValues(t, defaults.Ruler.StoreConfig.Azure, config.Ruler.StoreConfig.Azure) assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) + + // should remain empty + assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) + assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig.S3Config, config.StorageConfig.AWSStorageConfig.S3Config) + assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) + assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) + }) + + t.Run("when common alibaba storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { + configInput := `common: + storage: + alibabacloud: + bucket: testbucket + endpoint: https://example.com + access_key_id: abc123 + secret_access_key: def789` + + config, defaults := testContext(configInput, nil) + + assert.Equal(t, "alibaba", config.Ruler.StoreConfig.Type) + + for _, actual := range []alibaba.OssConfig{ + config.Ruler.StoreConfig.AlibabaCloud, + config.StorageConfig.AlibabaStorageConfig, + } { + assert.Equal(t, "testbucket", actual.Bucket) + assert.Equal(t, "https://example.com", actual.Endpoint) + assert.Equal(t, "abc123", actual.AccessKeyID) + assert.Equal(t, "def789", actual.SecretAccessKey) + } + + // should remain empty + assert.EqualValues(t, defaults.Ruler.StoreConfig.GCS, config.Ruler.StoreConfig.GCS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.S3, config.Ruler.StoreConfig.S3) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Azure, config.Ruler.StoreConfig.Azure) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) + assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.COS, config.Ruler.StoreConfig.COS) + + // should remain empty + assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) + assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig.S3Config, config.StorageConfig.AWSStorageConfig.S3Config) + assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) + assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) + assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.COSConfig, config.StorageConfig.COSConfig) + }) + + t.Run("when common cos storage config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { + configInput := `common: + storage: + cos: + bucketnames: testbucket + endpoint: https://example.com + region: test-region + access_key_id: abc123 + secret_access_key: def789` + + config, defaults := testContext(configInput, nil) + + assert.Equal(t, "cos", config.Ruler.StoreConfig.Type) + + for _, actual := range []ibmcloud.COSConfig{ + config.Ruler.StoreConfig.COS, + config.StorageConfig.COSConfig, + } { + assert.Equal(t, "testbucket", actual.BucketNames) + assert.Equal(t, "https://example.com", actual.Endpoint) + assert.Equal(t, "test-region", actual.Region) + assert.Equal(t, "abc123", actual.AccessKeyID) + assert.Equal(t, flagext.SecretWithValue("def789"), actual.SecretAccessKey) + } + + // should remain empty + assert.EqualValues(t, defaults.Ruler.StoreConfig.GCS, config.Ruler.StoreConfig.GCS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.S3, config.Ruler.StoreConfig.S3) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Azure, config.Ruler.StoreConfig.Azure) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Swift, config.Ruler.StoreConfig.Swift) + assert.EqualValues(t, defaults.Ruler.StoreConfig.Local, config.Ruler.StoreConfig.Local) + assert.EqualValues(t, defaults.Ruler.StoreConfig.BOS, config.Ruler.StoreConfig.BOS) + assert.EqualValues(t, defaults.Ruler.StoreConfig.AlibabaCloud, config.Ruler.StoreConfig.AlibabaCloud) + // should remain empty assert.EqualValues(t, defaults.StorageConfig.GCSConfig, config.StorageConfig.GCSConfig) assert.EqualValues(t, defaults.StorageConfig.AWSStorageConfig.S3Config, config.StorageConfig.AWSStorageConfig.S3Config) assert.EqualValues(t, defaults.StorageConfig.AzureStorageConfig, config.StorageConfig.AzureStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.Swift, config.StorageConfig.Swift) assert.EqualValues(t, defaults.StorageConfig.FSConfig, config.StorageConfig.FSConfig) assert.EqualValues(t, defaults.StorageConfig.BOSStorageConfig, config.StorageConfig.BOSStorageConfig) + assert.EqualValues(t, defaults.StorageConfig.AlibabaStorageConfig, config.StorageConfig.AlibabaStorageConfig) }) t.Run("when common filesystem/local config is provided, ruler and storage config are defaulted to use it", func(t *testing.T) { diff --git a/pkg/loki/loki.go b/pkg/loki/loki.go index 5107bf9ee765..84af0a73504f 100644 --- a/pkg/loki/loki.go +++ b/pkg/loki/loki.go @@ -42,7 +42,6 @@ import ( ingester_rf1 "github.com/grafana/loki/v3/pkg/ingester-rf1" "github.com/grafana/loki/v3/pkg/ingester-rf1/metastore" metastoreclient "github.com/grafana/loki/v3/pkg/ingester-rf1/metastore/client" - "github.com/grafana/loki/v3/pkg/ingester-rf1/metastore/health" ingester_client "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/kafka" ingester_kafka "github.com/grafana/loki/v3/pkg/kafka/ingester" @@ -96,7 +95,6 @@ type Config struct { IngesterClient ingester_client.Config `yaml:"ingester_client,omitempty"` IngesterRF1Client ingester_client.Config `yaml:"ingester_rf1_client,omitempty"` Ingester ingester.Config `yaml:"ingester,omitempty"` - IngesterRF1 ingester_rf1.Config `yaml:"ingester_rf1,omitempty" category:"experimental"` Pattern pattern.Config `yaml:"pattern_ingester,omitempty"` IndexGateway indexgateway.Config `yaml:"index_gateway"` BloomBuild bloombuild.Config `yaml:"bloom_build,omitempty" category:"experimental"` @@ -114,7 +112,6 @@ type Config struct { Metastore metastore.Config `yaml:"metastore,omitempty"` MetastoreClient metastoreclient.Config `yaml:"metastore_client"` KafkaConfig kafka.Config `yaml:"kafka_config,omitempty" category:"experimental"` - KafkaIngester ingester_kafka.Config `yaml:"kafka_ingester,omitempty" category:"experimental"` RuntimeConfig runtimeconfig.Config `yaml:"runtime_config,omitempty"` OperationalConfig runtime.Config `yaml:"operational_config,omitempty"` @@ -172,9 +169,7 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { c.CompactorHTTPClient.RegisterFlags(f) c.CompactorGRPCClient.RegisterFlags(f) c.IngesterClient.RegisterFlags(f) - // c.IngesterRF1Client.RegisterFlags(f) c.Ingester.RegisterFlags(f) - c.IngesterRF1.RegisterFlags(f) c.StorageConfig.RegisterFlags(f) c.IndexGateway.RegisterFlags(f) c.BloomGateway.RegisterFlags(f) @@ -198,7 +193,6 @@ func (c *Config) RegisterFlags(f *flag.FlagSet) { c.Metastore.RegisterFlags(f) c.MetastoreClient.RegisterFlags(f) c.KafkaConfig.RegisterFlags(f) - c.KafkaIngester.RegisterFlags(f) } func (c *Config) registerServerFlagsWithChangedDefaultValues(fs *flag.FlagSet) { @@ -271,9 +265,6 @@ func (c *Config) Validate() error { if err := c.Ingester.Validate(); err != nil { errs = append(errs, errors.Wrap(err, "CONFIG ERROR: invalid ingester config")) } - if err := c.IngesterRF1.Validate(); err != nil { - errs = append(errs, errors.Wrap(err, "CONFIG ERROR: invalid ingester config")) - } if err := c.LimitsConfig.Validate(); err != nil { errs = append(errs, errors.Wrap(err, "CONFIG ERROR: invalid limits_config config")) } @@ -304,13 +295,13 @@ func (c *Config) Validate() error { if err := c.Pattern.Validate(); err != nil { errs = append(errs, errors.Wrap(err, "CONFIG ERROR: invalid pattern_ingester config")) } - if c.KafkaIngester.Enabled { + if c.Ingester.KafkaIngestion.Enabled { if err := c.KafkaConfig.Validate(); err != nil { errs = append(errs, errors.Wrap(err, "CONFIG ERROR: invalid kafka_config config")) } - if err := c.KafkaIngester.Validate(); err != nil { - errs = append(errs, errors.Wrap(err, "CONFIG ERROR: invalid kafka_ingester config")) - } + } + if err := c.Distributor.Validate(); err != nil { + errs = append(errs, errors.Wrap(err, "CONFIG ERROR: invalid distributor config")) } errs = append(errs, validateSchemaValues(c)...) @@ -399,7 +390,6 @@ type Loki struct { Tee distributor.Tee PushParserWrapper push.RequestParserWrapper HTTPAuthMiddleware middleware.Interface - health *health.GRPCHealthService Codec Codec Metrics *server.Metrics @@ -523,11 +513,7 @@ func (t *Loki) Run(opts RunOpts) error { t.Server.HTTP.Path("/log_level").Methods("GET", "POST").Handler(util_log.LevelHandler(&t.Cfg.Server.LogLevel)) - if t.Cfg.isTarget(Metastore) { - grpc_health_v1.RegisterHealthServer(t.Server.GRPC, t.health) - } else { - grpc_health_v1.RegisterHealthServer(t.Server.GRPC, grpcutil.NewHealthCheck(sm)) - } + grpc_health_v1.RegisterHealthServer(t.Server.GRPC, grpcutil.NewHealthCheck(sm)) // Config endpoint adds a way to see the config and the changes compared to the defaults. t.bindConfigEndpoint(opts) @@ -694,9 +680,6 @@ func (t *Loki) setupModuleManager() error { mm.RegisterModule(Store, t.initStore, modules.UserInvisibleModule) mm.RegisterModule(Querier, t.initQuerier) mm.RegisterModule(Ingester, t.initIngester) - mm.RegisterModule(IngesterRF1, t.initIngesterRF1) - mm.RegisterModule(IngesterKafka, t.initKafkaIngester) - mm.RegisterModule(IngesterRF1RingClient, t.initIngesterRF1RingClient, modules.UserInvisibleModule) mm.RegisterModule(IngesterQuerier, t.initIngesterQuerier) mm.RegisterModule(IngesterGRPCInterceptors, t.initIngesterGRPCInterceptors, modules.UserInvisibleModule) mm.RegisterModule(QueryFrontendTripperware, t.initQueryFrontendMiddleware, modules.UserInvisibleModule) @@ -720,8 +703,6 @@ func (t *Loki) setupModuleManager() error { mm.RegisterModule(PatternRingClient, t.initPatternRingClient, modules.UserInvisibleModule) mm.RegisterModule(PatternIngesterTee, t.initPatternIngesterTee, modules.UserInvisibleModule) mm.RegisterModule(PatternIngester, t.initPatternIngester) - mm.RegisterModule(Metastore, t.initMetastore) - mm.RegisterModule(MetastoreClient, t.initMetastoreClient, modules.UserInvisibleModule) mm.RegisterModule(PartitionRing, t.initPartitionRing, modules.UserInvisibleModule) mm.RegisterModule(All, nil) @@ -736,12 +717,10 @@ func (t *Loki) setupModuleManager() error { Overrides: {RuntimeConfig}, OverridesExporter: {Overrides, Server}, TenantConfigs: {RuntimeConfig}, - Distributor: {Ring, Server, Overrides, TenantConfigs, PatternRingClient, PatternIngesterTee, IngesterRF1RingClient, Analytics, PartitionRing}, + Distributor: {Ring, Server, Overrides, TenantConfigs, PatternRingClient, PatternIngesterTee, Analytics, PartitionRing}, Store: {Overrides, IndexGatewayRing}, - IngesterRF1: {Store, Server, MemberlistKV, TenantConfigs, MetastoreClient, Analytics, PartitionRing}, - IngesterKafka: {Store, Server, MemberlistKV, TenantConfigs, MetastoreClient, Analytics, PartitionRing}, - Ingester: {Store, Server, MemberlistKV, TenantConfigs, Analytics}, - Querier: {Store, Ring, Server, IngesterQuerier, PatternRingClient, MetastoreClient, Overrides, Analytics, CacheGenerationLoader, QuerySchedulerRing}, + Ingester: {Store, Server, MemberlistKV, TenantConfigs, Analytics, PartitionRing}, + Querier: {Store, Ring, Server, IngesterQuerier, PatternRingClient, Overrides, Analytics, CacheGenerationLoader, QuerySchedulerRing}, QueryFrontendTripperware: {Server, Overrides, TenantConfigs}, QueryFrontend: {QueryFrontendTripperware, Analytics, CacheGenerationLoader, QuerySchedulerRing}, QueryScheduler: {Server, Overrides, MemberlistKV, Analytics, QuerySchedulerRing}, @@ -757,8 +736,6 @@ func (t *Loki) setupModuleManager() error { PatternRingClient: {Server, MemberlistKV, Analytics}, PatternIngesterTee: {Server, MemberlistKV, Analytics, PatternRingClient}, PatternIngester: {Server, MemberlistKV, Analytics, PatternRingClient, PatternIngesterTee}, - IngesterRF1RingClient: {Server, MemberlistKV, Analytics}, - Metastore: {Server, MetastoreClient}, IngesterQuerier: {Ring}, QuerySchedulerRing: {Overrides, MemberlistKV}, IndexGatewayRing: {Overrides, MemberlistKV}, @@ -766,10 +743,10 @@ func (t *Loki) setupModuleManager() error { MemberlistKV: {Server}, Read: {QueryFrontend, Querier}, - Write: {Ingester, IngesterRF1, Distributor, PatternIngester, IngesterKafka}, + Write: {Ingester, Distributor, PatternIngester}, Backend: {QueryScheduler, Ruler, Compactor, IndexGateway, BloomPlanner, BloomBuilder, BloomGateway}, - All: {QueryScheduler, QueryFrontend, Querier, Ingester, IngesterRF1, PatternIngester, Distributor, Ruler, Compactor, Metastore, IngesterKafka}, + All: {QueryScheduler, QueryFrontend, Querier, Ingester, PatternIngester, Distributor, Ruler, Compactor}, } if t.Cfg.Querier.PerRequestLimitsEnabled { diff --git a/pkg/loki/modules.go b/pkg/loki/modules.go index 20d744b0f620..1a72cb3c5b91 100644 --- a/pkg/loki/modules.go +++ b/pkg/loki/modules.go @@ -47,14 +47,7 @@ import ( "github.com/grafana/loki/v3/pkg/distributor" "github.com/grafana/loki/v3/pkg/indexgateway" "github.com/grafana/loki/v3/pkg/ingester" - ingester_rf1 "github.com/grafana/loki/v3/pkg/ingester-rf1" - "github.com/grafana/loki/v3/pkg/ingester-rf1/metastore" - metastoreclient "github.com/grafana/loki/v3/pkg/ingester-rf1/metastore/client" - "github.com/grafana/loki/v3/pkg/ingester-rf1/metastore/health" - "github.com/grafana/loki/v3/pkg/ingester-rf1/metastore/metastorepb" "github.com/grafana/loki/v3/pkg/ingester-rf1/objstore" - ingesterkafka "github.com/grafana/loki/v3/pkg/kafka/ingester" - kafka_tee "github.com/grafana/loki/v3/pkg/kafka/tee" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql" "github.com/grafana/loki/v3/pkg/logqlmodel/stats" @@ -110,9 +103,6 @@ const ( Querier string = "querier" CacheGenerationLoader string = "cache-generation-loader" Ingester string = "ingester" - IngesterRF1 string = "ingester-rf1" - IngesterKafka string = "ingester-kafka" - IngesterRF1RingClient string = "ingester-rf1-ring-client" PatternIngester string = "pattern-ingester" PatternIngesterTee string = "pattern-ingester-tee" PatternRingClient string = "pattern-ring-client" @@ -145,8 +135,6 @@ const ( Backend string = "backend" Analytics string = "analytics" InitCodec string = "init-codec" - Metastore string = "metastore" - MetastoreClient string = "metastore-client" PartitionRing string = "partition-ring" ) @@ -176,8 +164,6 @@ func (t *Loki) initServer() (services.Service, error) { t.Server = serv - t.health = health.NewGRPCHealthService() - servicesToWaitFor := func() []services.Service { svs := []services.Service(nil) for m, s := range t.serviceMap { @@ -334,20 +320,7 @@ func (t *Loki) initTenantConfigs() (_ services.Service, err error) { } func (t *Loki) initDistributor() (services.Service, error) { - if t.Cfg.IngesterRF1.Enabled { - rf1Tee, err := ingester_rf1.NewTee(t.Cfg.IngesterRF1, t.IngesterRF1RingClient, t.Cfg.MetricsNamespace, prometheus.DefaultRegisterer, util_log.Logger) - if err != nil { - return nil, err - } - t.Tee = distributor.WrapTee(t.Tee, rf1Tee) - } - if t.Cfg.KafkaIngester.Enabled { - kafkaTee, err := kafka_tee.NewTee(t.Cfg.KafkaConfig, t.Cfg.MetricsNamespace, prometheus.DefaultRegisterer, util_log.Logger, t.partitionRing) - if err != nil { - return nil, err - } - t.Tee = distributor.WrapTee(t.Tee, kafkaTee) - } + t.Cfg.Distributor.KafkaConfig = t.Cfg.KafkaConfig var err error logger := log.With(util_log.Logger, "component", "distributor") @@ -356,6 +329,7 @@ func (t *Loki) initDistributor() (services.Service, error) { t.Cfg.IngesterClient, t.tenantConfigs, t.ring, + t.partitionRing, t.Overrides, prometheus.DefaultRegisterer, t.Cfg.MetricsNamespace, @@ -613,6 +587,7 @@ func (t *Loki) initQuerier() (services.Service, error) { func (t *Loki) initIngester() (_ services.Service, err error) { logger := log.With(util_log.Logger, "component", "ingester") t.Cfg.Ingester.LifecyclerConfig.ListenPort = t.Cfg.Server.GRPCListenPort + t.Cfg.Ingester.KafkaIngestion.KafkaConfig = t.Cfg.KafkaConfig if t.Cfg.Ingester.ShutdownMarkerPath == "" && t.Cfg.Common.PathPrefix != "" { t.Cfg.Ingester.ShutdownMarkerPath = t.Cfg.Common.PathPrefix @@ -621,7 +596,7 @@ func (t *Loki) initIngester() (_ services.Service, err error) { level.Warn(util_log.Logger).Log("msg", "The config setting shutdown marker path is not set. The /ingester/prepare_shutdown endpoint won't work") } - t.Ingester, err = ingester.New(t.Cfg.Ingester, t.Cfg.IngesterClient, t.Store, t.Overrides, t.tenantConfigs, prometheus.DefaultRegisterer, t.Cfg.Distributor.WriteFailuresLogging, t.Cfg.MetricsNamespace, logger, t.UsageTracker, t.ring) + t.Ingester, err = ingester.New(t.Cfg.Ingester, t.Cfg.IngesterClient, t.Store, t.Overrides, t.tenantConfigs, prometheus.DefaultRegisterer, t.Cfg.Distributor.WriteFailuresLogging, t.Cfg.MetricsNamespace, logger, t.UsageTracker, t.ring, t.partitionRingWatcher) if err != nil { return } @@ -643,108 +618,15 @@ func (t *Loki) initIngester() (_ services.Service, err error) { t.Server.HTTP.Methods("POST", "GET", "DELETE").Path("/ingester/prepare_shutdown").Handler( httpMiddleware.Wrap(http.HandlerFunc(t.Ingester.PrepareShutdown)), ) + t.Server.HTTP.Methods("POST", "GET", "DELETE").Path("/ingester/prepare_partition_downscale").Handler( + httpMiddleware.Wrap(http.HandlerFunc(t.Ingester.PreparePartitionDownscaleHandler)), + ) t.Server.HTTP.Methods("POST", "GET").Path("/ingester/shutdown").Handler( httpMiddleware.Wrap(http.HandlerFunc(t.Ingester.ShutdownHandler)), ) return t.Ingester, nil } -func (t *Loki) initKafkaIngester() (_ services.Service, err error) { - if !t.Cfg.KafkaIngester.Enabled { - return nil, nil - } - t.Cfg.KafkaIngester.KafkaConfig = t.Cfg.KafkaConfig - logger := log.With(util_log.Logger, "component", "ingester-kafka") - t.Cfg.KafkaIngester.LifecyclerConfig.ListenPort = t.Cfg.Server.GRPCListenPort - - if t.Cfg.KafkaIngester.ShutdownMarkerPath == "" && t.Cfg.Common.PathPrefix != "" { - t.Cfg.KafkaIngester.ShutdownMarkerPath = t.Cfg.Common.PathPrefix - } - if t.Cfg.KafkaIngester.ShutdownMarkerPath == "" { - return nil, errors.New("the config setting shutdown marker path is not set. The /ingester/prepare-partition-downscale endpoint won't work") - } - storage, err := objstore.New(t.Cfg.SchemaConfig.Configs, t.Cfg.StorageConfig, t.ClientMetrics) - if err != nil { - return nil, err - } - - consumerFactory := ingesterkafka.NewConsumerFactory(t.MetastoreClient, storage, t.Cfg.KafkaIngester.FlushInterval, t.Cfg.KafkaIngester.FlushSize, logger, prometheus.DefaultRegisterer) - t.kafkaIngester, err = ingesterkafka.New(t.Cfg.KafkaIngester, consumerFactory, logger, t.Cfg.MetricsNamespace, prometheus.DefaultRegisterer) - if err != nil { - return nil, err - } - - httpMiddleware := middleware.Merge( - serverutil.RecoveryHTTPMiddleware, - ) - t.Server.HTTP.Methods("POST", "GET", "DELETE").Path("/ingester/prepare-partition-downscale").Handler( - httpMiddleware.Wrap(http.HandlerFunc(t.kafkaIngester.PreparePartitionDownscaleHandler)), - ) - - return t.kafkaIngester, nil -} - -func (t *Loki) initIngesterRF1() (_ services.Service, err error) { - if !t.Cfg.IngesterRF1.Enabled { - return nil, nil - } - - logger := log.With(util_log.Logger, "component", "ingester-rf1") - t.Cfg.IngesterRF1.LifecyclerConfig.ListenPort = t.Cfg.Server.GRPCListenPort - - if t.Cfg.IngesterRF1.ShutdownMarkerPath == "" && t.Cfg.Common.PathPrefix != "" { - t.Cfg.IngesterRF1.ShutdownMarkerPath = t.Cfg.Common.PathPrefix - } - if t.Cfg.IngesterRF1.ShutdownMarkerPath == "" { - level.Warn(util_log.Logger).Log("msg", "The config setting shutdown marker path is not set. The /ingester/prepare_shutdown endpoint won't work") - } - - t.IngesterRF1, err = ingester_rf1.New(t.Cfg.IngesterRF1, t.Cfg.IngesterRF1Client, t.Cfg.SchemaConfig.Configs, t.Cfg.StorageConfig, t.ClientMetrics, t.Overrides, t.tenantConfigs, t.MetastoreClient, prometheus.DefaultRegisterer, t.Cfg.Distributor.WriteFailuresLogging, t.Cfg.MetricsNamespace, logger, t.UsageTracker, t.ring) - if err != nil { - fmt.Println("Error initializing ingester rf1", err) - return - } - - if t.Cfg.IngesterRF1.Wrapper != nil { - t.IngesterRF1 = t.Cfg.IngesterRF1.Wrapper.Wrap(t.IngesterRF1) - } - - fmt.Println("registered GRPC") - logproto.RegisterPusherRF1Server(t.Server.GRPC, t.IngesterRF1) - - t.Server.HTTP.Path("/ingester-rf1/ring").Methods("GET", "POST").Handler(t.IngesterRF1) - - if t.Cfg.InternalServer.Enable { - t.InternalServer.HTTP.Path("/ingester-rf1/ring").Methods("GET", "POST").Handler(t.IngesterRF1) - } - - httpMiddleware := middleware.Merge( - serverutil.RecoveryHTTPMiddleware, - ) - t.Server.HTTP.Methods("GET", "POST").Path("/flush").Handler( - httpMiddleware.Wrap(http.HandlerFunc(t.IngesterRF1.FlushHandler)), - ) - t.Server.HTTP.Methods("POST", "GET", "DELETE").Path("/ingester-rf1/prepare_shutdown").Handler( - httpMiddleware.Wrap(http.HandlerFunc(t.IngesterRF1.PrepareShutdown)), - ) - t.Server.HTTP.Methods("POST", "GET").Path("/ingester-rf1/shutdown").Handler( - httpMiddleware.Wrap(http.HandlerFunc(t.IngesterRF1.ShutdownHandler)), - ) - return t.IngesterRF1, nil -} - -func (t *Loki) initIngesterRF1RingClient() (_ services.Service, err error) { - if !t.Cfg.IngesterRF1.Enabled { - return nil, nil - } - ringClient, err := ingester_rf1.NewRingClient(t.Cfg.IngesterRF1, t.Cfg.MetricsNamespace, prometheus.DefaultRegisterer, util_log.Logger) - if err != nil { - return nil, err - } - t.IngesterRF1RingClient = ringClient - return ringClient, nil -} - func (t *Loki) initPatternIngester() (_ services.Service, err error) { if !t.Cfg.Pattern.Enabled { return nil, nil @@ -995,7 +877,7 @@ func (t *Loki) updateConfigForShipperStore() { t.Cfg.StorageConfig.TSDBShipperConfig.Mode = indexshipper.ModeWriteOnly t.Cfg.StorageConfig.TSDBShipperConfig.IngesterDBRetainPeriod = shipperQuerierIndexUpdateDelay(t.Cfg.StorageConfig.IndexCacheValidity, t.Cfg.StorageConfig.TSDBShipperConfig.ResyncInterval) - case t.Cfg.isTarget(IngesterRF1), t.Cfg.isTarget(Querier), t.Cfg.isTarget(Ruler), t.Cfg.isTarget(Read), t.Cfg.isTarget(Backend), t.isModuleActive(IndexGateway), t.Cfg.isTarget(BloomPlanner), t.Cfg.isTarget(BloomBuilder): + case t.Cfg.isTarget(Querier), t.Cfg.isTarget(Ruler), t.Cfg.isTarget(Read), t.Cfg.isTarget(Backend), t.isModuleActive(IndexGateway), t.Cfg.isTarget(BloomPlanner), t.Cfg.isTarget(BloomBuilder): // We do not want query to do any updates to index t.Cfg.StorageConfig.BoltDBShipperConfig.Mode = indexshipper.ModeReadOnly t.Cfg.StorageConfig.TSDBShipperConfig.Mode = indexshipper.ModeReadOnly @@ -1501,9 +1383,7 @@ func (t *Loki) initMemberlistKV() (services.Service, error) { t.Cfg.QueryScheduler.SchedulerRing.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV t.Cfg.Ruler.Ring.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV t.Cfg.Pattern.LifecyclerConfig.RingConfig.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV - t.Cfg.IngesterRF1.LifecyclerConfig.RingConfig.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV - t.Cfg.KafkaIngester.PartitionRingConfig.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV - t.Cfg.KafkaIngester.LifecyclerConfig.RingConfig.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV + t.Cfg.Ingester.KafkaIngestion.PartitionRingConfig.KVStore.MemberlistKV = t.MemberlistKV.GetMemberlistKV t.Server.HTTP.Handle("/memberlist", t.MemberlistKV) if t.Cfg.InternalServer.Enable { @@ -1709,8 +1589,14 @@ func (t *Loki) initBloomPlanner() (services.Service, error) { logger := log.With(util_log.Logger, "component", "bloom-planner") + var ringManager *lokiring.RingManager if t.Cfg.isTarget(Backend) && t.indexGatewayRingManager != nil { + // Bloom planner and builder are part of the backend target in Simple Scalable Deployment mode. + // To avoid creating a new ring just for this special case, we can use the index gateway ring, which is already + // part of the backend target. The planner creates a watcher service that regularly checks which replica is + // the leader. Only the leader plans the tasks. Builders connect to the leader instance to pull tasks. level.Info(logger).Log("msg", "initializing bloom planner in ring mode as part of backend target") + ringManager = t.indexGatewayRingManager } p, err := planner.New( @@ -1722,11 +1608,7 @@ func (t *Loki) initBloomPlanner() (services.Service, error) { t.BloomStore, logger, prometheus.DefaultRegisterer, - // Bloom planner and builder are part of the backend target in Simple Scalable Deployment mode. - // To avoid creating a new ring just for this special case, we can use the index gateway ring, which is already - // part of the backend target. The planner creates a watcher service that regularly checks which replica is - // the leader. Only the leader plans the tasks. Builders connect to the leader instance to pull tasks. - t.indexGatewayRingManager, + ringManager, ) if err != nil { return nil, err @@ -1743,8 +1625,14 @@ func (t *Loki) initBloomBuilder() (services.Service, error) { logger := log.With(util_log.Logger, "component", "bloom-builder") + var ringManager *lokiring.RingManager if t.Cfg.isTarget(Backend) && t.indexGatewayRingManager != nil { + // Bloom planner and builder are part of the backend target in Simple Scalable Deployment mode. + // To avoid creating a new ring just for this special case, we can use the index gateway ring, which is already + // part of the backend target. The planner creates a watcher service that regularly checks which replica is + // the leader. Only the leader plans the tasks. Builders connect to the leader instance to pull tasks. level.Info(logger).Log("msg", "initializing bloom builder in ring mode as part of backend target") + ringManager = t.indexGatewayRingManager } return builder.New( @@ -1757,11 +1645,7 @@ func (t *Loki) initBloomBuilder() (services.Service, error) { t.BloomStore, logger, prometheus.DefaultRegisterer, - // Bloom planner and builder are part of the backend target in Simple Scalable Deployment mode. - // To avoid creating a new ring just for this special case, we can use the index gateway ring, which is already - // part of the backend target. The planner creates a watcher service that regularly checks which replica is - // the leader. Only the leader plans the tasks. Builders connect to the leader instance to pull tasks. - t.indexGatewayRingManager, + ringManager, ) } @@ -1867,51 +1751,22 @@ func (t *Loki) initAnalytics() (services.Service, error) { return ur, nil } -func (t *Loki) initMetastore() (services.Service, error) { - if !t.Cfg.IngesterRF1.Enabled && !t.Cfg.KafkaIngester.Enabled { - return nil, nil - } - if t.Cfg.isTarget(All) { - t.Cfg.MetastoreClient.MetastoreAddress = fmt.Sprintf("localhost:%d", t.Cfg.Server.GRPCListenPort) - } - m, err := metastore.New(t.Cfg.Metastore, log.With(util_log.Logger, "component", "metastore"), prometheus.DefaultRegisterer, t.health) - if err != nil { - return nil, err - } - // Service methods have tenant auth disabled in the fakeauth.SetupAuthMiddleware call since this is a shared service - metastorepb.RegisterMetastoreServiceServer(t.Server.GRPC, m) - - return m, nil -} - -func (t *Loki) initMetastoreClient() (services.Service, error) { - if !t.Cfg.IngesterRF1.Enabled && !t.Cfg.QuerierRF1.Enabled && !t.Cfg.KafkaIngester.Enabled { - return nil, nil - } - mc, err := metastoreclient.New(t.Cfg.MetastoreClient, prometheus.DefaultRegisterer) - if err != nil { - return nil, err - } - t.MetastoreClient = mc - return mc.Service(), nil -} - // The Ingest Partition Ring is responsible for watching the available ingesters and assigning partitions to incoming requests. func (t *Loki) initPartitionRing() (services.Service, error) { - if !t.Cfg.KafkaIngester.Enabled { // TODO: New config flag + if !t.Cfg.Ingester.KafkaIngestion.Enabled { return nil, nil } - kvClient, err := kv.NewClient(t.Cfg.KafkaIngester.PartitionRingConfig.KVStore, ring.GetPartitionRingCodec(), kv.RegistererWithKVName(prometheus.DefaultRegisterer, ingesterkafka.PartitionRingName+"-watcher"), util_log.Logger) + kvClient, err := kv.NewClient(t.Cfg.Ingester.KafkaIngestion.PartitionRingConfig.KVStore, ring.GetPartitionRingCodec(), kv.RegistererWithKVName(prometheus.DefaultRegisterer, ingester.PartitionRingName+"-watcher"), util_log.Logger) if err != nil { return nil, fmt.Errorf("creating KV store for partitions ring watcher: %w", err) } - t.partitionRingWatcher = ring.NewPartitionRingWatcher(ingesterkafka.PartitionRingName, ingesterkafka.PartitionRingName+"-key", kvClient, util_log.Logger, prometheus.WrapRegistererWithPrefix("loki_", prometheus.DefaultRegisterer)) + t.partitionRingWatcher = ring.NewPartitionRingWatcher(ingester.PartitionRingName, ingester.PartitionRingKey, kvClient, util_log.Logger, prometheus.WrapRegistererWithPrefix("loki_", prometheus.DefaultRegisterer)) t.partitionRing = ring.NewPartitionInstanceRing(t.partitionRingWatcher, t.ring, t.Cfg.Ingester.LifecyclerConfig.RingConfig.HeartbeatTimeout) // Expose a web page to view the partitions ring state. - t.Server.HTTP.Path("/partition-ring").Methods("GET", "POST").Handler(ring.NewPartitionRingPageHandler(t.partitionRingWatcher, ring.NewPartitionRingEditor(ingesterkafka.PartitionRingName+"-key", kvClient))) + t.Server.HTTP.Path("/partition-ring").Methods("GET", "POST").Handler(ring.NewPartitionRingPageHandler(t.partitionRingWatcher, ring.NewPartitionRingEditor(ingester.PartitionRingKey, kvClient))) return t.partitionRingWatcher, nil } diff --git a/pkg/pattern/aggregation/push.go b/pkg/pattern/aggregation/push.go index 9aac2e3a5050..649d71f92029 100644 --- a/pkg/pattern/aggregation/push.go +++ b/pkg/pattern/aggregation/push.go @@ -11,7 +11,6 @@ import ( "sync" "time" - "github.com/dustin/go-humanize" "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/golang/snappy" @@ -22,6 +21,7 @@ import ( "github.com/grafana/loki/v3/pkg/loghttp/push" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/syntax" + "github.com/grafana/loki/v3/pkg/util" "github.com/grafana/loki/v3/pkg/util/build" "github.com/grafana/dskit/backoff" @@ -312,9 +312,9 @@ func AggregatedMetricEntry( service string, lbls labels.Labels, ) string { - byteString := humanize.Bytes(totalBytes) + byteString := util.HumanizeBytes(totalBytes) base := fmt.Sprintf( - "ts=%d bytes=%s count=%d %s=%s", + "ts=%d bytes=%s count=%d %s=\"%s\"", ts.UnixNano(), byteString, totalCount, @@ -322,7 +322,7 @@ func AggregatedMetricEntry( ) for _, l := range lbls { - base += fmt.Sprintf(" %s=%s", l.Name, l.Value) + base += fmt.Sprintf(" %s=\"%s\"", l.Name, l.Value) } return base diff --git a/pkg/pattern/aggregation/push_test.go b/pkg/pattern/aggregation/push_test.go index 15f0336b5f7e..149b54a97715 100644 --- a/pkg/pattern/aggregation/push_test.go +++ b/pkg/pattern/aggregation/push_test.go @@ -229,6 +229,9 @@ func Test_Push(t *testing.T) { stream2.Entries[2].Line, ) + // sanity check that bytes are logged in humanized form without whitespaces + assert.Contains(t, stream1.Entries[0].Line, "bytes=1B") + case <-time.After(5 * time.Second): t.Fatal("timeout") } diff --git a/pkg/pattern/instance.go b/pkg/pattern/instance.go index 24f2814e467f..6e3a3de998be 100644 --- a/pkg/pattern/instance.go +++ b/pkg/pattern/instance.go @@ -9,7 +9,6 @@ import ( "sync" "github.com/go-kit/log" - "github.com/go-kit/log/level" "github.com/grafana/dskit/httpgrpc" "github.com/grafana/dskit/multierror" "github.com/grafana/dskit/ring" @@ -264,20 +263,11 @@ func (i *instance) Observe(stream string, entries []logproto.Entry) { streamMetrics, ok := i.aggMetricsByStreamAndLevel[stream] if !ok { - streamMetrics = make(map[string]*aggregatedMetrics, len(constants.LogLevels)) - for _, l := range constants.LogLevels { - streamMetrics[l] = &aggregatedMetrics{} - } + streamMetrics = map[string]*aggregatedMetrics{} } if _, ok := streamMetrics[lvl]; !ok { - level.Warn(i.logger).Log( - "msg", "unknown log level while observing stream", - "level", lvl, - "stream", stream, - ) - - lvl = constants.LogLevelUnknown + streamMetrics[lvl] = &aggregatedMetrics{} } streamMetrics[lvl].bytes += uint64(len(entry.Line)) diff --git a/pkg/pattern/tee_service.go b/pkg/pattern/tee_service.go index c38ef95dd90f..c279474cce42 100644 --- a/pkg/pattern/tee_service.go +++ b/pkg/pattern/tee_service.go @@ -21,7 +21,6 @@ import ( "github.com/grafana/loki/v3/pkg/loghttp/push" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/syntax" - "github.com/grafana/loki/v3/pkg/util/constants" ring_client "github.com/grafana/dskit/ring/client" ) @@ -77,10 +76,9 @@ func NewTeeService( sendDuration: instrument.NewHistogramCollector( promauto.With(registerer).NewHistogramVec( prometheus.HistogramOpts{ - Namespace: constants.Loki, - Name: "pattern_ingester_tee_send_duration_seconds", - Help: "Time spent sending batches from the tee to the pattern ingester", - Buckets: prometheus.DefBuckets, + Name: "pattern_ingester_tee_send_duration_seconds", + Help: "Time spent sending batches from the tee to the pattern ingester", + Buckets: prometheus.DefBuckets, }, instrument.HistogramCollectorBuckets, ), ), diff --git a/pkg/push/types.go b/pkg/push/types.go index d0fc6d6cb835..7ab5ab2aeb5b 100644 --- a/pkg/push/types.go +++ b/pkg/push/types.go @@ -571,8 +571,6 @@ func (m *Entry) Unmarshal(dAtA []byte) error { } // Unmarshal a LabelAdapter, implements proto.Unmarshaller. -// NB this is a copy of the autogenerated code to unmarshal a LabelPair, -// with the byte copying replaced with a yoloString. func (m *LabelAdapter) Unmarshal(dAtA []byte) error { l := len(dAtA) iNdEx := 0 @@ -632,7 +630,7 @@ func (m *LabelAdapter) Unmarshal(dAtA []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - m.Name = yoloString(dAtA[iNdEx:postIndex]) + m.Name = string(dAtA[iNdEx:postIndex]) iNdEx = postIndex case 2: if wireType != 2 { @@ -664,7 +662,7 @@ func (m *LabelAdapter) Unmarshal(dAtA []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - m.Value = yoloString(dAtA[iNdEx:postIndex]) + m.Value = string(dAtA[iNdEx:postIndex]) iNdEx = postIndex default: iNdEx = preIndex diff --git a/pkg/querier/http.go b/pkg/querier/http.go index 5f0e928b6a1c..862f9e2a2013 100644 --- a/pkg/querier/http.go +++ b/pkg/querier/http.go @@ -122,11 +122,7 @@ func (q *QuerierAPI) LabelHandler(ctx context.Context, req *logproto.LabelReques sp.LogKV(statResult.KVList()...) } - status := 200 - if err != nil { - status, _ = serverutil.ClientHTTPStatusAndError(err) - } - + status, _ := serverutil.ClientHTTPStatusAndError(err) logql.RecordLabelQueryMetrics(ctx, util_log.Logger, *req.Start, *req.End, req.Name, req.Query, strconv.Itoa(status), statResult) return resp, err @@ -277,11 +273,7 @@ func (q *QuerierAPI) SeriesHandler(ctx context.Context, req *logproto.SeriesRequ sp.LogKV(statResult.KVList()...) } - status := 200 - if err != nil { - status, _ = serverutil.ClientHTTPStatusAndError(err) - } - + status, _ := serverutil.ClientHTTPStatusAndError(err) logql.RecordSeriesQueryMetrics(ctx, util_log.Logger, req.Start, req.End, req.Groups, strconv.Itoa(status), req.GetShards(), statResult) return resp, statResult, err @@ -308,11 +300,7 @@ func (q *QuerierAPI) IndexStatsHandler(ctx context.Context, req *loghttp.RangeQu sp.LogKV(statResult.KVList()...) } - status := 200 - if err != nil { - status, _ = serverutil.ClientHTTPStatusAndError(err) - } - + status, _ := serverutil.ClientHTTPStatusAndError(err) logql.RecordStatsQueryMetrics(ctx, util_log.Logger, req.Start, req.End, req.Query, strconv.Itoa(status), statResult) return resp, err @@ -340,11 +328,7 @@ func (q *QuerierAPI) IndexShardsHandler(ctx context.Context, req *loghttp.RangeQ sp.LogKV(statResult.KVList()...) } - status := 200 - if err != nil { - status, _ = serverutil.ClientHTTPStatusAndError(err) - } - + status, _ := serverutil.ClientHTTPStatusAndError(err) logql.RecordShardsQueryMetrics( ctx, util_log.Logger, req.Start, req.End, req.Query, targetBytesPerShard, strconv.Itoa(status), resLength, statResult, ) @@ -377,11 +361,7 @@ func (q *QuerierAPI) VolumeHandler(ctx context.Context, req *logproto.VolumeRequ sp.LogKV(statResult.KVList()...) } - status := 200 - if err != nil { - status, _ = serverutil.ClientHTTPStatusAndError(err) - } - + status, _ := serverutil.ClientHTTPStatusAndError(err) logql.RecordVolumeQueryMetrics(ctx, util_log.Logger, req.From.Time(), req.Through.Time(), req.GetQuery(), uint32(req.GetLimit()), time.Duration(req.GetStep()), strconv.Itoa(status), statResult) return resp, nil diff --git a/pkg/querier/querier.go b/pkg/querier/querier.go index 2046763e6973..b6000b5479ec 100644 --- a/pkg/querier/querier.go +++ b/pkg/querier/querier.go @@ -1071,6 +1071,7 @@ func containsAllIDTypes(values []string) bool { return true } +// TODO(twhitney): Delete this method and the GRPC service signature. This is now handled in the query frontend. func (q *SingleTenantQuerier) DetectedFields(ctx context.Context, req *logproto.DetectedFieldsRequest) (*logproto.DetectedFieldsResponse, error) { expr, err := syntax.ParseLogSelector(req.Query, true) if err != nil { @@ -1113,13 +1114,16 @@ func (q *SingleTenantQuerier) DetectedFields(ctx context.Context, req *logproto. level.Warn(q.logger).Log("msg", "failed to marshal hyperloglog sketch", "err", err) continue } - + p := v.parsers + if len(p) == 0 { + p = nil + } fields[fieldCount] = &logproto.DetectedField{ Label: k, Type: v.fieldType, Cardinality: v.Estimate(), Sketch: sketch, - Parsers: v.parsers, + Parsers: p, } fieldCount++ @@ -1131,33 +1135,6 @@ func (q *SingleTenantQuerier) DetectedFields(ctx context.Context, req *logproto. }, nil } -func getParsersFromExpr(expr syntax.LogSelectorExpr) []string { - parsers := make([]string, 0) - expr.Walk(func(e syntax.Expr) { - switch concrete := e.(type) { - case *syntax.LogfmtParserExpr, *syntax.LogfmtExpressionParser: - if !slices.Contains(parsers, "logfmt") { - parsers = append(parsers, "logfmt") - } - case *syntax.JSONExpressionParser: - if !slices.Contains(parsers, "json") { - parsers = append(parsers, "json") - } - case *syntax.LabelParserExpr: - if concrete.Op == syntax.OpParserTypeJSON { - if !slices.Contains(parsers, "json") { - parsers = append(parsers, "json") - } - } - } - // bail if we found both parsers - if len(parsers) == 2 { - return - } - }) - return parsers -} - type parsedFields struct { sketch *hyperloglog.Sketch fieldType logproto.DetectedFieldType diff --git a/pkg/querier/querier_test.go b/pkg/querier/querier_test.go index 6bb83b453906..9b4928ee34c2 100644 --- a/pkg/querier/querier_test.go +++ b/pkg/querier/querier_test.go @@ -16,21 +16,17 @@ import ( ring_client "github.com/grafana/dskit/ring/client" "github.com/grafana/dskit/user" "github.com/prometheus/common/model" - "github.com/prometheus/prometheus/promql/parser" "github.com/stretchr/testify/assert" "github.com/stretchr/testify/mock" "github.com/stretchr/testify/require" util_log "github.com/grafana/loki/v3/pkg/util/log" - "github.com/grafana/loki/pkg/push" - "github.com/grafana/loki/v3/pkg/compactor/deletion" "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql" "github.com/grafana/loki/v3/pkg/logql/syntax" - "github.com/grafana/loki/v3/pkg/logqlmodel" "github.com/grafana/loki/v3/pkg/querier/plan" "github.com/grafana/loki/v3/pkg/storage" "github.com/grafana/loki/v3/pkg/util/constants" @@ -1736,935 +1732,3 @@ func BenchmarkQuerierDetectedLabels(b *testing.B) { assert.NoError(b, err) } } - -func TestQuerier_DetectedFields(t *testing.T) { - limits, err := validation.NewOverrides(defaultLimitsTestConfig(), nil) - require.NoError(t, err) - ctx := user.InjectOrgID(context.Background(), "test") - - conf := mockQuerierConfig() - conf.IngesterQueryStoreMaxLookback = 0 - - request := logproto.DetectedFieldsRequest{ - Start: time.Now().Add(-1 * time.Minute), - End: time.Now(), - Query: `{type="test"}`, - LineLimit: 1000, - FieldLimit: 1000, - } - - t.Run("returns detected fields from queried logs", func(t *testing.T) { - store := newStoreMock() - store.On("SelectLogs", mock.Anything, mock.Anything). - Return(mockLogfmtStreamIterator(1, 5), nil) - - queryClient := newQueryClientMock() - queryClient.On("Recv"). - Return(mockQueryResponse([]logproto.Stream{mockLogfmtStream(1, 5)}), nil) - - ingesterClient := newQuerierClientMock() - ingesterClient.On("Query", mock.Anything, mock.Anything, mock.Anything). - Return(queryClient, nil) - - querier, err := newQuerier( - conf, - mockIngesterClientConfig(), - newIngesterClientMockFactory(ingesterClient), - mockReadRingWithOneActiveIngester(), - &mockDeleteGettter{}, - store, limits) - require.NoError(t, err) - - resp, err := querier.DetectedFields(ctx, &request) - require.NoError(t, err) - - detectedFields := resp.Fields - // log lines come from querier_mock_test.go - // message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t - assert.Len(t, detectedFields, 8) - expectedCardinality := map[string]uint64{ - "message": 5, - "count": 5, - "fake": 1, - "bytes": 5, - "duration": 5, - "percent": 5, - "even": 2, - "name_extracted": 1, - } - for _, d := range detectedFields { - card := expectedCardinality[d.Label] - assert.Equal(t, card, d.Cardinality, "Expected cardinality mismatch for: %s", d.Label) - } - }) - - t.Run("returns detected fields with structured metadata from queried logs", func(t *testing.T) { - store := newStoreMock() - store.On("SelectLogs", mock.Anything, mock.Anything). - Return(mockLogfmtStreamIterator(1, 5), nil) - - queryClient := newQueryClientMock() - queryClient.On("Recv"). - Return(mockQueryResponse([]logproto.Stream{mockLogfmtStreamWithStructuredMetadata(1, 5)}), nil) - - ingesterClient := newQuerierClientMock() - ingesterClient.On("Query", mock.Anything, mock.Anything, mock.Anything). - Return(queryClient, nil) - - querier, err := newQuerier( - conf, - mockIngesterClientConfig(), - newIngesterClientMockFactory(ingesterClient), - mockReadRingWithOneActiveIngester(), - &mockDeleteGettter{}, - store, limits) - require.NoError(t, err) - - resp, err := querier.DetectedFields(ctx, &request) - require.NoError(t, err) - - detectedFields := resp.Fields - // log lines come from querier_mock_test.go - // message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t - assert.Len(t, detectedFields, 10) - expectedCardinality := map[string]uint64{ - "variable": 5, - "constant": 1, - "message": 5, - "count": 5, - "fake": 1, - "bytes": 5, - "duration": 5, - "percent": 5, - "even": 2, - "name_extracted": 1, - } - for _, d := range detectedFields { - card := expectedCardinality[d.Label] - assert.Equal(t, card, d.Cardinality, "Expected cardinality mismatch for: %s", d.Label) - } - }) - - t.Run("correctly identifies different field types", func(t *testing.T) { - store := newStoreMock() - store.On("SelectLogs", mock.Anything, mock.Anything). - Return(mockLogfmtStreamIterator(1, 2), nil) - - queryClient := newQueryClientMock() - queryClient.On("Recv"). - Return(mockQueryResponse([]logproto.Stream{mockLogfmtStream(1, 2)}), nil) - - ingesterClient := newQuerierClientMock() - ingesterClient.On("Query", mock.Anything, mock.Anything, mock.Anything). - Return(queryClient, nil) - - querier, err := newQuerier( - conf, - mockIngesterClientConfig(), - newIngesterClientMockFactory(ingesterClient), - mockReadRingWithOneActiveIngester(), - &mockDeleteGettter{}, - store, limits) - require.NoError(t, err) - - resp, err := querier.DetectedFields(ctx, &request) - require.NoError(t, err) - - detectedFields := resp.Fields - // log lines come from querier_mock_test.go - // message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t - assert.Len(t, detectedFields, 8) - - var messageField, countField, bytesField, durationField, floatField, evenField *logproto.DetectedField - for _, field := range detectedFields { - switch field.Label { - case "message": - messageField = field - case "count": - countField = field - case "bytes": - bytesField = field - case "duration": - durationField = field - case "percent": - floatField = field - case "even": - evenField = field - } - } - - assert.Equal(t, logproto.DetectedFieldString, messageField.Type) - assert.Equal(t, logproto.DetectedFieldInt, countField.Type) - assert.Equal(t, logproto.DetectedFieldBytes, bytesField.Type) - assert.Equal(t, logproto.DetectedFieldDuration, durationField.Type) - assert.Equal(t, logproto.DetectedFieldFloat, floatField.Type) - assert.Equal(t, logproto.DetectedFieldBoolean, evenField.Type) - }) - - t.Run("correctly identifies parser to use with logfmt and structured metadata", func(t *testing.T) { - store := newStoreMock() - store.On("SelectLogs", mock.Anything, mock.Anything). - Return(mockLogfmtStreamIterator(1, 2), nil) - - queryClient := newQueryClientMock() - queryClient.On("Recv"). - Return(mockQueryResponse([]logproto.Stream{mockLogfmtStreamWithStructuredMetadata(1, 2)}), nil) - - ingesterClient := newQuerierClientMock() - ingesterClient.On("Query", mock.Anything, mock.Anything, mock.Anything). - Return(queryClient, nil) - - querier, err := newQuerier( - conf, - mockIngesterClientConfig(), - newIngesterClientMockFactory(ingesterClient), - mockReadRingWithOneActiveIngester(), - &mockDeleteGettter{}, - store, limits) - require.NoError(t, err) - - resp, err := querier.DetectedFields(ctx, &request) - require.NoError(t, err) - - detectedFields := resp.Fields - // log lines come from querier_mock_test.go - // message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t - assert.Len(t, detectedFields, 10) - - var messageField, countField, bytesField, durationField, floatField, evenField, constantField, variableField *logproto.DetectedField - for _, field := range detectedFields { - switch field.Label { - case "message": - messageField = field - case "count": - countField = field - case "bytes": - bytesField = field - case "duration": - durationField = field - case "percent": - floatField = field - case "even": - evenField = field - case "constant": - constantField = field - case "variable": - variableField = field - } - } - - assert.Equal(t, []string{"logfmt"}, messageField.Parsers) - assert.Equal(t, []string{"logfmt"}, countField.Parsers) - assert.Equal(t, []string{"logfmt"}, bytesField.Parsers) - assert.Equal(t, []string{"logfmt"}, durationField.Parsers) - assert.Equal(t, []string{"logfmt"}, floatField.Parsers) - assert.Equal(t, []string{"logfmt"}, evenField.Parsers) - assert.Equal(t, []string{}, constantField.Parsers) - assert.Equal(t, []string{}, variableField.Parsers) - }, - ) - - t.Run( - "adds _extracted suffix to detected fields that conflict with indexed labels", - func(t *testing.T) { - store := newStoreMock() - store.On("SelectLogs", mock.Anything, mock.Anything). - Return(mockLogfmtStreamIterator(1, 2), nil) - - queryClient := newQueryClientMock() - queryClient.On("Recv"). - Return(mockQueryResponse([]logproto.Stream{mockLogfmtStreamWithStructuredMetadata(1, 2)}), nil) - - ingesterClient := newQuerierClientMock() - ingesterClient.On("Query", mock.Anything, mock.Anything, mock.Anything). - Return(queryClient, nil) - - querier, err := newQuerier( - conf, - mockIngesterClientConfig(), - newIngesterClientMockFactory(ingesterClient), - mockReadRingWithOneActiveIngester(), - &mockDeleteGettter{}, - store, limits) - require.NoError(t, err) - - resp, err := querier.DetectedFields(ctx, &request) - require.NoError(t, err) - - detectedFields := resp.Fields - // log lines come from querier_mock_test.go - // message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t - assert.Len(t, detectedFields, 10) - - var nameField *logproto.DetectedField - for _, field := range detectedFields { - switch field.Label { - case "name_extracted": - nameField = field - } - } - - assert.NotNil(t, nameField) - assert.Equal(t, "name_extracted", nameField.Label) - assert.Equal(t, logproto.DetectedFieldString, nameField.Type) - assert.Equal(t, []string{"logfmt"}, nameField.Parsers) - assert.Equal(t, uint64(1), nameField.Cardinality) - }, - ) -} - -func BenchmarkQuerierDetectedFields(b *testing.B) { - limits, _ := validation.NewOverrides(defaultLimitsTestConfig(), nil) - ctx := user.InjectOrgID(context.Background(), "test") - - conf := mockQuerierConfig() - conf.IngesterQueryStoreMaxLookback = 0 - - request := logproto.DetectedFieldsRequest{ - Start: time.Now().Add(-1 * time.Minute), - End: time.Now(), - Query: `{type="test"}`, - LineLimit: 1000, - FieldLimit: 1000, - } - - store := newStoreMock() - store.On("SelectLogs", mock.Anything, mock.Anything). - Return(mockLogfmtStreamIterator(1, 2), nil) - - queryClient := newQueryClientMock() - queryClient.On("Recv"). - Return(mockQueryResponse([]logproto.Stream{mockLogfmtStream(1, 2)}), nil) - - ingesterClient := newQuerierClientMock() - ingesterClient.On("Query", mock.Anything, mock.Anything, mock.Anything). - Return(queryClient, nil) - - querier, _ := newQuerier( - conf, - mockIngesterClientConfig(), - newIngesterClientMockFactory(ingesterClient), - mockReadRingWithOneActiveIngester(), - &mockDeleteGettter{}, - store, limits) - - b.ReportAllocs() - b.ResetTimer() - - for i := 0; i < b.N; i++ { - _, err := querier.DetectedFields(ctx, &request) - assert.NoError(b, err) - } -} - -func Test_getParsersFromExpr(t *testing.T) { - t.Run("detects logfmt parser", func(t *testing.T) { - exprStr := `{foo="bar"} | logfmt` - expr, err := syntax.ParseLogSelector(exprStr, true) - require.NoError(t, err) - assert.Equal(t, []string{"logfmt"}, getParsersFromExpr(expr)) - }) - - t.Run("detects json parser", func(t *testing.T) { - exprStr := `{foo="bar"} | json` - expr, err := syntax.ParseLogSelector(exprStr, true) - require.NoError(t, err) - assert.Equal(t, []string{"json"}, getParsersFromExpr(expr)) - }) - - t.Run("detects multiple parsers", func(t *testing.T) { - exprStr := `{foo="bar"} | logfmt | json` - expr, err := syntax.ParseLogSelector(exprStr, true) - require.NoError(t, err) - assert.Equal(t, []string{"logfmt", "json"}, getParsersFromExpr(expr)) - }) - - t.Run("detects logfmt expression parser", func(t *testing.T) { - exprStr := `{foo="bar"} | logfmt msg="message"` - expr, err := syntax.ParseLogSelector(exprStr, true) - require.NoError(t, err) - assert.Equal(t, []string{"logfmt"}, getParsersFromExpr(expr)) - }) - - t.Run("detects json expression parser", func(t *testing.T) { - exprStr := `{foo="bar"} | json first_server="servers[0]"` - expr, err := syntax.ParseLogSelector(exprStr, true) - require.NoError(t, err) - assert.Equal(t, []string{"json"}, getParsersFromExpr(expr)) - }) - - t.Run("detects multiple expression parsers", func(t *testing.T) { - exprStr := `{foo="bar"} | logfmt msg="message" | json first_server="servers[0]"` - expr, err := syntax.ParseLogSelector(exprStr, true) - require.NoError(t, err) - assert.Equal(t, []string{"logfmt", "json"}, getParsersFromExpr(expr)) - }) -} - -func Test_parseDetectedFeilds(t *testing.T) { - now := time.Now() - - t.Run("when no parsers are supplied", func(t *testing.T) { - infoDetectdFiledMetadata := []push.LabelAdapter{ - { - Name: "detected_level", - Value: "info", - }, - } - - rulerLines := []push.Entry{ - {Timestamp: now, Line: "ts=2024-09-05T15:36:38.757788067Z caller=grpc_logging.go:66 tenant=2419 level=info method=/cortex.Ingester/Push duration=19.098s msg=gRPC", StructuredMetadata: infoDetectdFiledMetadata}, - {Timestamp: now, Line: "ts=2024-09-05T15:36:38.698375619Z caller=grpc_logging.go:66 tenant=29 level=info method=/cortex.Ingester/Push duration=5.471s msg=gRPC", StructuredMetadata: infoDetectdFiledMetadata}, - {Timestamp: now, Line: "ts=2024-09-05T15:36:38.629424175Z caller=grpc_logging.go:66 tenant=2919 level=info method=/cortex.Ingester/Push duration=29.234s msg=gRPC", StructuredMetadata: infoDetectdFiledMetadata}, - } - - rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler"}` - rulerMetric, err := parser.ParseMetric(rulerLbls) - require.NoError(t, err) - - rulerStream := push.Stream{ - Labels: rulerLbls, - Entries: rulerLines, - Hash: rulerMetric.Hash(), - } - - debugDetectedFieldMetadata := []push.LabelAdapter{ - { - Name: "detected_level", - Value: "debug", - }, - } - - nginxJSONLines := []push.Entry{ - {Timestamp: now, Line: `{"host":"100.117.38.203", "user-identifier":"nader3722", "datetime":"05/Sep/2024:16:13:56 +0000", "method": "PATCH", "request": "/api/loki/v1/push", "protocol":"HTTP/2.0", "status":200, "bytes":9664, "referer": "https://www.seniorbleeding-edge.net/exploit/robust/whiteboard"}`, StructuredMetadata: debugDetectedFieldMetadata}, - {Timestamp: now, Line: `{"host":"66.134.9.30", "user-identifier":"-", "datetime":"05/Sep/2024:16:13:55 +0000", "method": "DELETE", "request": "/api/mimir/v1/push", "protocol":"HTTP/1.1", "status":200, "bytes":18688, "referer": "https://www.districtiterate.biz/synergistic/next-generation/extend"}`, StructuredMetadata: debugDetectedFieldMetadata}, - {Timestamp: now, Line: `{"host":"66.134.9.30", "user-identifier":"-", "datetime":"05/Sep/2024:16:13:55 +0000", "method": "GET", "request": "/api/loki/v1/label/names", "protocol":"HTTP/1.1", "status":200, "bytes":9314, "referer": "https://www.dynamicimplement.info/enterprise/distributed/incentivize/strategic"}`, StructuredMetadata: debugDetectedFieldMetadata}, - } - - nginxLbls := `{ cluster="eu-west-1", level="debug", namespace="gateway", pod="nginx-json-oghco", service_name="nginx-json" }` - nginxMetric, err := parser.ParseMetric(nginxLbls) - require.NoError(t, err) - - nginxStream := push.Stream{ - Labels: nginxLbls, - Entries: nginxJSONLines, - Hash: nginxMetric.Hash(), - } - - t.Run("detect logfmt fields when with no supplied parsers", func(t *testing.T) { - df := parseDetectedFields(uint32(15), logqlmodel.Streams([]push.Stream{rulerStream})) - for _, expected := range []string{"ts", "caller", "tenant", "level", "method", "duration", "msg"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1) - require.Equal(t, "logfmt", parsers[0]) - } - - // no parsers for structed metadata - for _, expected := range []string{"detected_level"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 0) - } - }) - - t.Run("detect json fields when with no supplied parsers", func(t *testing.T) { - df := parseDetectedFields(uint32(15), logqlmodel.Streams([]push.Stream{nginxStream})) - for _, expected := range []string{"host", "user_identifier", "datetime", "method", "request", "protocol", "status", "bytes", "referer"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1) - require.Equal(t, "json", parsers[0]) - } - - // no parsers for structed metadata - for _, expected := range []string{"detected_level"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 0) - } - }) - - t.Run("detect mixed fields when with no supplied parsers", func(t *testing.T) { - df := parseDetectedFields(uint32(20), logqlmodel.Streams([]push.Stream{rulerStream, nginxStream})) - - for _, expected := range []string{"ts", "caller", "tenant", "level", "duration", "msg"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1, "expected only logfmt parser for %s", expected) - require.Equal(t, "logfmt", parsers[0], "expected only logfmt parser for %s", expected) - } - - for _, expected := range []string{"host", "user_identifier", "datetime", "request", "protocol", "status", "bytes", "referer"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1, "expected only json parser for %s", expected) - require.Equal(t, "json", parsers[0], "expected only json parser for %s", expected) - } - - // multiple parsers for fields that exist in both streams - for _, expected := range []string{"method"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 2, "expected logfmt and json parser for %s", expected) - require.Contains(t, parsers, "logfmt", "expected logfmt parser for %s", expected) - require.Contains(t, parsers, "json", "expected json parser for %s", expected) - } - - // no parsers for structed metadata - for _, expected := range []string{"detected_level"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 0) - } - }) - - t.Run("correctly applies _extracted for a single stream", func(t *testing.T) { - rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler", tenant="42", caller="inside-the-house"}` - rulerMetric, err := parser.ParseMetric(rulerLbls) - require.NoError(t, err) - - rulerStream := push.Stream{ - Labels: rulerLbls, - Entries: rulerLines, - Hash: rulerMetric.Hash(), - } - - df := parseDetectedFields(uint32(15), logqlmodel.Streams([]push.Stream{rulerStream})) - for _, expected := range []string{"ts", "caller_extracted", "tenant_extracted", "level", "method", "duration", "msg"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1) - require.Equal(t, "logfmt", parsers[0]) - } - - // no parsers for structed metadata - for _, expected := range []string{"detected_level"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 0) - } - }) - - t.Run("correctly applies _extracted for multiple streams", func(t *testing.T) { - rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler", tenant="42", caller="inside-the-house"}` - rulerMetric, err := parser.ParseMetric(rulerLbls) - require.NoError(t, err) - - rulerStream := push.Stream{ - Labels: rulerLbls, - Entries: rulerLines, - Hash: rulerMetric.Hash(), - } - - nginxLbls := `{ cluster="eu-west-1", level="debug", namespace="gateway", pod="nginx-json-oghco", service_name="nginx-json", host="localhost"}` - nginxMetric, err := parser.ParseMetric(nginxLbls) - require.NoError(t, err) - - nginxStream := push.Stream{ - Labels: nginxLbls, - Entries: nginxJSONLines, - Hash: nginxMetric.Hash(), - } - - df := parseDetectedFields(uint32(20), logqlmodel.Streams([]push.Stream{rulerStream, nginxStream})) - for _, expected := range []string{"ts", "caller_extracted", "tenant_extracted", "level", "duration", "msg"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1) - require.Equal(t, "logfmt", parsers[0]) - } - - for _, expected := range []string{"host_extracted", "user_identifier", "datetime", "request", "protocol", "status", "bytes", "referer"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1, "expected only json parser for %s", expected) - require.Equal(t, "json", parsers[0], "expected only json parser for %s", expected) - } - - // multiple parsers for fields that exist in both streams - for _, expected := range []string{"method"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 2, "expected logfmt and json parser for %s", expected) - require.Contains(t, parsers, "logfmt", "expected logfmt parser for %s", expected) - require.Contains(t, parsers, "json", "expected json parser for %s", expected) - } - - // no parsers for structed metadata - for _, expected := range []string{"detected_level"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 0) - } - }) - }) - - t.Run("when parsers are supplied", func(t *testing.T) { - infoDetectdFiledMetadata := []push.LabelAdapter{ - { - Name: "detected_level", - Value: "info", - }, - } - - parsedRulerFields := func(ts, tenant, duration string) []push.LabelAdapter { - return []push.LabelAdapter{ - { - Name: "ts", - Value: ts, - }, - { - Name: "caller", - Value: "grpc_logging.go:66", - }, - { - Name: "tenant", - Value: tenant, - }, - { - Name: "level", - Value: "info", - }, - { - Name: "method", - Value: "/cortex.Ingester/Push", - }, - { - Name: "duration", - Value: duration, - }, - { - Name: "msg", - Value: "gRPC", - }, - } - } - - rulerLines := []push.Entry{ - { - Timestamp: now, - Line: "ts=2024-09-05T15:36:38.757788067Z caller=grpc_logging.go:66 tenant=2419 level=info method=/cortex.Ingester/Push duration=19.098s msg=gRPC", - StructuredMetadata: infoDetectdFiledMetadata, - Parsed: parsedRulerFields("2024-09-05T15:36:38.757788067Z", "2419", "19.098s"), - }, - { - Timestamp: now, - Line: "ts=2024-09-05T15:36:38.698375619Z caller=grpc_logging.go:66 tenant=29 level=info method=/cortex.Ingester/Push duration=5.471s msg=gRPC", - StructuredMetadata: infoDetectdFiledMetadata, - Parsed: parsedRulerFields("2024-09-05T15:36:38.698375619Z", "29", "5.471s"), - }, - { - Timestamp: now, - Line: "ts=2024-09-05T15:36:38.629424175Z caller=grpc_logging.go:66 tenant=2919 level=info method=/cortex.Ingester/Push duration=29.234s msg=gRPC", - StructuredMetadata: infoDetectdFiledMetadata, - Parsed: parsedRulerFields("2024-09-05T15:36:38.629424175Z", "2919", "29.234s"), - }, - } - - rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler"}` - rulerMetric, err := parser.ParseMetric(rulerLbls) - require.NoError(t, err) - - rulerStream := push.Stream{ - Labels: rulerLbls, - Entries: rulerLines, - Hash: rulerMetric.Hash(), - } - - debugDetectedFieldMetadata := []push.LabelAdapter{ - { - Name: "detected_level", - Value: "debug", - }, - } - - parsedNginxFields := func(host, userIdentifier, datetime, method, request, protocol, status, bytes, referer string) []push.LabelAdapter { - return []push.LabelAdapter{ - { - Name: "host", - Value: host, - }, - { - Name: "user_identifier", - Value: userIdentifier, - }, - { - Name: "datetime", - Value: datetime, - }, - { - Name: "method", - Value: method, - }, - { - Name: "request", - Value: request, - }, - { - Name: "protocol", - Value: protocol, - }, - { - Name: "status", - Value: status, - }, - { - Name: "bytes", - Value: bytes, - }, - { - Name: "referer", - Value: referer, - }, - } - } - - nginxJSONLines := []push.Entry{ - { - Timestamp: now, - Line: `{"host":"100.117.38.203", "user-identifier":"nader3722", "datetime":"05/Sep/2024:16:13:56 +0000", "method": "PATCH", "request": "/api/loki/v1/push", "protocol":"HTTP/2.0", "status":200, "bytes":9664, "referer": "https://www.seniorbleeding-edge.net/exploit/robust/whiteboard"}`, - StructuredMetadata: debugDetectedFieldMetadata, - Parsed: parsedNginxFields("100.117.38.203", "nadre3722", "05/Sep/2024:16:13:56 +0000", "PATCH", "/api/loki/v1/push", "HTTP/2.0", "200", "9664", "https://www.seniorbleeding-edge.net/exploit/robust/whiteboard"), - }, - { - Timestamp: now, - Line: `{"host":"66.134.9.30", "user-identifier":"-", "datetime":"05/Sep/2024:16:13:55 +0000", "method": "DELETE", "request": "/api/mimir/v1/push", "protocol":"HTTP/1.1", "status":200, "bytes":18688, "referer": "https://www.districtiterate.biz/synergistic/next-generation/extend"}`, - StructuredMetadata: debugDetectedFieldMetadata, - Parsed: parsedNginxFields("66.134.9.30", "-", "05/Sep/2024:16:13:55 +0000", "DELETE", "/api/mimir/v1/push", "HTTP/1.1", "200", "18688", "https://www.districtiterate.biz/synergistic/next-generation/extend"), - }, - { - Timestamp: now, - Line: `{"host":"66.134.9.30", "user-identifier":"-", "datetime":"05/Sep/2024:16:13:55 +0000", "method": "GET", "request": "/api/loki/v1/label/names", "protocol":"HTTP/1.1", "status":200, "bytes":9314, "referer": "https://www.dynamicimplement.info/enterprise/distributed/incentivize/strategic"}`, - StructuredMetadata: debugDetectedFieldMetadata, - Parsed: parsedNginxFields("66.134.9.30", "-", "05/Sep/2024:16:13:55 +0000", "GET", "/api/loki/v1/label/names", "HTTP/1.1", "200", "9314", "https://www.dynamicimplement.info/enterprise/distributed/incentivize/strategic"), - }, - } - - nginxLbls := `{ cluster="eu-west-1", level="debug", namespace="gateway", pod="nginx-json-oghco", service_name="nginx-json" }` - nginxMetric, err := parser.ParseMetric(nginxLbls) - require.NoError(t, err) - - nginxStream := push.Stream{ - Labels: nginxLbls, - Entries: nginxJSONLines, - Hash: nginxMetric.Hash(), - } - - t.Run("detect logfmt fields", func(t *testing.T) { - df := parseDetectedFields(uint32(15), logqlmodel.Streams([]push.Stream{rulerStream})) - for _, expected := range []string{"ts", "caller", "tenant", "level", "method", "duration", "msg"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1) - require.Equal(t, "logfmt", parsers[0]) - } - - // no parsers for structed metadata - for _, expected := range []string{"detected_level"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 0) - } - }) - - t.Run("detect json fields", func(t *testing.T) { - df := parseDetectedFields(uint32(15), logqlmodel.Streams([]push.Stream{nginxStream})) - for _, expected := range []string{"host", "user_identifier", "datetime", "method", "request", "protocol", "status", "bytes", "referer"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1) - require.Equal(t, "json", parsers[0]) - } - - // no parsers for structed metadata - for _, expected := range []string{"detected_level"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 0) - } - }) - - t.Run("detect mixed fields", func(t *testing.T) { - df := parseDetectedFields(uint32(20), logqlmodel.Streams([]push.Stream{rulerStream, nginxStream})) - - for _, expected := range []string{"ts", "caller", "tenant", "level", "duration", "msg"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1, "expected only logfmt parser for %s", expected) - require.Equal(t, "logfmt", parsers[0], "expected only logfmt parser for %s", expected) - } - - for _, expected := range []string{"host", "user_identifier", "datetime", "request", "protocol", "status", "bytes", "referer"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1, "expected only json parser for %s", expected) - require.Equal(t, "json", parsers[0], "expected only json parser for %s", expected) - } - - // multiple parsers for fields that exist in both streams - for _, expected := range []string{"method"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 2, "expected logfmt and json parser for %s", expected) - require.Contains(t, parsers, "logfmt", "expected logfmt parser for %s", expected) - require.Contains(t, parsers, "json", "expected json parser for %s", expected) - } - - // no parsers for structed metadata - for _, expected := range []string{"detected_level"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 0) - } - }) - - t.Run("correctly applies _extracted for a single stream", func(t *testing.T) { - rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler", tenant="42", caller="inside-the-house"}` - rulerMetric, err := parser.ParseMetric(rulerLbls) - require.NoError(t, err) - - rulerStream := push.Stream{ - Labels: rulerLbls, - Entries: rulerLines, - Hash: rulerMetric.Hash(), - } - - df := parseDetectedFields(uint32(15), logqlmodel.Streams([]push.Stream{rulerStream})) - for _, expected := range []string{"ts", "caller_extracted", "tenant_extracted", "level", "method", "duration", "msg"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1) - require.Equal(t, "logfmt", parsers[0]) - } - - // no parsers for structed metadata - for _, expected := range []string{"detected_level"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 0) - } - }) - - t.Run("correctly applies _extracted for multiple streams", func(t *testing.T) { - rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler", tenant="42", caller="inside-the-house"}` - rulerMetric, err := parser.ParseMetric(rulerLbls) - require.NoError(t, err) - - rulerStream := push.Stream{ - Labels: rulerLbls, - Entries: rulerLines, - Hash: rulerMetric.Hash(), - } - - nginxLbls := `{ cluster="eu-west-1", level="debug", namespace="gateway", pod="nginx-json-oghco", service_name="nginx-json", host="localhost"}` - nginxMetric, err := parser.ParseMetric(nginxLbls) - require.NoError(t, err) - - nginxStream := push.Stream{ - Labels: nginxLbls, - Entries: nginxJSONLines, - Hash: nginxMetric.Hash(), - } - - df := parseDetectedFields(uint32(20), logqlmodel.Streams([]push.Stream{rulerStream, nginxStream})) - for _, expected := range []string{"ts", "caller_extracted", "tenant_extracted", "level", "duration", "msg"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1) - require.Equal(t, "logfmt", parsers[0]) - } - - for _, expected := range []string{"host_extracted", "user_identifier", "datetime", "request", "protocol", "status", "bytes", "referer"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 1, "expected only json parser for %s", expected) - require.Equal(t, "json", parsers[0], "expected only json parser for %s", expected) - } - - // multiple parsers for fields that exist in both streams - for _, expected := range []string{"method"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 2, "expected logfmt and json parser for %s", expected) - require.Contains(t, parsers, "logfmt", "expected logfmt parser for %s", expected) - require.Contains(t, parsers, "json", "expected json parser for %s", expected) - } - - // no parsers for structed metadata - for _, expected := range []string{"detected_level"} { - require.Contains(t, df, expected) - parsers := df[expected].parsers - - require.Len(t, parsers, 0) - } - }) - }) - - t.Run("handles level in all the places", func(t *testing.T) { - rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler", tenant="42", caller="inside-the-house", level="debug"}` - rulerMetric, err := parser.ParseMetric(rulerLbls) - require.NoError(t, err) - - rulerStream := push.Stream{ - Labels: rulerLbls, - Entries: []push.Entry{ - { - Timestamp: now, - Line: "ts=2024-09-05T15:36:38.757788067Z caller=grpc_logging.go:66 tenant=2419 level=info method=/cortex.Ingester/Push duration=19.098s msg=gRPC", - StructuredMetadata: []push.LabelAdapter{ - { - Name: "detected_level", - Value: "debug", - }, - }, - Parsed: []push.LabelAdapter{ - { - Name: "level", - Value: "info", - }, - }, - }, - }, - Hash: rulerMetric.Hash(), - } - - df := parseDetectedFields(uint32(20), logqlmodel.Streams([]push.Stream{rulerStream, rulerStream})) - - detectedLevelField := df["detected_level"] - require.Len(t, detectedLevelField.parsers, 0) - require.Equal(t, uint64(1), detectedLevelField.sketch.Estimate()) - - levelField := df["level_extracted"] - require.Len(t, levelField.parsers, 1) - require.Contains(t, levelField.parsers, "logfmt") - require.Equal(t, uint64(1), levelField.sketch.Estimate()) - }) -} diff --git a/pkg/querier/queryrange/codec.go b/pkg/querier/queryrange/codec.go index 2c4ff98c92c8..5212d9bdebb5 100644 --- a/pkg/querier/queryrange/codec.go +++ b/pkg/querier/queryrange/codec.go @@ -312,8 +312,8 @@ func (r *DetectedLabelsRequest) WithQuery(query string) queryrangebase.Request { func (r *DetectedLabelsRequest) LogToSpan(sp opentracing.Span) { sp.LogFields( - otlog.String("start", timestamp.Time(r.GetStart().UnixNano()).String()), - otlog.String("end", timestamp.Time(r.GetEnd().UnixNano()).String()), + otlog.String("start", timestamp.Time(r.GetStart().UnixMilli()).String()), + otlog.String("end", timestamp.Time(r.GetEnd().UnixMilli()).String()), ) } @@ -2359,8 +2359,8 @@ func (r *DetectedFieldsRequest) WithQuery(query string) queryrangebase.Request { func (r *DetectedFieldsRequest) LogToSpan(sp opentracing.Span) { sp.LogFields( - otlog.String("start", timestamp.Time(r.GetStart().UnixNano()).String()), - otlog.String("end", timestamp.Time(r.GetEnd().UnixNano()).String()), + otlog.String("start", timestamp.Time(r.GetStart().UnixMilli()).String()), + otlog.String("end", timestamp.Time(r.GetEnd().UnixMilli()).String()), otlog.String("query", r.GetQuery()), otlog.Int64("step (ms)", r.GetStep()), otlog.Int64("line_limit", int64(r.GetLineLimit())), diff --git a/pkg/querier/queryrange/detected_fields.go b/pkg/querier/queryrange/detected_fields.go new file mode 100644 index 000000000000..115ba9601573 --- /dev/null +++ b/pkg/querier/queryrange/detected_fields.go @@ -0,0 +1,387 @@ +package queryrange + +import ( + "context" + "net/http" + "slices" + "strconv" + "time" + + "github.com/axiomhq/hyperloglog" + "github.com/dustin/go-humanize" + "github.com/grafana/dskit/httpgrpc" + "github.com/prometheus/prometheus/model/labels" + + "github.com/grafana/loki/v3/pkg/logproto" + logql_log "github.com/grafana/loki/v3/pkg/logql/log" + "github.com/grafana/loki/v3/pkg/logql/syntax" + "github.com/grafana/loki/v3/pkg/logqlmodel" + "github.com/grafana/loki/v3/pkg/querier/plan" + base "github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase" + "github.com/grafana/loki/v3/pkg/util/httpreq" + + "github.com/grafana/loki/pkg/push" +) + +func NewDetectedFieldsHandler( + limitedHandler base.Handler, + logHandler base.Handler, + limits Limits, +) base.Handler { + return base.HandlerFunc( + func(ctx context.Context, req base.Request) (base.Response, error) { + r, ok := req.(*DetectedFieldsRequest) + if !ok { + return nil, httpgrpc.Errorf( + http.StatusBadRequest, + "invalid request type, expected *DetectedFieldsRequest", + ) + } + + resp, err := makeDownstreamRequest(ctx, limits, limitedHandler, logHandler, r) + if err != nil { + return nil, err + } + + re, ok := resp.(*LokiResponse) + if !ok || re.Status != "success" { + return resp, nil + } + + detectedFields := parseDetectedFields(r.FieldLimit, re.Data.Result) + fields := make([]*logproto.DetectedField, len(detectedFields)) + fieldCount := 0 + for k, v := range detectedFields { + p := v.parsers + if len(p) == 0 { + p = nil + } + fields[fieldCount] = &logproto.DetectedField{ + Label: k, + Type: v.fieldType, + Cardinality: v.Estimate(), + Parsers: p, + } + + fieldCount++ + } + + dfResp := DetectedFieldsResponse{ + Response: &logproto.DetectedFieldsResponse{ + Fields: fields, + }, + Headers: re.Headers, + } + + // Otherwise all they get is the field limit, which is a bit confusing + if len(fields) > 0 { + dfResp.Response.FieldLimit = r.GetFieldLimit() + } + + return &dfResp, nil + }) +} + +func makeDownstreamRequest( + ctx context.Context, + limits Limits, + limitedHandler, logHandler base.Handler, + req *DetectedFieldsRequest, +) (base.Response, error) { + expr, err := syntax.ParseLogSelector(req.Query, true) + if err != nil { + return nil, httpgrpc.Errorf(http.StatusBadRequest, "%s", err.Error()) + } + + if err := validateMaxEntriesLimits(ctx, req.LineLimit, limits); err != nil { + return nil, httpgrpc.Errorf(http.StatusBadRequest, "%s", err.Error()) + } + + if err := validateMatchers(ctx, limits, expr.Matchers()); err != nil { + return nil, httpgrpc.Errorf(http.StatusBadRequest, "%s", err.Error()) + } + + lokiReq := &LokiRequest{ + Query: req.GetQuery(), + Step: req.GetStep(), + StartTs: req.GetStartTs(), + EndTs: req.GetEndTs(), + Direction: logproto.BACKWARD, + Limit: req.GetLineLimit(), + Path: "/loki/api/v1/query_range", + } + + lokiReq.Plan = &plan.QueryPlan{ + AST: expr, + } + + // Note(twhitney): The logic for parsing detected fields relies on the Entry.Parsed field being populated. + // The behavior of populating Entry.Parsed is different in ingesters and stores. + // We need to set this header to make sure Entry.Parsed is populated when getting logs from the store. + // Entries from the head block in the ingester always have the Parsed field populated. + ctx = httpreq.InjectHeader( + ctx, + httpreq.LokiEncodingFlagsHeader, + (string)(httpreq.FlagCategorizeLabels), + ) + if expr.HasFilter() { + return logHandler.Do(ctx, lokiReq) + } + return limitedHandler.Do(ctx, lokiReq) +} + +type parsedFields struct { + sketch *hyperloglog.Sketch + fieldType logproto.DetectedFieldType + parsers []string +} + +func newParsedFields(parsers []string) *parsedFields { + return &parsedFields{ + sketch: hyperloglog.New(), + fieldType: logproto.DetectedFieldString, + parsers: parsers, + } +} + +func newParsedLabels() *parsedFields { + return &parsedFields{ + sketch: hyperloglog.New(), + fieldType: logproto.DetectedFieldString, + } +} + +func (p *parsedFields) Insert(value string) { + p.sketch.Insert([]byte(value)) +} + +func (p *parsedFields) Estimate() uint64 { + return p.sketch.Estimate() +} + +func (p *parsedFields) Marshal() ([]byte, error) { + return p.sketch.MarshalBinary() +} + +func (p *parsedFields) DetermineType(value string) { + p.fieldType = determineType(value) +} + +func determineType(value string) logproto.DetectedFieldType { + if _, err := strconv.ParseInt(value, 10, 64); err == nil { + return logproto.DetectedFieldInt + } + + if _, err := strconv.ParseFloat(value, 64); err == nil { + return logproto.DetectedFieldFloat + } + + if _, err := strconv.ParseBool(value); err == nil { + return logproto.DetectedFieldBoolean + } + + if _, err := time.ParseDuration(value); err == nil { + return logproto.DetectedFieldDuration + } + + if _, err := humanize.ParseBytes(value); err == nil { + return logproto.DetectedFieldBytes + } + + return logproto.DetectedFieldString +} + +func parseDetectedFields(limit uint32, streams logqlmodel.Streams) map[string]*parsedFields { + detectedFields := make(map[string]*parsedFields, limit) + fieldCount := uint32(0) + emtpyparsers := []string{} + + for _, stream := range streams { + streamLbls, err := syntax.ParseLabels(stream.Labels) + if err != nil { + streamLbls = labels.EmptyLabels() + } + + for _, entry := range stream.Entries { + structuredMetadata := getStructuredMetadata(entry) + for k, vals := range structuredMetadata { + df, ok := detectedFields[k] + if !ok && fieldCount < limit { + df = newParsedFields(emtpyparsers) + detectedFields[k] = df + fieldCount++ + } + + if df == nil { + continue + } + + detectType := true + for _, v := range vals { + parsedFields := detectedFields[k] + if detectType { + // we don't want to determine the type for every line, so we assume the type in each stream will be the same, and re-detect the type for the next stream + parsedFields.DetermineType(v) + detectType = false + } + + parsedFields.Insert(v) + } + } + + entryLbls := logql_log.NewBaseLabelsBuilder().ForLabels(streamLbls, streamLbls.Hash()) + parsedLabels, parsers := parseEntry(entry, entryLbls) + for k, vals := range parsedLabels { + df, ok := detectedFields[k] + if !ok && fieldCount < limit { + df = newParsedFields(parsers) + detectedFields[k] = df + fieldCount++ + } + + if df == nil { + continue + } + + for _, parser := range parsers { + if !slices.Contains(df.parsers, parser) { + df.parsers = append(df.parsers, parser) + } + } + + detectType := true + for _, v := range vals { + parsedFields := detectedFields[k] + if detectType { + // we don't want to determine the type for every line, so we assume the type in each stream will be the same, and re-detect the type for the next stream + parsedFields.DetermineType(v) + detectType = false + } + + parsedFields.Insert(v) + } + } + } + } + + return detectedFields +} + +func getStructuredMetadata(entry push.Entry) map[string][]string { + labels := map[string]map[string]struct{}{} + for _, lbl := range entry.StructuredMetadata { + if values, ok := labels[lbl.Name]; ok { + values[lbl.Value] = struct{}{} + } else { + labels[lbl.Name] = map[string]struct{}{lbl.Value: {}} + } + } + + result := make(map[string][]string, len(labels)) + for lbl, values := range labels { + vals := make([]string, 0, len(values)) + for v := range values { + vals = append(vals, v) + } + result[lbl] = vals + } + + return result +} + +func parseEntry(entry push.Entry, lbls *logql_log.LabelsBuilder) (map[string][]string, []string) { + origParsed := getParsedLabels(entry) + + // if the original query has any parser expressions, then we need to differentiate the + // original stream labels from any parsed labels + for name := range origParsed { + lbls.Del(name) + } + streamLbls := lbls.LabelsResult().Stream() + lblBuilder := lbls.ForLabels(streamLbls, streamLbls.Hash()) + + parsed := make(map[string][]string, len(origParsed)) + for lbl, values := range origParsed { + if lbl == logqlmodel.ErrorLabel || lbl == logqlmodel.ErrorDetailsLabel || + lbl == logqlmodel.PreserveErrorLabel { + continue + } + + parsed[lbl] = values + } + + line := entry.Line + parser := "json" + jsonParser := logql_log.NewJSONParser() + _, jsonSuccess := jsonParser.Process(0, []byte(line), lblBuilder) + if !jsonSuccess || lblBuilder.HasErr() { + lblBuilder.Reset() + + logFmtParser := logql_log.NewLogfmtParser(false, false) + parser = "logfmt" + _, logfmtSuccess := logFmtParser.Process(0, []byte(line), lblBuilder) + if !logfmtSuccess || lblBuilder.HasErr() { + return parsed, nil + } + } + + parsedLabels := map[string]map[string]struct{}{} + for lbl, values := range parsed { + if vals, ok := parsedLabels[lbl]; ok { + for _, value := range values { + vals[value] = struct{}{} + } + } else { + parsedLabels[lbl] = map[string]struct{}{} + for _, value := range values { + parsedLabels[lbl][value] = struct{}{} + } + } + } + + lblsResult := lblBuilder.LabelsResult().Parsed() + for _, lbl := range lblsResult { + if values, ok := parsedLabels[lbl.Name]; ok { + values[lbl.Value] = struct{}{} + } else { + parsedLabels[lbl.Name] = map[string]struct{}{lbl.Value: {}} + } + } + + result := make(map[string][]string, len(parsedLabels)) + for lbl, values := range parsedLabels { + if lbl == logqlmodel.ErrorLabel || lbl == logqlmodel.ErrorDetailsLabel || + lbl == logqlmodel.PreserveErrorLabel { + continue + } + vals := make([]string, 0, len(values)) + for v := range values { + vals = append(vals, v) + } + result[lbl] = vals + } + + return result, []string{parser} +} + +func getParsedLabels(entry push.Entry) map[string][]string { + labels := map[string]map[string]struct{}{} + for _, lbl := range entry.Parsed { + if values, ok := labels[lbl.Name]; ok { + values[lbl.Value] = struct{}{} + } else { + labels[lbl.Name] = map[string]struct{}{lbl.Value: {}} + } + } + + result := make(map[string][]string, len(labels)) + for lbl, values := range labels { + vals := make([]string, 0, len(values)) + for v := range values { + vals = append(vals, v) + } + result[lbl] = vals + } + + return result +} diff --git a/pkg/querier/queryrange/detected_fields_test.go b/pkg/querier/queryrange/detected_fields_test.go new file mode 100644 index 000000000000..654a42ac8d00 --- /dev/null +++ b/pkg/querier/queryrange/detected_fields_test.go @@ -0,0 +1,1245 @@ +package queryrange + +import ( + "context" + "fmt" + "math" + "testing" + "time" + + "github.com/grafana/dskit/user" + "github.com/prometheus/prometheus/model/labels" + "github.com/prometheus/prometheus/promql/parser" + "github.com/stretchr/testify/assert" + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/v3/pkg/loghttp" + "github.com/grafana/loki/v3/pkg/logproto" + "github.com/grafana/loki/v3/pkg/logql/log" + logql_log "github.com/grafana/loki/v3/pkg/logql/log" + "github.com/grafana/loki/v3/pkg/logql/syntax" + "github.com/grafana/loki/v3/pkg/logqlmodel" + base "github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase" + + "github.com/grafana/loki/pkg/push" +) + +func Test_parseDetectedFeilds(t *testing.T) { + now := time.Now() + + t.Run("when no parsers are supplied", func(t *testing.T) { + infoDetectdFiledMetadata := []push.LabelAdapter{ + { + Name: "detected_level", + Value: "info", + }, + } + + rulerLines := []push.Entry{ + { + Timestamp: now, + Line: "ts=2024-09-05T15:36:38.757788067Z caller=grpc_logging.go:66 tenant=2419 level=info method=/cortex.Ingester/Push duration=19.098s msg=gRPC", + StructuredMetadata: infoDetectdFiledMetadata, + }, + { + Timestamp: now, + Line: "ts=2024-09-05T15:36:38.698375619Z caller=grpc_logging.go:66 tenant=29 level=info method=/cortex.Ingester/Push duration=5.471s msg=gRPC", + StructuredMetadata: infoDetectdFiledMetadata, + }, + { + Timestamp: now, + Line: "ts=2024-09-05T15:36:38.629424175Z caller=grpc_logging.go:66 tenant=2919 level=info method=/cortex.Ingester/Push duration=29.234s msg=gRPC", + StructuredMetadata: infoDetectdFiledMetadata, + }, + } + + rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler"}` + rulerMetric, err := parser.ParseMetric(rulerLbls) + require.NoError(t, err) + + rulerStream := push.Stream{ + Labels: rulerLbls, + Entries: rulerLines, + Hash: rulerMetric.Hash(), + } + + debugDetectedFieldMetadata := []push.LabelAdapter{ + { + Name: "detected_level", + Value: "debug", + }, + } + + nginxJSONLines := []push.Entry{ + { + Timestamp: now, + Line: `{"host":"100.117.38.203", "user-identifier":"nader3722", "datetime":"05/Sep/2024:16:13:56 +0000", "method": "PATCH", "request": "/api/loki/v1/push", "protocol":"HTTP/2.0", "status":200, "bytes":9664, "referer": "https://www.seniorbleeding-edge.net/exploit/robust/whiteboard"}`, + StructuredMetadata: debugDetectedFieldMetadata, + }, + { + Timestamp: now, + Line: `{"host":"66.134.9.30", "user-identifier":"-", "datetime":"05/Sep/2024:16:13:55 +0000", "method": "DELETE", "request": "/api/mimir/v1/push", "protocol":"HTTP/1.1", "status":200, "bytes":18688, "referer": "https://www.districtiterate.biz/synergistic/next-generation/extend"}`, + StructuredMetadata: debugDetectedFieldMetadata, + }, + { + Timestamp: now, + Line: `{"host":"66.134.9.30", "user-identifier":"-", "datetime":"05/Sep/2024:16:13:55 +0000", "method": "GET", "request": "/api/loki/v1/label/names", "protocol":"HTTP/1.1", "status":200, "bytes":9314, "referer": "https://www.dynamicimplement.info/enterprise/distributed/incentivize/strategic"}`, + StructuredMetadata: debugDetectedFieldMetadata, + }, + } + + nginxLbls := `{ cluster="eu-west-1", level="debug", namespace="gateway", pod="nginx-json-oghco", service_name="nginx-json" }` + nginxMetric, err := parser.ParseMetric(nginxLbls) + require.NoError(t, err) + + nginxStream := push.Stream{ + Labels: nginxLbls, + Entries: nginxJSONLines, + Hash: nginxMetric.Hash(), + } + + t.Run("detects logfmt fields", func(t *testing.T) { + df := parseDetectedFields(uint32(15), logqlmodel.Streams([]push.Stream{rulerStream})) + for _, expected := range []string{"ts", "caller", "tenant", "level", "method", "duration", "msg"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1) + require.Equal(t, "logfmt", parsers[0]) + } + + // no parsers for structed metadata + for _, expected := range []string{"detected_level"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 0) + } + }) + + t.Run("detects json fields", func(t *testing.T) { + df := parseDetectedFields(uint32(15), logqlmodel.Streams([]push.Stream{nginxStream})) + for _, expected := range []string{"host", "user_identifier", "datetime", "method", "request", "protocol", "status", "bytes", "referer"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1) + require.Equal(t, "json", parsers[0]) + } + + // no parsers for structed metadata + for _, expected := range []string{"detected_level"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 0) + } + }) + + t.Run("detects mixed fields", func(t *testing.T) { + df := parseDetectedFields( + uint32(20), + logqlmodel.Streams([]push.Stream{rulerStream, nginxStream}), + ) + + for _, expected := range []string{"ts", "caller", "tenant", "level", "duration", "msg"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1, "expected only logfmt parser for %s", expected) + require.Equal( + t, + "logfmt", + parsers[0], + "expected only logfmt parser for %s", + expected, + ) + } + + for _, expected := range []string{"host", "user_identifier", "datetime", "request", "protocol", "status", "bytes", "referer"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1, "expected only json parser for %s", expected) + require.Equal(t, "json", parsers[0], "expected only json parser for %s", expected) + } + + // multiple parsers for fields that exist in both streams + for _, expected := range []string{"method"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 2, "expected logfmt and json parser for %s", expected) + require.Contains(t, parsers, "logfmt", "expected logfmt parser for %s", expected) + require.Contains(t, parsers, "json", "expected json parser for %s", expected) + } + + // no parsers for structed metadata + for _, expected := range []string{"detected_level"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 0) + } + }) + + t.Run("correctly applies _extracted for a single stream", func(t *testing.T) { + rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler", tenant="42", caller="inside-the-house"}` + rulerMetric, err := parser.ParseMetric(rulerLbls) + require.NoError(t, err) + + rulerStream := push.Stream{ + Labels: rulerLbls, + Entries: rulerLines, + Hash: rulerMetric.Hash(), + } + + df := parseDetectedFields(uint32(15), logqlmodel.Streams([]push.Stream{rulerStream})) + for _, expected := range []string{"ts", "caller_extracted", "tenant_extracted", "level", "method", "duration", "msg"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1) + require.Equal(t, "logfmt", parsers[0]) + } + + // no parsers for structed metadata + for _, expected := range []string{"detected_level"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 0) + } + }) + + t.Run("correctly applies _extracted for multiple streams", func(t *testing.T) { + rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler", tenant="42", caller="inside-the-house"}` + rulerMetric, err := parser.ParseMetric(rulerLbls) + require.NoError(t, err) + + rulerStream := push.Stream{ + Labels: rulerLbls, + Entries: rulerLines, + Hash: rulerMetric.Hash(), + } + + nginxLbls := `{ cluster="eu-west-1", level="debug", namespace="gateway", pod="nginx-json-oghco", service_name="nginx-json", host="localhost"}` + nginxMetric, err := parser.ParseMetric(nginxLbls) + require.NoError(t, err) + + nginxStream := push.Stream{ + Labels: nginxLbls, + Entries: nginxJSONLines, + Hash: nginxMetric.Hash(), + } + + df := parseDetectedFields( + uint32(20), + logqlmodel.Streams([]push.Stream{rulerStream, nginxStream}), + ) + for _, expected := range []string{"ts", "caller_extracted", "tenant_extracted", "level", "duration", "msg"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1) + require.Equal(t, "logfmt", parsers[0]) + } + + for _, expected := range []string{"host_extracted", "user_identifier", "datetime", "request", "protocol", "status", "bytes", "referer"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1, "expected only json parser for %s", expected) + require.Equal(t, "json", parsers[0], "expected only json parser for %s", expected) + } + + // multiple parsers for fields that exist in both streams + for _, expected := range []string{"method"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 2, "expected logfmt and json parser for %s", expected) + require.Contains(t, parsers, "logfmt", "expected logfmt parser for %s", expected) + require.Contains(t, parsers, "json", "expected json parser for %s", expected) + } + + // no parsers for structed metadata + for _, expected := range []string{"detected_level"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 0) + } + }) + }) + + t.Run("when parsers are supplied", func(t *testing.T) { + infoDetectdFiledMetadata := []push.LabelAdapter{ + { + Name: "detected_level", + Value: "info", + }, + } + + parsedRulerFields := func(ts, tenant, duration string) []push.LabelAdapter { + return []push.LabelAdapter{ + { + Name: "ts", + Value: ts, + }, + { + Name: "caller", + Value: "grpc_logging.go:66", + }, + { + Name: "tenant", + Value: tenant, + }, + { + Name: "level", + Value: "info", + }, + { + Name: "method", + Value: "/cortex.Ingester/Push", + }, + { + Name: "duration", + Value: duration, + }, + { + Name: "msg", + Value: "gRPC", + }, + } + } + + rulerLbls := labels.FromStrings( + "cluster", "us-east-1", + "namespace", "mimir-dev", + "pod", "mimir-ruler-nfb37", + "service_name", "mimir-ruler", + ) + + rulerStreams := []push.Stream{} + streamLbls := logql_log.NewBaseLabelsBuilder().ForLabels(rulerLbls, rulerLbls.Hash()) + + for _, rulerFields := range [][]push.LabelAdapter{ + parsedRulerFields( + "2024-09-05T15:36:38.757788067Z", + "2419", + "19.098s", + ), + parsedRulerFields( + "2024-09-05T15:36:38.698375619Z", + "29", + "5.471s", + ), + parsedRulerFields( + "2024-09-05T15:36:38.629424175Z", + "2919", + "29.234s", + ), + } { + streamLbls.Reset() + + var ts, tenant, duration push.LabelAdapter + for _, field := range rulerFields { + switch field.Name { + case "ts": + ts = field + case "tenant": + tenant = field + case "duration": + duration = field + } + + streamLbls.Add(log.ParsedLabel, labels.Label{Name: field.Name, Value: field.Value}) + } + + rulerStreams = append(rulerStreams, push.Stream{ + Labels: streamLbls.LabelsResult().String(), + Entries: []push.Entry{ + { + Timestamp: now, + Line: fmt.Sprintf( + "ts=%s caller=grpc_logging.go:66 tenant=%s level=info method=/cortex.Ingester/Push duration=%s msg=gRPC", + ts.Value, + tenant.Value, + duration.Value, + ), + StructuredMetadata: infoDetectdFiledMetadata, + Parsed: rulerFields, + }, + }, + }) + } + + debugDetectedFieldMetadata := []push.LabelAdapter{ + { + Name: "detected_level", + Value: "debug", + }, + } + + parsedNginxFields := func(host, userIdentifier, datetime, method, request, protocol, status, bytes, referer string) []push.LabelAdapter { + return []push.LabelAdapter{ + { + Name: "host", + Value: host, + }, + { + Name: "user_identifier", + Value: userIdentifier, + }, + { + Name: "datetime", + Value: datetime, + }, + { + Name: "method", + Value: method, + }, + { + Name: "request", + Value: request, + }, + { + Name: "protocol", + Value: protocol, + }, + { + Name: "status", + Value: status, + }, + { + Name: "bytes", + Value: bytes, + }, + { + Name: "referer", + Value: referer, + }, + } + } + + nginxLbls := labels.FromStrings( + "cluster", "eu-west-1", + "level", "debug", + "namespace", "gateway", + "pod", "nginx-json-oghco", + "service_name", "nginx-json", + ) + + nginxStreams := []push.Stream{} + nginxStreamLbls := logql_log.NewBaseLabelsBuilder().ForLabels(nginxLbls, nginxLbls.Hash()) + + for _, nginxFields := range [][]push.LabelAdapter{ + parsedNginxFields( + "100.117.38.203", + "nadre3722", + "05/Sep/2024:16:13:56 +0000", + "PATCH", + "/api/loki/v1/push", + "HTTP/2.0", + "200", + "9664", + "https://www.seniorbleeding-edge.net/exploit/robust/whiteboard", + ), + parsedNginxFields( + "66.134.9.30", + "-", + "05/Sep/2024:16:13:55 +0000", + "DELETE", + "/api/mimir/v1/push", + "HTTP/1.1", + "200", + "18688", + "https://www.districtiterate.biz/synergistic/next-generation/extend", + ), + parsedNginxFields( + "66.134.9.30", + "-", + "05/Sep/2024:16:13:55 +0000", + "GET", + "/api/loki/v1/label/names", + "HTTP/1.1", + "200", + "9314", + "https://www.dynamicimplement.info/enterprise/distributed/incentivize/strategic", + ), + } { + nginxStreamLbls.Reset() + + var host, userIdentifier, datetime, method, request, protocol, status, bytes, referer push.LabelAdapter + for _, field := range nginxFields { + switch field.Name { + case "host": + host = field + case "user_identifier": + userIdentifier = field + case "datetime": + datetime = field + case "method": + method = field + case "request": + request = field + case "protocol": + protocol = field + case "status": + status = field + case "bytes": + bytes = field + case "referer": + referer = field + } + + nginxStreamLbls.Add( + log.ParsedLabel, + labels.Label{Name: field.Name, Value: field.Value}, + ) + } + + nginxStreams = append(nginxStreams, push.Stream{ + Labels: nginxStreamLbls.LabelsResult().String(), + Entries: []push.Entry{ + { + Timestamp: now, + Line: fmt.Sprintf( + `{"host":"%s", "user-identifier":"%s", "datetime":"%s", "method": "%s", "request": "%s", "protocol":"%s", "status":%s, "bytes":%s, "referer": ":%s"}`, + host.Value, + userIdentifier.Value, + datetime.Value, + method.Value, + request.Value, + protocol.Value, + status.Value, + bytes.Value, + referer.Value, + ), + StructuredMetadata: debugDetectedFieldMetadata, + Parsed: nginxFields, + }, + }, + }) + } + + t.Run("detect logfmt fields", func(t *testing.T) { + df := parseDetectedFields(uint32(15), logqlmodel.Streams(rulerStreams)) + for _, expected := range []string{"ts", "caller", "tenant", "level", "method", "duration", "msg"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1) + require.Equal(t, "logfmt", parsers[0]) + } + + // no parsers for structed metadata + for _, expected := range []string{"detected_level"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 0) + } + }) + + t.Run("detect json fields", func(t *testing.T) { + df := parseDetectedFields(uint32(15), logqlmodel.Streams(nginxStreams)) + for _, expected := range []string{"host", "user_identifier", "datetime", "method", "request", "protocol", "status", "bytes", "referer"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1) + require.Equal(t, "json", parsers[0]) + } + + // no parsers for structed metadata + for _, expected := range []string{"detected_level"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 0) + } + }) + + t.Run("detect mixed fields", func(t *testing.T) { + streams := logqlmodel.Streams(rulerStreams) + streams = append(streams, nginxStreams...) + df := parseDetectedFields( + uint32(20), + streams, + ) + + for _, expected := range []string{"ts", "caller", "tenant", "level", "duration", "msg"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1, "expected only logfmt parser for %s", expected) + require.Equal( + t, + "logfmt", + parsers[0], + "expected only logfmt parser for %s", + expected, + ) + } + + for _, expected := range []string{"host", "user_identifier", "datetime", "request", "protocol", "status", "bytes", "referer"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1, "expected only json parser for %s", expected) + require.Equal(t, "json", parsers[0], "expected only json parser for %s", expected) + } + + // multiple parsers for fields that exist in both streams + for _, expected := range []string{"method"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 2, "expected logfmt and json parser for %s", expected) + require.Contains(t, parsers, "logfmt", "expected logfmt parser for %s", expected) + require.Contains(t, parsers, "json", "expected json parser for %s", expected) + } + + // no parsers for structed metadata + for _, expected := range []string{"detected_level"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 0) + } + }) + + t.Run("correctly applies _extracted for a single stream", func(t *testing.T) { + rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler", tenant="42", caller="inside-the-house"}` + rulerMetric, err := parser.ParseMetric(rulerLbls) + require.NoError(t, err) + + rulerStream := push.Stream{ + Labels: rulerLbls, + Entries: []push.Entry{ + { + Timestamp: now, + Line: "ts=2024-09-05T15:36:38.757788067Z caller=grpc_logging.go:66 tenant=2419 level=info method=/cortex.Ingester/Push duration=19.098s msg=gRPC", + StructuredMetadata: infoDetectdFiledMetadata, + Parsed: []push.LabelAdapter{ + { + Name: "ts", + Value: "2024-09-05T15:36:38.757788067Z", + }, + { + Name: "caller_extracted", + Value: "grpc_logging.go:66", + }, + { + Name: "tenant_extracted", + Value: "2419", + }, + { + Name: "level", + Value: "info", + }, + { + Name: "method", + Value: "/cortex.Ingester/Push", + }, + { + Name: "duration", + Value: "19.098s", + }, + { + Name: "msg", + Value: "gRPC", + }, + }, + }, + }, + Hash: rulerMetric.Hash(), + } + + df := parseDetectedFields(uint32(15), logqlmodel.Streams([]push.Stream{rulerStream})) + for _, expected := range []string{"ts", "caller_extracted", "tenant_extracted", "level", "method", "duration", "msg"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1) + require.Equal(t, "logfmt", parsers[0]) + } + + // no parsers for structed metadata + for _, expected := range []string{"detected_level"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 0) + } + }) + + t.Run("correctly applies _extracted for multiple streams", func(t *testing.T) { + rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler", tenant="42", caller="inside-the-house"}` + rulerMetric, err := parser.ParseMetric(rulerLbls) + require.NoError(t, err) + + rulerStream := push.Stream{ + Labels: rulerLbls, + Entries: []push.Entry{ + { + Timestamp: now, + Line: "ts=2024-09-05T15:36:38.757788067Z caller=grpc_logging.go:66 tenant=2419 level=info method=/cortex.Ingester/Push duration=19.098s msg=gRPC", + StructuredMetadata: infoDetectdFiledMetadata, + Parsed: []push.LabelAdapter{ + { + Name: "ts", + Value: "2024-09-05T15:36:38.757788067Z", + }, + { + Name: "caller_extracted", + Value: "grpc_logging.go:66", + }, + { + Name: "tenant_extracted", + Value: "2419", + }, + { + Name: "level", + Value: "info", + }, + { + Name: "method", + Value: "/cortex.Ingester/Push", + }, + { + Name: "duration", + Value: "19.098s", + }, + { + Name: "msg", + Value: "gRPC", + }, + }, + }, + }, + Hash: rulerMetric.Hash(), + } + + nginxLbls := `{ cluster="eu-west-1", level="debug", namespace="gateway", pod="nginx-json-oghco", service_name="nginx-json", host="localhost"}` + nginxMetric, err := parser.ParseMetric(nginxLbls) + require.NoError(t, err) + + nginxStream := push.Stream{ + Labels: nginxLbls, + Entries: []push.Entry{ + { + Timestamp: now, + Line: `{"host":"100.117.38.203", "user-identifier":"nader3722", "datetime":"05/Sep/2024:16:13:56 +0000", "method": "PATCH", "request": "/api/loki/v1/push", "protocol":"HTTP/2.0", "status":200, "bytes":9664, "referer": "https://www.seniorbleeding-edge.net/exploit/robust/whiteboard"}`, + StructuredMetadata: debugDetectedFieldMetadata, + Parsed: []push.LabelAdapter{ + { + Name: "host_extracted", + Value: "100.117.38.203", + }, + { + Name: "user_identifier", + Value: "nader3722", + }, + { + Name: "datetime", + Value: "05/Sep/2024:16:13:56 +0000", + }, + { + Name: "method", + Value: "PATCH", + }, + { + Name: "request", + Value: "/api/loki/v1/push", + }, + { + Name: "protocol", + Value: "HTTP/2.0", + }, + { + Name: "status", + Value: "200", + }, + { + Name: "bytes", + Value: "9664", + }, + { + Name: "referer", + Value: "https://www.seniorbleeding-edge.net/exploit/robust/whiteboard", + }, + }, + }, + }, + Hash: nginxMetric.Hash(), + } + + df := parseDetectedFields( + uint32(20), + logqlmodel.Streams([]push.Stream{rulerStream, nginxStream}), + ) + for _, expected := range []string{"ts", "caller_extracted", "tenant_extracted", "level", "duration", "msg"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1) + require.Equal(t, "logfmt", parsers[0]) + } + + for _, expected := range []string{"host_extracted", "user_identifier", "datetime", "request", "protocol", "status", "bytes", "referer"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 1, "expected only json parser for %s", expected) + require.Equal(t, "json", parsers[0], "expected only json parser for %s", expected) + } + + // multiple parsers for fields that exist in both streams + for _, expected := range []string{"method"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 2, "expected logfmt and json parser for %s", expected) + require.Contains(t, parsers, "logfmt", "expected logfmt parser for %s", expected) + require.Contains(t, parsers, "json", "expected json parser for %s", expected) + } + + // no parsers for structed metadata + for _, expected := range []string{"detected_level"} { + require.Contains(t, df, expected) + parsers := df[expected].parsers + + require.Len(t, parsers, 0) + } + }) + }) + + t.Run("handles level in all the places", func(t *testing.T) { + rulerLbls := `{cluster="us-east-1", namespace="mimir-dev", pod="mimir-ruler-nfb37", service_name="mimir-ruler", tenant="42", caller="inside-the-house", level="debug"}` + rulerMetric, err := parser.ParseMetric(rulerLbls) + require.NoError(t, err) + + rulerStream := push.Stream{ + Labels: rulerLbls, + Entries: []push.Entry{ + { + Timestamp: now, + Line: "ts=2024-09-05T15:36:38.757788067Z caller=grpc_logging.go:66 tenant=2419 level=info method=/cortex.Ingester/Push duration=19.098s msg=gRPC", + StructuredMetadata: []push.LabelAdapter{ + { + Name: "detected_level", + Value: "debug", + }, + }, + Parsed: []push.LabelAdapter{ + { + Name: "level_extracted", + Value: "info", + }, + }, + }, + }, + Hash: rulerMetric.Hash(), + } + + df := parseDetectedFields( + uint32(20), + logqlmodel.Streams([]push.Stream{rulerStream, rulerStream}), + ) + + detectedLevelField := df["detected_level"] + require.Len(t, detectedLevelField.parsers, 0) + require.Equal(t, uint64(1), detectedLevelField.sketch.Estimate()) + + levelField := df["level_extracted"] + require.Len(t, levelField.parsers, 1) + require.Contains(t, levelField.parsers, "logfmt") + require.Equal(t, uint64(1), levelField.sketch.Estimate()) + }) +} + +func mockLogfmtStreamWithLabels(_ int, quantity int, lbls string) logproto.Stream { + entries := make([]logproto.Entry, 0, quantity) + streamLabels, err := syntax.ParseLabels(lbls) + if err != nil { + streamLabels = labels.EmptyLabels() + } + + lblBuilder := logql_log.NewBaseLabelsBuilder().ForLabels(streamLabels, streamLabels.Hash()) + logFmtParser := logql_log.NewLogfmtParser(false, false) + + // used for detected fields queries which are always BACKWARD + for i := quantity; i > 0; i-- { + line := fmt.Sprintf( + `message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t name=bar`, + i, + i, + (i * 10), + (i * 256), + float32(i*10.0), + (i%2 == 0), + ) + + entry := logproto.Entry{ + Timestamp: time.Unix(int64(i), 0), + Line: line, + } + _, logfmtSuccess := logFmtParser.Process(0, []byte(line), lblBuilder) + if logfmtSuccess { + entry.Parsed = logproto.FromLabelsToLabelAdapters(lblBuilder.LabelsResult().Parsed()) + } + entries = append(entries, entry) + } + + return logproto.Stream{ + Entries: entries, + Labels: lbls, + } +} + +func mockLogfmtStreamWithLabelsAndStructuredMetadata( + from int, + quantity int, + lbls string, +) logproto.Stream { + var entries []logproto.Entry + metadata := push.LabelsAdapter{ + { + Name: "constant", + Value: "constant", + }, + } + + for i := from; i < from+quantity; i++ { + metadata = append(metadata, push.LabelAdapter{ + Name: "variable", + Value: fmt.Sprintf("value%d", i), + }) + } + + streamLabels, err := syntax.ParseLabels(lbls) + if err != nil { + streamLabels = labels.EmptyLabels() + } + + lblBuilder := logql_log.NewBaseLabelsBuilder().ForLabels(streamLabels, streamLabels.Hash()) + logFmtParser := logql_log.NewLogfmtParser(false, false) + + // used for detected fields queries which are always BACKWARD + for i := quantity; i > 0; i-- { + line := fmt.Sprintf( + `message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t name=bar`, + i, + i, + (i * 10), + (i * 256), + float32(i*10.0), + (i%2 == 0), + ) + + entry := logproto.Entry{ + Timestamp: time.Unix(int64(i), 0), + Line: line, + StructuredMetadata: metadata, + } + _, logfmtSuccess := logFmtParser.Process(0, []byte(line), lblBuilder) + if logfmtSuccess { + entry.Parsed = logproto.FromLabelsToLabelAdapters(lblBuilder.LabelsResult().Parsed()) + } + entries = append(entries, entry) + } + + return logproto.Stream{ + Labels: lbls, + Entries: entries, + } +} + +func TestQuerier_DetectedFields(t *testing.T) { + limits := fakeLimits{ + maxSeries: math.MaxInt32, + maxQueryParallelism: 1, + tsdbMaxQueryParallelism: 1, + maxQueryBytesRead: 1000, + maxQuerierBytesRead: 100, + } + + limitedHandler := func(stream logproto.Stream) base.Handler { + return base.HandlerFunc( + func(_ context.Context, _ base.Request) (base.Response, error) { + return &LokiResponse{ + Status: "success", + Data: LokiData{ + ResultType: loghttp.ResultTypeStream, + Result: []logproto.Stream{ + stream, + }, + }, + Direction: logproto.BACKWARD, + }, nil + }) + } + + logHandler := func(stream logproto.Stream) base.Handler { + return base.HandlerFunc( + func(_ context.Context, _ base.Request) (base.Response, error) { + return &LokiResponse{ + Status: "success", + Data: LokiData{ + ResultType: loghttp.ResultTypeStream, + Result: []logproto.Stream{ + stream, + }, + }, + Direction: logproto.BACKWARD, + }, nil + }) + } + + request := DetectedFieldsRequest{ + logproto.DetectedFieldsRequest{ + Start: time.Now().Add(-1 * time.Minute), + End: time.Now(), + Query: `{type="test"} | logfmt | json`, + LineLimit: 1000, + FieldLimit: 1000, + }, + "/loki/api/v1/detected_fields", + } + + handleRequest := func(handler base.Handler, request DetectedFieldsRequest) []*logproto.DetectedField { + ctx := context.Background() + ctx = user.InjectOrgID(ctx, "test-tenant") + + resp, err := handler.Do(ctx, &request) + require.NoError(t, err) + + r, ok := resp.(*DetectedFieldsResponse) + require.True(t, ok) + + return r.Response.Fields + } + + t.Run("returns detected fields from queried logs", func(t *testing.T) { + handler := NewDetectedFieldsHandler( + limitedHandler(mockLogfmtStreamWithLabels(1, 5, `{type="test", name="foo"}`)), + logHandler(mockLogfmtStreamWithLabels(1, 5, `{type="test", name="foo"}`)), + limits, + ) + + detectedFields := handleRequest(handler, request) + // log lines come from querier_mock_test.go + // message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t + assert.Len(t, detectedFields, 8) + expectedCardinality := map[string]uint64{ + "message": 5, + "count": 5, + "fake": 1, + "bytes": 5, + "duration": 5, + "percent": 5, + "even": 2, + "name_extracted": 1, + } + for _, d := range detectedFields { + card := expectedCardinality[d.Label] + assert.Equal(t, card, d.Cardinality, "Expected cardinality mismatch for: %s", d.Label) + } + }) + + t.Run("returns detected fields with structured metadata from queried logs", func(t *testing.T) { + handler := NewDetectedFieldsHandler( + limitedHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 5, `{type="test", name="bob"}`)), + logHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 5, `{type="test", name="bob"}`)), + limits, + ) + + detectedFields := handleRequest(handler, request) + // log lines come from querier_mock_test.go + // message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t + assert.Len(t, detectedFields, 10) + expectedCardinality := map[string]uint64{ + "variable": 5, + "constant": 1, + "message": 5, + "count": 5, + "fake": 1, + "bytes": 5, + "duration": 5, + "percent": 5, + "even": 2, + "name_extracted": 1, + } + for _, d := range detectedFields { + card := expectedCardinality[d.Label] + assert.Equal(t, card, d.Cardinality, "Expected cardinality mismatch for: %s", d.Label) + } + }) + + t.Run("correctly identifies different field types", func(t *testing.T) { + handler := NewDetectedFieldsHandler( + limitedHandler(mockLogfmtStreamWithLabels(1, 2, `{type="test", name="foo"}`)), + logHandler(mockLogfmtStreamWithLabels(1, 2, `{type="test", name="foo"}`)), + limits, + ) + + detectedFields := handleRequest(handler, request) + // log lines come from querier_mock_test.go + // message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t + assert.Len(t, detectedFields, 8) + + var messageField, countField, bytesField, durationField, floatField, evenField *logproto.DetectedField + for _, field := range detectedFields { + print(field.Label) + switch field.Label { + case "message": + messageField = field + case "count": + countField = field + case "bytes": + bytesField = field + case "duration": + durationField = field + case "percent": + floatField = field + case "even": + evenField = field + } + } + + assert.Equal(t, logproto.DetectedFieldString, messageField.Type) + assert.Equal(t, logproto.DetectedFieldInt, countField.Type) + assert.Equal(t, logproto.DetectedFieldBytes, bytesField.Type) + assert.Equal(t, logproto.DetectedFieldDuration, durationField.Type) + assert.Equal(t, logproto.DetectedFieldFloat, floatField.Type) + assert.Equal(t, logproto.DetectedFieldBoolean, evenField.Type) + }) + + t.Run( + "correctly identifies parser to use with logfmt and structured metadata", + func(t *testing.T) { + handler := NewDetectedFieldsHandler( + limitedHandler( + mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 2, `{type="test"}`), + ), + logHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 2, `{type="test"}`)), + limits, + ) + + detectedFields := handleRequest(handler, request) + // log lines come from querier_mock_test.go + // message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t + assert.Len(t, detectedFields, 10) + + var messageField, countField, bytesField, durationField, floatField, evenField, constantField, variableField *logproto.DetectedField + for _, field := range detectedFields { + switch field.Label { + case "message": + messageField = field + case "count": + countField = field + case "bytes": + bytesField = field + case "duration": + durationField = field + case "percent": + floatField = field + case "even": + evenField = field + case "constant": + constantField = field + case "variable": + variableField = field + } + } + + assert.Equal(t, []string{"logfmt"}, messageField.Parsers) + assert.Equal(t, []string{"logfmt"}, countField.Parsers) + assert.Equal(t, []string{"logfmt"}, bytesField.Parsers) + assert.Equal(t, []string{"logfmt"}, durationField.Parsers) + assert.Equal(t, []string{"logfmt"}, floatField.Parsers) + assert.Equal(t, []string{"logfmt"}, evenField.Parsers) + assert.Equal(t, []string(nil), constantField.Parsers) + assert.Equal(t, []string(nil), variableField.Parsers) + }, + ) + + t.Run( + "adds _extracted suffix to detected fields that conflict with indexed labels", + func(t *testing.T) { + handler := NewDetectedFieldsHandler( + limitedHandler( + mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 2, `{type="test", name="bob"}`), + ), + logHandler(mockLogfmtStreamWithLabelsAndStructuredMetadata(1, 2, `{type="test", name="bob"}`)), + limits, + ) + + detectedFields := handleRequest(handler, request) + // log lines come from querier_mock_test.go + // message="line %d" count=%d fake=true bytes=%dMB duration=%dms percent=%f even=%t + assert.Len(t, detectedFields, 10) + + var nameField *logproto.DetectedField + for _, field := range detectedFields { + switch field.Label { + case "name_extracted": + nameField = field + } + } + + assert.NotNil(t, nameField) + assert.Equal(t, "name_extracted", nameField.Label) + assert.Equal(t, logproto.DetectedFieldString, nameField.Type) + assert.Equal(t, []string{"logfmt"}, nameField.Parsers) + assert.Equal(t, uint64(1), nameField.Cardinality) + }, + ) +} + +// func BenchmarkQuerierDetectedFields(b *testing.B) { +// limits, _ := validation.NewOverrides(defaultLimitsTestConfig(), nil) +// ctx := user.InjectOrgID(context.Background(), "test") + +// conf := mockQuerierConfig() +// conf.IngesterQueryStoreMaxLookback = 0 + +// request := logproto.DetectedFieldsRequest{ +// Start: time.Now().Add(-1 * time.Minute), +// End: time.Now(), +// Query: `{type="test"}`, +// LineLimit: 1000, +// FieldLimit: 1000, +// } + +// store := newStoreMock() +// store.On("SelectLogs", mock.Anything, mock.Anything). +// Return(mockLogfmtStreamIterator(1, 2), nil) + +// queryClient := newQueryClientMock() +// queryClient.On("Recv"). +// Return(mockQueryResponse([]logproto.Stream{mockLogfmtStream(1, 2)}), nil) + +// ingesterClient := newQuerierClientMock() +// ingesterClient.On("Query", mock.Anything, mock.Anything, mock.Anything). +// Return(queryClient, nil) + +// querier, _ := newQuerier( +// conf, +// mockIngesterClientConfig(), +// newIngesterClientMockFactory(ingesterClient), +// mockReadRingWithOneActiveIngester(), +// &mockDeleteGettter{}, +// store, limits) + +// b.ReportAllocs() +// b.ResetTimer() + +// for i := 0; i < b.N; i++ { +// _, err := querier.DetectedFields(ctx, &request) +// assert.NoError(b, err) +// } +// } diff --git a/pkg/querier/queryrange/downstreamer.go b/pkg/querier/queryrange/downstreamer.go index f4b4b18cac75..c6fba0fbf49a 100644 --- a/pkg/querier/queryrange/downstreamer.go +++ b/pkg/querier/queryrange/downstreamer.go @@ -142,7 +142,7 @@ func (in instance) Downstream(ctx context.Context, queries []logql.DownstreamQue } sp, ctx := opentracing.StartSpanFromContext(ctx, "DownstreamHandler.instance") defer sp.Finish() - sp.LogKV("shards", fmt.Sprintf("%+v", qry.Params.Shards()), "query", req.GetQuery(), "step", req.GetStep(), "handler", reflect.TypeOf(in.handler), "engine", "downstream") + sp.LogKV("shards", fmt.Sprintf("%+v", qry.Params.Shards()), "query", req.GetQuery(), "start", req.GetStart(), "end", req.GetEnd(), "step", req.GetStep(), "handler", reflect.TypeOf(in.handler), "engine", "downstream") res, err := in.handler.Do(ctx, req) if err != nil { diff --git a/pkg/querier/queryrange/marshal.go b/pkg/querier/queryrange/marshal.go index ab7a483890e0..7aa9f807bb3b 100644 --- a/pkg/querier/queryrange/marshal.go +++ b/pkg/querier/queryrange/marshal.go @@ -124,15 +124,17 @@ func ResultToResponse(result logqlmodel.Result, params logql.Params) (queryrange case sketch.TopKMatrix: sk, err := data.ToProto() return &TopKSketchesResponse{ - Response: sk, - Warnings: result.Warnings, + Response: sk, + Warnings: result.Warnings, + Statistics: result.Statistics, }, err case logql.ProbabilisticQuantileMatrix: r := data.ToProto() data.Release() return &QuantileSketchResponse{ - Response: r, - Warnings: result.Warnings, + Response: r, + Warnings: result.Warnings, + Statistics: result.Statistics, }, nil } @@ -184,9 +186,10 @@ func ResponseToResult(resp queryrangebase.Response) (logqlmodel.Result, error) { } return logqlmodel.Result{ - Data: matrix, - Headers: resp.GetHeaders(), - Warnings: r.Warnings, + Data: matrix, + Headers: resp.GetHeaders(), + Warnings: r.Warnings, + Statistics: r.Statistics, }, nil case *QuantileSketchResponse: matrix, err := logql.ProbabilisticQuantileMatrixFromProto(r.Response) @@ -194,9 +197,10 @@ func ResponseToResult(resp queryrangebase.Response) (logqlmodel.Result, error) { return logqlmodel.Result{}, fmt.Errorf("cannot decode quantile sketch: %w", err) } return logqlmodel.Result{ - Data: matrix, - Headers: resp.GetHeaders(), - Warnings: r.Warnings, + Data: matrix, + Headers: resp.GetHeaders(), + Warnings: r.Warnings, + Statistics: r.Statistics, }, nil default: return logqlmodel.Result{}, fmt.Errorf("cannot decode (%T)", resp) diff --git a/pkg/querier/queryrange/metrics.go b/pkg/querier/queryrange/metrics.go index bd9ce6fa79ba..d3c949b9bb3a 100644 --- a/pkg/querier/queryrange/metrics.go +++ b/pkg/querier/queryrange/metrics.go @@ -9,6 +9,7 @@ import ( "github.com/grafana/loki/v3/pkg/logql" "github.com/grafana/loki/v3/pkg/logql/syntax" "github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase" + v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" ) type Metrics struct { @@ -46,15 +47,15 @@ func NewMetrics(registerer prometheus.Registerer, metricsNamespace string) *Metr } type QueryMetrics struct { - receivedFilters prometheus.Histogram + receivedLabelFilters prometheus.Histogram } func NewMiddlewareQueryMetrics(registerer prometheus.Registerer, metricsNamespace string) *QueryMetrics { return &QueryMetrics{ - receivedFilters: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ + receivedLabelFilters: promauto.With(registerer).NewHistogram(prometheus.HistogramOpts{ Namespace: metricsNamespace, - Name: "query_frontend_query_filters", - Help: "Number of filters per query.", + Name: "query_frontend_query_label_filters", + Help: "Number of label matcher expressions per query.", Buckets: prometheus.ExponentialBuckets(1, 2, 9), // 1 -> 256 }), } @@ -87,8 +88,8 @@ func QueryMetricsMiddleware(metrics *QueryMetrics) queryrangebase.Middleware { } } - filters := syntax.ExtractLineFilters(expr) - metrics.receivedFilters.Observe(float64(len(filters))) + filters := v1.ExtractTestableLabelMatchers(expr) + metrics.receivedLabelFilters.Observe(float64(len(filters))) return next.Do(ctx, req) }) diff --git a/pkg/querier/queryrange/queryrange.pb.go b/pkg/querier/queryrange/queryrange.pb.go index ae2dddee539a..82e929e68303 100644 --- a/pkg/querier/queryrange/queryrange.pb.go +++ b/pkg/querier/queryrange/queryrange.pb.go @@ -803,9 +803,10 @@ func (m *VolumeResponse) XXX_DiscardUnknown() { var xxx_messageInfo_VolumeResponse proto.InternalMessageInfo type TopKSketchesResponse struct { - Response *github_com_grafana_loki_v3_pkg_logproto.TopKMatrix `protobuf:"bytes,1,opt,name=response,proto3,customtype=github.com/grafana/loki/v3/pkg/logproto.TopKMatrix" json:"response,omitempty"` - Headers []github_com_grafana_loki_v3_pkg_querier_queryrange_queryrangebase_definitions.PrometheusResponseHeader `protobuf:"bytes,2,rep,name=Headers,proto3,customtype=github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase/definitions.PrometheusResponseHeader" json:"-"` - Warnings []string `protobuf:"bytes,3,rep,name=warnings,proto3" json:"warnings,omitempty"` + Response *github_com_grafana_loki_v3_pkg_logproto.TopKMatrix `protobuf:"bytes,1,opt,name=response,proto3,customtype=github.com/grafana/loki/v3/pkg/logproto.TopKMatrix" json:"response,omitempty"` + Headers []github_com_grafana_loki_v3_pkg_querier_queryrange_queryrangebase_definitions.PrometheusResponseHeader `protobuf:"bytes,2,rep,name=Headers,proto3,customtype=github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase/definitions.PrometheusResponseHeader" json:"-"` + Warnings []string `protobuf:"bytes,3,rep,name=warnings,proto3" json:"warnings,omitempty"` + Statistics stats.Result `protobuf:"bytes,4,opt,name=statistics,proto3" json:"statistics"` } func (m *TopKSketchesResponse) Reset() { *m = TopKSketchesResponse{} } @@ -847,10 +848,18 @@ func (m *TopKSketchesResponse) GetWarnings() []string { return nil } +func (m *TopKSketchesResponse) GetStatistics() stats.Result { + if m != nil { + return m.Statistics + } + return stats.Result{} +} + type QuantileSketchResponse struct { - Response *github_com_grafana_loki_v3_pkg_logproto.QuantileSketchMatrix `protobuf:"bytes,1,opt,name=response,proto3,customtype=github.com/grafana/loki/v3/pkg/logproto.QuantileSketchMatrix" json:"response,omitempty"` - Headers []github_com_grafana_loki_v3_pkg_querier_queryrange_queryrangebase_definitions.PrometheusResponseHeader `protobuf:"bytes,2,rep,name=Headers,proto3,customtype=github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase/definitions.PrometheusResponseHeader" json:"-"` - Warnings []string `protobuf:"bytes,3,rep,name=warnings,proto3" json:"warnings,omitempty"` + Response *github_com_grafana_loki_v3_pkg_logproto.QuantileSketchMatrix `protobuf:"bytes,1,opt,name=response,proto3,customtype=github.com/grafana/loki/v3/pkg/logproto.QuantileSketchMatrix" json:"response,omitempty"` + Headers []github_com_grafana_loki_v3_pkg_querier_queryrange_queryrangebase_definitions.PrometheusResponseHeader `protobuf:"bytes,2,rep,name=Headers,proto3,customtype=github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase/definitions.PrometheusResponseHeader" json:"-"` + Warnings []string `protobuf:"bytes,3,rep,name=warnings,proto3" json:"warnings,omitempty"` + Statistics stats.Result `protobuf:"bytes,4,opt,name=statistics,proto3" json:"statistics"` } func (m *QuantileSketchResponse) Reset() { *m = QuantileSketchResponse{} } @@ -892,6 +901,13 @@ func (m *QuantileSketchResponse) GetWarnings() []string { return nil } +func (m *QuantileSketchResponse) GetStatistics() stats.Result { + if m != nil { + return m.Statistics + } + return stats.Result{} +} + type ShardsResponse struct { Response *github_com_grafana_loki_v3_pkg_logproto.ShardsResponse `protobuf:"bytes,1,opt,name=response,proto3,customtype=github.com/grafana/loki/v3/pkg/logproto.ShardsResponse" json:"response,omitempty"` Headers []github_com_grafana_loki_v3_pkg_querier_queryrange_queryrangebase_definitions.PrometheusResponseHeader `protobuf:"bytes,2,rep,name=Headers,proto3,customtype=github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase/definitions.PrometheusResponseHeader" json:"-"` @@ -1488,128 +1504,129 @@ func init() { } var fileDescriptor_51b9d53b40d11902 = []byte{ - // 1935 bytes of a gzipped FileDescriptorProto - 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xec, 0x59, 0xcb, 0x6f, 0x23, 0x49, - 0x19, 0x77, 0xfb, 0x19, 0x7f, 0x79, 0x4c, 0xa8, 0x09, 0xd9, 0x26, 0x3b, 0xeb, 0x36, 0x96, 0xd8, - 0x0d, 0x08, 0xec, 0x1d, 0x67, 0x77, 0xd8, 0x0d, 0xc3, 0x68, 0xa7, 0x27, 0x33, 0x64, 0x86, 0x59, - 0x76, 0xb6, 0x13, 0x71, 0xe0, 0x82, 0x2a, 0x76, 0xc5, 0x6e, 0x62, 0x77, 0xf7, 0x74, 0x97, 0x33, - 0x13, 0x09, 0xa1, 0xfd, 0x07, 0x56, 0xec, 0x5f, 0x81, 0xb8, 0x71, 0xe1, 0xc4, 0x89, 0xe3, 0xee, - 0x01, 0x69, 0x8e, 0x2b, 0x4b, 0x34, 0x8c, 0x07, 0x21, 0x94, 0xd3, 0x4a, 0x5c, 0x39, 0xa0, 0x7a, - 0x74, 0xbb, 0xca, 0x76, 0x18, 0x27, 0x20, 0xa4, 0xc0, 0x5e, 0x92, 0x7a, 0x7c, 0xbf, 0xea, 0xaa, - 0xdf, 0xf7, 0xfb, 0xbe, 0x7a, 0x18, 0xde, 0x08, 0x8e, 0x3a, 0x8d, 0xc7, 0x03, 0x12, 0xba, 0x24, - 0xe4, 0xff, 0x4f, 0x42, 0xec, 0x75, 0x88, 0x52, 0xac, 0x07, 0xa1, 0x4f, 0x7d, 0x04, 0xe3, 0x96, - 0x8d, 0x66, 0xc7, 0xa5, 0xdd, 0xc1, 0x41, 0xbd, 0xe5, 0xf7, 0x1b, 0x1d, 0xbf, 0xe3, 0x37, 0x3a, - 0xbe, 0xdf, 0xe9, 0x11, 0x1c, 0xb8, 0x91, 0x2c, 0x36, 0xc2, 0xa0, 0xd5, 0x88, 0x28, 0xa6, 0x83, - 0x48, 0xe0, 0x37, 0xd6, 0x98, 0x21, 0x2f, 0x72, 0x88, 0x6c, 0xb5, 0xa4, 0x39, 0xaf, 0x1d, 0x0c, - 0x0e, 0x1b, 0xd4, 0xed, 0x93, 0x88, 0xe2, 0x7e, 0x90, 0x18, 0xb0, 0xf9, 0xf5, 0xfc, 0x8e, 0x40, - 0xba, 0x5e, 0x9b, 0x3c, 0xed, 0x60, 0x4a, 0x9e, 0xe0, 0x13, 0x69, 0xf0, 0xaa, 0x66, 0x90, 0x14, - 0x64, 0xe7, 0x86, 0xd6, 0x19, 0x60, 0x4a, 0x49, 0xe8, 0xc9, 0xbe, 0xaf, 0x69, 0x7d, 0xd1, 0x11, - 0xa1, 0xad, 0xae, 0xec, 0xaa, 0xca, 0xae, 0xc7, 0xbd, 0xbe, 0xdf, 0x26, 0x3d, 0xbe, 0x90, 0x48, - 0xfc, 0x95, 0x16, 0x57, 0x99, 0x45, 0x30, 0x88, 0xba, 0xfc, 0x8f, 0x6c, 0xbc, 0xf3, 0x52, 0x2e, - 0x0f, 0x70, 0x44, 0x1a, 0x6d, 0x72, 0xe8, 0x7a, 0x2e, 0x75, 0x7d, 0x2f, 0x52, 0xcb, 0x72, 0x90, - 0x1b, 0xf3, 0x0d, 0x32, 0xe9, 0x9f, 0x8d, 0x37, 0x19, 0x2e, 0xa2, 0x7e, 0x88, 0x3b, 0xa4, 0xd1, - 0xea, 0x0e, 0xbc, 0xa3, 0x46, 0x0b, 0xb7, 0xba, 0xa4, 0x11, 0x92, 0x68, 0xd0, 0xa3, 0x91, 0xa8, - 0xd0, 0x93, 0x80, 0xc8, 0x2f, 0xd5, 0x3e, 0xcb, 0xc3, 0xe2, 0x43, 0xff, 0xc8, 0x75, 0xc8, 0xe3, - 0x01, 0x89, 0x28, 0x5a, 0x83, 0x02, 0x1f, 0xd5, 0x34, 0xaa, 0xc6, 0x66, 0xd9, 0x11, 0x15, 0xd6, - 0xda, 0x73, 0xfb, 0x2e, 0x35, 0xb3, 0x55, 0x63, 0x73, 0xd9, 0x11, 0x15, 0x84, 0x20, 0x1f, 0x51, - 0x12, 0x98, 0xb9, 0xaa, 0xb1, 0x99, 0x73, 0x78, 0x19, 0x6d, 0xc0, 0x82, 0xeb, 0x51, 0x12, 0x1e, - 0xe3, 0x9e, 0x59, 0xe6, 0xed, 0x69, 0x1d, 0xdd, 0x82, 0x52, 0x44, 0x71, 0x48, 0xf7, 0x23, 0x33, - 0x5f, 0x35, 0x36, 0x17, 0x9b, 0x1b, 0x75, 0xe1, 0xf9, 0x7a, 0xe2, 0xf9, 0xfa, 0x7e, 0xe2, 0x79, - 0x7b, 0xe1, 0xd3, 0xd8, 0xca, 0x7c, 0xf2, 0x27, 0xcb, 0x70, 0x12, 0x10, 0xda, 0x86, 0x02, 0xf1, - 0xda, 0xfb, 0x91, 0x59, 0x38, 0x07, 0x5a, 0x40, 0xd0, 0x75, 0x28, 0xb7, 0xdd, 0x90, 0xb4, 0x18, - 0xcb, 0x66, 0xb1, 0x6a, 0x6c, 0xae, 0x34, 0xaf, 0xd6, 0x53, 0xa1, 0xec, 0x24, 0x5d, 0xce, 0xd8, - 0x8a, 0x2d, 0x2f, 0xc0, 0xb4, 0x6b, 0x96, 0x38, 0x13, 0xbc, 0x8c, 0x6a, 0x50, 0x8c, 0xba, 0x38, - 0x6c, 0x47, 0xe6, 0x42, 0x35, 0xb7, 0x59, 0xb6, 0xe1, 0x34, 0xb6, 0x64, 0x8b, 0x23, 0xff, 0xa3, - 0x9f, 0x42, 0x3e, 0xe8, 0x61, 0xcf, 0x04, 0x3e, 0xcb, 0xd5, 0xba, 0xe2, 0xa5, 0x47, 0x3d, 0xec, - 0xd9, 0xef, 0x0e, 0x63, 0xeb, 0x6d, 0x35, 0x78, 0x42, 0x7c, 0x88, 0x3d, 0xdc, 0xe8, 0xf9, 0x47, - 0x6e, 0xe3, 0x78, 0xab, 0xa1, 0xfa, 0x9e, 0x0d, 0x54, 0xff, 0x90, 0x0d, 0xc0, 0xa0, 0x0e, 0x1f, - 0x18, 0x3d, 0x80, 0x45, 0xe6, 0x63, 0x72, 0x87, 0x39, 0x38, 0x32, 0x17, 0xf9, 0x77, 0x5e, 0x19, - 0xaf, 0x86, 0xb7, 0x3b, 0xe4, 0xf0, 0x07, 0xa1, 0x3f, 0x08, 0xec, 0x2b, 0xa7, 0xb1, 0xa5, 0xda, - 0x3b, 0x6a, 0x05, 0x3d, 0x80, 0x15, 0x26, 0x0a, 0xd7, 0xeb, 0x7c, 0x10, 0x70, 0x05, 0x9a, 0x4b, - 0x7c, 0xb8, 0x6b, 0x75, 0x55, 0x32, 0xf5, 0x3b, 0x9a, 0x8d, 0x9d, 0x67, 0xf4, 0x3a, 0x13, 0xc8, - 0xda, 0x28, 0x07, 0x88, 0x69, 0xe9, 0xbe, 0x17, 0x51, 0xec, 0xd1, 0x8b, 0x48, 0xea, 0x26, 0x14, - 0x59, 0xf0, 0xef, 0x47, 0x5c, 0x54, 0xf3, 0xfa, 0x58, 0x62, 0x74, 0x27, 0xe7, 0xcf, 0xe5, 0xe4, - 0xc2, 0x4c, 0x27, 0x17, 0x5f, 0xea, 0xe4, 0xd2, 0x7f, 0xc9, 0xc9, 0x0b, 0xff, 0x59, 0x27, 0x97, - 0x2f, 0xec, 0x64, 0x13, 0xf2, 0x6c, 0x96, 0x68, 0x15, 0x72, 0x21, 0x7e, 0xc2, 0x7d, 0xba, 0xe4, - 0xb0, 0x62, 0x6d, 0x94, 0x87, 0x25, 0x91, 0x4a, 0xa2, 0xc0, 0xf7, 0x22, 0xc2, 0x78, 0xdc, 0xe3, - 0xd9, 0x5f, 0x78, 0x5e, 0xf2, 0xc8, 0x5b, 0x1c, 0xd9, 0x83, 0xde, 0x83, 0xfc, 0x0e, 0xa6, 0x98, - 0xab, 0x60, 0xb1, 0xb9, 0xa6, 0xf2, 0xc8, 0xc6, 0x62, 0x7d, 0xf6, 0x3a, 0x9b, 0xc8, 0x69, 0x6c, - 0xad, 0xb4, 0x31, 0xc5, 0xdf, 0xf6, 0xfb, 0x2e, 0x25, 0xfd, 0x80, 0x9e, 0x38, 0x1c, 0x89, 0xde, - 0x86, 0xf2, 0xdd, 0x30, 0xf4, 0xc3, 0xfd, 0x93, 0x80, 0x70, 0xd5, 0x94, 0xed, 0x57, 0x4e, 0x63, - 0xeb, 0x2a, 0x49, 0x1a, 0x15, 0xc4, 0xd8, 0x12, 0x7d, 0x13, 0x0a, 0xbc, 0xc2, 0x75, 0x52, 0xb6, - 0xaf, 0x9e, 0xc6, 0xd6, 0x15, 0x0e, 0x51, 0xcc, 0x85, 0x85, 0x2e, 0xab, 0xc2, 0x5c, 0xb2, 0x4a, - 0xd5, 0x5d, 0x54, 0xd5, 0x6d, 0x42, 0xe9, 0x98, 0x84, 0x11, 0x1b, 0xa6, 0xc4, 0xdb, 0x93, 0x2a, - 0xba, 0x0d, 0xc0, 0x88, 0x71, 0x23, 0xea, 0xb6, 0x12, 0x67, 0x2f, 0xd7, 0xc5, 0x66, 0xe3, 0x70, - 0x1f, 0xd9, 0x48, 0xb2, 0xa0, 0x18, 0x3a, 0x4a, 0x19, 0xfd, 0xc6, 0x80, 0xd2, 0x2e, 0xc1, 0x6d, - 0x12, 0x32, 0xf7, 0xe6, 0x36, 0x17, 0x9b, 0xdf, 0xa8, 0xab, 0x3b, 0xcb, 0xa3, 0xd0, 0xef, 0x13, - 0xda, 0x25, 0x83, 0x28, 0x71, 0x90, 0xb0, 0xb6, 0xbd, 0x61, 0x6c, 0x91, 0x39, 0xa5, 0x3a, 0xd7, - 0x86, 0x76, 0xe6, 0xa7, 0x4e, 0x63, 0xcb, 0xf8, 0x8e, 0x93, 0xcc, 0x12, 0x35, 0x61, 0xe1, 0x09, - 0x0e, 0x3d, 0xd7, 0xeb, 0x44, 0x26, 0xf0, 0x48, 0x5b, 0x3f, 0x8d, 0x2d, 0x94, 0xb4, 0x29, 0x8e, - 0x48, 0xed, 0x6a, 0x7f, 0x34, 0xe0, 0x2b, 0x4c, 0x18, 0x7b, 0x6c, 0x3e, 0x91, 0x92, 0x62, 0xfa, - 0x98, 0xb6, 0xba, 0xa6, 0xc1, 0x86, 0x71, 0x44, 0x45, 0xdd, 0x6f, 0xb2, 0xff, 0xd6, 0x7e, 0x93, - 0x3b, 0xff, 0x7e, 0x93, 0xe4, 0x95, 0xfc, 0xcc, 0xbc, 0x52, 0x38, 0x2b, 0xaf, 0xd4, 0x7e, 0x29, - 0x73, 0x68, 0xb2, 0xbe, 0x73, 0x84, 0xd2, 0xbd, 0x34, 0x94, 0x72, 0x7c, 0xb6, 0xa9, 0x42, 0xc5, - 0x58, 0xf7, 0xdb, 0xc4, 0xa3, 0xee, 0xa1, 0x4b, 0xc2, 0x97, 0x04, 0x94, 0xa2, 0xd2, 0x9c, 0xae, - 0x52, 0x55, 0x62, 0xf9, 0x4b, 0x21, 0x31, 0x3d, 0xae, 0x0a, 0x17, 0x88, 0xab, 0xda, 0xdf, 0xb3, - 0xb0, 0xce, 0x3c, 0xf2, 0x10, 0x1f, 0x90, 0xde, 0x8f, 0x70, 0xff, 0x9c, 0x5e, 0x79, 0x5d, 0xf1, - 0x4a, 0xd9, 0x46, 0x5f, 0xb2, 0x3e, 0x1f, 0xeb, 0xbf, 0x32, 0x60, 0x21, 0xd9, 0x00, 0x50, 0x1d, - 0x40, 0xc0, 0x78, 0x8e, 0x17, 0x5c, 0xaf, 0x30, 0x70, 0x98, 0xb6, 0x3a, 0x8a, 0x05, 0xfa, 0x19, - 0x14, 0x45, 0x4d, 0xc6, 0x82, 0xb2, 0x6d, 0xee, 0xd1, 0x90, 0xe0, 0xfe, 0xed, 0x36, 0x0e, 0x28, - 0x09, 0xed, 0x77, 0xd9, 0x2c, 0x86, 0xb1, 0xf5, 0xc6, 0x59, 0x2c, 0x25, 0x27, 0x7c, 0x89, 0x63, - 0xfe, 0x15, 0xdf, 0x74, 0xe4, 0x17, 0x6a, 0x1f, 0x1b, 0xb0, 0xca, 0x26, 0xca, 0xa8, 0x49, 0x85, - 0xb1, 0x03, 0x0b, 0xa1, 0x2c, 0xf3, 0xe9, 0x2e, 0x36, 0x6b, 0x75, 0x9d, 0xd6, 0x19, 0x54, 0xf2, - 0x0d, 0xd7, 0x70, 0x52, 0x24, 0xda, 0xd2, 0x68, 0xcc, 0xce, 0xa2, 0x51, 0xec, 0xd1, 0x2a, 0x71, - 0xbf, 0xcf, 0x02, 0xba, 0xcf, 0x6e, 0x48, 0x4c, 0x7f, 0x63, 0xa9, 0x3e, 0x9d, 0x9a, 0xd1, 0xb5, - 0x31, 0x29, 0xd3, 0xf6, 0xf6, 0xad, 0x61, 0x6c, 0x6d, 0xbf, 0x44, 0x3b, 0xff, 0x02, 0xaf, 0xac, - 0x42, 0x95, 0x6f, 0xf6, 0x32, 0xc8, 0xb7, 0xf6, 0xdb, 0x2c, 0xac, 0xfc, 0xd8, 0xef, 0x0d, 0xfa, - 0x24, 0xa5, 0x2f, 0x98, 0xa2, 0xcf, 0x1c, 0xd3, 0xa7, 0xdb, 0xda, 0xdb, 0xc3, 0xd8, 0xba, 0x31, - 0x2f, 0x75, 0x3a, 0xf6, 0x52, 0xd3, 0xf6, 0xd7, 0x2c, 0xac, 0xed, 0xfb, 0xc1, 0x0f, 0xf7, 0xf8, - 0x2d, 0x5a, 0x49, 0x93, 0xdd, 0x29, 0xf2, 0xd6, 0xc6, 0xe4, 0x31, 0xc4, 0xfb, 0x98, 0x86, 0xee, - 0x53, 0xfb, 0xc6, 0x30, 0xb6, 0x9a, 0xf3, 0x12, 0x37, 0xc6, 0x5d, 0x66, 0xd2, 0xb4, 0x33, 0x50, - 0x6e, 0xce, 0x33, 0xd0, 0x3f, 0xb2, 0xb0, 0xfe, 0xe1, 0x00, 0x7b, 0xd4, 0xed, 0x11, 0x41, 0x76, - 0x4a, 0xf5, 0xcf, 0xa7, 0xa8, 0xae, 0x8c, 0xa9, 0xd6, 0x31, 0x92, 0xf4, 0xf7, 0x86, 0xb1, 0x75, - 0x73, 0x5e, 0xd2, 0x67, 0x8d, 0xf0, 0x7f, 0x47, 0xff, 0xef, 0xb2, 0xb0, 0xb2, 0x27, 0x4e, 0x6d, - 0xc9, 0xc2, 0x8f, 0x67, 0xd0, 0xae, 0x3e, 0x53, 0x05, 0x07, 0x75, 0x1d, 0x71, 0xbe, 0x24, 0xa1, - 0x63, 0x2f, 0x75, 0x92, 0xf8, 0x43, 0x16, 0xd6, 0x77, 0x08, 0x25, 0x2d, 0x4a, 0xda, 0xf7, 0x5c, - 0xd2, 0x53, 0x48, 0xfc, 0xc8, 0x98, 0x62, 0xb1, 0xaa, 0x5c, 0xb3, 0x66, 0x82, 0x6c, 0x7b, 0x18, - 0x5b, 0xb7, 0xe6, 0xe5, 0x71, 0xf6, 0x18, 0x97, 0x9a, 0xcf, 0xcf, 0xb2, 0xf0, 0x55, 0xf1, 0x74, - 0x20, 0xde, 0x35, 0xc7, 0x74, 0xfe, 0x62, 0x8a, 0x4d, 0x4b, 0x4d, 0x05, 0x33, 0x20, 0xf6, 0xed, - 0x61, 0x6c, 0x7d, 0x7f, 0xfe, 0x5c, 0x30, 0x63, 0x88, 0xff, 0x19, 0x6d, 0xf2, 0xd3, 0xfe, 0x79, - 0xb5, 0xa9, 0x83, 0x2e, 0xa6, 0x4d, 0x7d, 0x8c, 0x4b, 0xcd, 0xe7, 0x5f, 0x8a, 0xb0, 0xcc, 0x55, - 0x92, 0xd2, 0xf8, 0x2d, 0x90, 0xd7, 0x23, 0xc9, 0x21, 0x4a, 0xae, 0xd4, 0x61, 0xd0, 0xaa, 0xef, - 0xc9, 0x8b, 0x93, 0xb0, 0x40, 0xef, 0x40, 0x31, 0xe2, 0x17, 0x57, 0x79, 0xf2, 0xad, 0x4c, 0xbe, - 0x0d, 0xe9, 0x57, 0xe4, 0xdd, 0x8c, 0x23, 0xed, 0xd1, 0x4d, 0x28, 0xf6, 0x38, 0x8b, 0xf2, 0xe2, - 0x5e, 0x9b, 0x44, 0x4e, 0x5f, 0xe5, 0x18, 0x5a, 0x60, 0xd0, 0x0d, 0x28, 0xf0, 0x23, 0xb6, 0x7c, - 0xa3, 0xd6, 0x3e, 0x3b, 0x7d, 0xd0, 0xdd, 0xcd, 0x38, 0xc2, 0x1c, 0x35, 0x21, 0x1f, 0x84, 0x7e, - 0x5f, 0x5e, 0x77, 0xae, 0x4d, 0x7e, 0x53, 0xbd, 0x1f, 0xec, 0x66, 0x1c, 0x6e, 0x8b, 0xde, 0x82, - 0x52, 0xc4, 0x2f, 0x16, 0x11, 0x7f, 0x28, 0x62, 0xa7, 0xca, 0x09, 0x98, 0x02, 0x49, 0x4c, 0xd1, - 0x5b, 0x50, 0x3c, 0xe6, 0xc7, 0x46, 0xf9, 0xfa, 0xb8, 0xa1, 0x82, 0xf4, 0x03, 0x25, 0x5b, 0x97, - 0xb0, 0x45, 0xf7, 0x60, 0x89, 0xfa, 0xc1, 0x51, 0x72, 0x3a, 0x93, 0x8f, 0x4c, 0x55, 0x15, 0x3b, - 0xeb, 0xf4, 0xb6, 0x9b, 0x71, 0x34, 0x1c, 0x7a, 0x04, 0xab, 0x8f, 0xb5, 0x63, 0x00, 0x49, 0x9e, - 0x13, 0x35, 0x9e, 0x67, 0x1f, 0x50, 0x76, 0x33, 0xce, 0x14, 0x1a, 0xed, 0xc0, 0x4a, 0xa4, 0xed, - 0x70, 0xf2, 0xe9, 0x5c, 0x5b, 0x97, 0xbe, 0x07, 0xee, 0x66, 0x9c, 0x09, 0x0c, 0x7a, 0x08, 0x2b, - 0x6d, 0x2d, 0xbf, 0xcb, 0x87, 0x71, 0x6d, 0x56, 0xb3, 0x77, 0x00, 0x36, 0x9a, 0x8e, 0x45, 0x1f, - 0xc0, 0x6a, 0x30, 0x91, 0xdb, 0xe4, 0xcb, 0xf8, 0xd7, 0xf5, 0x55, 0xce, 0x48, 0x82, 0x6c, 0x91, - 0x93, 0x60, 0x75, 0x7a, 0x22, 0xc4, 0xcd, 0xe5, 0xb3, 0xa7, 0xa7, 0x27, 0x01, 0x75, 0x7a, 0xa2, - 0xc7, 0x86, 0x71, 0x3a, 0xaa, 0x7d, 0x5c, 0x84, 0x25, 0x19, 0x66, 0xe2, 0x35, 0xec, 0xbb, 0x69, - 0xe4, 0x88, 0x28, 0x7b, 0xed, 0xac, 0xc8, 0xe1, 0xe6, 0x4a, 0xe0, 0xbc, 0x99, 0x06, 0x8e, 0x08, - 0xb9, 0xf5, 0x71, 0x8a, 0xe3, 0xdf, 0x55, 0x10, 0x32, 0x58, 0xb6, 0x92, 0x60, 0x11, 0x91, 0xf6, - 0xea, 0xec, 0x3b, 0x65, 0x82, 0x92, 0x91, 0xb2, 0x0d, 0x25, 0x57, 0xfc, 0x44, 0x30, 0x2b, 0xc6, - 0xa6, 0x7f, 0x41, 0x60, 0xda, 0x97, 0x00, 0xb4, 0x35, 0x8e, 0x98, 0x82, 0x7c, 0x12, 0x9f, 0x8a, - 0x98, 0x14, 0x94, 0x04, 0xcc, 0xf5, 0x34, 0x60, 0x8a, 0x93, 0xcf, 0xe8, 0x49, 0xb8, 0xa4, 0x0b, - 0x93, 0xd1, 0x72, 0x17, 0x96, 0x13, 0x7d, 0xf1, 0x2e, 0x19, 0x2e, 0xaf, 0x9d, 0x75, 0xac, 0x4b, - 0xf0, 0x3a, 0x0a, 0xdd, 0x9f, 0x12, 0x65, 0x79, 0x72, 0x2b, 0x9e, 0x94, 0x64, 0x32, 0xd2, 0xa4, - 0x22, 0x1f, 0xc0, 0x95, 0xb1, 0xa8, 0xc4, 0x9c, 0x60, 0xfa, 0x84, 0xaf, 0xc9, 0x31, 0x19, 0x6a, - 0x12, 0xa8, 0x4e, 0x4b, 0x8a, 0x71, 0xf1, 0xac, 0x69, 0x25, 0x52, 0x9c, 0x9a, 0x96, 0xe8, 0x40, - 0xbb, 0xb0, 0xd0, 0x27, 0x14, 0xb7, 0x31, 0xc5, 0x66, 0x89, 0x6f, 0x4b, 0xaf, 0x4f, 0x05, 0x88, - 0x44, 0xd7, 0xdf, 0x97, 0x86, 0x77, 0x3d, 0x1a, 0x9e, 0xc8, 0xb7, 0x8b, 0x14, 0xbd, 0xf1, 0x3d, - 0x58, 0xd6, 0x0c, 0xd0, 0x2a, 0xe4, 0x8e, 0x48, 0xf2, 0xb3, 0x11, 0x2b, 0xa2, 0x35, 0x28, 0x1c, - 0xe3, 0xde, 0x80, 0x70, 0x7d, 0x96, 0x1d, 0x51, 0xd9, 0xce, 0xbe, 0x63, 0xd8, 0x65, 0x28, 0x85, - 0xe2, 0x2b, 0x76, 0xe7, 0xd9, 0xf3, 0x4a, 0xe6, 0xf3, 0xe7, 0x95, 0xcc, 0x17, 0xcf, 0x2b, 0xc6, - 0x47, 0xa3, 0x8a, 0xf1, 0xeb, 0x51, 0xc5, 0xf8, 0x74, 0x54, 0x31, 0x9e, 0x8d, 0x2a, 0xc6, 0x9f, - 0x47, 0x15, 0xe3, 0x6f, 0xa3, 0x4a, 0xe6, 0x8b, 0x51, 0xc5, 0xf8, 0xe4, 0x45, 0x25, 0xf3, 0xec, - 0x45, 0x25, 0xf3, 0xf9, 0x8b, 0x4a, 0xe6, 0x27, 0xd7, 0xcf, 0xbd, 0x43, 0x1e, 0x14, 0x39, 0x53, - 0x5b, 0xff, 0x0c, 0x00, 0x00, 0xff, 0xff, 0xd4, 0x35, 0x32, 0x52, 0x3f, 0x1f, 0x00, 0x00, + // 1944 bytes of a gzipped FileDescriptorProto + 0x1f, 0x8b, 0x08, 0x00, 0x00, 0x00, 0x00, 0x00, 0x02, 0xff, 0xec, 0x59, 0xcb, 0x6f, 0x1b, 0xc7, + 0x19, 0xe7, 0xf2, 0x29, 0x7e, 0x7a, 0x58, 0x1d, 0xab, 0xca, 0x56, 0x71, 0xb8, 0x2c, 0x81, 0x26, + 0x6a, 0xd1, 0x92, 0x31, 0x95, 0xb8, 0x89, 0xea, 0x1a, 0xf1, 0x5a, 0x76, 0x65, 0xd7, 0x69, 0x9c, + 0x95, 0xd0, 0x43, 0x2f, 0xc5, 0x88, 0x1c, 0x91, 0x5b, 0x91, 0xbb, 0xeb, 0xdd, 0xa1, 0x6c, 0x01, + 0x45, 0x91, 0x7f, 0x20, 0x68, 0xee, 0xbd, 0x17, 0xbd, 0x15, 0x05, 0x7a, 0xea, 0xa9, 0xc7, 0xe4, + 0x50, 0xc0, 0xc7, 0x80, 0x40, 0xb7, 0x35, 0xdd, 0x43, 0xa1, 0x53, 0x80, 0xfe, 0x03, 0xc5, 0x3c, + 0x76, 0x39, 0x43, 0x52, 0x35, 0xa5, 0x16, 0x05, 0x14, 0xe4, 0x22, 0xcd, 0xe3, 0xfb, 0xcd, 0xce, + 0xfc, 0xbe, 0xdf, 0xf7, 0xcd, 0x83, 0xf0, 0x46, 0x70, 0xd4, 0x69, 0x3c, 0x1e, 0x90, 0xd0, 0x25, + 0x21, 0xff, 0x7f, 0x12, 0x62, 0xaf, 0x43, 0x94, 0x62, 0x3d, 0x08, 0x7d, 0xea, 0x23, 0x18, 0xb7, + 0x6c, 0x34, 0x3b, 0x2e, 0xed, 0x0e, 0x0e, 0xea, 0x2d, 0xbf, 0xdf, 0xe8, 0xf8, 0x1d, 0xbf, 0xd1, + 0xf1, 0xfd, 0x4e, 0x8f, 0xe0, 0xc0, 0x8d, 0x64, 0xb1, 0x11, 0x06, 0xad, 0x46, 0x44, 0x31, 0x1d, + 0x44, 0x02, 0xbf, 0xb1, 0xc6, 0x0c, 0x79, 0x91, 0x43, 0x64, 0xab, 0x25, 0xcd, 0x79, 0xed, 0x60, + 0x70, 0xd8, 0xa0, 0x6e, 0x9f, 0x44, 0x14, 0xf7, 0x83, 0xc4, 0x80, 0xcd, 0xaf, 0xe7, 0x77, 0x04, + 0xd2, 0xf5, 0xda, 0xe4, 0x69, 0x07, 0x53, 0xf2, 0x04, 0x9f, 0x48, 0x83, 0x57, 0x35, 0x83, 0xa4, + 0x20, 0x3b, 0x37, 0xb4, 0xce, 0x00, 0x53, 0x4a, 0x42, 0x4f, 0xf6, 0x7d, 0x43, 0xeb, 0x8b, 0x8e, + 0x08, 0x6d, 0x75, 0x65, 0x57, 0x55, 0x76, 0x3d, 0xee, 0xf5, 0xfd, 0x36, 0xe9, 0xf1, 0x85, 0x44, + 0xe2, 0xaf, 0xb4, 0xb8, 0xca, 0x2c, 0x82, 0x41, 0xd4, 0xe5, 0x7f, 0x64, 0xe3, 0x9d, 0x97, 0x72, + 0x79, 0x80, 0x23, 0xd2, 0x68, 0x93, 0x43, 0xd7, 0x73, 0xa9, 0xeb, 0x7b, 0x91, 0x5a, 0x96, 0x83, + 0xdc, 0x98, 0x6f, 0x90, 0x49, 0xff, 0x6c, 0xbc, 0xc9, 0x70, 0x11, 0xf5, 0x43, 0xdc, 0x21, 0x8d, + 0x56, 0x77, 0xe0, 0x1d, 0x35, 0x5a, 0xb8, 0xd5, 0x25, 0x8d, 0x90, 0x44, 0x83, 0x1e, 0x8d, 0x44, + 0x85, 0x9e, 0x04, 0x44, 0x7e, 0xa9, 0xf6, 0x59, 0x1e, 0x16, 0x1f, 0xfa, 0x47, 0xae, 0x43, 0x1e, + 0x0f, 0x48, 0x44, 0xd1, 0x1a, 0x14, 0xf8, 0xa8, 0xa6, 0x51, 0x35, 0x36, 0xcb, 0x8e, 0xa8, 0xb0, + 0xd6, 0x9e, 0xdb, 0x77, 0xa9, 0x99, 0xad, 0x1a, 0x9b, 0xcb, 0x8e, 0xa8, 0x20, 0x04, 0xf9, 0x88, + 0x92, 0xc0, 0xcc, 0x55, 0x8d, 0xcd, 0x9c, 0xc3, 0xcb, 0x68, 0x03, 0x16, 0x5c, 0x8f, 0x92, 0xf0, + 0x18, 0xf7, 0xcc, 0x32, 0x6f, 0x4f, 0xeb, 0xe8, 0x16, 0x94, 0x22, 0x8a, 0x43, 0xba, 0x1f, 0x99, + 0xf9, 0xaa, 0xb1, 0xb9, 0xd8, 0xdc, 0xa8, 0x0b, 0xcf, 0xd7, 0x13, 0xcf, 0xd7, 0xf7, 0x13, 0xcf, + 0xdb, 0x0b, 0x9f, 0xc6, 0x56, 0xe6, 0x93, 0xbf, 0x59, 0x86, 0x93, 0x80, 0xd0, 0x36, 0x14, 0x88, + 0xd7, 0xde, 0x8f, 0xcc, 0xc2, 0x39, 0xd0, 0x02, 0x82, 0xae, 0x43, 0xb9, 0xed, 0x86, 0xa4, 0xc5, + 0x58, 0x36, 0x8b, 0x55, 0x63, 0x73, 0xa5, 0x79, 0xb5, 0x9e, 0x0a, 0x65, 0x27, 0xe9, 0x72, 0xc6, + 0x56, 0x6c, 0x79, 0x01, 0xa6, 0x5d, 0xb3, 0xc4, 0x99, 0xe0, 0x65, 0x54, 0x83, 0x62, 0xd4, 0xc5, + 0x61, 0x3b, 0x32, 0x17, 0xaa, 0xb9, 0xcd, 0xb2, 0x0d, 0xa7, 0xb1, 0x25, 0x5b, 0x1c, 0xf9, 0x1f, + 0xfd, 0x1c, 0xf2, 0x41, 0x0f, 0x7b, 0x26, 0xf0, 0x59, 0xae, 0xd6, 0x15, 0x2f, 0x3d, 0xea, 0x61, + 0xcf, 0x7e, 0x77, 0x18, 0x5b, 0x6f, 0xab, 0xc1, 0x13, 0xe2, 0x43, 0xec, 0xe1, 0x46, 0xcf, 0x3f, + 0x72, 0x1b, 0xc7, 0x5b, 0x0d, 0xd5, 0xf7, 0x6c, 0xa0, 0xfa, 0x87, 0x6c, 0x00, 0x06, 0x75, 0xf8, + 0xc0, 0xe8, 0x01, 0x2c, 0x32, 0x1f, 0x93, 0x3b, 0xcc, 0xc1, 0x91, 0xb9, 0xc8, 0xbf, 0xf3, 0xca, + 0x78, 0x35, 0xbc, 0xdd, 0x21, 0x87, 0x3f, 0x0a, 0xfd, 0x41, 0x60, 0x5f, 0x39, 0x8d, 0x2d, 0xd5, + 0xde, 0x51, 0x2b, 0xe8, 0x01, 0xac, 0x30, 0x51, 0xb8, 0x5e, 0xe7, 0x83, 0x80, 0x2b, 0xd0, 0x5c, + 0xe2, 0xc3, 0x5d, 0xab, 0xab, 0x92, 0xa9, 0xdf, 0xd1, 0x6c, 0xec, 0x3c, 0xa3, 0xd7, 0x99, 0x40, + 0xd6, 0x46, 0x39, 0x40, 0x4c, 0x4b, 0xf7, 0xbd, 0x88, 0x62, 0x8f, 0x5e, 0x44, 0x52, 0x37, 0xa1, + 0xc8, 0x82, 0x7f, 0x3f, 0xe2, 0xa2, 0x9a, 0xd7, 0xc7, 0x12, 0xa3, 0x3b, 0x39, 0x7f, 0x2e, 0x27, + 0x17, 0x66, 0x3a, 0xb9, 0xf8, 0x52, 0x27, 0x97, 0xfe, 0x4f, 0x4e, 0x5e, 0xf8, 0xdf, 0x3a, 0xb9, + 0x7c, 0x61, 0x27, 0x9b, 0x90, 0x67, 0xb3, 0x44, 0xab, 0x90, 0x0b, 0xf1, 0x13, 0xee, 0xd3, 0x25, + 0x87, 0x15, 0x6b, 0xa3, 0x3c, 0x2c, 0x89, 0x54, 0x12, 0x05, 0xbe, 0x17, 0x11, 0xc6, 0xe3, 0x1e, + 0xcf, 0xfe, 0xc2, 0xf3, 0x92, 0x47, 0xde, 0xe2, 0xc8, 0x1e, 0xf4, 0x1e, 0xe4, 0x77, 0x30, 0xc5, + 0x5c, 0x05, 0x8b, 0xcd, 0x35, 0x95, 0x47, 0x36, 0x16, 0xeb, 0xb3, 0xd7, 0xd9, 0x44, 0x4e, 0x63, + 0x6b, 0xa5, 0x8d, 0x29, 0xfe, 0xae, 0xdf, 0x77, 0x29, 0xe9, 0x07, 0xf4, 0xc4, 0xe1, 0x48, 0xf4, + 0x36, 0x94, 0xef, 0x86, 0xa1, 0x1f, 0xee, 0x9f, 0x04, 0x84, 0xab, 0xa6, 0x6c, 0xbf, 0x72, 0x1a, + 0x5b, 0x57, 0x49, 0xd2, 0xa8, 0x20, 0xc6, 0x96, 0xe8, 0xdb, 0x50, 0xe0, 0x15, 0xae, 0x93, 0xb2, + 0x7d, 0xf5, 0x34, 0xb6, 0xae, 0x70, 0x88, 0x62, 0x2e, 0x2c, 0x74, 0x59, 0x15, 0xe6, 0x92, 0x55, + 0xaa, 0xee, 0xa2, 0xaa, 0x6e, 0x13, 0x4a, 0xc7, 0x24, 0x8c, 0xd8, 0x30, 0x25, 0xde, 0x9e, 0x54, + 0xd1, 0x6d, 0x00, 0x46, 0x8c, 0x1b, 0x51, 0xb7, 0x95, 0x38, 0x7b, 0xb9, 0x2e, 0x36, 0x1b, 0x87, + 0xfb, 0xc8, 0x46, 0x92, 0x05, 0xc5, 0xd0, 0x51, 0xca, 0xe8, 0xf7, 0x06, 0x94, 0x76, 0x09, 0x6e, + 0x93, 0x90, 0xb9, 0x37, 0xb7, 0xb9, 0xd8, 0xfc, 0x56, 0x5d, 0xdd, 0x59, 0x1e, 0x85, 0x7e, 0x9f, + 0xd0, 0x2e, 0x19, 0x44, 0x89, 0x83, 0x84, 0xb5, 0xed, 0x0d, 0x63, 0x8b, 0xcc, 0x29, 0xd5, 0xb9, + 0x36, 0xb4, 0x33, 0x3f, 0x75, 0x1a, 0x5b, 0xc6, 0xf7, 0x9c, 0x64, 0x96, 0xa8, 0x09, 0x0b, 0x4f, + 0x70, 0xe8, 0xb9, 0x5e, 0x27, 0x32, 0x81, 0x47, 0xda, 0xfa, 0x69, 0x6c, 0xa1, 0xa4, 0x4d, 0x71, + 0x44, 0x6a, 0x57, 0xfb, 0xab, 0x01, 0x5f, 0x63, 0xc2, 0xd8, 0x63, 0xf3, 0x89, 0x94, 0x14, 0xd3, + 0xc7, 0xb4, 0xd5, 0x35, 0x0d, 0x36, 0x8c, 0x23, 0x2a, 0xea, 0x7e, 0x93, 0xfd, 0xaf, 0xf6, 0x9b, + 0xdc, 0xf9, 0xf7, 0x9b, 0x24, 0xaf, 0xe4, 0x67, 0xe6, 0x95, 0xc2, 0x59, 0x79, 0xa5, 0xf6, 0x6b, + 0x99, 0x43, 0x93, 0xf5, 0x9d, 0x23, 0x94, 0xee, 0xa5, 0xa1, 0x94, 0xe3, 0xb3, 0x4d, 0x15, 0x2a, + 0xc6, 0xba, 0xdf, 0x26, 0x1e, 0x75, 0x0f, 0x5d, 0x12, 0xbe, 0x24, 0xa0, 0x14, 0x95, 0xe6, 0x74, + 0x95, 0xaa, 0x12, 0xcb, 0x5f, 0x0a, 0x89, 0xe9, 0x71, 0x55, 0xb8, 0x40, 0x5c, 0xd5, 0xfe, 0x95, + 0x85, 0x75, 0xe6, 0x91, 0x87, 0xf8, 0x80, 0xf4, 0x7e, 0x82, 0xfb, 0xe7, 0xf4, 0xca, 0xeb, 0x8a, + 0x57, 0xca, 0x36, 0xfa, 0x8a, 0xf5, 0xf9, 0x58, 0xff, 0xad, 0x01, 0x0b, 0xc9, 0x06, 0x80, 0xea, + 0x00, 0x02, 0xc6, 0x73, 0xbc, 0xe0, 0x7a, 0x85, 0x81, 0xc3, 0xb4, 0xd5, 0x51, 0x2c, 0xd0, 0x2f, + 0xa0, 0x28, 0x6a, 0x32, 0x16, 0x94, 0x6d, 0x73, 0x8f, 0x86, 0x04, 0xf7, 0x6f, 0xb7, 0x71, 0x40, + 0x49, 0x68, 0xbf, 0xcb, 0x66, 0x31, 0x8c, 0xad, 0x37, 0xce, 0x62, 0x29, 0x39, 0xe1, 0x4b, 0x1c, + 0xf3, 0xaf, 0xf8, 0xa6, 0x23, 0xbf, 0x50, 0xfb, 0xd8, 0x80, 0x55, 0x36, 0x51, 0x46, 0x4d, 0x2a, + 0x8c, 0x1d, 0x58, 0x08, 0x65, 0x99, 0x4f, 0x77, 0xb1, 0x59, 0xab, 0xeb, 0xb4, 0xce, 0xa0, 0x92, + 0x6f, 0xb8, 0x86, 0x93, 0x22, 0xd1, 0x96, 0x46, 0x63, 0x76, 0x16, 0x8d, 0x62, 0x8f, 0x56, 0x89, + 0xfb, 0x73, 0x16, 0xd0, 0x7d, 0x76, 0x43, 0x62, 0xfa, 0x1b, 0x4b, 0xf5, 0xe9, 0xd4, 0x8c, 0xae, + 0x8d, 0x49, 0x99, 0xb6, 0xb7, 0x6f, 0x0d, 0x63, 0x6b, 0xfb, 0x25, 0xda, 0xf9, 0x0f, 0x78, 0x65, + 0x15, 0xaa, 0x7c, 0xb3, 0x97, 0x41, 0xbe, 0xb5, 0x3f, 0x66, 0x61, 0xe5, 0xa7, 0x7e, 0x6f, 0xd0, + 0x27, 0x29, 0x7d, 0xc1, 0x14, 0x7d, 0xe6, 0x98, 0x3e, 0xdd, 0xd6, 0xde, 0x1e, 0xc6, 0xd6, 0x8d, + 0x79, 0xa9, 0xd3, 0xb1, 0x97, 0x9a, 0xb6, 0xdf, 0xe4, 0x60, 0x6d, 0xdf, 0x0f, 0x7e, 0xbc, 0xc7, + 0x6f, 0xd1, 0x4a, 0x9a, 0xec, 0x4e, 0x91, 0xb7, 0x36, 0x26, 0x8f, 0x21, 0xde, 0xc7, 0x34, 0x74, + 0x9f, 0xda, 0x37, 0x86, 0xb1, 0xd5, 0x9c, 0x97, 0xb8, 0x31, 0xee, 0x32, 0x93, 0xa6, 0x9d, 0x81, + 0x72, 0xf3, 0x9d, 0x81, 0x26, 0xf2, 0x42, 0x7e, 0xbe, 0xbc, 0xf0, 0x87, 0x1c, 0xac, 0x7f, 0x38, + 0xc0, 0x1e, 0x75, 0x7b, 0x44, 0x78, 0x28, 0xf5, 0xcf, 0x2f, 0xa7, 0xfc, 0x53, 0x19, 0xfb, 0x47, + 0xc7, 0x48, 0x4f, 0xbd, 0x37, 0x8c, 0xad, 0x9b, 0xf3, 0x7a, 0x6a, 0xd6, 0x08, 0x5f, 0xf9, 0x6c, + 0x1e, 0x9f, 0xfd, 0x29, 0x0b, 0x2b, 0x7b, 0xe2, 0x7c, 0x98, 0xb0, 0x75, 0x3c, 0xc3, 0x57, 0xea, + 0x83, 0x58, 0x70, 0x50, 0xd7, 0x11, 0xe7, 0x4b, 0x47, 0x3a, 0xf6, 0x52, 0xa7, 0xa3, 0xbf, 0x64, + 0x61, 0x7d, 0x87, 0x50, 0xd2, 0xa2, 0xa4, 0x7d, 0xcf, 0x25, 0x3d, 0x85, 0xc4, 0x8f, 0x8c, 0x29, + 0x16, 0xab, 0xca, 0x85, 0x6e, 0x26, 0xc8, 0xb6, 0x87, 0xb1, 0x75, 0x6b, 0x5e, 0x1e, 0x67, 0x8f, + 0x71, 0xa9, 0xf9, 0xfc, 0x2c, 0x0b, 0x5f, 0x17, 0x8f, 0x14, 0xe2, 0x05, 0x75, 0x4c, 0xe7, 0xaf, + 0xa6, 0xd8, 0xb4, 0xd4, 0xfc, 0x31, 0x03, 0x62, 0xdf, 0x1e, 0xc6, 0xd6, 0x0f, 0xe7, 0x4f, 0x20, + 0x33, 0x86, 0xf8, 0xd2, 0x68, 0x93, 0xdf, 0x2b, 0xce, 0xab, 0x4d, 0x1d, 0x74, 0x31, 0x6d, 0xea, + 0x63, 0x5c, 0x6a, 0x3e, 0xff, 0x51, 0x84, 0x65, 0xae, 0x92, 0x94, 0xc6, 0xef, 0x80, 0xbc, 0x88, + 0x49, 0x0e, 0x51, 0x72, 0x79, 0x0f, 0x83, 0x56, 0x7d, 0x4f, 0x5e, 0xd1, 0x84, 0x05, 0x7a, 0x07, + 0x8a, 0x11, 0xbf, 0x22, 0xcb, 0x33, 0x76, 0x65, 0xf2, 0x15, 0x4a, 0xbf, 0x8c, 0xef, 0x66, 0x1c, + 0x69, 0x8f, 0x6e, 0x42, 0xb1, 0xc7, 0x59, 0x94, 0x4f, 0x04, 0xb5, 0x49, 0xe4, 0xf4, 0xa5, 0x91, + 0xa1, 0x05, 0x06, 0xdd, 0x80, 0x02, 0xdf, 0x00, 0xe4, 0x76, 0xa0, 0x7d, 0x76, 0xfa, 0x48, 0xbd, + 0x9b, 0x71, 0x84, 0x39, 0x6a, 0x42, 0x3e, 0x08, 0xfd, 0xbe, 0xbc, 0x58, 0x5d, 0x9b, 0xfc, 0xa6, + 0x7a, 0x13, 0xd9, 0xcd, 0x38, 0xdc, 0x16, 0xbd, 0x05, 0xa5, 0x88, 0x5f, 0x61, 0x22, 0xfe, 0x24, + 0xc5, 0xce, 0xaf, 0x13, 0x30, 0x05, 0x92, 0x98, 0xa2, 0xb7, 0xa0, 0x78, 0xcc, 0x0f, 0xa8, 0xf2, + 0x9d, 0x73, 0x43, 0x05, 0xe9, 0x47, 0x57, 0xb6, 0x2e, 0x61, 0x8b, 0xee, 0xc1, 0x12, 0xf5, 0x83, + 0xa3, 0xe4, 0x1c, 0x28, 0x9f, 0xb3, 0xaa, 0x2a, 0x76, 0xd6, 0x39, 0x71, 0x37, 0xe3, 0x68, 0x38, + 0xf4, 0x08, 0x56, 0x1f, 0x6b, 0x67, 0x07, 0x92, 0x3c, 0x5c, 0x6a, 0x3c, 0xcf, 0x3e, 0xd5, 0xec, + 0x66, 0x9c, 0x29, 0x34, 0xda, 0x81, 0x95, 0x48, 0xdb, 0xe1, 0xe4, 0x23, 0xbd, 0xb6, 0x2e, 0x7d, + 0x0f, 0xdc, 0xcd, 0x38, 0x13, 0x18, 0xf4, 0x10, 0x56, 0xda, 0x5a, 0x7e, 0x97, 0x4f, 0xf0, 0xda, + 0xac, 0x66, 0xef, 0x00, 0x6c, 0x34, 0x1d, 0x8b, 0x3e, 0x80, 0xd5, 0x60, 0x22, 0xb7, 0xc9, 0x37, + 0xf8, 0x6f, 0xea, 0xab, 0x9c, 0x91, 0x04, 0xd9, 0x22, 0x27, 0xc1, 0xea, 0xf4, 0x44, 0x88, 0x9b, + 0xcb, 0x67, 0x4f, 0x4f, 0x4f, 0x02, 0xea, 0xf4, 0x44, 0x8f, 0x0d, 0xe3, 0x74, 0x54, 0xfb, 0xb8, + 0x08, 0x4b, 0x32, 0xcc, 0xc4, 0xbb, 0xdb, 0xf7, 0xd3, 0xc8, 0x11, 0x51, 0xf6, 0xda, 0x59, 0x91, + 0xc3, 0xcd, 0x95, 0xc0, 0x79, 0x33, 0x0d, 0x1c, 0x11, 0x72, 0xeb, 0xe3, 0x14, 0xc7, 0xbf, 0xab, + 0x20, 0x64, 0xb0, 0x6c, 0x25, 0xc1, 0x22, 0x22, 0xed, 0xd5, 0xd9, 0xb7, 0xd7, 0x04, 0x25, 0x23, + 0x65, 0x1b, 0x4a, 0xae, 0xf8, 0x31, 0x62, 0x56, 0x8c, 0x4d, 0xff, 0x56, 0xc1, 0xb4, 0x2f, 0x01, + 0x68, 0x6b, 0x1c, 0x31, 0x05, 0xf9, 0xf8, 0x3e, 0x15, 0x31, 0x29, 0x28, 0x09, 0x98, 0xeb, 0x69, + 0xc0, 0x14, 0x27, 0x1f, 0xec, 0x93, 0x70, 0x49, 0x17, 0x26, 0xa3, 0xe5, 0x2e, 0x2c, 0x27, 0xfa, + 0xe2, 0x5d, 0x32, 0x5c, 0x5e, 0x3b, 0xeb, 0x58, 0x97, 0xe0, 0x75, 0x14, 0xba, 0x3f, 0x25, 0xca, + 0xf2, 0xe4, 0x56, 0x3c, 0x29, 0xc9, 0x64, 0xa4, 0x49, 0x45, 0x3e, 0x80, 0x2b, 0x63, 0x51, 0x89, + 0x39, 0xc1, 0xf4, 0xb5, 0x40, 0x93, 0x63, 0x32, 0xd4, 0x24, 0x50, 0x9d, 0x96, 0x14, 0xe3, 0xe2, + 0x59, 0xd3, 0x4a, 0xa4, 0x38, 0x35, 0x2d, 0xd1, 0x81, 0x76, 0x61, 0xa1, 0x4f, 0x28, 0x6e, 0x63, + 0x8a, 0xcd, 0x12, 0xdf, 0x96, 0x5e, 0x9f, 0x0a, 0x10, 0x89, 0xae, 0xbf, 0x2f, 0x0d, 0xef, 0x7a, + 0x34, 0x3c, 0x91, 0x27, 0xeb, 0x14, 0xbd, 0xf1, 0x03, 0x58, 0xd6, 0x0c, 0xd0, 0x2a, 0xe4, 0x8e, + 0x48, 0xf2, 0x03, 0x15, 0x2b, 0xa2, 0x35, 0x28, 0x1c, 0xe3, 0xde, 0x80, 0x70, 0x7d, 0x96, 0x1d, + 0x51, 0xd9, 0xce, 0xbe, 0x63, 0xd8, 0x65, 0x28, 0x85, 0xe2, 0x2b, 0x76, 0xe7, 0xd9, 0xf3, 0x4a, + 0xe6, 0xf3, 0xe7, 0x95, 0xcc, 0x17, 0xcf, 0x2b, 0xc6, 0x47, 0xa3, 0x8a, 0xf1, 0xbb, 0x51, 0xc5, + 0xf8, 0x74, 0x54, 0x31, 0x9e, 0x8d, 0x2a, 0xc6, 0xdf, 0x47, 0x15, 0xe3, 0x9f, 0xa3, 0x4a, 0xe6, + 0x8b, 0x51, 0xc5, 0xf8, 0xe4, 0x45, 0x25, 0xf3, 0xec, 0x45, 0x25, 0xf3, 0xf9, 0x8b, 0x4a, 0xe6, + 0x67, 0xd7, 0xcf, 0xbd, 0x43, 0x1e, 0x14, 0x39, 0x53, 0x5b, 0xff, 0x0e, 0x00, 0x00, 0xff, 0xff, + 0x6b, 0xd4, 0xf8, 0x77, 0xa9, 0x1f, 0x00, 0x00, } func (this *LokiRequest) Equal(that interface{}) bool { @@ -2131,6 +2148,9 @@ func (this *TopKSketchesResponse) Equal(that interface{}) bool { return false } } + if !this.Statistics.Equal(&that1.Statistics) { + return false + } return true } func (this *QuantileSketchResponse) Equal(that interface{}) bool { @@ -2175,6 +2195,9 @@ func (this *QuantileSketchResponse) Equal(that interface{}) bool { return false } } + if !this.Statistics.Equal(&that1.Statistics) { + return false + } return true } func (this *ShardsResponse) Equal(that interface{}) bool { @@ -3090,11 +3113,12 @@ func (this *TopKSketchesResponse) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 7) + s := make([]string, 0, 8) s = append(s, "&queryrange.TopKSketchesResponse{") s = append(s, "Response: "+fmt.Sprintf("%#v", this.Response)+",\n") s = append(s, "Headers: "+fmt.Sprintf("%#v", this.Headers)+",\n") s = append(s, "Warnings: "+fmt.Sprintf("%#v", this.Warnings)+",\n") + s = append(s, "Statistics: "+strings.Replace(this.Statistics.GoString(), `&`, ``, 1)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -3102,11 +3126,12 @@ func (this *QuantileSketchResponse) GoString() string { if this == nil { return "nil" } - s := make([]string, 0, 7) + s := make([]string, 0, 8) s = append(s, "&queryrange.QuantileSketchResponse{") s = append(s, "Response: "+fmt.Sprintf("%#v", this.Response)+",\n") s = append(s, "Headers: "+fmt.Sprintf("%#v", this.Headers)+",\n") s = append(s, "Warnings: "+fmt.Sprintf("%#v", this.Warnings)+",\n") + s = append(s, "Statistics: "+strings.Replace(this.Statistics.GoString(), `&`, ``, 1)+",\n") s = append(s, "}") return strings.Join(s, "") } @@ -4136,6 +4161,16 @@ func (m *TopKSketchesResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) { _ = i var l int _ = l + { + size, err := m.Statistics.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintQueryrange(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x22 if len(m.Warnings) > 0 { for iNdEx := len(m.Warnings) - 1; iNdEx >= 0; iNdEx-- { i -= len(m.Warnings[iNdEx]) @@ -4194,6 +4229,16 @@ func (m *QuantileSketchResponse) MarshalToSizedBuffer(dAtA []byte) (int, error) _ = i var l int _ = l + { + size, err := m.Statistics.MarshalToSizedBuffer(dAtA[:i]) + if err != nil { + return 0, err + } + i -= size + i = encodeVarintQueryrange(dAtA, i, uint64(size)) + } + i-- + dAtA[i] = 0x22 if len(m.Warnings) > 0 { for iNdEx := len(m.Warnings) - 1; iNdEx >= 0; iNdEx-- { i -= len(m.Warnings[iNdEx]) @@ -5306,6 +5351,8 @@ func (m *TopKSketchesResponse) Size() (n int) { n += 1 + l + sovQueryrange(uint64(l)) } } + l = m.Statistics.Size() + n += 1 + l + sovQueryrange(uint64(l)) return n } @@ -5331,6 +5378,8 @@ func (m *QuantileSketchResponse) Size() (n int) { n += 1 + l + sovQueryrange(uint64(l)) } } + l = m.Statistics.Size() + n += 1 + l + sovQueryrange(uint64(l)) return n } @@ -5884,6 +5933,7 @@ func (this *TopKSketchesResponse) String() string { `Response:` + fmt.Sprintf("%v", this.Response) + `,`, `Headers:` + fmt.Sprintf("%v", this.Headers) + `,`, `Warnings:` + fmt.Sprintf("%v", this.Warnings) + `,`, + `Statistics:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Statistics), "Result", "stats.Result", 1), `&`, ``, 1) + `,`, `}`, }, "") return s @@ -5896,6 +5946,7 @@ func (this *QuantileSketchResponse) String() string { `Response:` + fmt.Sprintf("%v", this.Response) + `,`, `Headers:` + fmt.Sprintf("%v", this.Headers) + `,`, `Warnings:` + fmt.Sprintf("%v", this.Warnings) + `,`, + `Statistics:` + strings.Replace(strings.Replace(fmt.Sprintf("%v", this.Statistics), "Result", "stats.Result", 1), `&`, ``, 1) + `,`, `}`, }, "") return s @@ -8591,6 +8642,39 @@ func (m *TopKSketchesResponse) Unmarshal(dAtA []byte) error { } m.Warnings = append(m.Warnings, string(dAtA[iNdEx:postIndex])) iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Statistics", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowQueryrange + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthQueryrange + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthQueryrange + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.Statistics.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipQueryrange(dAtA[iNdEx:]) @@ -8746,6 +8830,39 @@ func (m *QuantileSketchResponse) Unmarshal(dAtA []byte) error { } m.Warnings = append(m.Warnings, string(dAtA[iNdEx:postIndex])) iNdEx = postIndex + case 4: + if wireType != 2 { + return fmt.Errorf("proto: wrong wireType = %d for field Statistics", wireType) + } + var msglen int + for shift := uint(0); ; shift += 7 { + if shift >= 64 { + return ErrIntOverflowQueryrange + } + if iNdEx >= l { + return io.ErrUnexpectedEOF + } + b := dAtA[iNdEx] + iNdEx++ + msglen |= int(b&0x7F) << shift + if b < 0x80 { + break + } + } + if msglen < 0 { + return ErrInvalidLengthQueryrange + } + postIndex := iNdEx + msglen + if postIndex < 0 { + return ErrInvalidLengthQueryrange + } + if postIndex > l { + return io.ErrUnexpectedEOF + } + if err := m.Statistics.Unmarshal(dAtA[iNdEx:postIndex]); err != nil { + return err + } + iNdEx = postIndex default: iNdEx = preIndex skippy, err := skipQueryrange(dAtA[iNdEx:]) diff --git a/pkg/querier/queryrange/queryrange.proto b/pkg/querier/queryrange/queryrange.proto index 2169b2da3154..47bec327369f 100644 --- a/pkg/querier/queryrange/queryrange.proto +++ b/pkg/querier/queryrange/queryrange.proto @@ -169,6 +169,7 @@ message TopKSketchesResponse { (gogoproto.customtype) = "github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase/definitions.PrometheusResponseHeader" ]; repeated string warnings = 3 [(gogoproto.jsontag) = "warnings,omitempty"]; + stats.Result statistics = 4 [(gogoproto.nullable) = false]; } message QuantileSketchResponse { @@ -178,6 +179,7 @@ message QuantileSketchResponse { (gogoproto.customtype) = "github.com/grafana/loki/v3/pkg/querier/queryrange/queryrangebase/definitions.PrometheusResponseHeader" ]; repeated string warnings = 3 [(gogoproto.jsontag) = "warnings,omitempty"]; + stats.Result statistics = 4 [(gogoproto.nullable) = false]; } message ShardsResponse { diff --git a/pkg/querier/queryrange/roundtrip.go b/pkg/querier/queryrange/roundtrip.go index 3b8031cb5e1e..4b9f3dbca9da 100644 --- a/pkg/querier/queryrange/roundtrip.go +++ b/pkg/querier/queryrange/roundtrip.go @@ -11,7 +11,6 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" "github.com/grafana/dskit/httpgrpc" - "github.com/grafana/dskit/tenant" "github.com/grafana/dskit/user" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" @@ -30,7 +29,6 @@ import ( "github.com/grafana/loki/v3/pkg/util/constants" "github.com/grafana/loki/v3/pkg/util/httpreq" logutil "github.com/grafana/loki/v3/pkg/util/log" - "github.com/grafana/loki/v3/pkg/util/validation" ) const ( @@ -240,14 +238,11 @@ func NewMiddleware( } detectedFieldsTripperware, err := NewDetectedFieldsTripperware( - cfg, - log, limits, schema, - codec, - iqo, - metrics, - metricsNamespace) + limitedTripperware, + logFilterTripperware, + ) if err != nil { return nil, nil, err } @@ -1218,88 +1213,15 @@ func sharedIndexTripperware( // NewDetectedFieldsTripperware creates a new frontend tripperware responsible for handling detected field requests, which are basically log filter requests with a bit more processing. func NewDetectedFieldsTripperware( - cfg Config, - log log.Logger, limits Limits, - schema config.SchemaConfig, - merger base.Merger, - iqo util.IngesterQueryOptions, - metrics *Metrics, - metricsNamespace string, + _ config.SchemaConfig, + limitedTripperware base.Middleware, + logTripperware base.Middleware, ) (base.Middleware, error) { return base.MiddlewareFunc(func(next base.Handler) base.Handler { - splitter := newDefaultSplitter(limits, iqo) - - queryRangeMiddleware := []base.Middleware{ - StatsCollectorMiddleware(), - NewLimitsMiddleware(limits), - base.InstrumentMiddleware("split_by_interval", metrics.InstrumentMiddlewareMetrics), - SplitByIntervalMiddleware(schema.Configs, limits, merger, splitter, metrics.SplitByMetrics), - } - - if cfg.MaxRetries > 0 { - queryRangeMiddleware = append( - queryRangeMiddleware, base.InstrumentMiddleware("retry", metrics.InstrumentMiddlewareMetrics), - base.NewRetryMiddleware(log, cfg.MaxRetries, metrics.RetryMiddlewareMetrics, metricsNamespace), - ) - } + limitedHandler := limitedTripperware.Wrap(next) + logHandler := logTripperware.Wrap(next) - limitedRT := NewLimitedRoundTripper(next, limits, schema.Configs, queryRangeMiddleware...) - return NewSketchRemovingHandler(limitedRT, limits, splitter) + return NewDetectedFieldsHandler(limitedHandler, logHandler, limits) }), nil } - -// NewSketchRemovingHandler returns a handler that removes sketches from detected fields responses before -// returning them to the user. We only need sketches internally for calculating cardinality for split queries. -// We're already doing this sanitization in the merge code, so this handler catches non-split queries -// to make sure their sketches are also removed. -func NewSketchRemovingHandler(next queryrangebase.Handler, limits Limits, splitter splitter) queryrangebase.Handler { - return queryrangebase.HandlerFunc( - func(ctx context.Context, req queryrangebase.Request) (queryrangebase.Response, error) { - res, err := next.Do(ctx, req) - if err != nil { - return nil, err - } - - resp, ok := res.(*DetectedFieldsResponse) - if !ok { - return res, nil - } - - tenantIDs, err := tenant.TenantIDs(ctx) - if err != nil { - return resp, nil - } - - interval := validation.SmallestPositiveNonZeroDurationPerTenant( - tenantIDs, - limits.QuerySplitDuration, - ) - - // sketeches get cleaned up in the merge code, so we only need catch the cases - // where no splitting happened - if interval == 0 { - return removeSketches(resp), nil - } - - intervals, err := splitter.split(time.Now().UTC(), tenantIDs, req, interval) - if err != nil || len(intervals) < 2 { - return removeSketches(resp), nil - } - - // must have been splits, so sketches are already removed - return resp, nil - }, - ) -} - -// removeSketches removes sketches and field limit from a detected fields response. -// this is only needed for queries that were not split. -func removeSketches(resp *DetectedFieldsResponse) *DetectedFieldsResponse { - for i := range resp.Response.Fields { - resp.Response.Fields[i].Sketch = nil - } - - resp.Response.FieldLimit = 0 - return resp -} diff --git a/pkg/querier/queryrange/shard_resolver.go b/pkg/querier/queryrange/shard_resolver.go index 31366d0a0dd7..4fe444c3bc59 100644 --- a/pkg/querier/queryrange/shard_resolver.go +++ b/pkg/querier/queryrange/shard_resolver.go @@ -225,7 +225,10 @@ func (r *dynamicShardResolver) ShardingRanges(expr syntax.Expr, targetBytesPerSh ) { log := spanlogger.FromContext(r.ctx) - adjustedFrom := r.from + var ( + adjustedFrom = r.from + adjustedThrough model.Time + ) // NB(owen-d): there should only ever be 1 matcher group passed // to this call as we call it separately for different legs @@ -236,18 +239,30 @@ func (r *dynamicShardResolver) ShardingRanges(expr syntax.Expr, targetBytesPerSh } for _, grp := range grps { - diff := grp.Interval + grp.Offset + diff := grp.Interval // For instant queries, when start == end, // we have a default lookback which we add here - if grp.Interval == 0 { - diff = diff + r.defaultLookback + if diff == 0 { + diff = r.defaultLookback } + diff += grp.Offset + // use the oldest adjustedFrom if r.from.Add(-diff).Before(adjustedFrom) { adjustedFrom = r.from.Add(-diff) } + + // use the latest adjustedThrough + if r.through.Add(-grp.Offset).After(adjustedThrough) { + adjustedThrough = r.through.Add(-grp.Offset) + } + } + + // handle the case where there are no matchers + if adjustedThrough == 0 { + adjustedThrough = r.through } exprStr := expr.String() @@ -256,7 +271,7 @@ func (r *dynamicShardResolver) ShardingRanges(expr syntax.Expr, targetBytesPerSh // use the retry handler here to retry transient errors resp, err := r.retryNextHandler.Do(r.ctx, &logproto.ShardsRequest{ From: adjustedFrom, - Through: r.through, + Through: adjustedThrough, Query: expr.String(), TargetBytesPerShard: targetBytesPerShard, }) diff --git a/pkg/storage/bloom/spec.go b/pkg/storage/bloom/spec.go deleted file mode 100644 index 19f5940ddfbc..000000000000 --- a/pkg/storage/bloom/spec.go +++ /dev/null @@ -1,29 +0,0 @@ -package bloom - -import "github.com/prometheus/common/model" - -type Metadata interface { - Version() uint32 - NumSeries() uint64 - NumChunks() uint64 - Size() uint64 // bytes - - // timestamps - From() int64 - Through() int64 - - // series - FromFingerprint() model.Fingerprint - ThroughFingerprint() model.Fingerprint -} - -type Iterator[K any, V any] interface { - Next() bool - Err() error - At() V - Seek(K) Iterator[K, V] -} - -type Block interface { - SeriesIterator() Iterator[model.Fingerprint, []byte] -} diff --git a/pkg/storage/bloom/v1/TODO.md b/pkg/storage/bloom/v1/TODO.md deleted file mode 100644 index 1ff1c6a8ad06..000000000000 --- a/pkg/storage/bloom/v1/TODO.md +++ /dev/null @@ -1,18 +0,0 @@ -* Less copying! I've taken some shortcuts we'll need to refactor to avoid copying []byte around in a few places -* more sophisticated querying methods -* queue access to blooms -* multiplex reads across blooms -* Queueing system for bloom access -* bloom hierarchies (bloom per block, etc). Test a tree of blooms down the to individual series/chunk -* memoize hashing & bucket lookups during queries -* versioning - * so we can change implementations - * encode bloom parameters in block: sbf params, hashing strategy, tokenizer -* caching -* ability to download indices without chunks - - -# merge querier for different blocks -* how to merge two block queriers with the same fp -* merge block querier should use iterator interface - * As long as MergeBlockQuerier can Peek, we can make another querier impl to dedupe with some function (i.e. prefer series with more chunks indexed) \ No newline at end of file diff --git a/pkg/storage/bloom/v1/archive.go b/pkg/storage/bloom/v1/archive.go index fcc3294eba97..fce83d69e41d 100644 --- a/pkg/storage/bloom/v1/archive.go +++ b/pkg/storage/bloom/v1/archive.go @@ -8,7 +8,11 @@ import ( "github.com/pkg/errors" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" +) + +const ( + ExtTar = ".tar" ) type TarEntry struct { @@ -17,16 +21,24 @@ type TarEntry struct { Body io.ReadSeeker } -func TarGz(dst io.Writer, reader BlockReader) error { +func TarCompress(enc compression.Encoding, dst io.Writer, reader BlockReader) error { + comprPool := compression.GetWriterPool(enc) + comprWriter := comprPool.GetWriter(dst) + defer func() { + comprWriter.Close() + comprPool.PutWriter(comprWriter) + }() + + return Tar(comprWriter, reader) +} + +func Tar(dst io.Writer, reader BlockReader) error { itr, err := reader.TarEntries() if err != nil { return errors.Wrap(err, "error getting tar entries") } - gzipper := chunkenc.GetWriterPool(chunkenc.EncGZIP).GetWriter(dst) - defer gzipper.Close() - - tarballer := tar.NewWriter(gzipper) + tarballer := tar.NewWriter(dst) defer tarballer.Close() for itr.Next() { @@ -49,13 +61,19 @@ func TarGz(dst io.Writer, reader BlockReader) error { return itr.Err() } -func UnTarGz(dst string, r io.Reader) error { - gzipper, err := chunkenc.GetReaderPool(chunkenc.EncGZIP).GetReader(r) +func UnTarCompress(enc compression.Encoding, dst string, r io.Reader) error { + comprPool := compression.GetReaderPool(enc) + comprReader, err := comprPool.GetReader(r) if err != nil { - return errors.Wrap(err, "error getting gzip reader") + return errors.Wrapf(err, "error getting %s reader", enc.String()) } + defer comprPool.PutReader(comprReader) + + return UnTar(dst, comprReader) +} - tarballer := tar.NewReader(gzipper) +func UnTar(dst string, r io.Reader) error { + tarballer := tar.NewReader(r) for { header, err := tarballer.Next() diff --git a/pkg/storage/bloom/v1/archive_test.go b/pkg/storage/bloom/v1/archive_test.go index 401cc56a218c..b7857a4b5ed1 100644 --- a/pkg/storage/bloom/v1/archive_test.go +++ b/pkg/storage/bloom/v1/archive_test.go @@ -7,7 +7,7 @@ import ( "github.com/stretchr/testify/require" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" v2 "github.com/grafana/loki/v3/pkg/iter/v2" ) @@ -24,7 +24,7 @@ func TestArchive(t *testing.T) { BlockOptions{ Schema: Schema{ version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, + encoding: compression.EncNone, }, SeriesPageSize: 100, BloomPageSize: 10 << 10, @@ -40,9 +40,9 @@ func TestArchive(t *testing.T) { reader := NewDirectoryBlockReader(dir1) w := bytes.NewBuffer(nil) - require.Nil(t, TarGz(w, reader)) + require.Nil(t, Tar(w, reader)) - require.Nil(t, UnTarGz(dir2, w)) + require.Nil(t, UnTar(dir2, w)) reader2 := NewDirectoryBlockReader(dir2) @@ -78,3 +78,88 @@ func TestArchive(t *testing.T) { require.Nil(t, err) require.Equal(t, srcBloomsBytes, dstBloomsBytes) } + +func TestArchiveCompression(t *testing.T) { + t.Parallel() + for _, tc := range []struct { + enc compression.Encoding + }{ + {compression.EncNone}, + {compression.EncGZIP}, + {compression.EncSnappy}, + {compression.EncLZ4_64k}, + {compression.EncLZ4_256k}, + {compression.EncLZ4_1M}, + {compression.EncLZ4_4M}, + {compression.EncFlate}, + {compression.EncZstd}, + } { + t.Run(tc.enc.String(), func(t *testing.T) { + // for writing files to two dirs for comparison and ensuring they're equal + dir1 := t.TempDir() + dir2 := t.TempDir() + + numSeries := 100 + data, _ := MkBasicSeriesWithBlooms(numSeries, 0x0000, 0xffff, 0, 10000) + + builder, err := NewBlockBuilder( + BlockOptions{ + Schema: Schema{ + version: CurrentSchemaVersion, + encoding: compression.EncNone, + }, + SeriesPageSize: 100, + BloomPageSize: 10 << 10, + }, + NewDirectoryBlockWriter(dir1), + ) + + require.Nil(t, err) + itr := v2.NewSliceIter[SeriesWithBlooms](data) + _, err = builder.BuildFrom(itr) + require.Nil(t, err) + + reader := NewDirectoryBlockReader(dir1) + + w := bytes.NewBuffer(nil) + require.Nil(t, TarCompress(tc.enc, w, reader)) + + require.Nil(t, UnTarCompress(tc.enc, dir2, w)) + + reader2 := NewDirectoryBlockReader(dir2) + + // Check Index is byte for byte equivalent + srcIndex, err := reader.Index() + require.Nil(t, err) + _, err = srcIndex.Seek(0, io.SeekStart) + require.Nil(t, err) + dstIndex, err := reader2.Index() + require.Nil(t, err) + _, err = dstIndex.Seek(0, io.SeekStart) + require.Nil(t, err) + + srcIndexBytes, err := io.ReadAll(srcIndex) + require.Nil(t, err) + dstIndexBytes, err := io.ReadAll(dstIndex) + require.Nil(t, err) + require.Equal(t, srcIndexBytes, dstIndexBytes) + + // Check Blooms is byte for byte equivalent + srcBlooms, err := reader.Blooms() + require.Nil(t, err) + _, err = srcBlooms.Seek(0, io.SeekStart) + require.Nil(t, err) + dstBlooms, err := reader2.Blooms() + require.Nil(t, err) + _, err = dstBlooms.Seek(0, io.SeekStart) + require.Nil(t, err) + + srcBloomsBytes, err := io.ReadAll(srcBlooms) + require.Nil(t, err) + dstBloomsBytes, err := io.ReadAll(dstBlooms) + require.Nil(t, err) + require.Equal(t, srcBloomsBytes, dstBloomsBytes) + + }) + } +} diff --git a/pkg/storage/bloom/v1/ast_extractor.go b/pkg/storage/bloom/v1/ast_extractor.go new file mode 100644 index 000000000000..4c59c93e937f --- /dev/null +++ b/pkg/storage/bloom/v1/ast_extractor.go @@ -0,0 +1,99 @@ +package v1 + +import ( + "github.com/prometheus/prometheus/model/labels" + + "github.com/grafana/loki/v3/pkg/logql/log" + "github.com/grafana/loki/v3/pkg/logql/syntax" +) + +// LabelMatcher represents bloom tests for key-value pairs, mapped from +// LabelFilterExprs from the AST. +type LabelMatcher interface{ isLabelMatcher() } + +// UnsupportedLabelMatcher represents a label matcher which could not be +// mapped. Bloom tests for UnsupportedLabelMatchers must always pass. +type UnsupportedLabelMatcher struct{} + +// PlainLabelMatcher represents a direct key-value matcher. Bloom tests +// must only pass if the key-value pair exists in the bloom. +type PlainLabelMatcher struct{ Key, Value string } + +// OrLabelMatcher represents a logical OR test. Bloom tests must only pass if +// one of the Left or Right label matcher bloom tests pass. +type OrLabelMatcher struct{ Left, Right LabelMatcher } + +// AndLabelMatcher represents a logical AND test. Bloom tests must only pass +// if both of the Left and Right label matcher bloom tests pass. +type AndLabelMatcher struct{ Left, Right LabelMatcher } + +// ExtractTestableLabelMatchers extracts label matchers from the label filters +// in an expression. The resulting label matchers can then be used for testing +// against bloom filters. Only label matchers before the first parse stage are +// included. +// +// Unsupported LabelFilterExprs map to an UnsupportedLabelMatcher, for which +// bloom tests should always pass. +func ExtractTestableLabelMatchers(expr syntax.Expr) []LabelMatcher { + if expr == nil { + return nil + } + filters := syntax.ExtractLabelFiltersBeforeParser(expr) + return buildLabelMatchers(filters) +} + +func buildLabelMatchers(exprs []*syntax.LabelFilterExpr) []LabelMatcher { + matchers := make([]LabelMatcher, 0, len(exprs)) + for _, expr := range exprs { + matchers = append(matchers, buildLabelMatcher(expr.LabelFilterer)) + } + return matchers +} + +func buildLabelMatcher(filter log.LabelFilterer) LabelMatcher { + switch filter := filter.(type) { + + case *log.LineFilterLabelFilter: + if filter.Type != labels.MatchEqual { + return UnsupportedLabelMatcher{} + } + + return PlainLabelMatcher{ + Key: filter.Name, + Value: filter.Value, + } + + case *log.StringLabelFilter: + if filter.Type != labels.MatchEqual { + return UnsupportedLabelMatcher{} + } + + return PlainLabelMatcher{ + Key: filter.Name, + Value: filter.Value, + } + + case *log.BinaryLabelFilter: + var ( + left = buildLabelMatcher(filter.Left) + right = buildLabelMatcher(filter.Right) + ) + + if filter.And { + return AndLabelMatcher{Left: left, Right: right} + } + return OrLabelMatcher{Left: left, Right: right} + + default: + return UnsupportedLabelMatcher{} + } +} + +// +// Implement marker types: +// + +func (UnsupportedLabelMatcher) isLabelMatcher() {} +func (PlainLabelMatcher) isLabelMatcher() {} +func (OrLabelMatcher) isLabelMatcher() {} +func (AndLabelMatcher) isLabelMatcher() {} diff --git a/pkg/storage/bloom/v1/ast_extractor_test.go b/pkg/storage/bloom/v1/ast_extractor_test.go new file mode 100644 index 000000000000..856f0412c8a9 --- /dev/null +++ b/pkg/storage/bloom/v1/ast_extractor_test.go @@ -0,0 +1,105 @@ +package v1_test + +import ( + "fmt" + "testing" + + "github.com/stretchr/testify/require" + + "github.com/grafana/loki/v3/pkg/logql/syntax" + v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" +) + +func TestExtractLabelMatchers(t *testing.T) { + tt := []struct { + name string + input string + expect []v1.LabelMatcher + }{ + { + name: "basic label matcher", + input: `{app="foo"} | key="value"`, + expect: []v1.LabelMatcher{ + v1.PlainLabelMatcher{Key: "key", Value: "value"}, + }, + }, + + { + name: "or label matcher", + input: `{app="foo"} | key1="value1" or key2="value2"`, + expect: []v1.LabelMatcher{ + v1.OrLabelMatcher{ + Left: v1.PlainLabelMatcher{Key: "key1", Value: "value1"}, + Right: v1.PlainLabelMatcher{Key: "key2", Value: "value2"}, + }, + }, + }, + + { + name: "and label matcher", + input: `{app="foo"} | key1="value1" and key2="value2"`, + expect: []v1.LabelMatcher{ + v1.AndLabelMatcher{ + Left: v1.PlainLabelMatcher{Key: "key1", Value: "value1"}, + Right: v1.PlainLabelMatcher{Key: "key2", Value: "value2"}, + }, + }, + }, + + { + name: "multiple label matchers", + input: `{app="foo"} | key1="value1" | key2="value2"`, + expect: []v1.LabelMatcher{ + v1.PlainLabelMatcher{Key: "key1", Value: "value1"}, + v1.PlainLabelMatcher{Key: "key2", Value: "value2"}, + }, + }, + + { + name: "unsupported label matchers", + input: `{app="foo"} | key1=~"value1"`, + expect: []v1.LabelMatcher{ + v1.UnsupportedLabelMatcher{}, + }, + }, + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + expr, err := syntax.ParseExpr(tc.input) + require.NoError(t, err) + require.Equal(t, tc.expect, v1.ExtractTestableLabelMatchers(expr)) + }) + } +} + +func TestExtractLabelMatchers_IgnoreAfterParse(t *testing.T) { + tt := []struct { + name string + expr string + }{ + {"after json parser", `json`}, + {"after logfmt parser", `logfmt`}, + {"after pattern parser", `pattern ""`}, + {"after regexp parser", `regexp "(?P.*)"`}, + {"after unpack parser", `unpack`}, + {"after label_format", `label_format foo="bar"`}, + {"after drop labels stage", `drop foo`}, + {"after keep labels stage", `keep foo`}, + } + + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { + fullInput := fmt.Sprintf(`{app="foo"} | key1="value1" | %s | key2="value2"`, tc.expr) + expect := []v1.LabelMatcher{ + v1.PlainLabelMatcher{Key: "key1", Value: "value1"}, + // key2="value2" should be ignored following tc.expr + } + + expr, err := syntax.ParseExpr(fullInput) + require.NoError(t, err) + + require.Equal(t, expect, v1.ExtractTestableLabelMatchers(expr), "key2=value2 should be ignored with query %s", fullInput) + }) + } +} diff --git a/pkg/storage/bloom/v1/block.go b/pkg/storage/bloom/v1/block.go index 863bdf12a983..c309cb7fec29 100644 --- a/pkg/storage/bloom/v1/block.go +++ b/pkg/storage/bloom/v1/block.go @@ -170,7 +170,7 @@ func (b *BlockQuerierIter) Next() bool { func (b *BlockQuerierIter) At() *SeriesWithBlooms { s := b.LazySeriesIter.At() res := &SeriesWithBlooms{ - Series: &s.Series, + Series: s, Blooms: newOffsetsIter(b.blooms, s.Offsets), } return res diff --git a/pkg/storage/bloom/v1/bloom.go b/pkg/storage/bloom/v1/bloom.go index 878f254abc17..b77af18d1ace 100644 --- a/pkg/storage/bloom/v1/bloom.go +++ b/pkg/storage/bloom/v1/bloom.go @@ -7,7 +7,7 @@ import ( "github.com/pkg/errors" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/storage/bloom/v1/filter" "github.com/grafana/loki/v3/pkg/util/encoding" "github.com/grafana/loki/v3/pkg/util/mempool" @@ -71,7 +71,7 @@ func (b *Bloom) Decode(dec *encoding.Decbuf) error { return nil } -func LazyDecodeBloomPage(r io.Reader, alloc mempool.Allocator, pool chunkenc.ReaderPool, page BloomPageHeader) (*BloomPageDecoder, error) { +func LazyDecodeBloomPage(r io.Reader, alloc mempool.Allocator, pool compression.ReaderPool, page BloomPageHeader) (*BloomPageDecoder, error) { data, err := alloc.Get(page.Len) if err != nil { return nil, errors.Wrap(err, "allocating buffer") @@ -316,7 +316,7 @@ func (b *BloomBlock) BloomPageDecoder(r io.ReadSeeker, alloc mempool.Allocator, return nil, false, errors.Wrap(err, "seeking to bloom page") } - if b.schema.encoding == chunkenc.EncNone { + if b.schema.encoding == compression.EncNone { res, err = LazyDecodeBloomPageNoCompression(r, alloc, page) } else { res, err = LazyDecodeBloomPage(r, alloc, b.schema.DecompressorPool(), page) diff --git a/pkg/storage/bloom/v1/bloom_tester.go b/pkg/storage/bloom/v1/bloom_tester.go index b60fbadbffb9..6fe00a4cd173 100644 --- a/pkg/storage/bloom/v1/bloom_tester.go +++ b/pkg/storage/bloom/v1/bloom_tester.go @@ -1,14 +1,9 @@ package v1 import ( - "unicode/utf8" + "fmt" + "unsafe" - "github.com/grafana/regexp" - - iter "github.com/grafana/loki/v3/pkg/iter/v2" - "github.com/grafana/loki/v3/pkg/logql/log" - "github.com/grafana/loki/v3/pkg/logql/log/pattern" - "github.com/grafana/loki/v3/pkg/logql/syntax" "github.com/grafana/loki/v3/pkg/storage/bloom/v1/filter" ) @@ -37,228 +32,20 @@ func (b BloomTests) MatchesWithPrefixBuf(bloom filter.Checker, buf []byte, prefi return true } -// ExtractTestableLineFilters extracts all line filters from an expression -// that can be tested against a bloom filter. This will skip any line filters -// after a line format expression. A line format expression might add content -// that the query later matches against, which can't be tested with a bloom filter. -// E.g. For {app="fake"} |= "foo" | line_format "thisNewTextShouldMatch" |= "thisNewTextShouldMatch" -// this function will return only the line filter for "foo" since the line filter for "thisNewTextShouldMatch" -// wouldn't match against the bloom filter but should match against the query. -func ExtractTestableLineFilters(expr syntax.Expr) []syntax.LineFilterExpr { - if expr == nil { - return nil - } - - var filters []syntax.LineFilterExpr - var lineFmtFound bool - visitor := &syntax.DepthFirstTraversal{ - VisitLineFilterFn: func(_ syntax.RootVisitor, e *syntax.LineFilterExpr) { - if e != nil && !lineFmtFound { - filters = append(filters, *e) - } - }, - VisitLineFmtFn: func(_ syntax.RootVisitor, e *syntax.LineFmtExpr) { - if e != nil { - lineFmtFound = true - } - }, - } - expr.Accept(visitor) - return filters -} - -// FiltersToBloomTest converts a list of line filters to a BloomTest. -// Note that all the line filters should be testable against a bloom filter. -// Use ExtractTestableLineFilters to extract testable line filters from an expression. -// TODO(owen-d): limits the number of bloom lookups run. -// An arbitrarily high number can overconsume cpu and is a DoS vector. -// TODO(owen-d): use for loop not recursion to protect callstack -func FiltersToBloomTest(b NGramBuilder, filters ...syntax.LineFilterExpr) BloomTest { - tests := make(BloomTests, 0, len(filters)) - for _, f := range filters { - if f.Left != nil { - tests = append(tests, FiltersToBloomTest(b, *f.Left)) - } - if f.Or != nil { - left := FiltersToBloomTest(b, *f.Or) - right := simpleFilterToBloomTest(b, f.LineFilter) - tests = append(tests, newOrTest(left, right)) - continue - } - - tests = append(tests, simpleFilterToBloomTest(b, f.LineFilter)) - } - return tests -} - -func simpleFilterToBloomTest(b NGramBuilder, filter syntax.LineFilter) BloomTest { - switch filter.Ty { - case log.LineMatchNotEqual, log.LineMatchNotRegexp, log.LineMatchNotPattern: - // We cannot test _negated_ filters with a bloom filter since blooms are probabilistic - // filters that can only tell us if a string _might_ exist. - // For example, for `!= "foo"`, the bloom filter might tell us that the string "foo" might exist - // but because we are not sure, we cannot discard that chunk because it might actually not be there. - // Therefore, we return a test that always returns true. - return MatchAll - case log.LineMatchEqual: - return newStringTest(b, filter.Match) - case log.LineMatchRegexp: - return MatchAll - case log.LineMatchPattern: - return newPatternTest(b, filter.Match) - default: - return MatchAll - } -} - -type bloomCheckerWrapper struct { - bloom filter.Checker -} - -// Test implements the log.Checker interface -func (b bloomCheckerWrapper) Test(line []byte, _ bool, _ bool) bool { - return b.bloom.Test(line) -} - -// TestRegex implements the log.Checker interface -func (b bloomCheckerWrapper) TestRegex(_ *regexp.Regexp) bool { - // We won't support regexes in bloom filters so we just return true - return true -} - -type logCheckerWrapper struct { - checker log.Checker -} - -// Test implements the filter.Checker interface -func (l logCheckerWrapper) Test(data []byte) bool { - return l.checker.Test(data, true, false) -} - -type matcherFilterWrapper struct { - filter log.Matcher -} - -func (m matcherFilterWrapper) Matches(bloom filter.Checker) bool { - return m.filter.Matches(bloomCheckerWrapper{bloom}) -} - -func (m matcherFilterWrapper) MatchesWithPrefixBuf(bloom filter.Checker, buf []byte, prefixLen int) bool { - return m.filter.Matches(bloomCheckerWrapper{prefixedChecker{ - checker: bloom, - buf: buf, - prefixLen: prefixLen, - }}) -} - -type prefixedChecker struct { - checker filter.Checker - buf []byte - prefixLen int -} - -func (p prefixedChecker) Test(data []byte) bool { - return p.checker.Test(append(p.buf[:p.prefixLen], data...)) -} - type matchAllTest struct{} var MatchAll = matchAllTest{} +// Matches implements BloomTest func (n matchAllTest) Matches(_ filter.Checker) bool { return true } +// MatchesWithPrefixBuf implements BloomTest func (n matchAllTest) MatchesWithPrefixBuf(_ filter.Checker, _ []byte, _ int) bool { return true } -// NGramBuilder is an interface for tokenizing strings into ngrams -// Extracting this interface allows us to test the bloom filter without having to use the actual tokenizer -// TODO: This should be moved to tokenizer.go -type NGramBuilder interface { - Tokens(line string) iter.Iterator[[]byte] - N() int - SkipFactor() int -} - -type stringTest struct { - ngrams [][]byte -} - -func newStringTest(b NGramBuilder, search string) (res BloomTest) { - // search string must be longer than the combined ngram length and skip factor - // in order for all possible skip offsets to have at least 1 ngram - skip := b.SkipFactor() - if ct := utf8.RuneCountInString(search); ct < b.N()+skip { - return MatchAll - } - - tests := make([]stringTest, 0, skip) - - for i := 0; i < skip+1; i++ { - searchWithOffset := search - for j := 0; j < i; j++ { - _, size := utf8.DecodeRuneInString(searchWithOffset) - // NB(owen-d): small bounds check for invalid utf8 - searchWithOffset = searchWithOffset[min(size, len(searchWithOffset)):] - } - - var test stringTest - it := b.Tokens(searchWithOffset) - for it.Next() { - ngram := make([]byte, len(it.At())) - copy(ngram, it.At()) - test.ngrams = append(test.ngrams, ngram) - } - tests = append(tests, test) - } - - res = tests[0] - for _, t := range tests[1:] { - res = newOrTest(res, t) - } - return res -} - -// Matches implements the BloomTest interface -func (b stringTest) Matches(bloom filter.Checker) bool { - for _, ngram := range b.ngrams { - if !bloom.Test(ngram) { - return false - } - } - return true -} - -// MatchesWithPrefixBuf implements the BloomTest interface -func (b stringTest) MatchesWithPrefixBuf(bloom filter.Checker, buf []byte, prefixLen int) bool { - for _, ngram := range b.ngrams { - buf = append(buf[:prefixLen], ngram...) - if !bloom.Test(buf) { - return false - } - } - return true -} - -type stringMatcherFilter struct { - test BloomTest -} - -// Matches implements the log.Filterer interface -func (b stringMatcherFilter) Matches(test log.Checker) bool { - return b.test.Matches(logCheckerWrapper{test}) -} - -func newStringFilterFunc(b NGramBuilder) log.NewMatcherFiltererFunc { - return func(match []byte, _ bool) log.MatcherFilterer { - return log.WrapMatcher(stringMatcherFilter{ - test: newStringTest(b, string(match)), - }) - } -} - type orTest struct { left, right BloomTest } @@ -284,24 +71,133 @@ func newOrTest(left, right BloomTest) orTest { } } +// Matches implements BloomTest func (o orTest) Matches(bloom filter.Checker) bool { return o.left.Matches(bloom) || o.right.Matches(bloom) } +// MatchesWithPrefixBuf implements BloomTest func (o orTest) MatchesWithPrefixBuf(bloom filter.Checker, buf []byte, prefixLen int) bool { return o.left.MatchesWithPrefixBuf(bloom, buf, prefixLen) || o.right.MatchesWithPrefixBuf(bloom, buf, prefixLen) } -func newPatternTest(b NGramBuilder, match string) BloomTest { - lit, err := pattern.ParseLiterals(match) - if err != nil { - return MatchAll +type andTest struct { + left, right BloomTest +} + +func newAndTest(left, right BloomTest) andTest { + return andTest{ + left: left, + right: right, + } +} + +// Matches implements BloomTest +func (a andTest) Matches(bloom filter.Checker) bool { + return a.left.Matches(bloom) && a.right.Matches(bloom) +} + +// MatchesWithPrefixBuf implements BloomTest +func (a andTest) MatchesWithPrefixBuf(bloom filter.Checker, buf []byte, prefixLen int) bool { + return a.left.MatchesWithPrefixBuf(bloom, buf, prefixLen) && a.right.MatchesWithPrefixBuf(bloom, buf, prefixLen) +} + +func LabelMatchersToBloomTest(matchers ...LabelMatcher) BloomTest { + tests := make(BloomTests, 0, len(matchers)) + for _, matcher := range matchers { + tests = append(tests, matcherToBloomTest(matcher)) } + return tests +} + +func matcherToBloomTest(matcher LabelMatcher) BloomTest { + switch matcher := matcher.(type) { + case UnsupportedLabelMatcher: + return matchAllTest{} + + case PlainLabelMatcher: + return newStringMatcherTest(matcher) + + case OrLabelMatcher: + return newOrTest( + matcherToBloomTest(matcher.Left), + matcherToBloomTest(matcher.Right), + ) - var res BloomTests + case AndLabelMatcher: + return newAndTest( + matcherToBloomTest(matcher.Left), + matcherToBloomTest(matcher.Right), + ) - for _, l := range lit { - res = append(res, newStringTest(b, string(l))) + default: + // Unhandled cases pass bloom tests by default. + return matchAllTest{} } - return res +} + +type stringMatcherTest struct { + matcher PlainLabelMatcher +} + +func newStringMatcherTest(matcher PlainLabelMatcher) stringMatcherTest { + return stringMatcherTest{matcher: matcher} +} + +func (sm stringMatcherTest) Matches(bloom filter.Checker) bool { + // TODO(rfratto): reintroduce the use of a shared tokenizer here to avoid + // desyncing between how tokens are passed during building vs passed during + // querying. + // + // For a shared tokenizer to be ergonomic: + // + // 1. A prefix shouldn't be required until MatchesWithPrefixBuf is called + // 2. It should be possible to test for just the key + + var ( + combined = fmt.Sprintf("%s=%s", sm.matcher.Key, sm.matcher.Value) + + rawKey = unsafe.Slice(unsafe.StringData(sm.matcher.Key), len(sm.matcher.Key)) + rawCombined = unsafe.Slice(unsafe.StringData(combined), len(combined)) + ) + + if !bloom.Test(rawKey) { + // The structured metadata key wasn't indexed. However, sm.matcher might be + // checking against a label which *does* exist, so we can't safely filter + // out this chunk. + // + // TODO(rfratto): The negative test here is a bit confusing, and the key + // presence test should likely be done higher up within FuseQuerier. + return true + } + + return bloom.Test(rawCombined) +} + +func (sm stringMatcherTest) MatchesWithPrefixBuf(bloom filter.Checker, buf []byte, prefixLen int) bool { + var ( + combined = fmt.Sprintf("%s=%s", sm.matcher.Key, sm.matcher.Value) + + prefixedKey = appendToBuf(buf, prefixLen, sm.matcher.Key) + prefixedCombined = appendToBuf(buf, prefixLen, combined) + ) + + if !bloom.Test(prefixedKey) { + // The structured metadata key wasn't indexed for a prefix. However, + // sm.matcher might be checking against a label which *does* exist, so we + // can't safely filter out this chunk. + // + // TODO(rfratto): The negative test here is a bit confusing, and the key + // presence test should likely be done higher up within FuseQuerier. + return true + } + + return bloom.Test(prefixedCombined) +} + +// appendToBuf is the equivalent of append(buf[:prefixLen], str). len(buf) must +// be greater than or equal to prefixLen+len(str) to avoid allocations. +func appendToBuf(buf []byte, prefixLen int, str string) []byte { + rawString := unsafe.Slice(unsafe.StringData(str), len(str)) + return append(buf[:prefixLen], rawString...) } diff --git a/pkg/storage/bloom/v1/bloom_tester_test.go b/pkg/storage/bloom/v1/bloom_tester_test.go index 81adbd8a86b5..7a314872cc86 100644 --- a/pkg/storage/bloom/v1/bloom_tester_test.go +++ b/pkg/storage/bloom/v1/bloom_tester_test.go @@ -6,114 +6,125 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/logql/syntax" -) - -type fakeBloom []string -// fakeBloom is a fake bloom filter that matches tokens exactly. -// It uses a tokenizer to build the tokens for a line -func newFakeBloom(tokenizer *NGramTokenizer, line string) (res fakeBloom) { - toks := tokenizer.Tokens(line) - for toks.Next() { - res = append(res, string(toks.At())) - } - return -} - -func (f fakeBloom) Test(data []byte) bool { - str := string(data) - for _, match := range f { - if str == match { - return true - } - } - return false -} + "github.com/grafana/loki/pkg/push" +) -func TestBloomQueryingLogic(t *testing.T) { - // All tested on 4skip1 - n := 4 - skip := 1 - tokenizer := NewNGramTokenizer(n, skip) +func TestLabelMatchersToBloomTest(t *testing.T) { + // All test cases below have access to a fake bloom filter with + // trace_id=exists_1 and trace_id=exists_2 + var ( + prefix = "fakeprefix" + tokenizer = NewStructuredMetadataTokenizer(prefix) + bloom = newFakeMetadataBloom( + tokenizer, + push.LabelAdapter{Name: "trace_id", Value: "exists_1"}, + push.LabelAdapter{Name: "trace_id", Value: "exists_2"}, + ) + ) - for _, tc := range []struct { - desc string - line string - query string - match bool - enabled bool + tt := []struct { + name string + query string + match bool }{ { - desc: "filter too short always match", - line: "foobar", - query: `{app="fake"} |= "z"`, + name: "no matchers", + query: `{app="fake"}`, match: true, }, { - desc: "simple matcher", - line: "foobar", - query: `{app="fake"} |= "oobar"`, + name: "basic matcher pass", + query: `{app="fake"} | trace_id="exists_1"`, match: true, }, { - desc: "longer sequence", - line: "abcdefghijklmnopqrstuvwxyz", - query: `{app="fake"} |= "nopqrstuvwxyz"`, + name: "basic matcher fail", + query: `{app="fake"} | trace_id="noexist"`, + match: false, + }, + { + name: "multiple matcher pass", + query: `{app="fake"} | trace_id="exists_1" | trace_id="exists_2"`, match: true, }, { - desc: "longer sequence nomatch", - line: "abcdefghijklmnopqrstuvwxyz", - query: `{app="fake"} |= "nopqrstuvwxyzzz"`, + name: "multiple matcher fail", + query: `{app="fake"} | trace_id="exists_1" | trace_id="noexist"`, match: false, }, { - desc: "pattern simple", - line: "abcdefghijklmnopqrstuvwxyz", - query: `{app="fake"} |> "<_>lmnopq<_>"`, + name: "ignore non-indexed key", + query: `{app="fake"} | noexist="noexist"`, + match: true, + }, + { + name: "ignore non-indexed key with empty value", + query: `{app="fake"} | noexist=""`, match: true, }, { - desc: "pattern too short matches", - line: "abcdefghijklmnopqrstuvwxyz", - query: `{app="fake"} |> "<_>zzz<_>"`, + name: "ignore unsupported operator", + query: `{app="fake"} | trace_id=~".*noexist.*"`, match: true, }, { - desc: "pattern mix long success and short", - line: "abcdefghijklmnopqrstuvwxyz", - query: `{app="fake"} |> "<_>lmnop<_>zzz<_>"`, + name: "or test pass", + query: `{app="fake"} | trace_id="noexist" or trace_id="exists_1"`, match: true, }, { - desc: "pattern mix long fail and short", - line: "abcdefghijklmnopqrstuvwxyz", - query: `{app="fake"} |> "<_>zzzzz<_>zzz<_>"`, + name: "or test fail", + query: `{app="fake"} | trace_id="noexist" or trace_id="noexist"`, match: false, }, { - desc: "regexp disabled", - line: "foobarbaz", - query: `{app="fake"} |~ "(aaaaa|bbbbb)bazz"`, + name: "and test pass", + query: `{app="fake"} | trace_id="exists_1" or trace_id="exists_2"`, match: true, }, - } { + { + name: "and test fail", + query: `{app="fake"} | trace_id="exists_1" and trace_id="noexist"`, + match: false, + }, + } - // shortcut to enable specific tests - tc.enabled = true - if !tc.enabled { - continue - } - t.Run(tc.desc, func(t *testing.T) { - bloom := newFakeBloom(tokenizer, tc.line) + for _, tc := range tt { + t.Run(tc.name, func(t *testing.T) { expr, err := syntax.ParseExpr(tc.query) require.NoError(t, err) - filters := ExtractTestableLineFilters(expr) - bloomTests := FiltersToBloomTest(tokenizer, filters...) - matched := bloomTests.Matches(bloom) - require.Equal(t, tc.match, matched) + matchers := ExtractTestableLabelMatchers(expr) + bloomTest := LabelMatchersToBloomTest(matchers...) + // .Matches and .MatchesWithPrefixBuf should both have the same result. + require.Equal(t, tc.match, bloomTest.Matches(bloom)) + require.Equal(t, tc.match, bloomTest.MatchesWithPrefixBuf(bloom, []byte(prefix), len(prefix))) }) } } + +type fakeMetadataBloom []string + +// fakeBloom is a fake bloom filter that matches tokens exactly. +// It uses a tokenizer to build the tokens for a line +func newFakeMetadataBloom(tokenizer *StructuredMetadataTokenizer, kvs ...push.LabelAdapter) (res fakeMetadataBloom) { + for _, kv := range kvs { + it := tokenizer.Tokens(kv) + for it.Next() { + res = append(res, it.At()) + } + } + return res +} + +func (f fakeMetadataBloom) Test(data []byte) bool { + str := string(data) + for _, match := range f { + if str == match { + return true + } + } + return false +} diff --git a/pkg/storage/bloom/v1/bloom_tokenizer.go b/pkg/storage/bloom/v1/bloom_tokenizer.go index 333e2f22a37c..939c91c21439 100644 --- a/pkg/storage/bloom/v1/bloom_tokenizer.go +++ b/pkg/storage/bloom/v1/bloom_tokenizer.go @@ -23,9 +23,8 @@ type BloomTokenizer struct { metrics *Metrics logger log.Logger - maxBloomSize int // size in bytes - lineTokenizer *NGramTokenizer - cache map[string]interface{} + maxBloomSize int // size in bytes + cache map[string]interface{} } const cacheSize = 150000 @@ -37,47 +36,15 @@ const eightBits = 8 // 1) The token slices generated must not be mutated externally // 2) The token slice must not be used after the next call to `Tokens()` as it will repopulate the slice. // 2) This is not thread safe. -func NewBloomTokenizer(nGramLen, nGramSkip int, maxBloomSize int, metrics *Metrics, logger log.Logger) *BloomTokenizer { - level.Info(logger).Log("msg", "create new bloom tokenizer", "ngram length", nGramLen, "ngram skip", nGramSkip) +func NewBloomTokenizer(maxBloomSize int, metrics *Metrics, logger log.Logger) *BloomTokenizer { return &BloomTokenizer{ - metrics: metrics, - logger: logger, - cache: make(map[string]interface{}, cacheSize), - lineTokenizer: NewNGramTokenizer(nGramLen, nGramSkip), - maxBloomSize: maxBloomSize, + metrics: metrics, + logger: logger, + cache: make(map[string]interface{}, cacheSize), + maxBloomSize: maxBloomSize, } } -func (bt *BloomTokenizer) N() uint64 { - return uint64(bt.lineTokenizer.N()) -} - -func (bt *BloomTokenizer) SkipFactor() uint64 { - return uint64(bt.lineTokenizer.SkipFactor()) -} - -// prefixedToken returns a byte slice with sufficient capacity for a chunk-ref prefixed token -// of specific ngram length, along with the length of the prefix. -// It ensures enough capacity for the prefix and the token so additional tokens can be created -// without allocations by appending them to the prefix length -// If the buffer is nil or too small, a new one is created. The buffer is returned for reuse. -func prefixedToken(ngram int, chk ChunkRef, buf []byte) ([]byte, int) { - enc := encoding.EncWith(buf) - enc.Reset() - enc.PutBE64(uint64(chk.From)) - enc.PutBE64(uint64(chk.Through)) - enc.PutBE32(chk.Checksum) - prefixLn := enc.Len() // record the length of the prefix - - // If the buffer is too small, ensure enough capacity for the ngram - if cap(enc.Get()) < prefixLn+ngram*MaxRuneLen { - enc.PutBytes(make([]byte, ngram*MaxRuneLen)) - } - - // return the underlying byte slice and the length of the prefix - return enc.Get(), prefixLn -} - // ChunkRefWithIter is a wrapper around a ChunkRef and an EntryIterator. type ChunkRefWithIter struct { Ref ChunkRef @@ -112,7 +79,7 @@ func (bt *BloomTokenizer) Populate(blooms v2iter.SizedIterator[*Bloom], chks v2i // We noticed some blooms are empty on the resulting blocks. // We have the feeling that the empty blooms may be reused from old blocks. // Here we log an error if we find an empty bloom. - if bloom.Count() == 0 { + if bloom.IsEmpty() { level.Warn(bt.logger).Log("msg", "found existing empty bloom") } } else { @@ -149,7 +116,7 @@ func (bt *BloomTokenizer) Populate(blooms v2iter.SizedIterator[*Bloom], chks v2i } // TODO(salvacorts): Delete this once we solve the correctness bug - if bloom.Count() == 0 { + if bloom.IsEmpty() { level.Warn(bt.logger).Log("msg", "resulting bloom is empty") } diff --git a/pkg/storage/bloom/v1/bloom_tokenizer_test.go b/pkg/storage/bloom/v1/bloom_tokenizer_test.go index 7e8f5c4c9993..79eb74033dd7 100644 --- a/pkg/storage/bloom/v1/bloom_tokenizer_test.go +++ b/pkg/storage/bloom/v1/bloom_tokenizer_test.go @@ -12,6 +12,7 @@ import ( "github.com/grafana/dskit/multierror" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/iter" v2 "github.com/grafana/loki/v3/pkg/iter/v2" "github.com/grafana/loki/v3/pkg/logproto" @@ -19,7 +20,6 @@ import ( "github.com/grafana/loki/pkg/push" - "github.com/prometheus/common/model" "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/storage/bloom/v1/filter" @@ -27,84 +27,18 @@ import ( "github.com/prometheus/client_golang/prometheus" ) -const ( - DefaultNGramLength = 4 - DefaultNGramSkip = 0 -) - -var ( - four = NewNGramTokenizer(4, 0) - metrics = NewMetrics(prometheus.DefaultRegisterer) -) - -func TestPrefixedKeyCreation(t *testing.T) { - t.Parallel() - var ones uint64 = 0xffffffffffffffff - - ref := ChunkRef{ - From: 0, - Through: model.Time(int64(ones)), - Checksum: 0xffffffff, - } - for _, tc := range []struct { - desc string - ngram, expLen int - }{ - { - desc: "0-gram", - ngram: 0, - expLen: 20, - }, - { - desc: "4-gram", - ngram: 4, - expLen: 20 + 4*MaxRuneLen, - }, - } { - t.Run(tc.desc, func(t *testing.T) { - token, prefixLn := prefixedToken(tc.ngram, ref, nil) - require.Equal(t, 20, prefixLn) - require.Equal(t, tc.expLen, len(token)) - // first 8 bytes should be zeros from `from` - for i := 0; i < 8; i++ { - require.Equal(t, byte(0), token[i]) - } - // next 8 bytes should be ones from `through` - for i := 8; i < 16; i++ { - require.Equal(t, byte(255), token[i]) - } - // next 4 bytes should be ones from `checksum` - for i := 16; i < 20; i++ { - require.Equal(t, byte(255), token[i]) - } - }) - } -} - -func TestSetLineTokenizer(t *testing.T) { - t.Parallel() - bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, metrics, logger.NewNopLogger()) - - // Validate defaults - require.Equal(t, bt.lineTokenizer.N(), DefaultNGramLength) - require.Equal(t, bt.lineTokenizer.SkipFactor(), DefaultNGramSkip) - - // Set new tokenizer, and validate against that - bt.lineTokenizer = NewNGramTokenizer(6, 7) - require.Equal(t, bt.lineTokenizer.N(), 6) - require.Equal(t, bt.lineTokenizer.SkipFactor(), 7) -} +var metrics = NewMetrics(prometheus.DefaultRegisterer) func TestTokenizerPopulate(t *testing.T) { t.Parallel() var testLine = "this is a log line" - bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, metrics, logger.NewNopLogger()) + bt := NewBloomTokenizer(0, metrics, logger.NewNopLogger()) metadata := push.LabelsAdapter{ {Name: "pod", Value: "loki-1"}, {Name: "trace_id", Value: "3bef3c91643bde73"}, } - memChunk := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncSnappy, chunkenc.ChunkHeadFormatFor(chunkenc.ChunkFormatV4), 256000, 1500000) + memChunk := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncSnappy, chunkenc.ChunkHeadFormatFor(chunkenc.ChunkFormatV4), 256000, 1500000) _, _ = memChunk.Append(&push.Entry{ Timestamp: time.Unix(0, 1), Line: testLine, @@ -143,13 +77,13 @@ func TestTokenizerPopulate(t *testing.T) { func TestBloomTokenizerPopulateWithoutPreexistingBloom(t *testing.T) { var testLine = "this is a log line" - bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, metrics, logger.NewNopLogger()) + bt := NewBloomTokenizer(0, metrics, logger.NewNopLogger()) metadata := push.LabelsAdapter{ {Name: "pod", Value: "loki-1"}, {Name: "trace_id", Value: "3bef3c91643bde73"}, } - memChunk := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncSnappy, chunkenc.ChunkHeadFormatFor(chunkenc.ChunkFormatV4), 256000, 1500000) + memChunk := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncSnappy, chunkenc.ChunkHeadFormatFor(chunkenc.ChunkFormatV4), 256000, 1500000) _, _ = memChunk.Append(&push.Entry{ Timestamp: time.Unix(0, 1), Line: testLine, @@ -186,7 +120,7 @@ func TestBloomTokenizerPopulateWithoutPreexistingBloom(t *testing.T) { } func chunkRefItrFromMetadata(metadata ...push.LabelsAdapter) (iter.EntryIterator, error) { - memChunk := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncSnappy, chunkenc.ChunkHeadFormatFor(chunkenc.ChunkFormatV4), 256000, 1500000) + memChunk := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncSnappy, chunkenc.ChunkHeadFormatFor(chunkenc.ChunkFormatV4), 256000, 1500000) for i, md := range metadata { if _, err := memChunk.Append(&push.Entry{ Timestamp: time.Unix(0, int64(i)), @@ -220,7 +154,7 @@ func randomStr(ln int) string { func TestTokenizerPopulateWontExceedMaxSize(t *testing.T) { maxSize := 4 << 10 - bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, maxSize, NewMetrics(nil), logger.NewNopLogger()) + bt := NewBloomTokenizer(maxSize, NewMetrics(nil), logger.NewNopLogger()) ch := make(chan *BloomCreation) metadata := make([]push.LabelsAdapter, 0, 4<<10) @@ -267,15 +201,18 @@ func populateAndConsumeBloom( func BenchmarkPopulateSeriesWithBloom(b *testing.B) { for i := 0; i < b.N; i++ { - var testLine = lorem + lorem + lorem - bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, metrics, logger.NewNopLogger()) + bt := NewBloomTokenizer(0, metrics, logger.NewNopLogger()) sbf := filter.NewScalableBloomFilter(1024, 0.01, 0.8) - memChunk := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncSnappy, chunkenc.ChunkHeadFormatFor(chunkenc.ChunkFormatV4), 256000, 1500000) + memChunk := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncSnappy, chunkenc.ChunkHeadFormatFor(chunkenc.ChunkFormatV4), 256000, 1500000) _, _ = memChunk.Append(&push.Entry{ Timestamp: time.Unix(0, 1), - Line: testLine, + Line: "", + StructuredMetadata: push.LabelsAdapter{ + push.LabelAdapter{Name: "trace_id", Value: fmt.Sprintf("%04x", i)}, + push.LabelAdapter{Name: "org_id", Value: fmt.Sprintf("%d", i%1000)}, + }, }) itr, err := memChunk.Iterator( context.Background(), @@ -301,7 +238,7 @@ func BenchmarkPopulateSeriesWithBloom(b *testing.B) { } func TestTokenizerClearsCacheBetweenPopulateCalls(t *testing.T) { - bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, NewMetrics(nil), logger.NewNopLogger()) + bt := NewBloomTokenizer(0, NewMetrics(nil), logger.NewNopLogger()) md := push.LabelsAdapter{ {Name: "trace_id", Value: "3bef3c91643bde73"}, } @@ -339,7 +276,7 @@ func TestTokenizerClearsCacheBetweenPopulateCalls(t *testing.T) { } func BenchmarkMapClear(b *testing.B) { - bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, metrics, logger.NewNopLogger()) + bt := NewBloomTokenizer(0, metrics, logger.NewNopLogger()) for i := 0; i < b.N; i++ { for k := 0; k < cacheSize; k++ { bt.cache[fmt.Sprint(k)] = k @@ -350,7 +287,7 @@ func BenchmarkMapClear(b *testing.B) { } func BenchmarkNewMap(b *testing.B) { - bt := NewBloomTokenizer(DefaultNGramLength, DefaultNGramSkip, 0, metrics, logger.NewNopLogger()) + bt := NewBloomTokenizer(0, metrics, logger.NewNopLogger()) for i := 0; i < b.N; i++ { for k := 0; k < cacheSize; k++ { bt.cache[fmt.Sprint(k)] = k diff --git a/pkg/storage/bloom/v1/builder.go b/pkg/storage/bloom/v1/builder.go index 1b43ccadee2a..664eb60cd596 100644 --- a/pkg/storage/bloom/v1/builder.go +++ b/pkg/storage/bloom/v1/builder.go @@ -7,7 +7,7 @@ import ( "github.com/pkg/errors" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" iter "github.com/grafana/loki/v3/pkg/iter/v2" "github.com/grafana/loki/v3/pkg/util/encoding" ) @@ -66,12 +66,10 @@ func (b BlockOptions) Encode(enc *encoding.Encbuf) { enc.PutBE64(b.BlockSize) } -func NewBlockOptions(enc chunkenc.Encoding, nGramLength, nGramSkip, maxBlockSizeBytes, maxBloomSizeBytes uint64) BlockOptions { +func NewBlockOptions(enc compression.Encoding, maxBlockSizeBytes, maxBloomSizeBytes uint64) BlockOptions { opts := NewBlockOptionsFromSchema(Schema{ - version: CurrentSchemaVersion, - encoding: enc, - nGramLength: nGramLength, - nGramSkip: nGramSkip, + version: CurrentSchemaVersion, + encoding: enc, }) opts.BlockSize = maxBlockSizeBytes opts.UnencodedBlockOptions.MaxBloomSizeBytes = maxBloomSizeBytes @@ -122,7 +120,7 @@ func (w *PageWriter) Add(item []byte) (offset int) { return offset } -func (w *PageWriter) writePage(writer io.Writer, pool chunkenc.WriterPool, crc32Hash hash.Hash32) (int, int, error) { +func (w *PageWriter) writePage(writer io.Writer, pool compression.WriterPool, crc32Hash hash.Hash32) (int, int, error) { // write the number of blooms in this page, must not be varint // so we can calculate it's position+len during decoding w.enc.PutBE64(uint64(w.n)) @@ -183,12 +181,12 @@ type MergeBuilder struct { blocks iter.Iterator[*SeriesWithBlooms] // store store iter.Iterator[*Series] - // Add chunks to a bloom - populate func(s *Series, srcBlooms iter.SizedIterator[*Bloom], toAdd ChunkRefs, ch chan *BloomCreation) + // Add chunks of a single series to a bloom + populate BloomPopulatorFunc metrics *Metrics } -type BloomPopulatorFunc = func(s *Series, srcBlooms iter.SizedIterator[*Bloom], toAdd ChunkRefs, ch chan *BloomCreation) +type BloomPopulatorFunc func(series *Series, preExistingBlooms iter.SizedIterator[*Bloom], chunksToAdd ChunkRefs, ch chan *BloomCreation) // NewMergeBuilder is a specific builder which does the following: // 1. merges multiple blocks into a single ordered querier, @@ -277,7 +275,8 @@ func (mb *MergeBuilder) processNextSeries( } var ( - offsets []BloomOffset + offsets []BloomOffset + chunksToAdd = nextInStore.Chunks preExistingBlooms iter.SizedIterator[*Bloom] = iter.NewEmptyIter[*Bloom]() info = newIndexingInfo() @@ -288,6 +287,8 @@ func (mb *MergeBuilder) processNextSeries( chunksToAdd = nextInStore.Chunks.Unless(nextInBlocks.Series.Chunks) chunksCopied += len(nextInStore.Chunks) - len(chunksToAdd) preExistingBlooms = nextInBlocks.Blooms + // we also need to carry over existing indexed fields from the series metadata + info.indexedFields.Union(nextInBlocks.Series.Meta.Fields) } chunksIndexed += len(chunksToAdd) diff --git a/pkg/storage/bloom/v1/builder_test.go b/pkg/storage/bloom/v1/builder_test.go index 5e56c3507e88..a2682921930f 100644 --- a/pkg/storage/bloom/v1/builder_test.go +++ b/pkg/storage/bloom/v1/builder_test.go @@ -9,28 +9,26 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" iter "github.com/grafana/loki/v3/pkg/iter/v2" "github.com/grafana/loki/v3/pkg/util/encoding" "github.com/grafana/loki/v3/pkg/util/mempool" ) -var blockEncodings = []chunkenc.Encoding{ - chunkenc.EncNone, - chunkenc.EncGZIP, - chunkenc.EncSnappy, - chunkenc.EncLZ4_256k, - chunkenc.EncZstd, +var blockEncodings = []compression.Encoding{ + compression.EncNone, + compression.EncGZIP, + compression.EncSnappy, + compression.EncLZ4_256k, + compression.EncZstd, } func TestBlockOptions_RoundTrip(t *testing.T) { t.Parallel() opts := BlockOptions{ Schema: Schema{ - version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, - nGramLength: 10, - nGramSkip: 2, + version: CurrentSchemaVersion, + encoding: compression.EncSnappy, }, SeriesPageSize: 100, BloomPageSize: 10 << 10, @@ -49,7 +47,6 @@ func TestBlockOptions_RoundTrip(t *testing.T) { func TestBlockBuilder_RoundTrip(t *testing.T) { numSeries := 100 - data, keys := MkBasicSeriesWithLiteralBlooms(numSeries, 0, 0xffff, 0, 10000) for _, enc := range blockEncodings { // references for linking in memory reader+writer @@ -88,25 +85,19 @@ func TestBlockBuilder_RoundTrip(t *testing.T) { t.Run(desc, func(t *testing.T) { blockOpts := BlockOptions{ Schema: Schema{ - version: CurrentSchemaVersion, - encoding: enc, - nGramLength: 10, - nGramSkip: 2, + version: CurrentSchemaVersion, + encoding: enc, }, SeriesPageSize: 100, BloomPageSize: 10 << 10, BlockSize: tc.maxBlockSize, } + data, keys := MkBasicSeriesWithBlooms(numSeries, 0, 0xffff, 0, 10000) builder, err := NewBlockBuilder(blockOpts, tc.writer) require.Nil(t, err) - itr := iter.NewPeekIter[SeriesWithBlooms]( - iter.NewMapIter( - iter.NewSliceIter[SeriesWithLiteralBlooms](data), - func(x SeriesWithLiteralBlooms) SeriesWithBlooms { return x.SeriesWithBlooms() }, - ), - ) + itr := iter.NewPeekIter(iter.NewSliceIter(data)) _, err = builder.BuildFrom(itr) require.Nil(t, err) @@ -134,7 +125,7 @@ func TestBlockBuilder_RoundTrip(t *testing.T) { got := querier.At() blooms, err := iter.Collect(got.Blooms) require.Nil(t, err) - require.Equal(t, processedData[i].Series, got.Series) + require.Equal(t, processedData[i].Series.Series, got.Series.Series) for _, key := range keys[i] { found := false for _, b := range blooms { @@ -161,7 +152,7 @@ func TestBlockBuilder_RoundTrip(t *testing.T) { got := querier.At() blooms, err := iter.Collect(got.Blooms) require.Nil(t, err) - require.Equal(t, halfData[j].Series, got.Series) + require.Equal(t, halfData[j].Series.Series, got.Series.Series) for _, key := range halfKeys[j] { found := false for _, b := range blooms { @@ -210,7 +201,7 @@ func TestMergeBuilder(t *testing.T) { blockOpts := BlockOptions{ Schema: Schema{ version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, + encoding: compression.EncSnappy, }, SeriesPageSize: 100, BloomPageSize: 10 << 10, @@ -244,16 +235,12 @@ func TestMergeBuilder(t *testing.T) { } // We're not testing the ability to extend a bloom in this test - pop := func(_ *Series, srcBlooms iter.SizedIterator[*Bloom], _ ChunkRefs, ch chan *BloomCreation) { - for srcBlooms.Next() { - bloom := srcBlooms.At() - stats := indexingInfo{ - sourceBytes: int(bloom.Capacity()) / 8, - indexedFields: NewSetFromLiteral[Field]("__all__"), - } + populate := func(_ *Series, preExistingBlooms iter.SizedIterator[*Bloom], _ ChunkRefs, ch chan *BloomCreation) { + for preExistingBlooms.Next() { + bloom := preExistingBlooms.At() ch <- &BloomCreation{ Bloom: bloom, - Info: stats, + Info: newIndexingInfo(), } } close(ch) @@ -264,21 +251,18 @@ func TestMergeBuilder(t *testing.T) { storeItr := iter.NewMapIter[SeriesWithBlooms, *Series]( iter.NewSliceIter[SeriesWithBlooms](data), func(swb SeriesWithBlooms) *Series { - return swb.Series + return &swb.Series.Series }, ) // Ensure that the merge builder combines all the blocks correctly - mergeBuilder := NewMergeBuilder(dedupedBlocks(blocks), storeItr, pop, NewMetrics(nil)) + mergeBuilder := NewMergeBuilder(dedupedBlocks(blocks), storeItr, populate, NewMetrics(nil)) indexBuf := bytes.NewBuffer(nil) bloomsBuf := bytes.NewBuffer(nil) writer := NewMemoryBlockWriter(indexBuf, bloomsBuf) reader := NewByteReader(indexBuf, bloomsBuf) - builder, err := NewBlockBuilder( - blockOpts, - writer, - ) + builder, err := NewBlockBuilder(blockOpts, writer) require.Nil(t, err) _, _, err = mergeBuilder.Build(builder) @@ -290,7 +274,11 @@ func TestMergeBuilder(t *testing.T) { EqualIterators[*SeriesWithBlooms]( t, func(a, b *SeriesWithBlooms) { - require.Equal(t, a.Series, b.Series, "expected %+v, got %+v", a, b) + require.Equal(t, a.Series.Series, b.Series.Series, "expected series %+v, got %+v", a.Series.Series, b.Series.Series) + require.Equal(t, a.Series.Meta.Fields, b.Series.Meta.Fields, "expected fields %+v, got %+v", a.Series.Meta.Fields, b.Series.Meta.Fields) + // TODO(chaudum): Investigate why offsets not match + // This has not been tested before, so I'm not too worried about something being broken. + // require.Equal(t, a.Series.Meta.Offsets, b.Series.Meta.Offsets, "expected offsets %+v, got %+v", a.Series.Meta.Offsets, b.Series.Meta.Offsets) }, iter.NewSliceIter[*SeriesWithBlooms](PointerSlice(data)), querier.Iter(), @@ -310,7 +298,7 @@ func TestMergeBuilderFingerprintCollision(t *testing.T) { blockOpts := BlockOptions{ Schema: Schema{ version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, + encoding: compression.EncSnappy, }, SeriesPageSize: 100, BloomPageSize: 10 << 10, @@ -356,7 +344,7 @@ func TestMergeBuilderFingerprintCollision(t *testing.T) { } // We're not testing the ability to extend a bloom in this test - pop := func(s *Series, _ iter.SizedIterator[*Bloom], _ ChunkRefs, ch chan *BloomCreation) { + pop := func(_ *Series, _ iter.SizedIterator[*Bloom], _ ChunkRefs, ch chan *BloomCreation) { bloom := NewBloom() stats := indexingInfo{ sourceBytes: int(bloom.Capacity()) / 8, @@ -406,10 +394,8 @@ func TestBlockReset(t *testing.T) { reader := NewByteReader(indexBuf, bloomsBuf) schema := Schema{ - version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, - nGramLength: 10, - nGramSkip: 2, + version: CurrentSchemaVersion, + encoding: compression.EncSnappy, } builder, err := NewBlockBuilder( @@ -464,10 +450,8 @@ func TestMergeBuilder_Roundtrip(t *testing.T) { blockOpts := BlockOptions{ Schema: Schema{ - version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, // test with different encodings? - nGramLength: 4, // needs to match values from MkBasicSeriesWithBlooms - nGramSkip: 0, // needs to match values from MkBasicSeriesWithBlooms + version: CurrentSchemaVersion, + encoding: compression.EncSnappy, // test with different encodings? }, SeriesPageSize: 100, BloomPageSize: 10 << 10, @@ -519,11 +503,11 @@ func TestMergeBuilder_Roundtrip(t *testing.T) { return a.Series.Fingerprint == b.Fingerprint }, func(swb *SeriesWithBlooms) *Series { - return swb.Series + return &swb.Series.Series }, func(a *SeriesWithBlooms, b *Series) *Series { if len(a.Series.Chunks) > len(b.Chunks) { - return a.Series + return &a.Series.Series } return b }, diff --git a/pkg/storage/bloom/v1/filter/scalable.go b/pkg/storage/bloom/v1/filter/scalable.go index ca979632db1d..3e59a99b011f 100644 --- a/pkg/storage/bloom/v1/filter/scalable.go +++ b/pkg/storage/bloom/v1/filter/scalable.go @@ -116,6 +116,10 @@ func (s *ScalableBloomFilter) Count() (ct int) { return } +func (s *ScalableBloomFilter) IsEmpty() bool { + return s.Count() == 0 +} + // FillRatio returns the average ratio of set bits across every filter. func (s *ScalableBloomFilter) FillRatio() float64 { var sum, count float64 diff --git a/pkg/storage/bloom/v1/fuse.go b/pkg/storage/bloom/v1/fuse.go index 1ed9016bca04..f579ce652784 100644 --- a/pkg/storage/bloom/v1/fuse.go +++ b/pkg/storage/bloom/v1/fuse.go @@ -253,7 +253,7 @@ func (fq *FusedQuerier) Run() error { return nil } -func (fq *FusedQuerier) runSeries(schema Schema, series *SeriesWithMeta, reqs []Request) { +func (fq *FusedQuerier) runSeries(_ Schema, series *SeriesWithMeta, reqs []Request) { // For a given chunk|series to be removed, it must fail to match all blooms. // Because iterating/loading blooms can be expensive, we iterate blooms one at a time, collecting // the removals (failures) for each (bloom, chunk) pair. @@ -305,14 +305,18 @@ func (fq *FusedQuerier) runSeries(schema Schema, series *SeriesWithMeta, reqs [] // Test each bloom individually bloom := fq.bq.blooms.At() - // TODO(owen-d): this is a stopgap to avoid filtering broken blooms until we find their cause. + // This is a stopgap to avoid filtering on empty blooms. // In the case we don't have any data in the bloom, don't filter any chunks. - if bloom.ScalableBloomFilter.Count() == 0 { - level.Warn(fq.logger).Log( - "msg", "Found bloom with no data", - "offset_page", offset.Page, - "offset_bytes", offset.ByteOffset, - ) + // Empty blooms are generated from chunks that do not have entries with structured metadata. + if bloom.IsEmpty() { + // To debug empty blooms, uncomment the following block. Note that this may produce *a lot* of logs. + // swb := fq.bq.At() + // level.Debug(fq.logger).Log( + // "msg", "empty bloom", + // "series", swb.Fingerprint, + // "offset_page", offset.Page, + // "offset_bytes", offset.ByteOffset, + // ) for j := range reqs { for k := range inputs[j].InBlooms { @@ -331,23 +335,16 @@ func (fq *FusedQuerier) runSeries(schema Schema, series *SeriesWithMeta, reqs [] continue } - // TODO(owen-d): copying this over, but they're going to be the same - // across any block schema because prefix len is determined by n-gram and - // all chunks have the same encoding length. tl;dr: it's weird/unnecessary to have - // these defined this way and recreated across each bloom - var ( - tokenBuf []byte - prefixLen int - ) for k, chk := range inputs[j].InBlooms { // if we've already found this chunk in a previous bloom, skip testing it if inputs[j].found[k] { continue } - // Get buf to concatenate the chunk and search token - tokenBuf, prefixLen = prefixedToken(schema.NGramLen(), chk, tokenBuf) - if matched := req.Search.MatchesWithPrefixBuf(bloom, tokenBuf, prefixLen); matched { + // TODO(rfratto): reuse buffer between multiple calls to + // prefixForChunkRef and MatchesWithPrefixBuf to avoid allocations. + tokenBuf := prefixForChunkRef(chk) + if matched := req.Search.MatchesWithPrefixBuf(bloom, tokenBuf, len(tokenBuf)); matched { inputs[j].found[k] = true } } diff --git a/pkg/storage/bloom/v1/fuse_test.go b/pkg/storage/bloom/v1/fuse_test.go index 3adcc6833e17..ec4f575fc22a 100644 --- a/pkg/storage/bloom/v1/fuse_test.go +++ b/pkg/storage/bloom/v1/fuse_test.go @@ -12,7 +12,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" v2 "github.com/grafana/loki/v3/pkg/iter/v2" "github.com/grafana/loki/v3/pkg/storage/bloom/v1/filter" "github.com/grafana/loki/v3/pkg/util/mempool" @@ -24,25 +24,26 @@ var BloomPagePool = mempool.New("test", []mempool.Bucket{ {Size: 16, Capacity: 512 << 10}, }, nil) -// TODO(owen-d): this is unhinged from the data it represents. I'm leaving this solely so I don't -// have to refactor tests here in order to fix this elsewhere, but it can/should be fixed -- -// the skip & n len are hardcoded based on data that's passed to it elsewhere. -type fakeNgramBuilder struct{} +type singleKeyTest []byte -func (f fakeNgramBuilder) N() int { return 4 } -func (f fakeNgramBuilder) SkipFactor() int { return 0 } +// Matches implements BloomTest. +func (s singleKeyTest) Matches(bloom filter.Checker) bool { + return bloom.Test(s) +} -func (f fakeNgramBuilder) Tokens(line string) v2.Iterator[[]byte] { - return v2.NewSliceIter[[]byte]([][]byte{[]byte(line)}) +// MatchesWithPrefixBuf implements BloomTest. +func (s singleKeyTest) MatchesWithPrefixBuf(bloom filter.Checker, buf []byte, prefixLen int) bool { + return bloom.Test(append(buf[:prefixLen], s...)) } +// compiler check +var _ BloomTest = singleKeyTest("") + func keysToBloomTest(keys [][]byte) BloomTest { - var tokenizer fakeNgramBuilder tests := make(BloomTests, 0, len(keys)) for _, key := range keys { - tests = append(tests, newStringTest(tokenizer, string(key))) + tests = append(tests, singleKeyTest(key)) } - return tests } @@ -53,13 +54,13 @@ func TestFusedQuerier(t *testing.T) { writer := NewMemoryBlockWriter(indexBuf, bloomsBuf) reader := NewByteReader(indexBuf, bloomsBuf) numSeries := 1000 - data, keys := MkBasicSeriesWithBlooms(numSeries, 0x0000, 0xffff, 0, 10000) + data, _ := MkBasicSeriesWithBlooms(numSeries, 0x0000, 0xffff, 0, 10000) builder, err := NewBlockBuilder( BlockOptions{ Schema: Schema{ version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, + encoding: compression.EncSnappy, }, SeriesPageSize: 100, BloomPageSize: 10 << 10, @@ -89,7 +90,7 @@ func TestFusedQuerier(t *testing.T) { Fp: data[idx].Series.Fingerprint, Chks: data[idx].Series.Chunks, Response: ch, - Search: keysToBloomTest(keys[idx]), + Search: singleKeyTest("trace_id"), }) } inputs = append(inputs, reqs) @@ -130,20 +131,13 @@ func TestFusedQuerier(t *testing.T) { for i, input := range inputs { for j, req := range input { resp := resps[i][j] - require.Equal( - t, - Output{ - Fp: req.Fp, - Removals: nil, - }, - resp, - ) + require.Equal(t, Output{Fp: req.Fp, Removals: nil}, resp) } } } // Successfully query series across multiple pages as well as series that only occupy 1 bloom -func TestFuseMultiPage(t *testing.T) { +func TestFusedQuerier_MultiPage(t *testing.T) { indexBuf := bytes.NewBuffer(nil) bloomsBuf := bytes.NewBuffer(nil) writer := NewMemoryBlockWriter(indexBuf, bloomsBuf) @@ -152,10 +146,8 @@ func TestFuseMultiPage(t *testing.T) { builder, err := NewBlockBuilder( BlockOptions{ Schema: Schema{ - version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, - nGramLength: 3, // we test trigrams - nGramSkip: 0, + version: CurrentSchemaVersion, + encoding: compression.EncSnappy, }, SeriesPageSize: 100, BloomPageSize: 10, // So we force one bloom per page @@ -170,32 +162,32 @@ func TestFuseMultiPage(t *testing.T) { Through: 10, Checksum: 0, } - series := &Series{ + series := Series{ Fingerprint: fp, Chunks: []ChunkRef{chk}, } - buf, prefixLn := prefixedToken(3, chk, nil) + buf := prefixForChunkRef(chk) b1 := &Bloom{ *filter.NewScalableBloomFilter(1024, 0.01, 0.8), } key1, key2 := []byte("foo"), []byte("bar") b1.Add(key1) - b1.Add(append(buf[:prefixLn], key1...)) + b1.Add(append(buf, key1...)) b2 := &Bloom{ *filter.NewScalableBloomFilter(1024, 0.01, 0.8), } b2.Add(key2) - b2.Add(append(buf[:prefixLn], key2...)) + b2.Add(append(buf, key2...)) _, err = builder.BuildFrom(v2.NewSliceIter([]SeriesWithBlooms{ { - series, - v2.NewSliceIter([]*Bloom{ - b1, b2, - }), + Series: &SeriesWithMeta{ + Series: series, + }, + Blooms: v2.NewSliceIter([]*Bloom{b1, b2}), }, })) require.NoError(t, err) @@ -215,13 +207,11 @@ func TestFuseMultiPage(t *testing.T) { chans[i] = make(chan Output, 1) // buffered once to not block in test } - req := func(ngram []byte, ch chan Output) Request { + req := func(key []byte, ch chan Output) Request { return Request{ - Fp: fp, - Chks: []ChunkRef{chk}, - Search: stringTest{ - ngrams: [][]byte{ngram}, - }, + Fp: fp, + Chks: []ChunkRef{chk}, + Search: singleKeyTest(key), Response: ch, Recorder: NewBloomRecorder(context.Background(), "unknown"), } @@ -267,7 +257,7 @@ func TestLazyBloomIter_Seek_ResetError(t *testing.T) { numSeries := 4 data := make([]SeriesWithBlooms, 0, numSeries) - tokenizer := NewNGramTokenizer(4, 0) + for i := 0; i < numSeries; i++ { var series Series series.Fingerprint = model.Fingerprint(i) @@ -279,8 +269,7 @@ func TestLazyBloomIter_Seek_ResetError(t *testing.T) { }, } - var bloom Bloom - bloom.ScalableBloomFilter = *filter.NewScalableBloomFilter(1024, 0.01, 0.8) + bloom := NewBloom() nLines := 10 // all even series will have a larger bloom (more than 1 filter) @@ -291,17 +280,15 @@ func TestLazyBloomIter_Seek_ResetError(t *testing.T) { } for j := 0; j < nLines; j++ { - line := fmt.Sprintf("%04x:%04x", i, j) - it := tokenizer.Tokens(line) - for it.Next() { - key := it.At() - bloom.Add(key) - } + key := fmt.Sprintf("%04x:%04x", i, j) + bloom.Add([]byte(key)) } data = append(data, SeriesWithBlooms{ - Series: &series, - Blooms: v2.NewSliceIter([]*Bloom{&bloom}), + Series: &SeriesWithMeta{ + Series: series, + }, + Blooms: v2.NewSliceIter([]*Bloom{bloom}), }) } @@ -309,7 +296,7 @@ func TestLazyBloomIter_Seek_ResetError(t *testing.T) { BlockOptions{ Schema: Schema{ version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, + encoding: compression.EncSnappy, }, SeriesPageSize: 100, BloomPageSize: 10, // So we force one series per page @@ -356,7 +343,7 @@ func TestLazyBloomIter_Seek_ResetError(t *testing.T) { } } -func TestFusedQuerierSkipsEmptyBlooms(t *testing.T) { +func TestFusedQuerier_SkipsEmptyBlooms(t *testing.T) { // references for linking in memory reader+writer indexBuf := bytes.NewBuffer(nil) bloomsBuf := bytes.NewBuffer(nil) @@ -367,7 +354,7 @@ func TestFusedQuerierSkipsEmptyBlooms(t *testing.T) { BlockOptions{ Schema: Schema{ version: CurrentSchemaVersion, - encoding: chunkenc.EncNone, + encoding: compression.EncNone, }, SeriesPageSize: 100, BloomPageSize: 10 << 10, @@ -377,22 +364,19 @@ func TestFusedQuerierSkipsEmptyBlooms(t *testing.T) { require.Nil(t, err) data := SeriesWithBlooms{ - Series: &Series{ - Fingerprint: 0, - Chunks: []ChunkRef{ - { - From: 0, - Through: 10, - Checksum: 0x1234, + Series: &SeriesWithMeta{ + Series: Series{ + Fingerprint: 0, + Chunks: []ChunkRef{ + { + From: 0, + Through: 10, + Checksum: 0x1234, + }, }, }, }, - Blooms: v2.NewSliceIter([]*Bloom{ - // simulate empty bloom - { - *filter.NewScalableBloomFilter(1024, 0.01, 0.8), - }, - }), + Blooms: v2.NewSliceIter([]*Bloom{NewBloom()}), } itr := v2.NewSliceIter[SeriesWithBlooms]([]SeriesWithBlooms{data}) @@ -431,7 +415,7 @@ func setupBlockForBenchmark(b *testing.B) (*BlockQuerier, [][]Request, []chan Ou BlockOptions{ Schema: Schema{ version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, + encoding: compression.EncSnappy, }, SeriesPageSize: 256 << 10, // 256k BloomPageSize: 1 << 20, // 1MB diff --git a/pkg/storage/bloom/v1/schema.go b/pkg/storage/bloom/v1/schema.go index 6fd862165423..954c96f757d6 100644 --- a/pkg/storage/bloom/v1/schema.go +++ b/pkg/storage/bloom/v1/schema.go @@ -6,7 +6,7 @@ import ( "github.com/pkg/errors" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/util/encoding" ) @@ -38,22 +38,19 @@ var ( ) type Schema struct { - version Version - encoding chunkenc.Encoding - nGramLength, nGramSkip uint64 + version Version + encoding compression.Encoding } func NewSchema() Schema { return Schema{ - version: CurrentSchemaVersion, - encoding: chunkenc.EncNone, - nGramLength: 0, - nGramSkip: 0, + version: CurrentSchemaVersion, + encoding: compression.EncNone, } } func (s Schema) String() string { - return fmt.Sprintf("%s,encoding=%s,ngram=%d,skip=%d", s.version, s.encoding, s.nGramLength, s.nGramSkip) + return fmt.Sprintf("%s,encoding=%s", s.version, s.encoding) } func (s Schema) Compatible(other Schema) bool { @@ -64,26 +61,18 @@ func (s Schema) Version() Version { return s.version } -func (s Schema) NGramLen() int { - return int(s.nGramLength) -} - -func (s Schema) NGramSkip() int { - return int(s.nGramSkip) -} - // byte length func (s Schema) Len() int { - // magic number + version + encoding + ngram length + ngram skip - return 4 + 1 + 1 + 8 + 8 + // magic number + version + encoding + return 4 + 1 + 1 } -func (s *Schema) DecompressorPool() chunkenc.ReaderPool { - return chunkenc.GetReaderPool(s.encoding) +func (s *Schema) DecompressorPool() compression.ReaderPool { + return compression.GetReaderPool(s.encoding) } -func (s *Schema) CompressorPool() chunkenc.WriterPool { - return chunkenc.GetWriterPool(s.encoding) +func (s *Schema) CompressorPool() compression.WriterPool { + return compression.GetWriterPool(s.encoding) } func (s *Schema) Encode(enc *encoding.Encbuf) { @@ -91,8 +80,6 @@ func (s *Schema) Encode(enc *encoding.Encbuf) { enc.PutBE32(magicNumber) enc.PutByte(byte(s.version)) enc.PutByte(byte(s.encoding)) - enc.PutBE64(s.nGramLength) - enc.PutBE64(s.nGramSkip) } @@ -118,13 +105,10 @@ func (s *Schema) Decode(dec *encoding.Decbuf) error { return errors.Errorf("invalid version. expected %d, got %d", 3, s.version) } - s.encoding = chunkenc.Encoding(dec.Byte()) - if _, err := chunkenc.ParseEncoding(s.encoding.String()); err != nil { + s.encoding = compression.Encoding(dec.Byte()) + if _, err := compression.ParseEncoding(s.encoding.String()); err != nil { return errors.Wrap(err, "parsing encoding") } - s.nGramLength = dec.Be64() - s.nGramSkip = dec.Be64() - return dec.Err() } diff --git a/pkg/storage/bloom/v1/test_util.go b/pkg/storage/bloom/v1/test_util.go index c08209ce0380..e8997a8cc241 100644 --- a/pkg/storage/bloom/v1/test_util.go +++ b/pkg/storage/bloom/v1/test_util.go @@ -9,9 +9,10 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" iter "github.com/grafana/loki/v3/pkg/iter/v2" - "github.com/grafana/loki/v3/pkg/storage/bloom/v1/filter" + + "github.com/grafana/loki/pkg/push" ) // TODO(owen-d): this should probably be in it's own testing-util package @@ -28,10 +29,8 @@ func MakeBlock(t testing.TB, nth int, fromFp, throughFp model.Fingerprint, fromT builder, err := NewBlockBuilder( BlockOptions{ Schema: Schema{ - version: CurrentSchemaVersion, - encoding: chunkenc.EncSnappy, - nGramLength: 4, // see DefaultNGramLength in bloom_tokenizer_test.go - nGramSkip: 0, // see DefaultNGramSkip in bloom_tokenizer_test.go + version: CurrentSchemaVersion, + encoding: compression.EncSnappy, }, SeriesPageSize: 100, BloomPageSize: 10 << 10, @@ -46,83 +45,73 @@ func MakeBlock(t testing.TB, nth int, fromFp, throughFp model.Fingerprint, fromT return block, data, keys } -// This is a helper type used in tests that buffers blooms and can be turned into -// the commonly used iterator form *SeriesWithBlooms. -type SeriesWithLiteralBlooms struct { - Series *Series - Blooms []*Bloom -} - -func (s *SeriesWithLiteralBlooms) SeriesWithBlooms() SeriesWithBlooms { +func newSeriesWithBlooms(series Series, blooms []*Bloom) SeriesWithBlooms { + offsets := make([]BloomOffset, 0, len(blooms)) + for i := range blooms { + offsets = append(offsets, BloomOffset{Page: i, ByteOffset: 0}) + } return SeriesWithBlooms{ - Series: s.Series, - Blooms: iter.NewSliceIter[*Bloom](s.Blooms), + Series: &SeriesWithMeta{ + Series: series, + Meta: Meta{ + Fields: NewSetFromLiteral[Field]("trace_id"), + Offsets: offsets, + }, + }, + Blooms: iter.NewSliceIter(blooms), } } -func MkBasicSeriesWithBlooms(nSeries int, fromFp, throughFp model.Fingerprint, fromTs, throughTs model.Time) (seriesList []SeriesWithBlooms, keysList [][][]byte) { - series, keys := MkBasicSeriesWithLiteralBlooms(nSeries, fromFp, throughFp, fromTs, throughTs) - mapped := make([]SeriesWithBlooms, 0, len(series)) - for _, s := range series { - mapped = append(mapped, s.SeriesWithBlooms()) - } - - return mapped, keys -} +func MkBasicSeriesWithBlooms(nSeries int, fromFp, throughFp model.Fingerprint, fromTs, throughTs model.Time) ([]SeriesWithBlooms, [][][]byte) { + // return values + seriesList := make([]SeriesWithBlooms, 0, nSeries) + keysList := make([][][]byte, 0, nSeries) -func MkBasicSeriesWithLiteralBlooms(nSeries int, fromFp, throughFp model.Fingerprint, fromTs, throughTs model.Time) (seriesList []SeriesWithLiteralBlooms, keysList [][][]byte) { - const nGramLen = 4 - seriesList = make([]SeriesWithLiteralBlooms, 0, nSeries) - keysList = make([][][]byte, 0, nSeries) + numChunksPerSeries := 10 + numBloomsPerSeries := 2 step := (throughFp - fromFp) / model.Fingerprint(nSeries) - timeDelta := time.Duration(throughTs.Sub(fromTs).Nanoseconds() / int64(nSeries)) + timeDelta := time.Duration(throughTs.Sub(fromTs).Nanoseconds() / int64(numChunksPerSeries)) - tokenizer := NewNGramTokenizer(nGramLen, 0) for i := 0; i < nSeries; i++ { var series Series + var blooms []*Bloom + series.Fingerprint = fromFp + model.Fingerprint(i)*step - from := fromTs.Add(timeDelta * time.Duration(i)) - series.Chunks = []ChunkRef{ - { - From: from, - Through: from.Add(timeDelta), - Checksum: uint32(i), - }, + for from := fromTs; from < throughTs; from = from.Add(timeDelta) { + series.Chunks = append(series.Chunks, + ChunkRef{ + From: from, + Through: from.Add(timeDelta), + }, + ) } - var bloom Bloom - bloom.ScalableBloomFilter = *filter.NewScalableBloomFilter(1024, 0.01, 0.8) - keys := make([][]byte, 0, int(step)) - for _, chk := range series.Chunks { - tokenBuf, prefixLen := prefixedToken(nGramLen, chk, nil) - for j := 0; j < int(step); j++ { - line := fmt.Sprintf("%04x:%04x", int(series.Fingerprint), j) - it := tokenizer.Tokens(line) + chunkBatchSize := (series.Chunks.Len() + numBloomsPerSeries - 1) / numBloomsPerSeries + for j := 0; j < numBloomsPerSeries; j++ { + bloom := NewBloom() + + batchStart, batchEnd := j*chunkBatchSize, min(series.Chunks.Len(), (j+1)*chunkBatchSize) + for x, chk := range series.Chunks[batchStart:batchEnd] { + tokenizer := NewStructuredMetadataTokenizer(string(prefixForChunkRef(chk))) + kv := push.LabelAdapter{Name: "trace_id", Value: fmt.Sprintf("%s:%04x", series.Fingerprint, j*chunkBatchSize+x)} + it := tokenizer.Tokens(kv) for it.Next() { - key := it.At() - // series-level key + key := []byte(it.At()) bloom.Add(key) - - // chunk-level key - tokenBuf = append(tokenBuf[:prefixLen], key...) - bloom.Add(tokenBuf) - - keyCopy := key - keys = append(keys, keyCopy) + keys = append(keys, key) } } + blooms = append(blooms, bloom) } - seriesList = append(seriesList, SeriesWithLiteralBlooms{ - Series: &series, - Blooms: []*Bloom{&bloom}, - }) + seriesList = append(seriesList, newSeriesWithBlooms(series, blooms)) keysList = append(keysList, keys) } - return + + return seriesList, keysList } func EqualIterators[T any](t *testing.T, test func(a, b T), expected, actual iter.Iterator[T]) { diff --git a/pkg/storage/bloom/v1/tokenizer.go b/pkg/storage/bloom/v1/tokenizer.go index bf7e12983b6b..59654747832d 100644 --- a/pkg/storage/bloom/v1/tokenizer.go +++ b/pkg/storage/bloom/v1/tokenizer.go @@ -2,14 +2,10 @@ package v1 import ( "fmt" - "unicode/utf8" - "github.com/grafana/loki/pkg/push" iter "github.com/grafana/loki/v3/pkg/iter/v2" -) -const ( - MaxRuneLen = 4 + "github.com/grafana/loki/pkg/push" ) type StructuredMetadataTokenizer struct { @@ -25,7 +21,6 @@ func NewStructuredMetadataTokenizer(prefix string) *StructuredMetadataTokenizer } } -// Tokens implements the NGramBuilder interface func (t *StructuredMetadataTokenizer) Tokens(kv push.LabelAdapter) iter.Iterator[string] { combined := fmt.Sprintf("%s=%s", kv.Name, kv.Value) t.tokens = append(t.tokens[:0], @@ -35,118 +30,3 @@ func (t *StructuredMetadataTokenizer) Tokens(kv push.LabelAdapter) iter.Iterator ) return iter.NewSliceIter(t.tokens) } - -func reassemble(buf []rune, ln, pos int, result []byte) []byte { - result = result[:0] // Reset the result slice - for i := 0; i < ln; i++ { - cur := pos % len(buf) - pos++ - result = utf8.AppendRune(result, buf[cur]) - } - return result -} - -// Iterable variants (more performant, less space) -type NGramTokenizer struct { - n, skip int - buffer []rune // circular buffer used for ngram generation - res []byte // buffer used for token generation -} - -func (t *NGramTokenizer) N() int { - return t.n -} - -func (t *NGramTokenizer) SkipFactor() int { - return t.skip -} - -/* -N-Grams (https://en.wikipedia.org/wiki/N-gram) are a series of 'n' adjacent characters in a string. -These will be utilized for the bloom filters to allow for fuzzy searching. -*/ -func NewNGramTokenizer(n, skip int) *NGramTokenizer { - t := &NGramTokenizer{ - n: n, - skip: skip, - buffer: make([]rune, n+skip), - res: make([]byte, 0, n*MaxRuneLen), // maximum 4 bytes per rune - } - - return t -} - -// Token implements the NGramBuilder interface -// The Token iterator uses shared buffers for performance. The []byte returned by At() -// is not safe for use after subsequent calls to Next() -func (t *NGramTokenizer) Tokens(line string) iter.Iterator[[]byte] { - return &NGramTokenIter{ - n: t.N(), - skip: t.SkipFactor(), - - line: line, - - buffer: t.buffer, - res: t.res, - } -} - -type NGramTokenIter struct { - n, skip int - - runeIndex, offset int - line string // source - - buffer []rune // circular buffers used for ngram generation - res []byte -} - -func (t *NGramTokenIter) Next() bool { - for i, r := range t.line[t.offset:] { - t.buffer[t.runeIndex%len(t.buffer)] = r - t.runeIndex++ - - if t.runeIndex < t.n { - continue - } - - // if the start of the ngram is at the interval of our skip factor, emit it. - // we increment the skip due to modulo logic: - // because `n % 0 is a divide by zero and n % 1 is always 0` - if (t.runeIndex-t.n)%(t.skip+1) == 0 { - // update the offset, but don't go past the end of the line; - // for instance invalid utf-8 - t.offset = min(len(t.line), t.offset+i+utf8.RuneLen(r)) - return true - } - - } - return false -} - -func (t *NGramTokenIter) At() []byte { - return reassemble(t.buffer, t.n, (t.runeIndex-t.n)%len(t.buffer), t.res[:0]) -} - -func (t *NGramTokenIter) Err() error { - return nil -} - -type PrefixedTokenIter struct { - buf []byte - prefixLen int - - iter.Iterator[[]byte] -} - -func (t *PrefixedTokenIter) At() []byte { - return append(t.buf[:t.prefixLen], t.Iterator.At()...) -} - -func NewPrefixedTokenIter(buf []byte, prefixLn int, itr iter.Iterator[[]byte]) *PrefixedTokenIter { - return &PrefixedTokenIter{ - buf: buf, - prefixLen: prefixLn, - Iterator: itr, - } -} diff --git a/pkg/storage/bloom/v1/tokenizer_test.go b/pkg/storage/bloom/v1/tokenizer_test.go index e95e4649bd3e..0aeb0ba1f551 100644 --- a/pkg/storage/bloom/v1/tokenizer_test.go +++ b/pkg/storage/bloom/v1/tokenizer_test.go @@ -2,237 +2,13 @@ package v1 import ( "testing" - "unicode/utf8" "github.com/stretchr/testify/require" - "github.com/grafana/loki/pkg/push" v2 "github.com/grafana/loki/v3/pkg/iter/v2" -) - -const BigFile = "../../../logql/sketch/testdata/war_peace.txt" - -func TestNGramIterator(t *testing.T) { - t.Parallel() - var ( - three = NewNGramTokenizer(3, 0) - threeSkip1 = NewNGramTokenizer(3, 1) - threeSkip3 = NewNGramTokenizer(3, 3) - ) - - for _, tc := range []struct { - desc string - t *NGramTokenizer - input string - exp []string - }{ - { - t: three, - input: "", - exp: []string{}, - }, - { - t: three, - input: "ab", - exp: []string{}, - }, - { - t: three, - input: "abcdefg", - exp: []string{"abc", "bcd", "cde", "def", "efg"}, - }, - { - t: threeSkip1, - input: "abcdefg", - exp: []string{"abc", "cde", "efg"}, - }, - { - t: threeSkip3, - input: "abcdefgh", - exp: []string{"abc", "efg"}, - }, - { - t: three, - input: "日本語", - exp: []string{"日本語"}, - }, - { - t: four, - input: "日本語日本語", - exp: []string{ - "日本語日", - "本語日本", - "語日本語"}, - }, - } { - t.Run(tc.desc, func(t *testing.T) { - itr := tc.t.Tokens(tc.input) - for _, exp := range tc.exp { - require.True(t, itr.Next()) - require.Equal(t, exp, string(itr.At())) - } - require.False(t, itr.Next()) - }) - } -} - -// Mainly this ensures we don't panic when a string ends in invalid utf8 -func TestInvalidUTF8(t *testing.T) { - x := NewNGramTokenizer(3, 0) - - input := "abc\x80" - require.False(t, utf8.ValidString(input)) - itr := x.Tokens(input) - require.True(t, itr.Next()) - require.Equal(t, []byte("abc"), itr.At()) - require.True(t, itr.Next()) - // we don't really care about the final rune returned and it's probably not worth the perf cost - // to check for it - require.Equal(t, []byte{0x62, 0x63, 0xef, 0xbf, 0xbd}, itr.At()) - require.False(t, itr.Next()) -} -func TestPrefixedIterator(t *testing.T) { - t.Parallel() - var ( - three = NewNGramTokenizer(3, 0) - ) - - for _, tc := range []struct { - desc string - input string - exp []string - }{ - { - input: "", - exp: []string{}, - }, - { - input: "ab", - exp: []string{}, - }, - { - input: "abcdefg", - exp: []string{"0123abc", "0123bcd", "0123cde", "0123def", "0123efg"}, - }, - - { - input: "日本語", - exp: []string{"0123日本語"}, - }, - } { - prefix := []byte("0123") - t.Run(tc.desc, func(t *testing.T) { - itr := NewPrefixedTokenIter(prefix, len(prefix), three.Tokens(tc.input)) - for _, exp := range tc.exp { - require.True(t, itr.Next()) - require.Equal(t, exp, string(itr.At())) - } - require.False(t, itr.Next()) - }) - } -} - -const lorem = ` -lorum ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna -aliqua ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat -duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur excepteur -sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est -laborum ipsum dolor sit amet consectetur adipiscing elit sed do eiusmod tempor incididunt ut labore et dolore magna -aliqua ut enim ad minim veniam quis nostrud exercitation ullamco laboris nisi ut aliquip ex ea commodo consequat -duis aute irure dolor in reprehenderit in voluptate velit esse cillum dolore eu fugiat nulla pariatur excepteur -sint occaecat cupidatat non proident sunt in culpa qui officia deserunt mollit anim id est -` - -func BenchmarkTokens(b *testing.B) { - var ( - v2Three = NewNGramTokenizer(3, 0) - v2ThreeSkip1 = NewNGramTokenizer(3, 1) - ) - - type impl struct { - desc string - f func() - } - type tc struct { - desc string - impls []impl - } - for _, tc := range []tc{ - { - desc: "three", - impls: []impl{ - { - desc: "v2", - f: func() { - itr := v2Three.Tokens(lorem) - for itr.Next() { - _ = itr.At() - } - }, - }, - }, - }, - { - desc: "threeSkip1", - impls: []impl{ - { - desc: "v2", - f: func() { - itr := v2ThreeSkip1.Tokens(lorem) - for itr.Next() { - _ = itr.At() - } - }, - }, - }, - }, - { - desc: "threeChunk", - impls: []impl{ - { - desc: "v2", - f: func() func() { - buf, prefixLn := prefixedToken(v2Three.N(), ChunkRef{}, nil) - return func() { - itr := NewPrefixedTokenIter(buf, prefixLn, v2Three.Tokens(lorem)) - for itr.Next() { - _ = itr.At() - } - } - }(), - }, - }, - }, - { - desc: "threeSkip1Chunk", - impls: []impl{ - { - desc: "v2", - f: func() func() { - buf, prefixLn := prefixedToken(v2Three.N(), ChunkRef{}, nil) - return func() { - itr := NewPrefixedTokenIter(buf, prefixLn, v2ThreeSkip1.Tokens(lorem)) - for itr.Next() { - _ = itr.At() - } - } - }(), - }, - }, - }, - } { - b.Run(tc.desc, func(b *testing.B) { - for _, impl := range tc.impls { - b.Run(impl.desc, func(b *testing.B) { - for i := 0; i < b.N; i++ { - impl.f() - } - }) - } - }) - } -} + "github.com/grafana/loki/pkg/push" +) func TestStructuredMetadataTokenizer(t *testing.T) { tokenizer := NewStructuredMetadataTokenizer("chunk") diff --git a/pkg/storage/bloom/v1/versioned_builder.go b/pkg/storage/bloom/v1/versioned_builder.go index 9162fe5b5d95..4f1881c441e7 100644 --- a/pkg/storage/bloom/v1/versioned_builder.go +++ b/pkg/storage/bloom/v1/versioned_builder.go @@ -37,7 +37,7 @@ type V3Builder struct { } type SeriesWithBlooms struct { - Series *Series + Series *SeriesWithMeta Blooms iter.SizedIterator[*Bloom] } @@ -81,11 +81,7 @@ func (b *V3Builder) BuildFrom(itr iter.Iterator[SeriesWithBlooms]) (uint32, erro return 0, errors.Wrap(err, "iterating blooms") } - // TODO(chaudum): Use the indexed fields from bloom creation. - fields := NewSet[Field](1) - fields.Add("__line__") - - blockFull, err := b.AddSeries(*at.Series, offsets, fields) + blockFull, err := b.AddSeries(at.Series.Series, offsets, at.Series.Meta.Fields) if err != nil { return 0, errors.Wrapf(err, "writing series") } diff --git a/pkg/storage/bloom/v1/versioned_builder_test.go b/pkg/storage/bloom/v1/versioned_builder_test.go index 01a5aa354f75..07240fe60358 100644 --- a/pkg/storage/bloom/v1/versioned_builder_test.go +++ b/pkg/storage/bloom/v1/versioned_builder_test.go @@ -6,7 +6,7 @@ import ( "github.com/stretchr/testify/require" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" v2 "github.com/grafana/loki/v3/pkg/iter/v2" "github.com/grafana/loki/v3/pkg/util/encoding" "github.com/grafana/loki/v3/pkg/util/mempool" @@ -14,13 +14,11 @@ import ( // smallBlockOpts returns a set of block options that are suitable for testing // characterized by small page sizes -func smallBlockOpts(v Version, enc chunkenc.Encoding) BlockOptions { +func smallBlockOpts(v Version, enc compression.Encoding) BlockOptions { return BlockOptions{ Schema: Schema{ - version: v, - encoding: enc, - nGramLength: 4, - nGramSkip: 0, + version: v, + encoding: enc, }, SeriesPageSize: 100, BloomPageSize: 2 << 10, @@ -28,72 +26,58 @@ func smallBlockOpts(v Version, enc chunkenc.Encoding) BlockOptions { } } -func setup(v Version) (BlockOptions, []SeriesWithLiteralBlooms, BlockWriter, BlockReader) { +func setup(v Version) (BlockOptions, []SeriesWithBlooms, BlockWriter, BlockReader) { numSeries := 100 - data, _ := MkBasicSeriesWithLiteralBlooms(numSeries, 0, 0xffff, 0, 10000) + data, _ := MkBasicSeriesWithBlooms(numSeries, 0, 0xffff, 0, 10000) indexBuf := bytes.NewBuffer(nil) bloomsBuf := bytes.NewBuffer(nil) writer := NewMemoryBlockWriter(indexBuf, bloomsBuf) reader := NewByteReader(indexBuf, bloomsBuf) - return smallBlockOpts(v, chunkenc.EncNone), data, writer, reader + return smallBlockOpts(v, compression.EncNone), data, writer, reader } func TestV3Roundtrip(t *testing.T) { - opts, data, writer, reader := setup(V3) + opts, sourceData, writer, reader := setup(V3) - data, err := v2.Collect( - v2.NewMapIter[SeriesWithLiteralBlooms, SeriesWithLiteralBlooms]( - v2.NewSliceIter(data), - func(swlb SeriesWithLiteralBlooms) SeriesWithLiteralBlooms { - return SeriesWithLiteralBlooms{ - Series: swlb.Series, - // hack(owen-d): data currently only creates one bloom per series, but I want to test multiple. - // we're not checking the contents here, so ensuring the same bloom is used twice is fine. - Blooms: []*Bloom{swlb.Blooms[0], swlb.Blooms[0]}, - } - }, - ), - ) - require.NoError(t, err) + // SeriesWithBlooms holds an interator of blooms, + // which will be exhausted after being consumed by the block builder + // therefore we need a deepcopy of the original data, or - and that's easier to achieve - + // we simply create the same data twice. + _, unmodifiedData, _, _ := setup(V3) b, err := NewBlockBuilderV3(opts, writer) require.NoError(t, err) - mapped := v2.NewMapIter[SeriesWithLiteralBlooms]( - v2.NewSliceIter(data), - func(s SeriesWithLiteralBlooms) SeriesWithBlooms { - return s.SeriesWithBlooms() - }, - ) - - _, err = b.BuildFrom(mapped) + _, err = b.BuildFrom(v2.NewSliceIter(sourceData)) require.NoError(t, err) // Ensure Equality block := NewBlock(reader, NewMetrics(nil)) querier := NewBlockQuerier(block, &mempool.SimpleHeapAllocator{}, DefaultMaxPageSize).Iter() - CompareIterators[SeriesWithLiteralBlooms, *SeriesWithBlooms]( + CompareIterators[SeriesWithBlooms, *SeriesWithBlooms]( t, - func(t *testing.T, a SeriesWithLiteralBlooms, b *SeriesWithBlooms) { - require.Equal(t, a.Series, b.Series) // ensure series equality - bs, err := v2.Collect(b.Blooms) + func(t *testing.T, a SeriesWithBlooms, b *SeriesWithBlooms) { + require.Equal(t, a.Series.Series.Fingerprint, b.Series.Series.Fingerprint) + require.ElementsMatch(t, a.Series.Series.Chunks, b.Series.Series.Chunks) + bloomsA, err := v2.Collect(a.Blooms) + require.NoError(t, err) + bloomsB, err := v2.Collect(b.Blooms) require.NoError(t, err) - // ensure we only have one bloom in v1 - require.Equal(t, 2, len(a.Blooms)) - require.Equal(t, 2, len(bs)) + require.Equal(t, 2, len(bloomsA)) + require.Equal(t, 2, len(bloomsB)) var encA, encB encoding.Encbuf - for i := range a.Blooms { - require.NoError(t, a.Blooms[i].Encode(&encA)) - require.NoError(t, bs[i].Encode(&encB)) + for i := range bloomsA { + require.NoError(t, bloomsA[i].Encode(&encA)) + require.NoError(t, bloomsB[i].Encode(&encB)) require.Equal(t, encA.Get(), encB.Get()) encA.Reset() encB.Reset() } }, - v2.NewSliceIter(data), + v2.NewSliceIter(unmodifiedData), querier, ) } diff --git a/pkg/storage/chunk/cache/cache_test.go b/pkg/storage/chunk/cache/cache_test.go index c6ab61666b88..2f236c1f40e4 100644 --- a/pkg/storage/chunk/cache/cache_test.go +++ b/pkg/storage/chunk/cache/cache_test.go @@ -15,6 +15,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/storage/chunk" "github.com/grafana/loki/v3/pkg/storage/chunk/cache" @@ -34,7 +35,7 @@ func fillCache(t *testing.T, scfg config.SchemaConfig, cache cache.Cache) ([]str for i := 0; i < 111; i++ { ts := model.TimeFromUnix(int64(i * chunkLen)) - cs := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncGZIP, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 256*1024, 0) + cs := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncGZIP, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 256*1024, 0) _, err := cs.Append(&logproto.Entry{ Timestamp: ts.Time(), diff --git a/pkg/storage/chunk/chunk.go b/pkg/storage/chunk/chunk.go index 6f050f8cbd01..aadfe6ea937b 100644 --- a/pkg/storage/chunk/chunk.go +++ b/pkg/storage/chunk/chunk.go @@ -5,7 +5,6 @@ import ( "encoding/binary" "fmt" "hash/crc32" - "reflect" "strconv" "strings" "sync" @@ -215,11 +214,7 @@ func readOneHexPart(hex []byte) (part []byte, i int) { } func unsafeGetBytes(s string) []byte { - var buf []byte - p := unsafe.Pointer(&buf) - *(*string)(p) = s - (*reflect.SliceHeader)(p).Cap = len(s) - return buf + return unsafe.Slice(unsafe.StringData(s), len(s)) } func unsafeGetString(buf []byte) string { diff --git a/pkg/storage/chunk/client/alibaba/oss_object_client.go b/pkg/storage/chunk/client/alibaba/oss_object_client.go index 423a7348086e..9d7f4cc48ce1 100644 --- a/pkg/storage/chunk/client/alibaba/oss_object_client.go +++ b/pkg/storage/chunk/client/alibaba/oss_object_client.go @@ -73,16 +73,27 @@ func (s *OssObjectClient) Stop() { } func (s *OssObjectClient) ObjectExists(ctx context.Context, objectKey string) (bool, error) { + exists, _, err := s.ObjectExistsWithSize(ctx, objectKey) + return exists, err +} + +func (s *OssObjectClient) ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) { var options []oss.Option + var objectSize int64 err := instrument.CollectedRequest(ctx, "OSS.ObjectExists", ossRequestDuration, instrument.ErrorCode, func(_ context.Context) error { - _, requestErr := s.defaultBucket.GetObjectMeta(objectKey, options...) - return requestErr + headers, requestErr := s.defaultBucket.GetObjectMeta(objectKey, options...) + if requestErr != nil { + return requestErr + } + + objectSize, _ = strconv.ParseInt(headers.Get(oss.HTTPHeaderContentLength), 10, 64) + return nil }) if err != nil { - return false, err + return false, 0, err } - return true, nil + return true, objectSize, nil } // GetObject returns a reader and the size for the specified object key from the configured OSS bucket. diff --git a/pkg/storage/chunk/client/aws/s3_storage_client.go b/pkg/storage/chunk/client/aws/s3_storage_client.go index 12fea874e311..26c2807e120e 100644 --- a/pkg/storage/chunk/client/aws/s3_storage_client.go +++ b/pkg/storage/chunk/client/aws/s3_storage_client.go @@ -21,12 +21,15 @@ import ( "github.com/aws/aws-sdk-go/service/s3" "github.com/aws/aws-sdk-go/service/s3/s3iface" awscommon "github.com/grafana/dskit/aws" + "github.com/grafana/dskit/backoff" "github.com/grafana/dskit/flagext" "github.com/grafana/dskit/instrument" "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" + amnet "k8s.io/apimachinery/pkg/util/net" + bucket_s3 "github.com/grafana/loki/v3/pkg/storage/bucket/s3" "github.com/grafana/loki/v3/pkg/storage/chunk/client" "github.com/grafana/loki/v3/pkg/storage/chunk/client/hedging" @@ -307,37 +310,49 @@ func buckets(cfg S3Config) ([]string, error) { func (a *S3ObjectClient) Stop() {} func (a *S3ObjectClient) ObjectExists(ctx context.Context, objectKey string) (bool, error) { + exists, _, err := a.ObjectExistsWithSize(ctx, objectKey) + return exists, err +} + +func (a *S3ObjectClient) ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) { var lastErr error + var objectSize int64 retries := backoff.New(ctx, a.cfg.BackoffConfig) for retries.Ongoing() { if ctx.Err() != nil { - return false, errors.Wrap(ctx.Err(), "ctx related error during s3 objectExists") + return false, 0, errors.Wrap(ctx.Err(), "ctx related error during s3 objectExists") } lastErr = instrument.CollectedRequest(ctx, "S3.ObjectExists", s3RequestDuration, instrument.ErrorCode, func(_ context.Context) error { headObjectInput := &s3.HeadObjectInput{ Bucket: aws.String(a.bucketFromKey(objectKey)), Key: aws.String(objectKey), } - _, requestErr := a.S3.HeadObject(headObjectInput) - return requestErr + headOutput, requestErr := a.S3.HeadObject(headObjectInput) + if requestErr != nil { + return requestErr + } + if headOutput != nil && headOutput.ContentLength != nil { + objectSize = *headOutput.ContentLength + } + return nil }) if lastErr == nil { - return true, nil + return true, 0, nil } if a.IsObjectNotFoundErr(lastErr) { - return false, lastErr + return false, 0, lastErr } retries.Wait() } if lastErr != nil { - return false, lastErr + return false, 0, lastErr } - return true, nil + return true, objectSize, nil } // DeleteObject deletes the specified objectKey from the appropriate S3 bucket @@ -532,5 +547,63 @@ func (a *S3ObjectClient) IsObjectNotFoundErr(err error) bool { return false } -// TODO(dannyk): implement for client -func (a *S3ObjectClient) IsRetryableErr(error) bool { return false } +func isTimeoutError(err error) bool { + var netErr net.Error + return errors.As(err, &netErr) && netErr.Timeout() +} + +func isContextErr(err error) bool { + return errors.Is(err, context.DeadlineExceeded) || + errors.Is(err, context.Canceled) +} + +// IsStorageTimeoutErr returns true if error means that object cannot be retrieved right now due to server-side timeouts. +func (a *S3ObjectClient) IsStorageTimeoutErr(err error) bool { + // TODO(dannyk): move these out to be generic + // context errors are all client-side + if isContextErr(err) { + // Go 1.23 changed the type of the error returned by the http client when a timeout occurs + // while waiting for headers. This is a server side timeout. + return strings.Contains(err.Error(), "Client.Timeout") + } + + // connection misconfiguration, or writing on a closed connection + // do NOT retry; this is not a server-side issue + if errors.Is(err, net.ErrClosed) || amnet.IsConnectionRefused(err) { + return false + } + + // this is a server-side timeout + if isTimeoutError(err) { + return true + } + + // connection closed (closed before established) or reset (closed after established) + // this is a server-side issue + if errors.Is(err, io.EOF) || amnet.IsConnectionReset(err) { + return true + } + + if rerr, ok := err.(awserr.RequestFailure); ok { + // https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html + return rerr.StatusCode() == http.StatusRequestTimeout || + rerr.StatusCode() == http.StatusGatewayTimeout + } + + return false +} + +// IsStorageThrottledErr returns true if error means that object cannot be retrieved right now due to throttling. +func (a *S3ObjectClient) IsStorageThrottledErr(err error) bool { + if rerr, ok := err.(awserr.RequestFailure); ok { + + // https://docs.aws.amazon.com/sdkref/latest/guide/feature-retry-behavior.html + return rerr.StatusCode() == http.StatusTooManyRequests || + (rerr.StatusCode()/100 == 5) // all 5xx errors are retryable + } + + return false +} +func (a *S3ObjectClient) IsRetryableErr(err error) bool { + return a.IsStorageTimeoutErr(err) || a.IsStorageThrottledErr(err) +} diff --git a/pkg/storage/chunk/client/aws/s3_storage_client_test.go b/pkg/storage/chunk/client/aws/s3_storage_client_test.go index 3a2c1e8dc33c..e18160a3fa00 100644 --- a/pkg/storage/chunk/client/aws/s3_storage_client_test.go +++ b/pkg/storage/chunk/client/aws/s3_storage_client_test.go @@ -6,9 +6,11 @@ import ( "errors" "fmt" "io" + "net" "net/http" "net/http/httptest" "strings" + "syscall" "testing" "time" @@ -73,6 +75,108 @@ func TestIsObjectNotFoundErr(t *testing.T) { } } +func TestIsRetryableErr(t *testing.T) { + tests := []struct { + err error + expected bool + name string + }{ + { + name: "IsStorageThrottledErr - Too Many Requests", + err: awserr.NewRequestFailure( + awserr.New("TooManyRequests", "TooManyRequests", nil), 429, "reqId", + ), + expected: true, + }, + { + name: "IsStorageThrottledErr - 500", + err: awserr.NewRequestFailure( + awserr.New("500", "500", nil), 500, "reqId", + ), + expected: true, + }, + { + name: "IsStorageThrottledErr - 5xx", + err: awserr.NewRequestFailure( + awserr.New("501", "501", nil), 501, "reqId", + ), + expected: true, + }, + { + name: "IsStorageTimeoutErr - Request Timeout", + err: awserr.NewRequestFailure( + awserr.New("Request Timeout", "Request Timeout", nil), 408, "reqId", + ), + expected: true, + }, + { + name: "IsStorageTimeoutErr - Gateway Timeout", + err: awserr.NewRequestFailure( + awserr.New("Gateway Timeout", "Gateway Timeout", nil), 504, "reqId", + ), + expected: true, + }, + { + name: "IsStorageTimeoutErr - EOF", + err: io.EOF, + expected: true, + }, + { + name: "IsStorageTimeoutErr - Connection Reset", + err: syscall.ECONNRESET, + expected: true, + }, + { + name: "IsStorageTimeoutErr - Timeout Error", + err: awserr.NewRequestFailure( + awserr.New("RequestCanceled", "request canceled due to timeout", nil), 408, "request-id", + ), + expected: true, + }, + { + name: "IsStorageTimeoutErr - Closed", + err: net.ErrClosed, + expected: false, + }, + { + name: "IsStorageTimeoutErr - Connection Refused", + err: syscall.ECONNREFUSED, + expected: false, + }, + { + name: "IsStorageTimeoutErr - Context Deadline Exceeded", + err: context.DeadlineExceeded, + expected: false, + }, + { + name: "IsStorageTimeoutErr - Context Canceled", + err: context.Canceled, + expected: false, + }, + { + name: "Not a retryable error", + err: syscall.EINVAL, + expected: false, + }, + { + name: "Not found 404", + err: awserr.NewRequestFailure( + awserr.New("404", "404", nil), 404, "reqId", + ), + expected: false, + }, + } + + for _, tt := range tests { + t.Run(tt.name, func(t *testing.T) { + client, err := NewS3ObjectClient(S3Config{BucketNames: "mybucket"}, hedging.Config{}) + require.NoError(t, err) + + require.Equal(t, tt.expected, client.IsRetryableErr(tt.err)) + }) + } +} + func TestRequestMiddleware(t *testing.T) { ts := httptest.NewServer(http.HandlerFunc(func(w http.ResponseWriter, r *http.Request) { fmt.Fprintln(w, r.Header.Get("echo-me")) @@ -230,6 +334,15 @@ func Test_RetryLogic(t *testing.T) { return err }, }, + { + "object exists with size with retries", + 3, + true, + func(c *S3ObjectClient) error { + _, _, err := c.ObjectExistsWithSize(context.Background(), "foo") + return err + }, + }, { "object doesn't exist with retries", 3, @@ -239,6 +352,15 @@ func Test_RetryLogic(t *testing.T) { return err }, }, + { + "object doesn't exist (with size) with retries", + 3, + false, + func(c *S3ObjectClient) error { + _, _, err := c.ObjectExistsWithSize(context.Background(), "foo") + return err + }, + }, } { t.Run(tc.name, func(t *testing.T) { callCount := atomic.NewInt32(0) diff --git a/pkg/storage/chunk/client/azure/blob_storage_client.go b/pkg/storage/chunk/client/azure/blob_storage_client.go index 0a9d6300b163..2a7014c29898 100644 --- a/pkg/storage/chunk/client/azure/blob_storage_client.go +++ b/pkg/storage/chunk/client/azure/blob_storage_client.go @@ -220,20 +220,35 @@ func NewBlobStorage(cfg *BlobStorageConfig, metrics BlobStorageMetrics, hedgingC func (b *BlobStorage) Stop() {} func (b *BlobStorage) ObjectExists(ctx context.Context, objectKey string) (bool, error) { + exists, _, err := b.ObjectExistsWithSize(ctx, objectKey) + return exists, err +} + +func (b *BlobStorage) ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) { + var objectSize int64 err := loki_instrument.TimeRequest(ctx, "azure.ObjectExists", instrument.NewHistogramCollector(b.metrics.requestDuration), instrument.ErrorCode, func(ctx context.Context) error { blockBlobURL, err := b.getBlobURL(objectKey, false) if err != nil { return err } - _, err = blockBlobURL.GetProperties(ctx, azblob.BlobAccessConditions{}, azblob.ClientProvidedKeyOptions{}) - return err + response, err := blockBlobURL.GetProperties(ctx, azblob.BlobAccessConditions{}, azblob.ClientProvidedKeyOptions{}) + if err != nil { + return err + } + if response != nil { + rawResponse := response.Response() + if rawResponse != nil { + objectSize = rawResponse.ContentLength + } + } + return nil }) if err != nil { - return false, err + return false, 0, err } - return true, nil + return true, objectSize, nil } // GetObject returns a reader and the size for the specified object key. diff --git a/pkg/storage/chunk/client/baidubce/bos_storage_client.go b/pkg/storage/chunk/client/baidubce/bos_storage_client.go index b76db38e47c6..cc76b2162429 100644 --- a/pkg/storage/chunk/client/baidubce/bos_storage_client.go +++ b/pkg/storage/chunk/client/baidubce/bos_storage_client.go @@ -91,16 +91,27 @@ func (b *BOSObjectStorage) PutObject(ctx context.Context, objectKey string, obje } func (b *BOSObjectStorage) ObjectExists(ctx context.Context, objectKey string) (bool, error) { + exists, _, err := b.ObjectExistsWithSize(ctx, objectKey) + return exists, err +} + +func (b *BOSObjectStorage) ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) { + var objectSize int64 err := instrument.CollectedRequest(ctx, "BOS.ObjectExists", bosRequestDuration, instrument.ErrorCode, func(_ context.Context) error { - var requestErr error - _, requestErr = b.client.GetObjectMeta(b.cfg.BucketName, objectKey) - return requestErr + metaResult, requestErr := b.client.GetObjectMeta(b.cfg.BucketName, objectKey) + if requestErr != nil { + return requestErr + } + if metaResult != nil { + objectSize = metaResult.ContentLength + } + return nil }) if err != nil { - return false, err + return false, 0, err } - return true, nil + return true, objectSize, nil } func (b *BOSObjectStorage) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, int64, error) { diff --git a/pkg/storage/chunk/client/congestion/controller.go b/pkg/storage/chunk/client/congestion/controller.go index 1e3e2ee0dcb3..1c69ef16139f 100644 --- a/pkg/storage/chunk/client/congestion/controller.go +++ b/pkg/storage/chunk/client/congestion/controller.go @@ -145,6 +145,10 @@ func (a *AIMDController) ObjectExists(ctx context.Context, objectKey string) (bo return a.inner.ObjectExists(ctx, objectKey) } +func (a *AIMDController) ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) { + return a.inner.ObjectExistsWithSize(ctx, objectKey) +} + func (a *AIMDController) DeleteObject(ctx context.Context, objectKey string) error { return a.inner.DeleteObject(ctx, objectKey) } @@ -212,6 +216,9 @@ func NewNoopController(Config) *NoopController { return &NoopController{} } +func (n *NoopController) ObjectExistsWithSize(context.Context, string) (bool, int64, error) { + return true, 0, nil +} func (n *NoopController) ObjectExists(context.Context, string) (bool, error) { return true, nil } func (n *NoopController) PutObject(context.Context, string, io.Reader) error { return nil } func (n *NoopController) GetObject(context.Context, string) (io.ReadCloser, int64, error) { diff --git a/pkg/storage/chunk/client/congestion/controller_test.go b/pkg/storage/chunk/client/congestion/controller_test.go index a46466ebfc54..6d17573248f5 100644 --- a/pkg/storage/chunk/client/congestion/controller_test.go +++ b/pkg/storage/chunk/client/congestion/controller_test.go @@ -267,6 +267,10 @@ func (m *mockObjectClient) ObjectExists(context.Context, string) (bool, error) { panic("not implemented") } +func (m *mockObjectClient) ObjectExistsWithSize(context.Context, string) (bool, int64, error) { + panic("not implemented") +} + func (m *mockObjectClient) List(context.Context, string, string) ([]client.StorageObject, []client.StorageCommonPrefix, error) { panic("not implemented") } diff --git a/pkg/storage/chunk/client/gcp/gcs_object_client.go b/pkg/storage/chunk/client/gcp/gcs_object_client.go index d1289a61e771..c161705ecf7f 100644 --- a/pkg/storage/chunk/client/gcp/gcs_object_client.go +++ b/pkg/storage/chunk/client/gcp/gcs_object_client.go @@ -7,6 +7,7 @@ import ( "io" "net" "net/http" + "strings" "time" "cloud.google.com/go/storage" @@ -126,12 +127,20 @@ func (s *GCSObjectClient) Stop() { } func (s *GCSObjectClient) ObjectExists(ctx context.Context, objectKey string) (bool, error) { - _, err := s.getsBuckets.Object(objectKey).Attrs(ctx) + exists, _, err := s.ObjectExistsWithSize(ctx, objectKey) + return exists, err +} + +func (s *GCSObjectClient) ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) { + attrs, err := s.getsBuckets.Object(objectKey).Attrs(ctx) if err != nil { - return false, err + return false, 0, err } - return true, nil + if attrs != nil { + return true, attrs.Size, nil + } + return true, 0, nil } // GetObject returns a reader and the size for the specified object key from the configured GCS bucket. @@ -269,7 +278,9 @@ func (s *GCSObjectClient) IsStorageTimeoutErr(err error) bool { // TODO(dannyk): move these out to be generic // context errors are all client-side if isContextErr(err) { - return false + // Go 1.23 changed the type of the error returned by the http client when a timeout occurs + // while waiting for headers. This is a server side timeout. + return strings.Contains(err.Error(), "Client.Timeout") } // connection misconfiguration, or writing on a closed connection diff --git a/pkg/storage/chunk/client/grpc/grpc_client_test.go b/pkg/storage/chunk/client/grpc/grpc_client_test.go index b0bcffce91eb..d40d825a9442 100644 --- a/pkg/storage/chunk/client/grpc/grpc_client_test.go +++ b/pkg/storage/chunk/client/grpc/grpc_client_test.go @@ -8,6 +8,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/storage/chunk" "github.com/grafana/loki/v3/pkg/storage/config" @@ -81,7 +82,7 @@ func TestGrpcStore(t *testing.T) { newChunkData := func() chunk.Data { return chunkenc.NewFacade( chunkenc.NewMemChunk( - chunkenc.ChunkFormatV3, chunkenc.EncNone, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 256*1024, 0, + chunkenc.ChunkFormatV3, compression.EncNone, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 256*1024, 0, ), 0, 0) } diff --git a/pkg/storage/chunk/client/ibmcloud/cos_object_client.go b/pkg/storage/chunk/client/ibmcloud/cos_object_client.go index d43207129305..51a9c9fd9308 100644 --- a/pkg/storage/chunk/client/ibmcloud/cos_object_client.go +++ b/pkg/storage/chunk/client/ibmcloud/cos_object_client.go @@ -320,20 +320,31 @@ func (c *COSObjectClient) DeleteObject(ctx context.Context, objectKey string) er } func (c *COSObjectClient) ObjectExists(ctx context.Context, objectKey string) (bool, error) { + exists, _, err := c.ObjectExistsWithSize(ctx, objectKey) + return exists, err +} + +func (c *COSObjectClient) ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) { bucket := c.bucketFromKey(objectKey) + var objectSize int64 err := instrument.CollectedRequest(ctx, "COS.GetObject", cosRequestDuration, instrument.ErrorCode, func(_ context.Context) error { - var requestErr error - _, requestErr = c.hedgedCOS.HeadObject(&cos.HeadObjectInput{ + headOutput, requestErr := c.hedgedCOS.HeadObject(&cos.HeadObjectInput{ Bucket: ibm.String(bucket), Key: ibm.String(objectKey), }) - return requestErr + if requestErr != nil { + return requestErr + } + if headOutput != nil && headOutput.ContentLength != nil { + objectSize = *headOutput.ContentLength + } + return nil }) if err != nil { - return false, err + return false, 0, err } - return true, nil + return true, objectSize, nil } // GetObject returns a reader and the size for the specified object key from the configured S3 bucket. diff --git a/pkg/storage/chunk/client/local/fs_object_client.go b/pkg/storage/chunk/client/local/fs_object_client.go index 0eb027e9fd3c..671b5df28587 100644 --- a/pkg/storage/chunk/client/local/fs_object_client.go +++ b/pkg/storage/chunk/client/local/fs_object_client.go @@ -67,14 +67,19 @@ func NewFSObjectClient(cfg FSConfig) (*FSObjectClient, error) { // Stop implements ObjectClient func (FSObjectClient) Stop() {} -func (f *FSObjectClient) ObjectExists(_ context.Context, objectKey string) (bool, error) { +func (f *FSObjectClient) ObjectExists(ctx context.Context, objectKey string) (bool, error) { + exists, _, err := f.ObjectExistsWithSize(ctx, objectKey) + return exists, err +} + +func (f *FSObjectClient) ObjectExistsWithSize(_ context.Context, objectKey string) (bool, int64, error) { fullPath := filepath.Join(f.cfg.Directory, filepath.FromSlash(objectKey)) - _, err := os.Lstat(fullPath) + fi, err := os.Lstat(fullPath) if err != nil { - return false, err + return false, 0, err } - return true, nil + return true, fi.Size(), nil } // GetObject from the store diff --git a/pkg/storage/chunk/client/local/fs_object_client_test.go b/pkg/storage/chunk/client/local/fs_object_client_test.go index 2dc059b3f5f1..15ad96425dc9 100644 --- a/pkg/storage/chunk/client/local/fs_object_client_test.go +++ b/pkg/storage/chunk/client/local/fs_object_client_test.go @@ -156,6 +156,11 @@ func TestFSObjectClient_List_and_ObjectExists(t *testing.T) { ok, err := bucketClient.ObjectExists(context.Background(), "outer-file2") require.NoError(t, err) require.True(t, ok) + + ok, objectSize, err := bucketClient.ObjectExistsWithSize(context.Background(), "outer-file2") + require.NoError(t, err) + require.True(t, ok) + require.EqualValues(t, len("outer-file2"), objectSize) } func TestFSObjectClient_DeleteObject(t *testing.T) { diff --git a/pkg/storage/chunk/client/object_client.go b/pkg/storage/chunk/client/object_client.go index 225f5025b1d5..95672c286ad0 100644 --- a/pkg/storage/chunk/client/object_client.go +++ b/pkg/storage/chunk/client/object_client.go @@ -19,6 +19,7 @@ import ( // ObjectClient is used to store arbitrary data in Object Store (S3/GCS/Azure/...) type ObjectClient interface { ObjectExists(ctx context.Context, objectKey string) (bool, error) + ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) PutObject(ctx context.Context, objectKey string, object io.Reader) error // NOTE: The consumer of GetObject should always call the Close method when it is done reading which otherwise could cause a resource leak. diff --git a/pkg/storage/chunk/client/openstack/swift_object_client.go b/pkg/storage/chunk/client/openstack/swift_object_client.go index 951a4d652a5a..d3d978cd5ef7 100644 --- a/pkg/storage/chunk/client/openstack/swift_object_client.go +++ b/pkg/storage/chunk/client/openstack/swift_object_client.go @@ -125,12 +125,17 @@ func (s *SwiftObjectClient) Stop() { } func (s *SwiftObjectClient) ObjectExists(ctx context.Context, objectKey string) (bool, error) { - _, _, err := s.hedgingConn.Object(ctx, s.cfg.Config.ContainerName, objectKey) + exists, _, err := s.ObjectExistsWithSize(ctx, objectKey) + return exists, err +} + +func (s *SwiftObjectClient) ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) { + info, _, err := s.hedgingConn.Object(ctx, s.cfg.Config.ContainerName, objectKey) if err != nil { - return false, err + return false, 0, err } - return true, nil + return true, info.Bytes, nil } // GetObject returns a reader and the size for the specified object key from the configured swift container. diff --git a/pkg/storage/chunk/client/prefixed_object_client.go b/pkg/storage/chunk/client/prefixed_object_client.go index 899dcd2b2112..5a5bda762770 100644 --- a/pkg/storage/chunk/client/prefixed_object_client.go +++ b/pkg/storage/chunk/client/prefixed_object_client.go @@ -23,6 +23,10 @@ func (p PrefixedObjectClient) ObjectExists(ctx context.Context, objectKey string return p.downstreamClient.ObjectExists(ctx, p.prefix+objectKey) } +func (p PrefixedObjectClient) ObjectExistsWithSize(ctx context.Context, objectKey string) (bool, int64, error) { + return p.downstreamClient.ObjectExistsWithSize(ctx, p.prefix+objectKey) +} + func (p PrefixedObjectClient) GetObject(ctx context.Context, objectKey string) (io.ReadCloser, int64, error) { return p.downstreamClient.GetObject(ctx, p.prefix+objectKey) } diff --git a/pkg/storage/chunk/client/testutils/inmemory_storage_client.go b/pkg/storage/chunk/client/testutils/inmemory_storage_client.go index da65a35d5317..d937cd5fdfd4 100644 --- a/pkg/storage/chunk/client/testutils/inmemory_storage_client.go +++ b/pkg/storage/chunk/client/testutils/inmemory_storage_client.go @@ -392,20 +392,25 @@ func (m *MockStorage) query(ctx context.Context, query index.Query, callback fun } // ObjectExists implments client.ObjectClient -func (m *InMemoryObjectClient) ObjectExists(_ context.Context, objectKey string) (bool, error) { +func (m *InMemoryObjectClient) ObjectExists(ctx context.Context, objectKey string) (bool, error) { + exists, _, err := m.ObjectExistsWithSize(ctx, objectKey) + return exists, err +} + +func (m *InMemoryObjectClient) ObjectExistsWithSize(_ context.Context, objectKey string) (bool, int64, error) { m.mtx.RLock() defer m.mtx.RUnlock() if m.mode == MockStorageModeWriteOnly { - return false, errPermissionDenied + return false, 0, errPermissionDenied } _, ok := m.objects[objectKey] if !ok { - return false, nil + return false, 0, nil } - - return true, nil + objectSize := len(m.objects[objectKey]) + return true, int64(objectSize), nil } // GetObject implements client.ObjectClient. diff --git a/pkg/storage/chunk/client/testutils/testutils.go b/pkg/storage/chunk/client/testutils/testutils.go index b34e75a6a166..e436c1335f21 100644 --- a/pkg/storage/chunk/client/testutils/testutils.go +++ b/pkg/storage/chunk/client/testutils/testutils.go @@ -13,6 +13,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/storage/chunk" @@ -86,7 +87,7 @@ func CreateChunks(scfg config.SchemaConfig, startIndex, batchSize int, from mode } func DummyChunkFor(from, through model.Time, metric labels.Labels) chunk.Chunk { - cs := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncGZIP, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 256*1024, 0) + cs := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncGZIP, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 256*1024, 0) for ts := from; ts <= through; ts = ts.Add(15 * time.Second) { _, err := cs.Append(&logproto.Entry{Timestamp: ts.Time(), Line: fmt.Sprintf("line ts=%d", ts)}) diff --git a/pkg/storage/chunk/client/util/reader.go b/pkg/storage/chunk/client/util/reader.go index 2459b1e9ea43..88cd408f9e37 100644 --- a/pkg/storage/chunk/client/util/reader.go +++ b/pkg/storage/chunk/client/util/reader.go @@ -2,6 +2,7 @@ package util import ( "bytes" + "fmt" "io" ) @@ -11,7 +12,7 @@ func ReadSeeker(r io.Reader) (io.ReadSeeker, error) { } data, err := io.ReadAll(r) if err != nil { - return nil, err + return nil, fmt.Errorf("Error in ReadSeeker ReadAll(): %w", err) } return bytes.NewReader(data), nil } diff --git a/pkg/storage/chunk/fetcher/fetcher_test.go b/pkg/storage/chunk/fetcher/fetcher_test.go index 03efc9afdc80..58123957919b 100644 --- a/pkg/storage/chunk/fetcher/fetcher_test.go +++ b/pkg/storage/chunk/fetcher/fetcher_test.go @@ -13,6 +13,7 @@ import ( "golang.org/x/exp/slices" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/storage/chunk" "github.com/grafana/loki/v3/pkg/storage/chunk/cache" @@ -311,7 +312,7 @@ func makeChunks(now time.Time, tpls ...c) []chunk.Chunk { from := int(chk.from) / int(time.Hour) // This is only here because it's helpful for debugging. // This isn't even the write format for Loki but we dont' care for the sake of these tests. - memChk := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncNone, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 256*1024, 0) + memChk := chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncNone, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 256*1024, 0) // To make sure the fetcher doesn't swap keys and buffers each chunk is built with different, but deterministic data for i := 0; i < from; i++ { _, _ = memChk.Append(&logproto.Entry{ diff --git a/pkg/storage/detected/fields.go b/pkg/storage/detected/fields.go index 9d6a699bc1e1..8dd9dd1a1512 100644 --- a/pkg/storage/detected/fields.go +++ b/pkg/storage/detected/fields.go @@ -44,6 +44,9 @@ func (f *UnmarshaledDetectedField) Merge(df *logproto.DetectedField) error { f.Parsers = append(f.Parsers, df.Parsers...) slices.Sort(f.Parsers) f.Parsers = slices.Compact(f.Parsers) + if len(f.Parsers) == 0 { + f.Parsers = nil + } return f.Sketch.Merge(sketch) } diff --git a/pkg/storage/hack/main.go b/pkg/storage/hack/main.go index 74257a8ba6ad..b2d01d2e41e0 100644 --- a/pkg/storage/hack/main.go +++ b/pkg/storage/hack/main.go @@ -15,6 +15,7 @@ import ( "github.com/prometheus/prometheus/model/labels" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/syntax" @@ -103,7 +104,7 @@ func fillStore(cm storage.ClientMetrics) error { labelsBuilder.Set(labels.MetricName, "logs") metric := labelsBuilder.Labels() fp := client.Fingerprint(lbs) - chunkEnc := chunkenc.NewMemChunk(chunkfmt, chunkenc.EncLZ4_4M, headfmt, 262144, 1572864) + chunkEnc := chunkenc.NewMemChunk(chunkfmt, compression.EncLZ4_4M, headfmt, 262144, 1572864) for ts := start.UnixNano(); ts < start.UnixNano()+time.Hour.Nanoseconds(); ts = ts + time.Millisecond.Nanoseconds() { entry := &logproto.Entry{ Timestamp: time.Unix(0, ts), @@ -126,7 +127,7 @@ func fillStore(cm storage.ClientMetrics) error { if flushCount >= maxChunks { return } - chunkEnc = chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, chunkenc.EncLZ4_64k, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 262144, 1572864) + chunkEnc = chunkenc.NewMemChunk(chunkenc.ChunkFormatV4, compression.EncLZ4_64k, chunkenc.UnorderedWithStructuredMetadataHeadBlockFmt, 262144, 1572864) } } }(i) diff --git a/pkg/storage/store_test.go b/pkg/storage/store_test.go index 101c906b8b4f..b1493089750a 100644 --- a/pkg/storage/store_test.go +++ b/pkg/storage/store_test.go @@ -13,6 +13,7 @@ import ( "testing" "time" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/storage/types" "github.com/grafana/loki/v3/pkg/util/httpreq" @@ -1893,6 +1894,7 @@ func TestStore_BoltdbTsdbSameIndexPrefix(t *testing.T) { // recreate the store because boltdb-shipper now runs queriers on snapshots which are created every 1 min and during startup. store.Stop() + ResetBoltDBIndexClientsWithShipper() // there should be 2 index tables in the object storage indexTables, err := os.ReadDir(filepath.Join(cfg.FSConfig.Directory, "index")) @@ -2035,7 +2037,7 @@ func TestQueryReferencingStructuredMetadata(t *testing.T) { metric := labelsBuilder.Labels() fp := client.Fingerprint(lbs) - chunkEnc := chunkenc.NewMemChunk(chunkfmt, chunkenc.EncLZ4_4M, headfmt, 262144, 1572864) + chunkEnc := chunkenc.NewMemChunk(chunkfmt, compression.EncLZ4_4M, headfmt, 262144, 1572864) for ts := chkFrom; !ts.After(chkThrough); ts = ts.Add(time.Second) { entry := logproto.Entry{ Timestamp: ts, diff --git a/pkg/storage/stores/series/series_store_test.go b/pkg/storage/stores/series/series_store_test.go index 553ea945f94f..3bd136cb3b61 100644 --- a/pkg/storage/stores/series/series_store_test.go +++ b/pkg/storage/stores/series/series_store_test.go @@ -18,6 +18,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logqlmodel/stats" @@ -752,7 +753,7 @@ func dummyChunkWithFormat(t testing.TB, now model.Time, metric labels.Labels, fo samples := 1 chunkStart := now.Add(-time.Hour) - chk := chunkenc.NewMemChunk(format, chunkenc.EncGZIP, headfmt, 256*1024, 0) + chk := chunkenc.NewMemChunk(format, compression.EncGZIP, headfmt, 256*1024, 0) for i := 0; i < samples; i++ { ts := time.Duration(i) * 15 * time.Second dup, err := chk.Append(&logproto.Entry{Timestamp: chunkStart.Time().Add(ts), Line: fmt.Sprintf("line %d", i)}) diff --git a/pkg/storage/stores/series_store_write_test.go b/pkg/storage/stores/series_store_write_test.go index f58ec1a730c5..a24608675a3d 100644 --- a/pkg/storage/stores/series_store_write_test.go +++ b/pkg/storage/stores/series_store_write_test.go @@ -9,6 +9,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/logqlmodel/stats" "github.com/grafana/loki/v3/pkg/storage/chunk" "github.com/grafana/loki/v3/pkg/storage/chunk/fetcher" @@ -92,7 +93,7 @@ func TestChunkWriter_PutOne(t *testing.T) { chunkfmt, headfmt, err := periodConfig.ChunkFormat() require.NoError(t, err) - memchk := chunkenc.NewMemChunk(chunkfmt, chunkenc.EncGZIP, headfmt, 256*1024, 0) + memchk := chunkenc.NewMemChunk(chunkfmt, compression.EncGZIP, headfmt, 256*1024, 0) chk := chunk.NewChunk("fake", model.Fingerprint(0), []labels.Label{{Name: "foo", Value: "bar"}}, chunkenc.NewFacade(memchk, 0, 0), 100, 400) for name, tc := range map[string]struct { diff --git a/pkg/storage/stores/shipper/bloomshipper/cache.go b/pkg/storage/stores/shipper/bloomshipper/cache.go index 203d15684502..838866e1dee8 100644 --- a/pkg/storage/stores/shipper/bloomshipper/cache.go +++ b/pkg/storage/stores/shipper/bloomshipper/cache.go @@ -94,15 +94,15 @@ func loadBlockDirectories(root string, logger log.Logger) (keys []string, values return nil } - ref, err := resolver.ParseBlockKey(key(path)) + // The block file extension (.tar) needs to be added so the key can be parsed. + // This is because the extension is stripped off when the tar archive is extracted. + ref, err := resolver.ParseBlockKey(key(path + blockExtension)) if err != nil { return nil } if ok, clean := isBlockDir(path, logger); ok { - // the cache key must not contain the directory prefix - // therefore we use the defaultKeyResolver to resolve the block's address - key := defaultKeyResolver{}.Block(ref).Addr() + key := cacheKey(ref) keys = append(keys, key) values = append(values, NewBlockDirectory(ref, path)) level.Debug(logger).Log("msg", "found block directory", "path", path, "key", key) diff --git a/pkg/storage/stores/shipper/bloomshipper/cache_test.go b/pkg/storage/stores/shipper/bloomshipper/cache_test.go index 941b7fa29e99..763036e56ac7 100644 --- a/pkg/storage/stores/shipper/bloomshipper/cache_test.go +++ b/pkg/storage/stores/shipper/bloomshipper/cache_test.go @@ -12,6 +12,7 @@ import ( "github.com/go-kit/log" "github.com/stretchr/testify/require" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/logqlmodel/stats" "github.com/grafana/loki/v3/pkg/storage/stores/shipper/bloomshipper/config" ) @@ -63,7 +64,8 @@ func Test_LoadBlocksDirIntoCache(t *testing.T) { wd := t.TempDir() // plain file - fp, _ := os.Create(filepath.Join(wd, "regular-file.tar.gz")) + ext := blockExtension + compression.ExtGZIP + fp, _ := os.Create(filepath.Join(wd, "regular-file"+ext)) fp.Close() // invalid directory @@ -99,8 +101,8 @@ func Test_LoadBlocksDirIntoCache(t *testing.T) { require.Equal(t, 1, len(c.entries)) - key := validDir + ".tar.gz" // cache key must not contain directory prefix - elem, found := c.entries[key] + // cache key does neither contain directory prefix nor file extension suffix + elem, found := c.entries[validDir] require.True(t, found) blockDir := elem.Value.(*Entry).Value require.Equal(t, filepath.Join(wd, validDir), blockDir.Path) diff --git a/pkg/storage/stores/shipper/bloomshipper/client.go b/pkg/storage/stores/shipper/bloomshipper/client.go index 2ce0e0a149ee..1390b0d9c52e 100644 --- a/pkg/storage/stores/shipper/bloomshipper/client.go +++ b/pkg/storage/stores/shipper/bloomshipper/client.go @@ -7,7 +7,6 @@ import ( "fmt" "hash" "io" - "strings" "sync" "time" @@ -18,6 +17,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/common/model" + "github.com/grafana/loki/v3/pkg/compression" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" "github.com/grafana/loki/v3/pkg/storage/chunk/client" "github.com/grafana/loki/v3/pkg/storage/chunk/client/util" @@ -73,6 +73,7 @@ func (r Ref) Interval() Interval { type BlockRef struct { Ref + compression.Encoding } func (r BlockRef) String() string { @@ -208,29 +209,31 @@ func (c ClosableReadSeekerAdapter) Close() error { return nil } -func BlockRefFrom(tenant, table string, md v1.BlockMetadata) BlockRef { - return BlockRef{ - Ref: Ref{ - TenantID: tenant, - TableName: table, - Bounds: md.Series.Bounds, - StartTimestamp: md.Series.FromTs, - EndTimestamp: md.Series.ThroughTs, - Checksum: md.Checksum, - }, +func newRefFrom(tenant, table string, md v1.BlockMetadata) Ref { + return Ref{ + TenantID: tenant, + TableName: table, + Bounds: md.Series.Bounds, + StartTimestamp: md.Series.FromTs, + EndTimestamp: md.Series.ThroughTs, + Checksum: md.Checksum, } } -func BlockFrom(tenant, table string, blk *v1.Block) (Block, error) { +func newBlockRefWithEncoding(ref Ref, enc compression.Encoding) BlockRef { + return BlockRef{Ref: ref, Encoding: enc} +} + +func BlockFrom(enc compression.Encoding, tenant, table string, blk *v1.Block) (Block, error) { md, _ := blk.Metadata() - ref := BlockRefFrom(tenant, table, md) + ref := newBlockRefWithEncoding(newRefFrom(tenant, table, md), enc) // TODO(owen-d): pool buf := bytes.NewBuffer(nil) - err := v1.TarGz(buf, blk.Reader()) + err := v1.TarCompress(ref.Encoding, buf, blk.Reader()) if err != nil { - return Block{}, errors.Wrap(err, "archiving+compressing block") + return Block{}, err } reader := bytes.NewReader(buf.Bytes()) @@ -320,15 +323,14 @@ func (b *BloomClient) GetBlock(ctx context.Context, ref BlockRef) (BlockDirector } defer rc.Close() - path := b.fsResolver.Block(ref).LocalPath() - // the block directory should not contain the .tar.gz extension - path = strings.TrimSuffix(path, ".tar.gz") + // the block directory must not contain the .tar(.compression) extension + path := localFilePathWithoutExtension(ref, b.fsResolver) err = util.EnsureDirectory(path) if err != nil { return BlockDirectory{}, fmt.Errorf("failed to create block directory %s: %w", path, err) } - err = v1.UnTarGz(path, rc) + err = v1.UnTarCompress(ref.Encoding, path, rc) if err != nil { return BlockDirectory{}, fmt.Errorf("failed to extract block file %s: %w", key, err) } diff --git a/pkg/storage/stores/shipper/bloomshipper/client_test.go b/pkg/storage/stores/shipper/bloomshipper/client_test.go index dff01dcae50a..13ce7a7c97ae 100644 --- a/pkg/storage/stores/shipper/bloomshipper/client_test.go +++ b/pkg/storage/stores/shipper/bloomshipper/client_test.go @@ -14,12 +14,25 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + "github.com/grafana/loki/v3/pkg/compression" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" "github.com/grafana/loki/v3/pkg/storage/chunk/client" "github.com/grafana/loki/v3/pkg/storage/chunk/client/testutils" "github.com/grafana/loki/v3/pkg/storage/config" ) +var supportedCompressions = []compression.Encoding{ + compression.EncNone, + compression.EncGZIP, + compression.EncSnappy, + compression.EncLZ4_64k, + compression.EncLZ4_256k, + compression.EncLZ4_1M, + compression.EncLZ4_4M, + compression.EncFlate, + compression.EncZstd, +} + func parseTime(s string) model.Time { t, err := time.Parse("2006-01-02 15:04", s) if err != nil { @@ -196,18 +209,18 @@ func TestBloomClient_DeleteMetas(t *testing.T) { }) } -func putBlock(t *testing.T, c *BloomClient, tenant string, start model.Time, minFp, maxFp model.Fingerprint) (Block, error) { +func putBlock(t *testing.T, c *BloomClient, tenant string, start model.Time, minFp, maxFp model.Fingerprint, enc compression.Encoding) (Block, error) { step := int64((24 * time.Hour).Seconds()) day := start.Unix() / step tmpDir := t.TempDir() - fp, _ := os.CreateTemp(t.TempDir(), "*.tar.gz") + fp, _ := os.CreateTemp(t.TempDir(), "*"+blockExtension+compression.ToFileExtension(enc)) blockWriter := v1.NewDirectoryBlockWriter(tmpDir) err := blockWriter.Init() require.NoError(t, err) - err = v1.TarGz(fp, v1.NewDirectoryBlockReader(tmpDir)) + err = v1.TarCompress(enc, fp, v1.NewDirectoryBlockReader(tmpDir)) require.NoError(t, err) _, _ = fp.Seek(0, 0) @@ -221,40 +234,48 @@ func putBlock(t *testing.T, c *BloomClient, tenant string, start model.Time, min StartTimestamp: start, EndTimestamp: start.Add(12 * time.Hour), }, + Encoding: enc, }, Data: fp, } - return block, c.client.PutObject(context.Background(), c.Block(block.BlockRef).Addr(), block.Data) + key := c.Block(block.BlockRef).Addr() + t.Logf("PUT block to storage: %s", key) + return block, c.client.PutObject(context.Background(), key, block.Data) } func TestBloomClient_GetBlock(t *testing.T) { - c, _ := newMockBloomClient(t) - ctx := context.Background() - - b, err := putBlock(t, c, "tenant", parseTime("2024-02-05 00:00"), 0x0000, 0xffff) - require.NoError(t, err) + for _, enc := range supportedCompressions { + c, _ := newMockBloomClient(t) + ctx := context.Background() - t.Run("exists", func(t *testing.T) { - blockDir, err := c.GetBlock(ctx, b.BlockRef) + b, err := putBlock(t, c, "tenant", parseTime("2024-02-05 00:00"), 0x0000, 0xffff, enc) require.NoError(t, err) - require.Equal(t, b.BlockRef, blockDir.BlockRef) - }) - t.Run("does not exist", func(t *testing.T) { - blockDir, err := c.GetBlock(ctx, BlockRef{}) - require.Error(t, err) - require.True(t, c.client.IsObjectNotFoundErr(err)) - require.Equal(t, blockDir, BlockDirectory{}) - }) + t.Run(enc.String(), func(t *testing.T) { + + t.Run("exists", func(t *testing.T) { + blockDir, err := c.GetBlock(ctx, b.BlockRef) + require.NoError(t, err) + require.Equal(t, b.BlockRef, blockDir.BlockRef) + }) + + t.Run("does not exist", func(t *testing.T) { + blockDir, err := c.GetBlock(ctx, BlockRef{}) + require.Error(t, err) + require.True(t, c.client.IsObjectNotFoundErr(err)) + require.Equal(t, blockDir, BlockDirectory{}) + }) + }) + } } func TestBloomClient_GetBlocks(t *testing.T) { c, _ := newMockBloomClient(t) ctx := context.Background() - b1, err := putBlock(t, c, "tenant", parseTime("2024-02-05 00:00"), 0x0000, 0x0fff) + b1, err := putBlock(t, c, "tenant", parseTime("2024-02-05 00:00"), 0x0000, 0x0fff, compression.EncGZIP) require.NoError(t, err) - b2, err := putBlock(t, c, "tenant", parseTime("2024-02-05 00:00"), 0x1000, 0xffff) + b2, err := putBlock(t, c, "tenant", parseTime("2024-02-05 00:00"), 0x1000, 0xffff, compression.EncNone) require.NoError(t, err) t.Run("exists", func(t *testing.T) { @@ -271,57 +292,62 @@ func TestBloomClient_GetBlocks(t *testing.T) { } func TestBloomClient_PutBlock(t *testing.T) { - c, _ := newMockBloomClient(t) - ctx := context.Background() - - start := parseTime("2024-02-05 12:00") - - tmpDir := t.TempDir() - fp, _ := os.CreateTemp(t.TempDir(), "*.tar.gz") - - blockWriter := v1.NewDirectoryBlockWriter(tmpDir) - err := blockWriter.Init() - require.NoError(t, err) - - err = v1.TarGz(fp, v1.NewDirectoryBlockReader(tmpDir)) - require.NoError(t, err) - - block := Block{ - BlockRef: BlockRef{ - Ref: Ref{ - TenantID: "tenant", - Bounds: v1.NewBounds(0x0000, 0xffff), - TableName: "table_1234", - StartTimestamp: start, - EndTimestamp: start.Add(12 * time.Hour), - }, - }, - Data: fp, + for _, enc := range supportedCompressions { + t.Run(enc.String(), func(t *testing.T) { + c, _ := newMockBloomClient(t) + ctx := context.Background() + + start := parseTime("2024-02-05 12:00") + + tmpDir := t.TempDir() + fp, _ := os.CreateTemp(t.TempDir(), "*"+blockExtension+compression.ToFileExtension(enc)) + + blockWriter := v1.NewDirectoryBlockWriter(tmpDir) + err := blockWriter.Init() + require.NoError(t, err) + + err = v1.TarCompress(enc, fp, v1.NewDirectoryBlockReader(tmpDir)) + require.NoError(t, err) + + block := Block{ + BlockRef: BlockRef{ + Ref: Ref{ + TenantID: "tenant", + Bounds: v1.NewBounds(0x0000, 0xffff), + TableName: "table_1234", + StartTimestamp: start, + EndTimestamp: start.Add(12 * time.Hour), + }, + Encoding: enc, + }, + Data: fp, + } + + err = c.PutBlock(ctx, block) + require.NoError(t, err) + + oc := c.client.(*testutils.InMemoryObjectClient) + stored := oc.Internals() + _, found := stored[c.Block(block.BlockRef).Addr()] + require.True(t, found) + + blockDir, err := c.GetBlock(ctx, block.BlockRef) + require.NoError(t, err) + + require.Equal(t, block.BlockRef, blockDir.BlockRef) + }) } - - err = c.PutBlock(ctx, block) - require.NoError(t, err) - - oc := c.client.(*testutils.InMemoryObjectClient) - stored := oc.Internals() - _, found := stored[c.Block(block.BlockRef).Addr()] - require.True(t, found) - - blockDir, err := c.GetBlock(ctx, block.BlockRef) - require.NoError(t, err) - - require.Equal(t, block.BlockRef, blockDir.BlockRef) } func TestBloomClient_DeleteBlocks(t *testing.T) { c, _ := newMockBloomClient(t) ctx := context.Background() - b1, err := putBlock(t, c, "tenant", parseTime("2024-02-05 00:00"), 0x0000, 0xffff) + b1, err := putBlock(t, c, "tenant", parseTime("2024-02-05 00:00"), 0x0000, 0xffff, compression.EncNone) require.NoError(t, err) - b2, err := putBlock(t, c, "tenant", parseTime("2024-02-06 00:00"), 0x0000, 0xffff) + b2, err := putBlock(t, c, "tenant", parseTime("2024-02-06 00:00"), 0x0000, 0xffff, compression.EncGZIP) require.NoError(t, err) - b3, err := putBlock(t, c, "tenant", parseTime("2024-02-07 00:00"), 0x0000, 0xffff) + b3, err := putBlock(t, c, "tenant", parseTime("2024-02-07 00:00"), 0x0000, 0xffff, compression.EncSnappy) require.NoError(t, err) oc := c.client.(*testutils.InMemoryObjectClient) diff --git a/pkg/storage/stores/shipper/bloomshipper/compress_utils.go b/pkg/storage/stores/shipper/bloomshipper/compress_utils.go deleted file mode 100644 index 52de4a4da582..000000000000 --- a/pkg/storage/stores/shipper/bloomshipper/compress_utils.go +++ /dev/null @@ -1,29 +0,0 @@ -package bloomshipper - -import ( - "os" - - "github.com/go-kit/log" - "github.com/go-kit/log/level" - - v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" -) - -func CompressBloomBlock(ref BlockRef, archivePath, localDst string, logger log.Logger) (Block, error) { - blockToUpload := Block{} - archiveFile, err := os.Create(archivePath) - if err != nil { - return blockToUpload, err - } - - err = v1.TarGz(archiveFile, v1.NewDirectoryBlockReader(localDst)) - if err != nil { - level.Error(logger).Log("msg", "creating bloom block archive file", "err", err) - return blockToUpload, err - } - - blockToUpload.BlockRef = ref - blockToUpload.Data = archiveFile - - return blockToUpload, nil -} diff --git a/pkg/storage/stores/shipper/bloomshipper/compress_utils_test.go b/pkg/storage/stores/shipper/bloomshipper/compress_utils_test.go deleted file mode 100644 index f0b1598dadf9..000000000000 --- a/pkg/storage/stores/shipper/bloomshipper/compress_utils_test.go +++ /dev/null @@ -1,49 +0,0 @@ -package bloomshipper - -import ( - "bytes" - "io" - "os" - "path/filepath" - "testing" - - "github.com/google/uuid" - "github.com/stretchr/testify/require" - - v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" -) - -func directoryDoesNotExist(path string) bool { - _, err := os.Lstat(path) - return err != nil -} - -const testArchiveFileName = "test-block-archive" - -func createBlockArchive(t *testing.T) (string, io.Reader, string, string) { - dir := t.TempDir() - mockBlockDir := filepath.Join(dir, "mock-block-dir") - err := os.MkdirAll(mockBlockDir, 0777) - require.NoError(t, err) - bloomFile, err := os.Create(filepath.Join(mockBlockDir, v1.BloomFileName)) - require.NoError(t, err) - bloomFileContent := uuid.NewString() - _, err = io.Copy(bloomFile, bytes.NewReader([]byte(bloomFileContent))) - require.NoError(t, err) - - seriesFile, err := os.Create(filepath.Join(mockBlockDir, v1.SeriesFileName)) - require.NoError(t, err) - seriesFileContent := uuid.NewString() - _, err = io.Copy(seriesFile, bytes.NewReader([]byte(seriesFileContent))) - require.NoError(t, err) - - blockFilePath := filepath.Join(dir, testArchiveFileName) - file, err := os.OpenFile(blockFilePath, os.O_CREATE|os.O_RDWR, 0700) - require.NoError(t, err) - err = v1.TarGz(file, v1.NewDirectoryBlockReader(mockBlockDir)) - require.NoError(t, err) - - _, _ = file.Seek(0, 0) - - return blockFilePath, file, bloomFileContent, seriesFileContent -} diff --git a/pkg/storage/stores/shipper/bloomshipper/fetcher.go b/pkg/storage/stores/shipper/bloomshipper/fetcher.go index 42d8d116b64a..053078180547 100644 --- a/pkg/storage/stores/shipper/bloomshipper/fetcher.go +++ b/pkg/storage/stores/shipper/bloomshipper/fetcher.go @@ -5,7 +5,6 @@ import ( "encoding/json" "os" "path/filepath" - "strings" "sync" "time" @@ -240,7 +239,7 @@ func (f *Fetcher) FetchBlocks(ctx context.Context, refs []BlockRef, opts ...Fetc var enqueueTime time.Duration for i := 0; i < n; i++ { - key := f.client.Block(refs[i]).Addr() + key := cacheKey(refs[i]) dir, isFound, err := f.fromCache(ctx, key) if err != nil { return results, err @@ -346,7 +345,7 @@ func (f *Fetcher) processTask(ctx context.Context, task downloadRequest[BlockRef return } - key := f.client.Block(result.BlockRef).Addr() + key := cacheKey(result.BlockRef) if task.async { // put item into cache err = f.blocksCache.Put(ctx, key, result) @@ -407,10 +406,9 @@ func (f *Fetcher) loadBlocksFromFS(_ context.Context, refs []BlockRef) ([]BlockD missing := make([]BlockRef, 0, len(refs)) for _, ref := range refs { - path := f.localFSResolver.Block(ref).LocalPath() - // the block directory does not contain the .tar.gz extension + // the block directory does not contain the .tar(.compression) extension // since it is stripped when the archive is extracted into a folder - path = strings.TrimSuffix(path, ".tar.gz") + path := localFilePathWithoutExtension(ref, f.localFSResolver) if ok, clean := f.isBlockDir(path); ok { blockDirs = append(blockDirs, NewBlockDirectory(ref, path)) } else { diff --git a/pkg/storage/stores/shipper/bloomshipper/fetcher_test.go b/pkg/storage/stores/shipper/bloomshipper/fetcher_test.go index e7723b6d2653..9361c35e90eb 100644 --- a/pkg/storage/stores/shipper/bloomshipper/fetcher_test.go +++ b/pkg/storage/stores/shipper/bloomshipper/fetcher_test.go @@ -6,7 +6,6 @@ import ( "fmt" "os" "path/filepath" - "strings" "testing" "time" @@ -15,6 +14,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + "github.com/grafana/loki/v3/pkg/compression" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" "github.com/grafana/loki/v3/pkg/storage/chunk/cache" "github.com/grafana/loki/v3/pkg/storage/chunk/client/local" @@ -329,16 +329,16 @@ func TestFetcher_LoadBlocksFromFS(t *testing.T) { refs := []BlockRef{ // no directory for block - {Ref: Ref{TenantID: "tenant", TableName: "12345", Bounds: v1.NewBounds(0x0000, 0x0fff)}}, + {Ref: Ref{TenantID: "tenant", TableName: "12345", Bounds: v1.NewBounds(0x0000, 0x0fff)}, Encoding: compression.EncNone}, // invalid directory for block - {Ref: Ref{TenantID: "tenant", TableName: "12345", Bounds: v1.NewBounds(0x1000, 0x1fff)}}, + {Ref: Ref{TenantID: "tenant", TableName: "12345", Bounds: v1.NewBounds(0x1000, 0x1fff)}, Encoding: compression.EncSnappy}, // valid directory for block - {Ref: Ref{TenantID: "tenant", TableName: "12345", Bounds: v1.NewBounds(0x2000, 0x2fff)}}, + {Ref: Ref{TenantID: "tenant", TableName: "12345", Bounds: v1.NewBounds(0x2000, 0x2fff)}, Encoding: compression.EncGZIP}, } dirs := []string{ - strings.TrimSuffix(resolver.Block(refs[0]).LocalPath(), ".tar.gz"), - strings.TrimSuffix(resolver.Block(refs[1]).LocalPath(), ".tar.gz"), - strings.TrimSuffix(resolver.Block(refs[2]).LocalPath(), ".tar.gz"), + localFilePathWithoutExtension(refs[0], resolver), + localFilePathWithoutExtension(refs[1], resolver), + localFilePathWithoutExtension(refs[2], resolver), } createBlockDir(t, dirs[1]) @@ -360,7 +360,7 @@ func TestFetcher_LoadBlocksFromFS(t *testing.T) { require.Len(t, found, 1) require.Len(t, missing, 2) - require.Equal(t, refs[2], found[0].BlockRef) + require.Equal(t, refs[2].Ref, found[0].Ref) require.ElementsMatch(t, refs[0:2], missing) } diff --git a/pkg/storage/stores/shipper/bloomshipper/resolver.go b/pkg/storage/stores/shipper/bloomshipper/resolver.go index 8f86ce7cb09e..3115f731fe13 100644 --- a/pkg/storage/stores/shipper/bloomshipper/resolver.go +++ b/pkg/storage/stores/shipper/bloomshipper/resolver.go @@ -9,6 +9,7 @@ import ( "strconv" "strings" + "github.com/grafana/loki/v3/pkg/compression" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" ) @@ -17,8 +18,8 @@ const ( MetasPrefix = "metas" BlocksPrefix = "blocks" - extTarGz = ".tar.gz" - extJSON = ".json" + metaExtension = ".json" + blockExtension = v1.ExtTar ) // KeyResolver is an interface for resolving keys to locations. @@ -44,7 +45,7 @@ func (defaultKeyResolver) Meta(ref MetaRef) Location { fmt.Sprintf("%v", ref.TableName), ref.TenantID, MetasPrefix, - fmt.Sprintf("%v-%x%s", ref.Bounds, ref.Checksum, extJSON), + fmt.Sprintf("%v-%x%s", ref.Bounds, ref.Checksum, metaExtension), } } @@ -58,7 +59,7 @@ func (defaultKeyResolver) ParseMetaKey(loc Location) (MetaRef, error) { if err != nil { return MetaRef{}, fmt.Errorf("failed to parse bounds of meta key %s : %w", loc, err) } - withoutExt := strings.TrimSuffix(fnParts[2], extJSON) + withoutExt := strings.TrimSuffix(fnParts[2], metaExtension) checksum, err := strconv.ParseUint(withoutExt, 16, 64) if err != nil { return MetaRef{}, fmt.Errorf("failed to parse checksum of meta key %s : %w", loc, err) @@ -80,28 +81,44 @@ func (defaultKeyResolver) ParseMetaKey(loc Location) (MetaRef, error) { } func (defaultKeyResolver) Block(ref BlockRef) Location { + ext := blockExtension + compression.ToFileExtension(ref.Encoding) return simpleLocation{ BloomPrefix, fmt.Sprintf("%v", ref.TableName), ref.TenantID, BlocksPrefix, ref.Bounds.String(), - fmt.Sprintf("%d-%d-%x%s", ref.StartTimestamp, ref.EndTimestamp, ref.Checksum, extTarGz), + fmt.Sprintf("%d-%d-%x%s", ref.StartTimestamp, ref.EndTimestamp, ref.Checksum, ext), } } func (defaultKeyResolver) ParseBlockKey(loc Location) (BlockRef, error) { dir, fn := path.Split(loc.Addr()) + + ext, enc := path.Ext(fn), compression.EncNone + if ext != "" && ext != blockExtension { + // trim compression extension + fn = strings.TrimSuffix(fn, ext) + enc = compression.FromFileExtension(ext) + ext = path.Ext(fn) + if ext != "" && ext != blockExtension { + return BlockRef{}, fmt.Errorf("failed to parse block. invalid block extension: %s, expected %s", ext, blockExtension) + } + } + // trim tar extension + fn = strings.TrimSuffix(fn, ext) + fnParts := strings.Split(fn, "-") if len(fnParts) != 3 { return BlockRef{}, fmt.Errorf("failed to split filename parts of block key %s : len must be 3, but was %d", loc, len(fnParts)) } + interval, err := ParseIntervalFromParts(fnParts[0], fnParts[1]) if err != nil { return BlockRef{}, fmt.Errorf("failed to parse bounds of meta key %s : %w", loc, err) } - withoutExt := strings.TrimSuffix(fnParts[2], extTarGz) - checksum, err := strconv.ParseUint(withoutExt, 16, 64) + + checksum, err := strconv.ParseUint(fnParts[2], 16, 64) if err != nil { return BlockRef{}, fmt.Errorf("failed to parse checksum of meta key %s : %w", loc, err) } @@ -125,6 +142,7 @@ func (defaultKeyResolver) ParseBlockKey(loc Location) (BlockRef, error) { EndTimestamp: interval.End, Checksum: uint32(checksum), }, + Encoding: enc, }, nil } @@ -266,3 +284,11 @@ func (ls locations) LocalPath() string { return filepath.Join(xs...) } + +func cacheKey(ref BlockRef) string { + return strings.TrimSuffix(defaultKeyResolver{}.Block(ref).Addr(), blockExtension+compression.ToFileExtension(ref.Encoding)) +} + +func localFilePathWithoutExtension(ref BlockRef, res KeyResolver) string { + return strings.TrimSuffix(res.Block(ref).LocalPath(), blockExtension+compression.ToFileExtension(ref.Encoding)) +} diff --git a/pkg/storage/stores/shipper/bloomshipper/resolver_test.go b/pkg/storage/stores/shipper/bloomshipper/resolver_test.go index ba45845ea9ba..259bf7b2db3a 100644 --- a/pkg/storage/stores/shipper/bloomshipper/resolver_test.go +++ b/pkg/storage/stores/shipper/bloomshipper/resolver_test.go @@ -5,6 +5,7 @@ import ( "github.com/stretchr/testify/require" + "github.com/grafana/loki/v3/pkg/compression" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" ) @@ -31,27 +32,50 @@ func TestResolver_ParseMetaKey(t *testing.T) { } func TestResolver_ParseBlockKey(t *testing.T) { - r := defaultKeyResolver{} - ref := BlockRef{ - Ref: Ref{ - TenantID: "tenant", - TableName: "table_1", - Bounds: v1.NewBounds(0x0000, 0xffff), - StartTimestamp: 0, - EndTimestamp: 3600000, - Checksum: 43981, - }, - } + for _, tc := range []struct { + srcEnc, dstEnc compression.Encoding + }{ + {compression.EncNone, compression.EncNone}, + {compression.EncGZIP, compression.EncGZIP}, + {compression.EncSnappy, compression.EncSnappy}, + {compression.EncLZ4_64k, compression.EncLZ4_4M}, + {compression.EncLZ4_256k, compression.EncLZ4_4M}, + {compression.EncLZ4_1M, compression.EncLZ4_4M}, + {compression.EncLZ4_4M, compression.EncLZ4_4M}, + {compression.EncFlate, compression.EncFlate}, + {compression.EncZstd, compression.EncZstd}, + } { + t.Run(tc.srcEnc.String(), func(t *testing.T) { + r := defaultKeyResolver{} + ref := BlockRef{ + Ref: Ref{ + TenantID: "tenant", + TableName: "table_1", + Bounds: v1.NewBounds(0x0000, 0xffff), + StartTimestamp: 0, + EndTimestamp: 3600000, + Checksum: 43981, + }, + Encoding: tc.srcEnc, + } - // encode block ref as string - loc := r.Block(ref) - path := loc.LocalPath() - require.Equal(t, "bloom/table_1/tenant/blocks/0000000000000000-000000000000ffff/0-3600000-abcd.tar.gz", path) + // encode block ref as string + loc := r.Block(ref) + path := loc.LocalPath() + fn := "bloom/table_1/tenant/blocks/0000000000000000-000000000000ffff/0-3600000-abcd" + require.Equal(t, fn+blockExtension+compression.ToFileExtension(tc.srcEnc), path) + + // parse encoded string into block ref + parsed, err := r.ParseBlockKey(key(path)) + require.NoError(t, err) + expected := BlockRef{ + Ref: ref.Ref, + Encoding: tc.dstEnc, + } + require.Equal(t, expected, parsed) + }) + } - // parse encoded string into block ref - parsed, err := r.ParseBlockKey(key(path)) - require.NoError(t, err) - require.Equal(t, ref, parsed) } func TestResolver_ShardedPrefixedResolver(t *testing.T) { @@ -87,7 +111,7 @@ func TestResolver_ShardedPrefixedResolver(t *testing.T) { loc := r.Meta(metaRef) require.Equal(t, "prefix/bloom/table_1/tenant/metas/0000000000000000-000000000000ffff-abcd.json", loc.LocalPath()) loc = r.Block(blockRef) - require.Equal(t, "prefix/bloom/table_1/tenant/blocks/0000000000000000-000000000000ffff/0-3600000-bcde.tar.gz", loc.LocalPath()) + require.Equal(t, "prefix/bloom/table_1/tenant/blocks/0000000000000000-000000000000ffff/0-3600000-bcde.tar", loc.LocalPath()) }) t.Run("multiple prefixes", func(t *testing.T) { @@ -96,6 +120,6 @@ func TestResolver_ShardedPrefixedResolver(t *testing.T) { loc := r.Meta(metaRef) require.Equal(t, "b/bloom/table_1/tenant/metas/0000000000000000-000000000000ffff-abcd.json", loc.LocalPath()) loc = r.Block(blockRef) - require.Equal(t, "d/bloom/table_1/tenant/blocks/0000000000000000-000000000000ffff/0-3600000-bcde.tar.gz", loc.LocalPath()) + require.Equal(t, "d/bloom/table_1/tenant/blocks/0000000000000000-000000000000ffff/0-3600000-bcde.tar", loc.LocalPath()) }) } diff --git a/pkg/storage/stores/shipper/bloomshipper/store_test.go b/pkg/storage/stores/shipper/bloomshipper/store_test.go index 6a6705f8f0be..674e0c02a506 100644 --- a/pkg/storage/stores/shipper/bloomshipper/store_test.go +++ b/pkg/storage/stores/shipper/bloomshipper/store_test.go @@ -15,6 +15,7 @@ import ( "github.com/prometheus/common/model" "github.com/stretchr/testify/require" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/storage" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" "github.com/grafana/loki/v3/pkg/storage/chunk/cache" @@ -109,13 +110,14 @@ func createMetaInStorage(store *BloomStore, tenant string, start model.Time, min func createBlockInStorage(t *testing.T, store *BloomStore, tenant string, start model.Time, minFp, maxFp model.Fingerprint) (Block, error) { tmpDir := t.TempDir() - fp, _ := os.CreateTemp(t.TempDir(), "*.tar.gz") + fp, _ := os.CreateTemp(t.TempDir(), "*.tar") blockWriter := v1.NewDirectoryBlockWriter(tmpDir) err := blockWriter.Init() require.NoError(t, err) - err = v1.TarGz(fp, v1.NewDirectoryBlockReader(tmpDir)) + enc := compression.EncGZIP + err = v1.TarCompress(enc, fp, v1.NewDirectoryBlockReader(tmpDir)) require.NoError(t, err) _, _ = fp.Seek(0, 0) @@ -128,6 +130,7 @@ func createBlockInStorage(t *testing.T, store *BloomStore, tenant string, start StartTimestamp: start, EndTimestamp: start.Add(12 * time.Hour), }, + Encoding: enc, }, Data: fp, } diff --git a/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util.go b/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util.go index 6f1b0326a5cc..a7ea7af3b05e 100644 --- a/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util.go +++ b/pkg/storage/stores/shipper/indexshipper/boltdb/compactor/util.go @@ -11,6 +11,7 @@ import ( "github.com/stretchr/testify/require" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" ingesterclient "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/storage/chunk" @@ -31,7 +32,7 @@ func createChunk(t testing.TB, chunkFormat byte, headBlockFmt chunkenc.HeadBlock labelsBuilder.Set(labels.MetricName, "logs") metric := labelsBuilder.Labels() fp := ingesterclient.Fingerprint(lbs) - chunkEnc := chunkenc.NewMemChunk(chunkFormat, chunkenc.EncSnappy, headBlockFmt, blockSize, targetSize) + chunkEnc := chunkenc.NewMemChunk(chunkFormat, compression.EncSnappy, headBlockFmt, blockSize, targetSize) for ts := from; !ts.After(through); ts = ts.Add(1 * time.Minute) { dup, err := chunkEnc.Append(&logproto.Entry{ diff --git a/pkg/storage/stores/shipper/indexshipper/downloads/index_set.go b/pkg/storage/stores/shipper/indexshipper/downloads/index_set.go index 8edd121071c5..971dcb0fb65b 100644 --- a/pkg/storage/stores/shipper/indexshipper/downloads/index_set.go +++ b/pkg/storage/stores/shipper/indexshipper/downloads/index_set.go @@ -283,6 +283,11 @@ func (t *indexSet) cleanupDB(fileName string) error { } func (t *indexSet) Sync(ctx context.Context) (err error) { + if !t.indexMtx.isReady() { + level.Info(t.logger).Log("msg", "skip sync since the index set is not ready") + return nil + } + return t.syncWithRetry(ctx, true, false) } diff --git a/pkg/storage/stores/shipper/indexshipper/downloads/table.go b/pkg/storage/stores/shipper/indexshipper/downloads/table.go index 1bae83c51e0e..5b9f29c3a0c1 100644 --- a/pkg/storage/stores/shipper/indexshipper/downloads/table.go +++ b/pkg/storage/stores/shipper/indexshipper/downloads/table.go @@ -13,6 +13,7 @@ import ( "github.com/go-kit/log/level" "github.com/grafana/dskit/concurrency" "github.com/pkg/errors" + "golang.org/x/exp/maps" "golang.org/x/sync/errgroup" "github.com/grafana/loki/v3/pkg/storage/chunk/client/util" @@ -271,9 +272,22 @@ func (t *table) Sync(ctx context.Context) error { level.Debug(t.logger).Log("msg", fmt.Sprintf("syncing files for table %s", t.name)) t.indexSetsMtx.RLock() - defer t.indexSetsMtx.RUnlock() + users := maps.Keys(t.indexSets) + t.indexSetsMtx.RUnlock() + + for _, userID := range users { + if err := ctx.Err(); err != nil { + return err + } + + t.indexSetsMtx.RLock() + indexSet, ok := t.indexSets[userID] + t.indexSetsMtx.RUnlock() + + if !ok { + continue + } - for userID, indexSet := range t.indexSets { if err := indexSet.Sync(ctx); err != nil { return errors.Wrap(err, fmt.Sprintf("failed to sync index set %s for table %s", userID, t.name)) } diff --git a/pkg/storage/stores/shipper/indexshipper/downloads/table_manager.go b/pkg/storage/stores/shipper/indexshipper/downloads/table_manager.go index 6b6927259378..3b4bc4bfb3fc 100644 --- a/pkg/storage/stores/shipper/indexshipper/downloads/table_manager.go +++ b/pkg/storage/stores/shipper/indexshipper/downloads/table_manager.go @@ -14,6 +14,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" "github.com/prometheus/common/model" + "golang.org/x/exp/maps" "github.com/grafana/loki/v3/pkg/compactor/deletion" "github.com/grafana/loki/v3/pkg/storage/chunk/client/util" @@ -180,6 +181,10 @@ func (tm *tableManager) ForEach(ctx context.Context, tableName, userID string, c } func (tm *tableManager) getOrCreateTable(tableName string) (Table, error) { + if tm.ctx.Err() != nil { + return nil, errors.New("table manager is stopping") + } + // if table is already there, use it. start := time.Now() tm.tablesMtx.RLock() @@ -214,7 +219,8 @@ func (tm *tableManager) getOrCreateTable(tableName string) (Table, error) { func (tm *tableManager) syncTables(ctx context.Context) error { tm.tablesMtx.RLock() - defer tm.tablesMtx.RUnlock() + tables := maps.Keys(tm.tables) + tm.tablesMtx.RUnlock() start := time.Now() var err error @@ -231,11 +237,24 @@ func (tm *tableManager) syncTables(ctx context.Context) error { level.Info(tm.logger).Log("msg", "syncing tables") - for name, table := range tm.tables { + for _, name := range tables { + if err := ctx.Err(); err != nil { + return err + } + level.Debug(tm.logger).Log("msg", "syncing table", "table", name) start := time.Now() + + tm.tablesMtx.RLock() + table, ok := tm.tables[name] + tm.tablesMtx.RUnlock() + + if !ok { + continue + } + err := table.Sync(ctx) - duration := float64(time.Since(start)) + duration := time.Since(start).Seconds() if err != nil { tm.metrics.tableSyncLatency.WithLabelValues(name, statusFailure).Observe(duration) return errors.Wrapf(err, "failed to sync table '%s'", name) diff --git a/pkg/storage/stores/shipper/indexshipper/downloads/util.go b/pkg/storage/stores/shipper/indexshipper/downloads/util.go index 457f76b3433d..4c5fcfee1674 100644 --- a/pkg/storage/stores/shipper/indexshipper/downloads/util.go +++ b/pkg/storage/stores/shipper/indexshipper/downloads/util.go @@ -23,6 +23,15 @@ func (m *mtxWithReadiness) markReady() { close(m.ready) } +func (m *mtxWithReadiness) isReady() bool { + select { + case <-m.ready: + return true + default: + return false + } +} + func (m *mtxWithReadiness) awaitReady(ctx context.Context) error { ctx, cancel := context.WithTimeoutCause(ctx, 30*time.Second, errors.New("exceeded 30 seconds in awaitReady")) defer cancel() diff --git a/pkg/storage/stores/shipper/indexshipper/tsdb/index/index.go b/pkg/storage/stores/shipper/indexshipper/tsdb/index/index.go index f3cb7653cbe9..7a34ecfdeb35 100644 --- a/pkg/storage/stores/shipper/indexshipper/tsdb/index/index.go +++ b/pkg/storage/stores/shipper/indexshipper/tsdb/index/index.go @@ -1347,44 +1347,37 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { // Earlier V1 formats don't have a sorted postings offset table, so // load the whole offset table into memory. r.postingsV1 = map[string]map[string]uint64{} - if err := ReadOffsetTable(r.b, r.toc.PostingsTable, func(key []string, off uint64, _ int) error { - if len(key) != 2 { - return errors.Errorf("unexpected key length for posting table %d", len(key)) + if err := ReadOffsetTable(r.b, r.toc.PostingsTable, func(name, value []byte, off uint64, _ int) error { + if _, ok := r.postingsV1[string(name)]; !ok { + r.postingsV1[string(name)] = map[string]uint64{} + r.postings[string(name)] = nil // Used to get a list of labelnames in places. } - if _, ok := r.postingsV1[key[0]]; !ok { - r.postingsV1[key[0]] = map[string]uint64{} - r.postings[key[0]] = nil // Used to get a list of labelnames in places. - } - r.postingsV1[key[0]][key[1]] = off + r.postingsV1[string(name)][string(value)] = off return nil }); err != nil { return nil, errors.Wrap(err, "read postings table") } } else { - var lastKey []string + var lastName, lastValue []byte lastOff := 0 valueCount := 0 // For the postings offset table we keep every label name but only every nth // label value (plus the first and last one), to save memory. - if err := ReadOffsetTable(r.b, r.toc.PostingsTable, func(key []string, _ uint64, off int) error { - if len(key) != 2 { - return errors.Errorf("unexpected key length for posting table %d", len(key)) - } - if _, ok := r.postings[key[0]]; !ok { + if err := ReadOffsetTable(r.b, r.toc.PostingsTable, func(name, value []byte, _ uint64, off int) error { + if _, ok := r.postings[string(name)]; !ok { // Next label name. - r.postings[key[0]] = []postingOffset{} - if lastKey != nil { + r.postings[string(name)] = []postingOffset{} + if lastName != nil { // Always include last value for each label name. - r.postings[lastKey[0]] = append(r.postings[lastKey[0]], postingOffset{value: lastKey[1], off: lastOff}) + r.postings[string(lastName)] = append(r.postings[string(lastName)], postingOffset{value: string(lastValue), off: lastOff}) } - lastKey = nil valueCount = 0 } if valueCount%symbolFactor == 0 { - r.postings[key[0]] = append(r.postings[key[0]], postingOffset{value: key[1], off: off}) - lastKey = nil + r.postings[string(name)] = append(r.postings[string(name)], postingOffset{value: string(value), off: off}) + lastName, lastValue = nil, nil } else { - lastKey = key + lastName, lastValue = name, value lastOff = off } valueCount++ @@ -1392,8 +1385,8 @@ func newReader(b ByteSlice, c io.Closer) (*Reader, error) { }); err != nil { return nil, errors.Wrap(err, "read postings table") } - if lastKey != nil { - r.postings[lastKey[0]] = append(r.postings[lastKey[0]], postingOffset{value: lastKey[1], off: lastOff}) + if lastName != nil { + r.postings[string(lastName)] = append(r.postings[string(lastName)], postingOffset{value: string(lastValue), off: lastOff}) } // Trim any extra space in the slices. for k, v := range r.postings { @@ -1443,15 +1436,12 @@ type Range struct { // for all postings lists. func (r *Reader) PostingsRanges() (map[labels.Label]Range, error) { m := map[labels.Label]Range{} - if err := ReadOffsetTable(r.b, r.toc.PostingsTable, func(key []string, off uint64, _ int) error { - if len(key) != 2 { - return errors.Errorf("unexpected key length for posting table %d", len(key)) - } + if err := ReadOffsetTable(r.b, r.toc.PostingsTable, func(name, value []byte, off uint64, _ int) error { d := encoding.DecWrap(tsdb_enc.NewDecbufAt(r.b, int(off), castagnoliTable)) if d.Err() != nil { return d.Err() } - m[labels.Label{Name: key[0], Value: key[1]}] = Range{ + m[labels.Label{Name: string(name), Value: string(value)}] = Range{ Start: int64(off) + 4, End: int64(off) + 4 + int64(d.Len()), } @@ -1606,27 +1596,24 @@ func (s symbolsIter) Err() error { return s.err } // ReadOffsetTable reads an offset table and at the given position calls f for each // found entry. If f returns an error it stops decoding and returns the received error. -func ReadOffsetTable(bs ByteSlice, off uint64, f func([]string, uint64, int) error) error { +func ReadOffsetTable(bs ByteSlice, off uint64, f func(name, value []byte, postingsOffset uint64, labelOffset int) error) error { d := encoding.DecWrap(tsdb_enc.NewDecbufAt(bs, int(off), castagnoliTable)) startLen := d.Len() cnt := d.Be32() for d.Err() == nil && d.Len() > 0 && cnt > 0 { offsetPos := startLen - d.Len() - keyCount := d.Uvarint() - // The Postings offset table takes only 2 keys per entry (name and value of label), - // and the LabelIndices offset table takes only 1 key per entry (a label name). - // Hence setting the size to max of both, i.e. 2. - keys := make([]string, 0, 2) - - for i := 0; i < keyCount; i++ { - keys = append(keys, d.UvarintStr()) + if keyCount := d.Uvarint(); keyCount != 2 { + return fmt.Errorf("unexpected number of keys for postings offset table %d", keyCount) } + + name := d.UvarintBytes() + value := d.UvarintBytes() o := d.Uvarint64() if d.Err() != nil { break } - if err := f(keys, o, offsetPos); err != nil { + if err := f(name, value, o, offsetPos); err != nil { return err } cnt-- diff --git a/pkg/storage/stores/shipper/indexshipper/tsdb/index/index_test.go b/pkg/storage/stores/shipper/indexshipper/tsdb/index/index_test.go index 2f8576b82564..2a1b4f4d58dc 100644 --- a/pkg/storage/stores/shipper/indexshipper/tsdb/index/index_test.go +++ b/pkg/storage/stores/shipper/indexshipper/tsdb/index/index_test.go @@ -17,6 +17,7 @@ import ( "context" "fmt" "hash/crc32" + "io" "math" "math/rand" "os" @@ -198,28 +199,30 @@ func TestIndexRW_Postings(t *testing.T) { require.NoError(t, p.Err()) // The label indices are no longer used, so test them by hand here. - labelIndices := map[string][]string{} - require.NoError(t, ReadOffsetTable(ir.b, ir.toc.LabelIndicesTable, func(key []string, off uint64, _ int) error { - if len(key) != 1 { - return errors.Errorf("unexpected key length for label indices table %d", len(key)) - } + labelValuesOffsets := map[string]uint64{} + d := tsdb_enc.NewDecbufAt(ir.b, int(ir.toc.LabelIndicesTable), castagnoliTable) + cnt := d.Be32() + + for d.Err() == nil && d.Len() > 0 && cnt > 0 { + require.Equal(t, 1, d.Uvarint(), "Unexpected number of keys for label indices table") + lbl := d.UvarintStr() + off := d.Uvarint64() + labelValuesOffsets[lbl] = off + cnt-- + } + require.NoError(t, d.Err()) + labelIndices := map[string][]string{} + for lbl, off := range labelValuesOffsets { d := tsdb_enc.NewDecbufAt(ir.b, int(off), castagnoliTable) - vals := []string{} - nc := d.Be32int() - if nc != 1 { - return errors.Errorf("unexpected number of label indices table names %d", nc) - } - for i := d.Be32(); i > 0; i-- { + require.Equal(t, 1, d.Be32int(), "Unexpected number of label indices table names") + for i := d.Be32(); i > 0 && d.Err() == nil; i-- { v, err := ir.lookupSymbol(d.Be32()) - if err != nil { - return err - } - vals = append(vals, v) + require.NoError(t, err) + labelIndices[lbl] = append(labelIndices[lbl], v) } - labelIndices[key[0]] = vals - return d.Err() - })) + require.NoError(t, d.Err()) + } require.Equal(t, map[string][]string{ "a": {"1"}, "b": {"1", "2", "3", "4"}, @@ -940,3 +943,71 @@ func TestChunkSamples_getChunkSampleForQueryStarting(t *testing.T) { }) } } + +func BenchmarkInitReader_ReadOffsetTable(b *testing.B) { + dir := b.TempDir() + idxFile := filepath.Join(dir, IndexFilename) + + lbls, err := labels.ReadLabels(filepath.Join("..", "testdata", "20kseries.json"), 1000) + require.NoError(b, err) + + // Sort labels as the index writer expects series in sorted order by fingerprint. + sort.Slice(lbls, func(i, j int) bool { + return lbls[i].Hash() < lbls[j].Hash() + }) + + symbols := map[string]struct{}{} + for _, lset := range lbls { + for _, l := range lset { + symbols[l.Name] = struct{}{} + symbols[l.Value] = struct{}{} + } + } + + var input indexWriterSeriesSlice + + // Generate ChunkMetas for every label set. + for _, lset := range lbls { + input = append(input, &indexWriterSeries{ + labels: lset, + chunks: []ChunkMeta{ + { + MinTime: 0, + MaxTime: 1, + Checksum: rand.Uint32(), + }, + }, + }) + } + + iw, err := NewWriter(context.Background(), FormatV3, idxFile) + require.NoError(b, err) + + var syms []string + for s := range symbols { + syms = append(syms, s) + } + sort.Strings(syms) + for _, s := range syms { + require.NoError(b, iw.AddSymbol(s)) + } + + for i, s := range input { + err = iw.AddSeries(storage.SeriesRef(i), s.labels, model.Fingerprint(s.labels.Hash()), s.chunks...) + require.NoError(b, err) + } + + err = iw.Close() + require.NoError(b, err) + + bs, err := os.ReadFile(idxFile) + require.NoError(b, err) + + b.ResetTimer() + b.ReportAllocs() + for i := 0; i < b.N; i++ { + r, err := newReader(RealByteSlice(bs), io.NopCloser(nil)) + require.NoError(b, err) + require.NoError(b, r.Close()) + } +} diff --git a/pkg/storage/stores/shipper/indexshipper/uploads/index_set.go b/pkg/storage/stores/shipper/indexshipper/uploads/index_set.go index 19bf88842b02..36dc13850956 100644 --- a/pkg/storage/stores/shipper/indexshipper/uploads/index_set.go +++ b/pkg/storage/stores/shipper/indexshipper/uploads/index_set.go @@ -11,7 +11,7 @@ import ( "github.com/go-kit/log" "github.com/go-kit/log/level" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/index" "github.com/grafana/loki/v3/pkg/storage/stores/shipper/indexshipper/storage" util_log "github.com/grafana/loki/v3/pkg/util/log" @@ -145,8 +145,9 @@ func (t *indexSet) uploadIndex(ctx context.Context, idx index.Index) error { } }() - compressedWriter := chunkenc.Gzip.GetWriter(f) - defer chunkenc.Gzip.PutWriter(compressedWriter) + gzipPool := compression.GetWriterPool(compression.EncGZIP) + compressedWriter := gzipPool.GetWriter(f) + defer gzipPool.PutWriter(compressedWriter) idxReader, err := idx.Reader() if err != nil { diff --git a/pkg/storage/util_test.go b/pkg/storage/util_test.go index 5ef02e74b1ca..dd535197afb3 100644 --- a/pkg/storage/util_test.go +++ b/pkg/storage/util_test.go @@ -12,6 +12,7 @@ import ( "github.com/stretchr/testify/assert" "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/ingester/client" "github.com/grafana/loki/v3/pkg/logproto" "github.com/grafana/loki/v3/pkg/logql/syntax" @@ -108,7 +109,7 @@ func newChunk(chunkFormat byte, headBlockFmt chunkenc.HeadBlockFmt, stream logpr lbs = builder.Labels() } from, through := loki_util.RoundToMilliseconds(stream.Entries[0].Timestamp, stream.Entries[len(stream.Entries)-1].Timestamp) - chk := chunkenc.NewMemChunk(chunkFormat, chunkenc.EncGZIP, headBlockFmt, 256*1024, 0) + chk := chunkenc.NewMemChunk(chunkFormat, compression.EncGZIP, headBlockFmt, 256*1024, 0) for _, e := range stream.Entries { _, _ = chk.Append(&e) } diff --git a/pkg/tool/audit/audit_test.go b/pkg/tool/audit/audit_test.go index c9afa7b34f35..d591fca7e1dd 100644 --- a/pkg/tool/audit/audit_test.go +++ b/pkg/tool/audit/audit_test.go @@ -17,11 +17,16 @@ type testObjClient struct { client.ObjectClient } -func (t testObjClient) ObjectExists(_ context.Context, object string) (bool, error) { +func (t testObjClient) ObjectExistsWithSize(_ context.Context, object string) (bool, int64, error) { if strings.Contains(object, "missing") { - return false, nil + return false, 0, nil } - return true, nil + return true, 0, nil +} + +func (t testObjClient) ObjectExists(ctx context.Context, object string) (bool, error) { + exists, _, err := t.ObjectExistsWithSize(ctx, object) + return exists, err } type testCompactedIdx struct { diff --git a/pkg/util/metrics_helper.go b/pkg/util/metrics_helper.go index e4572b4e4a15..7bf7d3029a26 100644 --- a/pkg/util/metrics_helper.go +++ b/pkg/util/metrics_helper.go @@ -5,8 +5,10 @@ import ( "errors" "fmt" "math" + "strings" "sync" + humanize "github.com/dustin/go-humanize" "github.com/go-kit/log/level" "github.com/prometheus/client_golang/prometheus" dto "github.com/prometheus/client_model/go" @@ -841,3 +843,8 @@ func RegisterCounterVec(registerer prometheus.Registerer, namespace, name, help } return vec } + +// HumanizeBytes returns a human readable string representation of the given byte value and removes all whitespaces. +func HumanizeBytes(val uint64) string { + return strings.Replace(humanize.Bytes(val), " ", "", 1) +} diff --git a/pkg/util/metrics_helper_test.go b/pkg/util/metrics_helper_test.go index 7ca74ab7b022..09e80a2afa58 100644 --- a/pkg/util/metrics_helper_test.go +++ b/pkg/util/metrics_helper_test.go @@ -1158,3 +1158,18 @@ func verifyLabels(t *testing.T, m prometheus.Collector, filter map[string]string require.Equal(t, expectedLabels, result) } + +func TestHumanizeBytes(t *testing.T) { + tests := map[uint64]string{ + 1024: "1.0kB", + 1024 * 1000: "1.0MB", + 1024 * 1000 * 1000: "1.0GB", + 10: "10B", + } + + for bytes, humanizedBytes := range tests { + t.Run(fmt.Sprintf("%d", bytes), func(t *testing.T) { + require.Equal(t, humanizedBytes, HumanizeBytes(bytes)) + }) + } +} diff --git a/pkg/util/server/error.go b/pkg/util/server/error.go index 8ff7457a605b..3044a58f61e2 100644 --- a/pkg/util/server/error.go +++ b/pkg/util/server/error.go @@ -48,6 +48,10 @@ func WriteError(err error, w http.ResponseWriter) { // ClientHTTPStatusAndError returns error and http status that is "safe" to return to client without // exposing any implementation details. func ClientHTTPStatusAndError(err error) (int, error) { + if err == nil { + return http.StatusOK, nil + } + var ( queryErr storage_errors.QueryError promErr promql.ErrStorage diff --git a/pkg/validation/limits.go b/pkg/validation/limits.go index 75128607b882..153073b74e6c 100644 --- a/pkg/validation/limits.go +++ b/pkg/validation/limits.go @@ -19,8 +19,8 @@ import ( "golang.org/x/time/rate" "gopkg.in/yaml.v2" - "github.com/grafana/loki/v3/pkg/chunkenc" "github.com/grafana/loki/v3/pkg/compactor/deletionmode" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/distributor/shardstreams" "github.com/grafana/loki/v3/pkg/loghttp/push" "github.com/grafana/loki/v3/pkg/logql" @@ -207,12 +207,9 @@ type Limits struct { BloomBuildTaskMaxRetries int `yaml:"bloom_build_task_max_retries" json:"bloom_build_task_max_retries" category:"experimental"` BloomBuilderResponseTimeout time.Duration `yaml:"bloom_build_builder_response_timeout" json:"bloom_build_builder_response_timeout" category:"experimental"` - BloomCreationEnabled bool `yaml:"bloom_creation_enabled" json:"bloom_creation_enabled" category:"experimental"` - BloomSplitSeriesKeyspaceBy int `yaml:"bloom_split_series_keyspace_by" json:"bloom_split_series_keyspace_by" category:"experimental"` - BloomNGramLength int `yaml:"bloom_ngram_length" json:"bloom_ngram_length" category:"experimental"` - BloomNGramSkip int `yaml:"bloom_ngram_skip" json:"bloom_ngram_skip" category:"experimental"` - BloomFalsePositiveRate float64 `yaml:"bloom_false_positive_rate" json:"bloom_false_positive_rate" category:"experimental"` - BloomBlockEncoding string `yaml:"bloom_block_encoding" json:"bloom_block_encoding" category:"experimental"` + BloomCreationEnabled bool `yaml:"bloom_creation_enabled" json:"bloom_creation_enabled" category:"experimental"` + BloomSplitSeriesKeyspaceBy int `yaml:"bloom_split_series_keyspace_by" json:"bloom_split_series_keyspace_by" category:"experimental"` + BloomBlockEncoding string `yaml:"bloom_block_encoding" json:"bloom_block_encoding" category:"experimental"` BloomMaxBlockSize flagext.ByteSize `yaml:"bloom_max_block_size" json:"bloom_max_block_size" category:"experimental"` BloomMaxBloomSize flagext.ByteSize `yaml:"bloom_max_bloom_size" json:"bloom_max_bloom_size" category:"experimental"` @@ -379,9 +376,6 @@ func (l *Limits) RegisterFlags(f *flag.FlagSet) { f.BoolVar(&l.BloomGatewayEnabled, "bloom-gateway.enable-filtering", false, "Experimental. Whether to use the bloom gateway component in the read path to filter chunks.") f.DurationVar(&l.BloomGatewayCacheKeyInterval, "bloom-gateway.cache-key-interval", 15*time.Minute, "Experimental. Interval for computing the cache key in the Bloom Gateway.") - f.IntVar(&l.BloomNGramLength, "bloom-build.ngram-length", 4, "Experimental. Length of the n-grams created when computing blooms from log lines.") - f.IntVar(&l.BloomNGramSkip, "bloom-build.ngram-skip", 1, "Experimental. Skip factor for the n-grams created when computing blooms from log lines.") - f.Float64Var(&l.BloomFalsePositiveRate, "bloom-build.false-positive-rate", 0.01, "Experimental. Scalable Bloom Filter desired false-positive rate.") f.StringVar(&l.BloomBlockEncoding, "bloom-build.block-encoding", "none", "Experimental. Compression algorithm for bloom block pages.") _ = l.BloomMaxBlockSize.Set(defaultBloomBuildMaxBlockSize) @@ -496,7 +490,7 @@ func (l *Limits) Validate() error { return errors.Wrap(err, "invalid tsdb sharding strategy") } - if _, err := chunkenc.ParseEncoding(l.BloomBlockEncoding); err != nil { + if _, err := compression.ParseEncoding(l.BloomBlockEncoding); err != nil { return err } @@ -1010,14 +1004,6 @@ func (o *Overrides) BloomTaskMaxRetries(userID string) int { return o.getOverridesForUser(userID).BloomBuildTaskMaxRetries } -func (o *Overrides) BloomNGramLength(userID string) int { - return o.getOverridesForUser(userID).BloomNGramLength -} - -func (o *Overrides) BloomNGramSkip(userID string) int { - return o.getOverridesForUser(userID).BloomNGramSkip -} - func (o *Overrides) BloomMaxBlockSize(userID string) int { return o.getOverridesForUser(userID).BloomMaxBlockSize.Val() } @@ -1026,10 +1012,6 @@ func (o *Overrides) BloomMaxBloomSize(userID string) int { return o.getOverridesForUser(userID).BloomMaxBloomSize.Val() } -func (o *Overrides) BloomFalsePositiveRate(userID string) float64 { - return o.getOverridesForUser(userID).BloomFalsePositiveRate -} - func (o *Overrides) BloomBlockEncoding(userID string) string { return o.getOverridesForUser(userID).BloomBlockEncoding } diff --git a/pkg/validation/limits_test.go b/pkg/validation/limits_test.go index 2d4457c2a119..87fab6837029 100644 --- a/pkg/validation/limits_test.go +++ b/pkg/validation/limits_test.go @@ -12,8 +12,8 @@ import ( "github.com/stretchr/testify/require" "gopkg.in/yaml.v2" - "github.com/grafana/loki/v3/pkg/chunkenc" "github.com/grafana/loki/v3/pkg/compactor/deletionmode" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/loghttp/push" "github.com/grafana/loki/v3/pkg/logql" ) @@ -339,7 +339,7 @@ func TestLimitsValidation(t *testing.T) { }, { limits: Limits{DeletionMode: "disabled", BloomBlockEncoding: "unknown"}, - expected: fmt.Errorf("invalid encoding: unknown, supported: %s", chunkenc.SupportedEncoding()), + expected: fmt.Errorf("invalid encoding: unknown, supported: %s", compression.SupportedEncoding()), }, } { desc := fmt.Sprintf("%s/%s", tc.limits.DeletionMode, tc.limits.BloomBlockEncoding) diff --git a/production/helm/loki/CHANGELOG.md b/production/helm/loki/CHANGELOG.md index a213acf8e8a2..36b91c68ff24 100644 --- a/production/helm/loki/CHANGELOG.md +++ b/production/helm/loki/CHANGELOG.md @@ -13,10 +13,30 @@ Entries should include a reference to the pull request that introduced the chang [//]: # ( : do not remove this line. This locator is used by the CI pipeline to automatically create a changelog entry for each new Loki release. Add other chart versions and respective changelog entries bellow this line.) -## 6.12.1 +## 6.15.1 - [BUGFIX] Added missing `loki.storage.azure.chunkDelimiter` parameter to Helm chart. +## 6.15.0 + +- [ENHANCEMENT] Allow setting annotations for memberlist and query-scheduler-discovery services + +## 6.14.1 + +- [BUGFIX] Fixed Memcached persistence options. + +## 6.14.0 + +- [FEATURE] Add additional service annotations for components in distributed mode +- [FIX] Rename loki/templates/query-frontend/poddisruptionbudget-query-frontend.yaml to fix spelling mistake. + +## 6.13.0 + +- [CHANGE] Correctly wrap ClusterRoleBinding around `rbac/namespaced` conditional. +- [FIX] Do not create bloom planner, bloom builder, bloom gateway Deployment/Statefulset if their replica count is 0. +- [FIX] Configure (ephemeral) storage for bloom builder working directory +- [ENHANCEMENT] Automatically configure bloom planner address for bloom builders and bloom gateway addresses for bloom gateway clients. + ## 6.12.0 - [ENHANCEMENT] Replace Bloom Compactor component with Bloom Planner and Bloom Builder. These are the new components to build bloom blocks. diff --git a/production/helm/loki/Chart.yaml b/production/helm/loki/Chart.yaml index b282e580f908..fe93a4b4ae39 100644 --- a/production/helm/loki/Chart.yaml +++ b/production/helm/loki/Chart.yaml @@ -3,7 +3,7 @@ name: loki description: Helm chart for Grafana Loki and Grafana Enterprise Logs supporting both simple, scalable and distributed modes. type: application appVersion: 3.1.1 -version: 6.12.1 +version: 6.15.1 home: https://grafana.github.io/helm-charts sources: - https://github.com/grafana/loki diff --git a/production/helm/loki/README.md b/production/helm/loki/README.md index 86622a974397..3897b4face73 100644 --- a/production/helm/loki/README.md +++ b/production/helm/loki/README.md @@ -1,6 +1,6 @@ # loki -![Version: 6.12.1](https://img.shields.io/badge/Version-6.12.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 3.1.1](https://img.shields.io/badge/AppVersion-3.1.1-informational?style=flat-square) +![Version: 6.15.1](https://img.shields.io/badge/Version-6.15.1-informational?style=flat-square) ![Type: application](https://img.shields.io/badge/Type-application-informational?style=flat-square) ![AppVersion: 3.1.1](https://img.shields.io/badge/AppVersion-3.1.1-informational?style=flat-square) Helm chart for Grafana Loki and Grafana Enterprise Logs supporting both simple, scalable and distributed modes. diff --git a/production/helm/loki/scenarios/README.md b/production/helm/loki/scenarios/README.md new file mode 100644 index 000000000000..1ec869261861 --- /dev/null +++ b/production/helm/loki/scenarios/README.md @@ -0,0 +1,19 @@ +These scenarios are used by Github Workflow: [Publish Rendered Helm Chart Diff](../../../../.github/workflows/helm-loki-ci.yml). + +Each scenario is used as the values file for the Loki Helm chart to render Kubernetes manifests in `base` and `PR's` branch to compare the content and report the diff on Pull Request as a comment([example](https://github.com/grafana/loki/pull/14127#issuecomment-2348360828)). It gives the ability to the reviewer to understand how the changes in the chart modify resulting manifests. + +![img.png](images/img.png) + +The workflow reports three types of changes for each scenario: + +1. Added files - the manifests that are added in the current PR and that did not exist in `base` branch. + +![added.png](images/added.png) + + +2. Modified files - the manifests that exist in both branches but the changes in PRs branch modify them. +![modified.png](images/modified.png) + +3. Removed files - the manifests that exist in `base` branch but do not exist in PRs branch. + +![removed.png](images/removed.png) \ No newline at end of file diff --git a/production/helm/loki/scenarios/default-single-binary-values.yaml b/production/helm/loki/scenarios/default-single-binary-values.yaml new file mode 100644 index 000000000000..78a1f111cd24 --- /dev/null +++ b/production/helm/loki/scenarios/default-single-binary-values.yaml @@ -0,0 +1,71 @@ +--- +loki: + schemaConfig: + configs: + - from: 2024-04-01 + store: tsdb + object_store: s3 + schema: v13 + index: + prefix: loki_index_ + period: 24h + ingester: + chunk_encoding: snappy + tracing: + enabled: true + querier: + # Default is 4, if you have enough memory and CPU you can increase, reduce if OOMing + max_concurrent: 4 + +#gateway: +# ingress: +# enabled: true +# hosts: +# - host: FIXME +# paths: +# - path: / +# pathType: Prefix + +deploymentMode: Distributed + +ingester: + replicas: 3 +querier: + replicas: 3 + maxUnavailable: 2 +queryFrontend: + replicas: 2 + maxUnavailable: 1 +queryScheduler: + replicas: 2 +distributor: + replicas: 3 + maxUnavailable: 2 +compactor: + replicas: 1 +indexGateway: + replicas: 2 + maxUnavailable: 1 + +# optional experimental components +bloomPlanner: + replicas: 0 +bloomBuilder: + replicas: 0 +bloomGateway: + replicas: 0 + +# Enable minio for storage +minio: + enabled: true + +# Zero out replica counts of other deployment modes +backend: + replicas: 0 +read: + replicas: 0 +write: + replicas: 0 + +singleBinary: + replicas: 0 diff --git a/production/helm/loki/scenarios/default-values.yaml b/production/helm/loki/scenarios/default-values.yaml new file mode 100644 index 000000000000..a79baee50398 --- /dev/null +++ b/production/helm/loki/scenarios/default-values.yaml @@ -0,0 +1,16 @@ +--- +loki: + commonConfig: + replication_factor: 1 + useTestSchema: true + storage: + bucketNames: + chunks: chunks + ruler: ruler + admin: admin +read: + replicas: 1 +write: + replicas: 1 +backend: + replicas: 1 diff --git a/production/helm/loki/scenarios/images/added.png b/production/helm/loki/scenarios/images/added.png new file mode 100644 index 000000000000..ced9f9554a8f Binary files /dev/null and b/production/helm/loki/scenarios/images/added.png differ diff --git a/production/helm/loki/scenarios/images/img.png b/production/helm/loki/scenarios/images/img.png new file mode 100644 index 000000000000..81ba701da26a Binary files /dev/null and b/production/helm/loki/scenarios/images/img.png differ diff --git a/production/helm/loki/scenarios/images/modified.png b/production/helm/loki/scenarios/images/modified.png new file mode 100644 index 000000000000..39a25bae35b2 Binary files /dev/null and b/production/helm/loki/scenarios/images/modified.png differ diff --git a/production/helm/loki/scenarios/images/removed.png b/production/helm/loki/scenarios/images/removed.png new file mode 100644 index 000000000000..219d64c32c98 Binary files /dev/null and b/production/helm/loki/scenarios/images/removed.png differ diff --git a/production/helm/loki/scenarios/ingress-values.yaml b/production/helm/loki/scenarios/ingress-values.yaml new file mode 100644 index 000000000000..ff5ff1efd9ce --- /dev/null +++ b/production/helm/loki/scenarios/ingress-values.yaml @@ -0,0 +1,30 @@ +--- +gateway: + ingress: + enabled: true + annotations: {} + hosts: + - host: gateway.loki.example.com + paths: + - path: / + pathType: Prefix +loki: + commonConfig: + replication_factor: 1 + useTestSchema: true + storage: + bucketNames: + chunks: chunks + ruler: ruler + admin: admin +read: + replicas: 1 +write: + replicas: 1 +backend: + replicas: 1 +monitoring: + lokiCanary: + enabled: false +test: + enabled: false diff --git a/production/helm/loki/scenarios/legacy-monitoring-values.yaml b/production/helm/loki/scenarios/legacy-monitoring-values.yaml new file mode 100644 index 000000000000..ad520e57f2f4 --- /dev/null +++ b/production/helm/loki/scenarios/legacy-monitoring-values.yaml @@ -0,0 +1,27 @@ +--- +loki: + commonConfig: + replication_factor: 1 + useTestSchema: true + storage: + bucketNames: + chunks: chunks + ruler: ruler + admin: admin +read: + replicas: 1 +write: + replicas: 1 +backend: + replicas: 1 +monitoring: + enabled: true + selfMonitoring: + enabled: true + grafanaAgent: + installOperator: true + serviceMonitor: + labels: + release: "prometheus" +test: + prometheusAddress: "http://prometheus-kube-prometheus-prometheus.prometheus.svc.cluster.local.:9090" diff --git a/production/helm/loki/scenarios/simple-scalable-aws-kube-irsa-values.yaml b/production/helm/loki/scenarios/simple-scalable-aws-kube-irsa-values.yaml new file mode 100644 index 000000000000..28c6c3bbe916 --- /dev/null +++ b/production/helm/loki/scenarios/simple-scalable-aws-kube-irsa-values.yaml @@ -0,0 +1,67 @@ +loki: + # -- Storage config. Providing this will automatically populate all necessary storage configs in the templated config. + storage: + # Loki requires a bucket for chunks and the ruler. GEL requires a third bucket for the admin API. + # Please provide these values if you are using object storage. + bucketNames: + chunks: aws-s3-chunks-bucket + ruler: aws-s3-ruler-bucket + admin: aws-s3-admin-bucket + type: s3 + s3: + region: eu-central-1 + # -- Check https://grafana.com/docs/loki/latest/configuration/#schema_config for more info on how to configure schemas + schemaConfig: + configs: + - from: "2023-09-19" + index: + period: 1d + prefix: tsdb_index_ + object_store: s3 + schema: v13 + store: tsdb +###################################################################################################################### +# +# Enterprise Loki Configs +# +###################################################################################################################### + +# -- Configuration for running Enterprise Loki +enterprise: + # Enable enterprise features, license must be provided + enabled: true + # -- Grafana Enterprise Logs license + license: + contents: "content of licence" + tokengen: + annotations: { + eks.amazonaws.com/role-arn: arn:aws:iam::2222222:role/test-role + } + # -- Configuration for `provisioner` target + provisioner: + # -- Additional annotations for the `provisioner` Job + annotations: { + eks.amazonaws.com/role-arn: arn:aws:iam::2222222:role/test-role + } +###################################################################################################################### +# +# Service Accounts and Kubernetes RBAC +# +###################################################################################################################### +serviceAccount: + # -- Annotations for the service account + annotations: + eks.amazonaws.com/role-arn: arn:aws:iam::2222222:role/test-role + +# Configuration for the write pod(s) +write: + persistence: + storageClass: gp2 +# -- Configuration for the read pod(s) +read: + persistence: + storageClass: gp2 +# -- Configuration for the backend pod(s) +backend: + persistence: + storageClass: gp2 diff --git a/production/helm/loki/src/alerts.yaml.tpl b/production/helm/loki/src/alerts.yaml.tpl index 144e263f7061..0aa37b708b52 100644 --- a/production/helm/loki/src/alerts.yaml.tpl +++ b/production/helm/loki/src/alerts.yaml.tpl @@ -52,7 +52,7 @@ groups: message: | {{`{{`}} $labels.cluster {{`}}`}} {{`{{`}} $labels.namespace {{`}}`}} has had {{`{{`}} printf "%.0f" $value {{`}}`}} compactors running for more than 5m. Only one compactor should run at a time. expr: | - sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1 + sum(loki_boltdb_shipper_compactor_running) by (cluster, namespace) > 1 for: "5m" labels: severity: "warning" diff --git a/production/helm/loki/src/helm-test/Dockerfile b/production/helm/loki/src/helm-test/Dockerfile index bb71f28b98ed..9645b206b105 100644 --- a/production/helm/loki/src/helm-test/Dockerfile +++ b/production/helm/loki/src/helm-test/Dockerfile @@ -1,4 +1,4 @@ -ARG GO_VERSION=1.22 +ARG GO_VERSION=1.23 FROM golang:${GO_VERSION} as build # build via Makefile target helm-test-image in root diff --git a/production/helm/loki/templates/_helpers.tpl b/production/helm/loki/templates/_helpers.tpl index 4302f6f09e68..f302bc5a621a 100644 --- a/production/helm/loki/templates/_helpers.tpl +++ b/production/helm/loki/templates/_helpers.tpl @@ -1050,6 +1050,34 @@ enableServiceLinks: false {{- printf "%s" $idxGatewayAddress }} {{- end }} +{{/* Determine bloom-planner address */}} +{{- define "loki.bloomPlannerAddress" -}} +{{- $bloomPlannerAddress := ""}} +{{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}} +{{- $isScalable := eq (include "loki.deployment.isScalable" .) "true" -}} +{{- if $isDistributed -}} +{{- $bloomPlannerAddress = printf "%s-headless.%s.svc.%s:%s" (include "loki.bloomPlannerFullname" .) .Release.Namespace .Values.global.clusterDomain (.Values.loki.server.grpc_listen_port | toString) -}} +{{- end -}} +{{- if $isScalable -}} +{{- $bloomPlannerAddress = printf "%s-headless.%s.svc.%s:%s" (include "loki.backendFullname" .) .Release.Namespace .Values.global.clusterDomain (.Values.loki.server.grpc_listen_port | toString) -}} +{{- end -}} +{{- printf "%s" $bloomPlannerAddress}} +{{- end }} + +{{/* Determine bloom-gateway address */}} +{{- define "loki.bloomGatewayAddresses" -}} +{{- $bloomGatewayAddresses := ""}} +{{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}} +{{- $isScalable := eq (include "loki.deployment.isScalable" .) "true" -}} +{{- if $isDistributed -}} +{{- $bloomGatewayAddresses = printf "dnssrvnoa+_grpc._tcp.%s-headless.%s.svc.%s" (include "loki.bloomGatewayFullname" .) .Release.Namespace .Values.global.clusterDomain -}} +{{- end -}} +{{- if $isScalable -}} +{{- $bloomGatewayAddresses = printf "dnssrvnoa+_grpc._tcp.%s-headless.%s.svc.%s" (include "loki.backendFullname" .) .Release.Namespace .Values.global.clusterDomain -}} +{{- end -}} +{{- printf "%s" $bloomGatewayAddresses}} +{{- end }} + {{- define "loki.config.checksum" -}} checksum/config: {{ include (print .Template.BasePath "/config.yaml") . | sha256sum }} {{- end -}} diff --git a/production/helm/loki/templates/backend/clusterrole.yaml b/production/helm/loki/templates/backend/clusterrole.yaml index e8631c35a501..36c8a0fe0e80 100644 --- a/production/helm/loki/templates/backend/clusterrole.yaml +++ b/production/helm/loki/templates/backend/clusterrole.yaml @@ -1,5 +1,4 @@ -{{- $isSimpleScalable := eq (include "loki.deployment.isScalable" .) "true" -}} -{{- if and $isSimpleScalable (not .Values.rbac.namespaced) (not .Values.rbac.useExistingRole) }} +{{- if and (not .Values.rbac.namespaced) (not .Values.rbac.useExistingRole) }} kind: ClusterRole apiVersion: rbac.authorization.k8s.io/v1 metadata: diff --git a/production/helm/loki/templates/backend/clusterrolebinding.yaml b/production/helm/loki/templates/backend/clusterrolebinding.yaml index 619b70260cd4..92f86a47d4f1 100644 --- a/production/helm/loki/templates/backend/clusterrolebinding.yaml +++ b/production/helm/loki/templates/backend/clusterrolebinding.yaml @@ -1,5 +1,5 @@ {{- $isSimpleScalable := eq (include "loki.deployment.isScalable" .) "true" -}} -{{- if and $isSimpleScalable (not .Values.rbac.namespaced) }} +{{- if (not .Values.rbac.namespaced) }} kind: ClusterRoleBinding apiVersion: rbac.authorization.k8s.io/v1 metadata: diff --git a/production/helm/loki/templates/backend/query-scheduler-discovery.yaml b/production/helm/loki/templates/backend/query-scheduler-discovery.yaml index 14bca1fa041f..4c357e53a431 100644 --- a/production/helm/loki/templates/backend/query-scheduler-discovery.yaml +++ b/production/helm/loki/templates/backend/query-scheduler-discovery.yaml @@ -9,6 +9,13 @@ metadata: labels: {{- include "loki.backendSelectorLabels" . | nindent 4 }} prometheus.io/service-monitor: "false" + annotations: + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.backend.service.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None diff --git a/production/helm/loki/templates/bloom-builder/deployment-bloom-builder.yaml b/production/helm/loki/templates/bloom-builder/deployment-bloom-builder.yaml index 5735de5da23d..c04b3ae5ae25 100644 --- a/production/helm/loki/templates/bloom-builder/deployment-bloom-builder.yaml +++ b/production/helm/loki/templates/bloom-builder/deployment-bloom-builder.yaml @@ -1,5 +1,5 @@ {{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}} -{{- if $isDistributed -}} +{{- if (and $isDistributed (gt (int .Values.bloomPlanner.replicas) 0)) -}} apiVersion: apps/v1 kind: Deployment metadata: @@ -101,6 +101,10 @@ spec: - name: license mountPath: /etc/loki/license {{- end }} + - name: temp + mountPath: /tmp + - name: data + mountPath: /var/loki {{- with .Values.bloomBuilder.extraVolumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} @@ -136,6 +140,10 @@ spec: secretName: enterprise-logs-license {{- end }} {{- end }} + - name: temp + emptyDir: {} + - name: data + emptyDir: {} {{- with .Values.bloomBuilder.extraVolumes }} {{- toYaml . | nindent 8 }} {{- end }} diff --git a/production/helm/loki/templates/bloom-builder/service-bloom-builder-headless.yaml b/production/helm/loki/templates/bloom-builder/service-bloom-builder-headless.yaml index e089d4d2de40..938925291a44 100644 --- a/production/helm/loki/templates/bloom-builder/service-bloom-builder-headless.yaml +++ b/production/helm/loki/templates/bloom-builder/service-bloom-builder-headless.yaml @@ -1,5 +1,5 @@ {{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}} -{{- if $isDistributed -}} +{{- if (and $isDistributed (or (gt (int .Values.bloomBuilder.replicas) 0)) .Values.bloomBuilder.autoscaling.enabled) -}} apiVersion: v1 kind: Service metadata: @@ -11,10 +11,13 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} prometheus.io/service-monitor: "false" - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.bloomBuilder.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: clusterIP: None type: ClusterIP diff --git a/production/helm/loki/templates/bloom-builder/service-bloom-builder.yaml b/production/helm/loki/templates/bloom-builder/service-bloom-builder.yaml index aab082d72293..b3debb08893a 100644 --- a/production/helm/loki/templates/bloom-builder/service-bloom-builder.yaml +++ b/production/helm/loki/templates/bloom-builder/service-bloom-builder.yaml @@ -1,5 +1,5 @@ {{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}} -{{- if $isDistributed -}} +{{- if (and $isDistributed (gt (int .Values.bloomBuilder.replicas) 0)) -}} apiVersion: v1 kind: Service metadata: @@ -10,10 +10,13 @@ metadata: {{- with .Values.bloomBuilder.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.bloomBuilder.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP publishNotReadyAddresses: true diff --git a/production/helm/loki/templates/bloom-gateway/service-bloom-gateway-headless.yaml b/production/helm/loki/templates/bloom-gateway/service-bloom-gateway-headless.yaml index daa61c64ab5e..852e4cb10006 100644 --- a/production/helm/loki/templates/bloom-gateway/service-bloom-gateway-headless.yaml +++ b/production/helm/loki/templates/bloom-gateway/service-bloom-gateway-headless.yaml @@ -11,10 +11,13 @@ metadata: {{- with .Values.bloomGateway.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.bloomGateway.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None diff --git a/production/helm/loki/templates/bloom-gateway/statefulset-bloom-gateway.yaml b/production/helm/loki/templates/bloom-gateway/statefulset-bloom-gateway.yaml index e2ceefbeafb5..7e97b8e93ece 100644 --- a/production/helm/loki/templates/bloom-gateway/statefulset-bloom-gateway.yaml +++ b/production/helm/loki/templates/bloom-gateway/statefulset-bloom-gateway.yaml @@ -1,6 +1,5 @@ {{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}} -{{- if $isDistributed }} -{{- if (gt (int .Values.bloomGateway.replicas) 0) -}} +{{- if (and $isDistributed (gt (int .Values.bloomGateway.replicas) 0)) -}} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -180,4 +179,3 @@ spec: {{- end }} {{- end }} {{- end -}} -{{- end -}} \ No newline at end of file diff --git a/production/helm/loki/templates/bloom-planner/service-bloom-planner-headless.yaml b/production/helm/loki/templates/bloom-planner/service-bloom-planner-headless.yaml index fd02c64acd50..78e26336f39f 100644 --- a/production/helm/loki/templates/bloom-planner/service-bloom-planner-headless.yaml +++ b/production/helm/loki/templates/bloom-planner/service-bloom-planner-headless.yaml @@ -1,6 +1,5 @@ {{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}} -{{- if $isDistributed -}} -{{- if (gt (int .Values.bloomPlanner.replicas) 0) -}} +{{- if (and $isDistributed (gt (int .Values.bloomPlanner.replicas) 0)) -}} apiVersion: v1 kind: Service metadata: @@ -11,10 +10,13 @@ metadata: {{- with .Values.bloomPlanner.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.bloomPlanner.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None @@ -33,4 +35,3 @@ spec: selector: {{- include "loki.bloomPlannerSelectorLabels" . | nindent 4 }} {{- end -}} -{{- end -}} diff --git a/production/helm/loki/templates/bloom-planner/statefulset-bloom-planner.yaml b/production/helm/loki/templates/bloom-planner/statefulset-bloom-planner.yaml index 8d9a9f23998a..8406542dabaa 100644 --- a/production/helm/loki/templates/bloom-planner/statefulset-bloom-planner.yaml +++ b/production/helm/loki/templates/bloom-planner/statefulset-bloom-planner.yaml @@ -1,6 +1,5 @@ {{- $isDistributed := eq (include "loki.deployment.isDistributed" .) "true" -}} -{{- if $isDistributed }} -{{- if (gt (int .Values.bloomPlanner.replicas) 0) -}} +{{- if (and $isDistributed (gt (int .Values.bloomPlanner.replicas) 0)) -}} apiVersion: apps/v1 kind: StatefulSet metadata: @@ -180,4 +179,3 @@ spec: {{- end }} {{- end }} {{- end -}} -{{- end -}} diff --git a/production/helm/loki/templates/compactor/service-compactor.yaml b/production/helm/loki/templates/compactor/service-compactor.yaml index c75e1cee5ae1..f118b6cc9b82 100644 --- a/production/helm/loki/templates/compactor/service-compactor.yaml +++ b/production/helm/loki/templates/compactor/service-compactor.yaml @@ -11,10 +11,13 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} app.kubernetes.io/component: compactor - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.compactor.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP ports: diff --git a/production/helm/loki/templates/distributor/service-distributor-headless.yaml b/production/helm/loki/templates/distributor/service-distributor-headless.yaml index c69bb0add37e..650b62959d97 100644 --- a/production/helm/loki/templates/distributor/service-distributor-headless.yaml +++ b/production/helm/loki/templates/distributor/service-distributor-headless.yaml @@ -12,10 +12,13 @@ metadata: {{- end }} variant: headless prometheus.io/service-monitor: "false" - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.distributor.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None diff --git a/production/helm/loki/templates/distributor/service-distributor.yaml b/production/helm/loki/templates/distributor/service-distributor.yaml index 8145834d3509..6a8995677c14 100644 --- a/production/helm/loki/templates/distributor/service-distributor.yaml +++ b/production/helm/loki/templates/distributor/service-distributor.yaml @@ -10,10 +10,13 @@ metadata: {{- with .Values.distributor.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.distributor.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP ports: diff --git a/production/helm/loki/templates/index-gateway/service-index-gateway-headless.yaml b/production/helm/loki/templates/index-gateway/service-index-gateway-headless.yaml index b0c90dc35fd9..06506582f9e3 100644 --- a/production/helm/loki/templates/index-gateway/service-index-gateway-headless.yaml +++ b/production/helm/loki/templates/index-gateway/service-index-gateway-headless.yaml @@ -7,6 +7,13 @@ metadata: labels: {{- include "loki.indexGatewaySelectorLabels" . | nindent 4 }} prometheus.io/service-monitor: "false" + annotations: + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.indexGateway.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None diff --git a/production/helm/loki/templates/index-gateway/service-index-gateway.yaml b/production/helm/loki/templates/index-gateway/service-index-gateway.yaml index 2d43bb0ed5e9..822a0ce692d3 100644 --- a/production/helm/loki/templates/index-gateway/service-index-gateway.yaml +++ b/production/helm/loki/templates/index-gateway/service-index-gateway.yaml @@ -9,10 +9,13 @@ metadata: {{- with .Values.indexGateway.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.indexGateway.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP ports: diff --git a/production/helm/loki/templates/ingester/service-ingester-headless.yaml b/production/helm/loki/templates/ingester/service-ingester-headless.yaml index e83dcf7be4fe..8a8b92f2ebc5 100644 --- a/production/helm/loki/templates/ingester/service-ingester-headless.yaml +++ b/production/helm/loki/templates/ingester/service-ingester-headless.yaml @@ -8,10 +8,13 @@ metadata: labels: {{- include "loki.ingesterSelectorLabels" . | nindent 4 }} prometheus.io/service-monitor: "false" - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.ingester.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None diff --git a/production/helm/loki/templates/ingester/service-ingester-zone-a-headless.yaml b/production/helm/loki/templates/ingester/service-ingester-zone-a-headless.yaml index 478ea8c89eff..03add3b286fc 100644 --- a/production/helm/loki/templates/ingester/service-ingester-zone-a-headless.yaml +++ b/production/helm/loki/templates/ingester/service-ingester-zone-a-headless.yaml @@ -10,10 +10,13 @@ metadata: {{- with .Values.ingester.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.ingester.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: clusterIP: None ports: diff --git a/production/helm/loki/templates/ingester/service-ingester-zone-b-headless.yaml b/production/helm/loki/templates/ingester/service-ingester-zone-b-headless.yaml index c19ed4cb1f65..607221922a66 100644 --- a/production/helm/loki/templates/ingester/service-ingester-zone-b-headless.yaml +++ b/production/helm/loki/templates/ingester/service-ingester-zone-b-headless.yaml @@ -10,10 +10,13 @@ metadata: {{- with .Values.ingester.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.ingester.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: clusterIP: None ports: diff --git a/production/helm/loki/templates/ingester/service-ingester-zone-c-headless.yaml b/production/helm/loki/templates/ingester/service-ingester-zone-c-headless.yaml index 2757fcef9400..554144746ae0 100644 --- a/production/helm/loki/templates/ingester/service-ingester-zone-c-headless.yaml +++ b/production/helm/loki/templates/ingester/service-ingester-zone-c-headless.yaml @@ -10,10 +10,13 @@ metadata: {{- with .Values.ingester.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.ingester.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: clusterIP: None ports: diff --git a/production/helm/loki/templates/ingester/service-ingester.yaml b/production/helm/loki/templates/ingester/service-ingester.yaml index d762cbf65d95..94d6f835332b 100644 --- a/production/helm/loki/templates/ingester/service-ingester.yaml +++ b/production/helm/loki/templates/ingester/service-ingester.yaml @@ -10,10 +10,13 @@ metadata: {{- with .Values.ingester.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.ingester.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP ports: diff --git a/production/helm/loki/templates/memcached/_memcached-statefulset.tpl b/production/helm/loki/templates/memcached/_memcached-statefulset.tpl index ce490ee6cd71..0664ba43c6a2 100644 --- a/production/helm/loki/templates/memcached/_memcached-statefulset.tpl +++ b/production/helm/loki/templates/memcached/_memcached-statefulset.tpl @@ -102,9 +102,11 @@ spec: ports: - containerPort: {{ .port }} name: client + {{- /* Calculate storage size as round(.persistence.storageSize * 0.9). But with integer built-in operators. */}} + {{- $persistenceSize := (div (mul (trimSuffix "Gi" .persistence.storageSize | trimSuffix "G") 9) 10 ) }} args: - -m {{ .allocatedMemory }} - - --extended=modern,track_sizes{{ if .persistence.enabled }},ext_path={{ .persistence.mountPath }}/file:{{ .persistence.storageSize }}{{ end }}{{ with .extraExtendedOptions }},{{ . }}{{ end }} + - --extended=modern,track_sizes{{ if .persistence.enabled }},ext_path={{ .persistence.mountPath }}/file:{{ $persistenceSize }}G,ext_wbuf_size=16{{ end }}{{ with .extraExtendedOptions }},{{ . }}{{ end }} - -I {{ .maxItemMemory }}m - -c {{ .connectionLimit }} - -v diff --git a/production/helm/loki/templates/provisioner/role-provisioner.yaml b/production/helm/loki/templates/provisioner/role-provisioner.yaml index e1a636ef7c1f..1335b0f315a5 100644 --- a/production/helm/loki/templates/provisioner/role-provisioner.yaml +++ b/production/helm/loki/templates/provisioner/role-provisioner.yaml @@ -1,4 +1,4 @@ -{{ if and .Values.enterprise.provisioner.enabled .Values.enterprise.enabled }} +{{ if and (and .Values.enterprise.provisioner.enabled .Values.enterprise.enabled) (not .Values.rbac.namespaced)}} apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole metadata: diff --git a/production/helm/loki/templates/provisioner/rolebinding-provisioner.yaml b/production/helm/loki/templates/provisioner/rolebinding-provisioner.yaml index e681e97a74be..d87874dc9307 100644 --- a/production/helm/loki/templates/provisioner/rolebinding-provisioner.yaml +++ b/production/helm/loki/templates/provisioner/rolebinding-provisioner.yaml @@ -1,4 +1,4 @@ -{{ if and .Values.enterprise.provisioner.enabled .Values.enterprise.enabled }} +{{ if and (and .Values.enterprise.provisioner.enabled .Values.enterprise.enabled) (not .Values.rbac.namespaced)}} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/production/helm/loki/templates/querier/service-querier.yaml b/production/helm/loki/templates/querier/service-querier.yaml index ca5a23bbffb2..15c9c6a06c98 100644 --- a/production/helm/loki/templates/querier/service-querier.yaml +++ b/production/helm/loki/templates/querier/service-querier.yaml @@ -10,10 +10,13 @@ metadata: {{- with .Values.querier.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.querier.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP ports: diff --git a/production/helm/loki/templates/query-frontend/poddisruptionbudget-query-frontent.yaml b/production/helm/loki/templates/query-frontend/poddisruptionbudget-query-frontend.yaml similarity index 100% rename from production/helm/loki/templates/query-frontend/poddisruptionbudget-query-frontent.yaml rename to production/helm/loki/templates/query-frontend/poddisruptionbudget-query-frontend.yaml diff --git a/production/helm/loki/templates/query-frontend/service-query-frontend-headless.yaml b/production/helm/loki/templates/query-frontend/service-query-frontend-headless.yaml index b168ce6ce952..8da905415597 100644 --- a/production/helm/loki/templates/query-frontend/service-query-frontend-headless.yaml +++ b/production/helm/loki/templates/query-frontend/service-query-frontend-headless.yaml @@ -11,10 +11,13 @@ metadata: {{- toYaml . | nindent 4 }} {{- end }} prometheus.io/service-monitor: "false" - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.queryFrontend.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: clusterIP: None type: ClusterIP diff --git a/production/helm/loki/templates/query-frontend/service-query-frontend.yaml b/production/helm/loki/templates/query-frontend/service-query-frontend.yaml index b017c5d54aaf..a2396950d94d 100644 --- a/production/helm/loki/templates/query-frontend/service-query-frontend.yaml +++ b/production/helm/loki/templates/query-frontend/service-query-frontend.yaml @@ -10,10 +10,13 @@ metadata: {{- with .Values.queryFrontend.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.queryFrontend.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP publishNotReadyAddresses: true diff --git a/production/helm/loki/templates/query-scheduler/service-query-scheduler.yaml b/production/helm/loki/templates/query-scheduler/service-query-scheduler.yaml index 2b3f1b230060..746c7bdfdfb2 100644 --- a/production/helm/loki/templates/query-scheduler/service-query-scheduler.yaml +++ b/production/helm/loki/templates/query-scheduler/service-query-scheduler.yaml @@ -10,10 +10,13 @@ metadata: {{- with .Values.queryScheduler.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.queryScheduler.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None diff --git a/production/helm/loki/templates/ruler/service-ruler.yaml b/production/helm/loki/templates/ruler/service-ruler.yaml index 1a1f0f4d2e91..4d58ec85b42a 100644 --- a/production/helm/loki/templates/ruler/service-ruler.yaml +++ b/production/helm/loki/templates/ruler/service-ruler.yaml @@ -9,10 +9,13 @@ metadata: {{- with .Values.ruler.serviceLabels }} {{- toYaml . | nindent 4 }} {{- end }} - {{- with .Values.loki.serviceAnnotations }} annotations: - {{- toYaml . | nindent 4 }} - {{- end }} + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.ruler.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None diff --git a/production/helm/loki/templates/service-memberlist.yaml b/production/helm/loki/templates/service-memberlist.yaml index cacb5b1e872b..3d46f234d4ea 100644 --- a/production/helm/loki/templates/service-memberlist.yaml +++ b/production/helm/loki/templates/service-memberlist.yaml @@ -6,6 +6,13 @@ metadata: namespace: {{ $.Release.Namespace }} labels: {{- include "loki.labels" . | nindent 4 }} + annotations: + {{- with .Values.loki.serviceAnnotations }} + {{- toYaml . | nindent 4}} + {{- end }} + {{- with .Values.memberlist.service.annotations }} + {{- toYaml . | nindent 4}} + {{- end }} spec: type: ClusterIP clusterIP: None diff --git a/production/helm/loki/templates/single-binary/statefulset.yaml b/production/helm/loki/templates/single-binary/statefulset.yaml index 7bd2b9813f60..5e28902e5677 100644 --- a/production/helm/loki/templates/single-binary/statefulset.yaml +++ b/production/helm/loki/templates/single-binary/statefulset.yaml @@ -79,6 +79,75 @@ spec: {{- end }} {{- end }} containers: + {{- if .Values.sidecar.rules.enabled }} + - name: loki-sc-rules + {{- if .Values.sidecar.image.sha }} + image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}@sha256:{{ .Values.sidecar.image.sha }}" + {{- else }} + image: "{{ .Values.sidecar.image.repository }}:{{ .Values.sidecar.image.tag }}" + {{- end }} + imagePullPolicy: {{ .Values.sidecar.image.pullPolicy }} + env: + - name: METHOD + value: {{ .Values.sidecar.rules.watchMethod }} + - name: LABEL + value: "{{ .Values.sidecar.rules.label }}" + {{- if .Values.sidecar.rules.labelValue }} + - name: LABEL_VALUE + value: {{ quote .Values.sidecar.rules.labelValue }} + {{- end }} + - name: FOLDER + value: "{{ .Values.sidecar.rules.folder }}" + - name: RESOURCE + value: {{ quote .Values.sidecar.rules.resource }} + {{- if .Values.sidecar.enableUniqueFilenames }} + - name: UNIQUE_FILENAMES + value: "{{ .Values.sidecar.enableUniqueFilenames }}" + {{- end }} + {{- if .Values.sidecar.rules.searchNamespace }} + - name: NAMESPACE + value: "{{ .Values.sidecar.rules.searchNamespace | join "," }}" + {{- end }} + {{- if .Values.sidecar.skipTlsVerify }} + - name: SKIP_TLS_VERIFY + value: "{{ .Values.sidecar.skipTlsVerify }}" + {{- end }} + {{- if .Values.sidecar.rules.script }} + - name: SCRIPT + value: "{{ .Values.sidecar.rules.script }}" + {{- end }} + {{- if .Values.sidecar.rules.watchServerTimeout }} + - name: WATCH_SERVER_TIMEOUT + value: "{{ .Values.sidecar.rules.watchServerTimeout }}" + {{- end }} + {{- if .Values.sidecar.rules.watchClientTimeout }} + - name: WATCH_CLIENT_TIMEOUT + value: "{{ .Values.sidecar.rules.watchClientTimeout }}" + {{- end }} + {{- if .Values.sidecar.rules.logLevel }} + - name: LOG_LEVEL + value: "{{ .Values.sidecar.rules.logLevel }}" + {{- end }} + {{- if .Values.sidecar.livenessProbe }} + livenessProbe: + {{- toYaml .Values.sidecar.livenessProbe | nindent 12 }} + {{- end }} + {{- if .Values.sidecar.readinessProbe }} + readinessProbe: + {{- toYaml .Values.sidecar.readinessProbe | nindent 12 }} + {{- end }} + {{- if .Values.sidecar.resources }} + resources: + {{- toYaml .Values.sidecar.resources | nindent 12 }} + {{- end }} + {{- if .Values.sidecar.securityContext }} + securityContext: + {{- toYaml .Values.sidecar.securityContext | nindent 12 }} + {{- end }} + volumeMounts: + - name: sc-rules-volume + mountPath: {{ .Values.sidecar.rules.folder | quote }} + {{- end}} - name: loki image: {{ include "loki.image" . }} imagePullPolicy: {{ .Values.loki.image.pullPolicy }} @@ -125,6 +194,10 @@ spec: - name: license mountPath: /etc/loki/license {{- end }} + {{- if .Values.sidecar.rules.enabled }} + - name: sc-rules-volume + mountPath: {{ .Values.sidecar.rules.folder | quote }} + {{- end}} {{- with .Values.singleBinary.extraVolumeMounts }} {{- toYaml . | nindent 12 }} {{- end }} @@ -166,6 +239,15 @@ spec: secretName: enterprise-logs-license {{- end }} {{- end }} + {{- if .Values.sidecar.rules.enabled }} + - name: sc-rules-volume + {{- if .Values.sidecar.rules.sizeLimit }} + emptyDir: + sizeLimit: {{ .Values.sidecar.rules.sizeLimit }} + {{- else }} + emptyDir: {} + {{- end -}} + {{- end -}} {{- with .Values.singleBinary.extraVolumes }} {{- toYaml . | nindent 8 }} {{- end }} diff --git a/production/helm/loki/templates/tokengen/clusterrole-tokengen.yaml b/production/helm/loki/templates/tokengen/clusterrole-tokengen.yaml index 19dad8804bd6..c67cec886471 100644 --- a/production/helm/loki/templates/tokengen/clusterrole-tokengen.yaml +++ b/production/helm/loki/templates/tokengen/clusterrole-tokengen.yaml @@ -1,4 +1,4 @@ -{{ if and .Values.enterprise.tokengen.enabled .Values.enterprise.enabled }} +{{ if and (and .Values.enterprise.tokengen.enabled .Values.enterprise.enabled) (not .Values.rbac.namespaced)}} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRole diff --git a/production/helm/loki/templates/tokengen/clusterrolebinding-tokengen.yaml b/production/helm/loki/templates/tokengen/clusterrolebinding-tokengen.yaml index 248337ea2e8f..deb368f29936 100644 --- a/production/helm/loki/templates/tokengen/clusterrolebinding-tokengen.yaml +++ b/production/helm/loki/templates/tokengen/clusterrolebinding-tokengen.yaml @@ -1,4 +1,4 @@ -{{ if and .Values.enterprise.tokengen.enabled .Values.enterprise.enabled }} +{{ if and (and .Values.enterprise.tokengen.enabled .Values.enterprise.enabled) (not .Values.rbac.namespaced)}} --- apiVersion: rbac.authorization.k8s.io/v1 kind: ClusterRoleBinding diff --git a/production/helm/loki/values.yaml b/production/helm/loki/values.yaml index b4c01bfaa3f7..f9bd31de9498 100644 --- a/production/helm/loki/values.yaml +++ b/production/helm/loki/values.yaml @@ -279,6 +279,16 @@ loki: tracing: enabled: {{ .Values.loki.tracing.enabled }} + + {{- with .Values.loki.bloom_build }} + bloom_build: + {{- tpl (. | toYaml) $ | nindent 4 }} + {{- end }} + + {{- with .Values.loki.bloom_gateway }} + bloom_gateway: + {{- tpl (. | toYaml) $ | nindent 4 }} + {{- end }} # Should authentication be enabled auth_enabled: true # -- memberlist configuration (overrides embedded default) @@ -411,6 +421,8 @@ loki: tsdb_shipper: index_gateway_client: server_address: '{{ include "loki.indexGatewayAddress" . }}' + bloom_shipper: + working_directory: /var/loki/data/bloomshipper hedging: at: "250ms" max_per_second: 20 @@ -443,8 +455,12 @@ loki: enabled: false bloom_build: enabled: false + builder: + planner_address: '{{ include "loki.bloomPlannerAddress" . }}' bloom_gateway: enabled: false + client: + addresses: '{{ include "loki.bloomGatewayAddresses" . }}' ###################################################################################################################### # # Enterprise Loki Configs @@ -779,6 +795,7 @@ networkPolicy: memberlist: service: publishNotReadyAddresses: false + annotations: {} ###################################################################################################################### # # adminAPI configuration, enterprise only. @@ -1718,6 +1735,8 @@ ingester: # -- The name of the PriorityClass for ingester pods # -- Labels for ingestor service serviceLabels: {} + # -- Annotations for ingestor service + serviceAnnotations: {} # -- Additional CLI args for the ingester extraArgs: [] # -- Environment variables to add to the ingester pods @@ -1893,6 +1912,8 @@ distributor: podAnnotations: {} # -- Labels for distributor service serviceLabels: {} + # -- Annotations for distributor service + serviceAnnotations: {} # -- Additional CLI args for the distributor extraArgs: [] # -- Environment variables to add to the distributor pods @@ -1983,6 +2004,8 @@ querier: podAnnotations: {} # -- Labels for querier service serviceLabels: {} + # -- Annotations for querier service + serviceAnnotations: {} # -- Additional CLI args for the querier extraArgs: [] # -- Environment variables to add to the querier pods @@ -2099,6 +2122,8 @@ queryFrontend: podAnnotations: {} # -- Labels for query-frontend service serviceLabels: {} + # -- Annotations for query-frontend service + serviceAnnotations: {} # -- Additional CLI args for the query-frontend extraArgs: [] # -- Environment variables to add to the query-frontend pods @@ -2160,6 +2185,8 @@ queryScheduler: podAnnotations: {} # -- Labels for query-scheduler service serviceLabels: {} + # -- Annotations for query-scheduler service + serviceAnnotations: {} # -- Additional CLI args for the query-scheduler extraArgs: [] # -- Environment variables to add to the query-scheduler pods @@ -2220,6 +2247,8 @@ indexGateway: podAnnotations: {} # -- Labels for index-gateway service serviceLabels: {} + # -- Annotations for index-gateway service + serviceAnnotations: {} # -- Additional CLI args for the index-gateway extraArgs: [] # -- Environment variables to add to the index-gateway pods @@ -2310,6 +2339,8 @@ compactor: topologyKey: kubernetes.io/hostname # -- Labels for compactor service serviceLabels: {} + # -- Annotations for compactor service + serviceAnnotations: {} # -- Additional CLI args for the compactor extraArgs: [] # -- Environment variables to add to the compactor pods @@ -2416,6 +2447,8 @@ bloomGateway: topologyKey: kubernetes.io/hostname # -- Labels for bloom-gateway service serviceLabels: {} + # -- Annotations for bloom-gateway service + serviceAnnotations: {} # -- Additional CLI args for the bloom-gateway extraArgs: [] # -- Environment variables to add to the bloom-gateway pods @@ -2448,20 +2481,13 @@ bloomGateway: persistence: # -- Enable creating PVCs for the bloom-gateway enabled: false - # -- Size of persistent disk - size: 10Gi - # -- Storage class to be used. - # If defined, storageClassName: . - # If set to "-", storageClassName: "", which disables dynamic provisioning. - # If empty or set to null, no storageClassName spec is - # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). - storageClass: null # -- Annotations for bloom-gateway PVCs annotations: {} # -- List of the bloom-gateway PVCs # @notationType -- list claims: - name: data + # -- Size of persistent disk size: 10Gi # -- Storage class to be used. # If defined, storageClassName: . @@ -2469,8 +2495,6 @@ bloomGateway: # If empty or set to null, no storageClassName spec is # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). storageClass: null - # - name: wal - # size: 150Gi # -- Enable StatefulSetAutoDeletePVC feature enableStatefulSetAutoDeletePVC: false whenDeleted: Retain @@ -2522,6 +2546,8 @@ bloomPlanner: topologyKey: kubernetes.io/hostname # -- Labels for bloom-planner service serviceLabels: {} + # -- Annotations for bloom-planner service + serviceAnnotations: {} # -- Additional CLI args for the bloom-planner extraArgs: [] # -- Environment variables to add to the bloom-planner pods @@ -2554,19 +2580,20 @@ bloomPlanner: persistence: # -- Enable creating PVCs for the bloom-planner enabled: false - # -- Size of persistent disk - size: 10Gi - # -- Storage class to be used. - # If defined, storageClassName: . - # If set to "-", storageClassName: "", which disables dynamic provisioning. - # If empty or set to null, no storageClassName spec is - # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). - storageClass: null # -- Annotations for bloom-planner PVCs annotations: {} # -- List of the bloom-planner PVCs # @notationType -- list - claims: [] + claims: + - name: data + # -- Size of persistent disk + size: 10Gi + # -- Storage class to be used. + # If defined, storageClassName: . + # If set to "-", storageClassName: "", which disables dynamic provisioning. + # If empty or set to null, no storageClassName spec is + # set, choosing the default provisioner (gp2 on AWS, standard on GKE, AWS, and OpenStack). + storageClass: null # -- Enable StatefulSetAutoDeletePVC feature enableStatefulSetAutoDeletePVC: false whenDeleted: Retain @@ -2636,6 +2663,8 @@ bloomBuilder: podAnnotations: {} # -- Labels for bloom-builder service serviceLabels: {} + # -- Annotations for bloom-builder service + serviceAnnotations: {} # -- Additional CLI args for the bloom-builder extraArgs: [] # -- Environment variables to add to the bloom-builder pods @@ -2706,6 +2735,8 @@ patternIngester: topologyKey: kubernetes.io/hostname # -- Labels for pattern ingester service serviceLabels: {} + # -- Annotations for pattern ingester service + serviceAnnotations: {} # -- Additional CLI args for the pattern ingester extraArgs: [] # -- Environment variables to add to the pattern ingester pods @@ -2805,6 +2836,8 @@ ruler: podAnnotations: {} # -- Labels for ruler service serviceLabels: {} + # -- Annotations for ruler service + serviceAnnotations: {} # -- Additional CLI args for the ruler extraArgs: [] # -- Environment variables to add to the ruler pods @@ -2931,7 +2964,11 @@ memcached: # -- Memcached Docker image pull policy pullPolicy: IfNotPresent # -- The SecurityContext override for memcached pods - podSecurityContext: {} + podSecurityContext: + runAsNonRoot: true + runAsUser: 11211 + runAsGroup: 11211 + fsGroup: 11211 # -- The name of the PriorityClass for memcached pods priorityClassName: null # -- The SecurityContext for memcached containers @@ -3054,7 +3091,7 @@ resultsCache: persistence: # -- Enable creating PVCs for the results-cache enabled: false - # -- Size of persistent disk + # -- Size of persistent disk, must be in G or Gi storageSize: 10G # -- Storage class to be used. # If defined, storageClassName: . @@ -3156,7 +3193,7 @@ chunksCache: persistence: # -- Enable creating PVCs for the chunks-cache enabled: false - # -- Size of persistent disk + # -- Size of persistent disk, must be in G or Gi storageSize: 10G # -- Storage class to be used. # If defined, storageClassName: . diff --git a/production/ksonnet/loki/multi-zone.libsonnet b/production/ksonnet/loki/multi-zone.libsonnet index 606f70099d0f..a3d48f21a96d 100644 --- a/production/ksonnet/loki/multi-zone.libsonnet +++ b/production/ksonnet/loki/multi-zone.libsonnet @@ -100,8 +100,8 @@ local rolloutOperator = import 'rollout-operator.libsonnet'; }, )), - newIngesterZoneStatefulSet(zone, container):: - local name = '%(prefix)s-%(zone)s' % { prefix: $._config.multi_zone_ingester_name_prefix, zone: zone }; + newIngesterZoneStatefulSet(zone, container, name_prefix=''):: + local name = '%(prefix)s-%(zone)s' % { prefix: if name_prefix == '' then $._config.multi_zone_ingester_name_prefix else name_prefix, zone: zone }; self.newIngesterStatefulSet(name, container, with_anti_affinity=false) + statefulSet.mixin.metadata.withLabels({ 'rollout-group': 'ingester' }) + diff --git a/production/loki-mixin-compiled-ssd/alerts.yaml b/production/loki-mixin-compiled-ssd/alerts.yaml index 7c0825d8580d..09b9b6f54341 100644 --- a/production/loki-mixin-compiled-ssd/alerts.yaml +++ b/production/loki-mixin-compiled-ssd/alerts.yaml @@ -4,12 +4,12 @@ groups: - alert: LokiRequestErrors annotations: description: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. + {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. summary: Loki request error rate is high. expr: | - 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route) + 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (cluster, namespace, job, route) / - sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route) + sum(rate(loki_request_duration_seconds_count[2m])) by (cluster, namespace, job, route) > 10 for: 15m labels: @@ -17,16 +17,16 @@ groups: - alert: LokiRequestPanics annotations: description: | - {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics. + {{ $labels.cluster }} {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics. summary: Loki requests are causing code panics. expr: | - sum(increase(loki_panic_total[10m])) by (namespace, job) > 0 + sum(increase(loki_panic_total[10m])) by (cluster, namespace, job) > 0 labels: severity: critical - alert: LokiRequestLatency annotations: description: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. + {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. summary: Loki request error latency is high. expr: | cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1 @@ -39,7 +39,7 @@ groups: {{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time. summary: Loki deployment is running more than one compactor. expr: | - sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1 + sum(loki_boltdb_shipper_compactor_running) by (cluster, namespace) > 1 for: 5m labels: severity: warning diff --git a/production/loki-mixin-compiled-ssd/dashboards/loki-operational.json b/production/loki-mixin-compiled-ssd/dashboards/loki-operational.json index 245e62959eb9..e4fa8dce793a 100644 --- a/production/loki-mixin-compiled-ssd/dashboards/loki-operational.json +++ b/production/loki-mixin-compiled-ssd/dashboards/loki-operational.json @@ -3429,6 +3429,417 @@ "title": "Read Path", "type": "row" }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 64, + "panels": [ + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"(loki.*|enterprise-logs)-read.*\"}[$__rate_interval]))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "binBps" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 69, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"(loki.*|enterprise-logs)-backend.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": { }, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$loki_datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 3, + "w": 18, + "x": 12, + "y": 32 + }, + "hiddenSeries": false, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "{}", + "color": "#F2495C" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki.*|enterprise-logs)-backend\"} | logfmt | level=\"error\"[$__auto]))", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Error Log Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$loki_datasource", + "gridPos": { + "h": 18, + "w": 18, + "x": 12, + "y": 35 + }, + "id": 66, + "options": { + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "panels": [ ], + "targets": [ + { + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki.*|enterprise-logs)-backend\"} |= \"level=error\"", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Logs", + "type": "logs" + }, + { + "aliasColors": { }, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 46 + }, + "hiddenSeries": false, + "id": 70, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [ ] + }, + "panels": [ ], + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [ ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki.*|enterprise-logs)-backend\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=~\"($namespace)/(loki.*|enterprise-logs)-backend\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [ ], + "timeFrom": null, + "timeRegions": [ ], + "timeShift": null, + "title": "Success Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [ ] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "targets": [ ], + "title": "Backend Path", + "type": "row" + }, { "collapsed": true, "datasource": null, diff --git a/production/loki-mixin-compiled-ssd/dashboards/loki-reads-resources.json b/production/loki-mixin-compiled-ssd/dashboards/loki-reads-resources.json deleted file mode 100644 index ed5fc3dd245d..000000000000 --- a/production/loki-mixin-compiled-ssd/dashboards/loki-reads-resources.json +++ /dev/null @@ -1,657 +0,0 @@ -{ - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "gridPos": { }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-read\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Read path", - "titleSize": "h6", - "type": "row" - }, - { - "collapse": false, - "height": "250px", - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "span": 4, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Ingester", - "titleSize": "h6" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Reads Resources", - "uid": "reads-resources", - "version": 0 - } \ No newline at end of file diff --git a/production/loki-mixin-compiled-ssd/dashboards/loki-resources-overview.json b/production/loki-mixin-compiled-ssd/dashboards/loki-resources-overview.json new file mode 100644 index 000000000000..3b3d7e7c0a40 --- /dev/null +++ b/production/loki-mixin-compiled-ssd/dashboards/loki-resources-overview.json @@ -0,0 +1,1276 @@ +{ + "annotations": { + "list": [ ] + }, + "editable": true, + "gnetId": null, + "graphTooltip": 0, + "hideControls": false, + "links": [ + { + "asDropdown": true, + "icon": "external link", + "includeVars": true, + "keepTime": true, + "tags": [ + "loki" + ], + "targetBlank": false, + "title": "Loki Dashboards", + "type": "dashboards" + } + ], + "refresh": "10s", + "rows": [ + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "id": 1, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "id": 2, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-read.*\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 3, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-read\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Read path", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ ] + }, + "id": 4, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (loki_write_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-write\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "In-memory streams", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "id": 5, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "id": 6, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 7, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 3, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-write\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Write path", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 8, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk Writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 9, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk Reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 10, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(loki.*|enterprise-logs)-write.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(loki.*|enterprise-logs)-write.*\"})", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk Space Utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "id": 11, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"}[$__rate_interval]))", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\", resource=\"cpu\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"})", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "CPU", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "request" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#FFC000", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + }, + { + "matcher": { + "id": "byName", + "options": "limit" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "#E02F44", + "mode": "fixed" + } + }, + { + "id": "custom.fillOpacity", + "value": 0 + } + ] + } + ] + }, + "id": 12, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + }, + { + "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\", resource=\"memory\"} > 0)", + "format": "time_series", + "legendFormat": "request", + "legendLink": null + }, + { + "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\"} > 0)", + "format": "time_series", + "legendFormat": "limit", + "legendLink": null + } + ], + "title": "Memory (workingset)", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "bytes" + }, + "overrides": [ ] + }, + "id": 13, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-backend\"})", + "format": "time_series", + "legendFormat": "{{pod}}", + "legendLink": null + } + ], + "title": "Memory (go heap inuse)", + "tooltip": { + "sort": 2 + }, + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "Backend path", + "titleSize": "h6" + }, + { + "collapse": false, + "height": "250px", + "panels": [ + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 14, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk Writes", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 100, + "lineWidth": 0, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "normal" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "Bps" + }, + "overrides": [ ] + }, + "id": 15, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-backend.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "format": "time_series", + "legendFormat": "{{pod}} - {{device}}", + "legendLink": null + } + ], + "title": "Disk Reads", + "type": "timeseries" + }, + { + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "custom": { + "drawStyle": "line", + "fillOpacity": 10, + "lineWidth": 1, + "pointSize": 5, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + } + }, + "thresholds": { + "mode": "absolute", + "steps": [ ] + }, + "unit": "percentunit" + }, + "overrides": [ ] + }, + "id": 16, + "links": [ ], + "options": { + "legend": { + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "span": 4, + "targets": [ + { + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(loki.*|enterprise-logs)-backend.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*(loki.*|enterprise-logs)-backend.*\"})", + "format": "time_series", + "legendFormat": "{{persistentvolumeclaim}}", + "legendLink": null + } + ], + "title": "Disk Space Utilization", + "type": "timeseries" + } + ], + "repeat": null, + "repeatIteration": null, + "repeatRowId": null, + "showTitle": true, + "title": "", + "titleSize": "h6" + } + ], + "schemaVersion": 14, + "style": "dark", + "tags": [ + "loki" + ], + "templating": { + "list": [ + { + "current": { + "text": "default", + "value": "default" + }, + "hide": 0, + "label": "Data source", + "name": "datasource", + "options": [ ], + "query": "prometheus", + "refresh": 1, + "regex": "", + "type": "datasource" + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "cluster", + "multi": false, + "name": "cluster", + "options": [ ], + "query": "label_values(loki_build_info, cluster)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + }, + { + "allValue": null, + "current": { + "text": "prod", + "value": "prod" + }, + "datasource": "$datasource", + "hide": 0, + "includeAll": false, + "label": "namespace", + "multi": false, + "name": "namespace", + "options": [ ], + "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", + "refresh": 1, + "regex": "", + "sort": 2, + "tagValuesQuery": "", + "tags": [ ], + "tagsQuery": "", + "type": "query", + "useTags": false + } + ] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": { + "refresh_intervals": [ + "5s", + "10s", + "30s", + "1m", + "5m", + "15m", + "30m", + "1h", + "2h", + "1d" + ], + "time_options": [ + "5m", + "15m", + "1h", + "6h", + "12h", + "24h", + "2d", + "7d", + "30d" + ] + }, + "timezone": "utc", + "title": "Loki / Resources Overview", + "uid": "resources-overview", + "version": 0 + } \ No newline at end of file diff --git a/production/loki-mixin-compiled-ssd/dashboards/loki-writes-resources.json b/production/loki-mixin-compiled-ssd/dashboards/loki-writes-resources.json deleted file mode 100644 index dd366ff201df..000000000000 --- a/production/loki-mixin-compiled-ssd/dashboards/loki-writes-resources.json +++ /dev/null @@ -1,584 +0,0 @@ -{ - "annotations": { - "list": [ ] - }, - "editable": true, - "gnetId": null, - "graphTooltip": 0, - "hideControls": false, - "links": [ - { - "asDropdown": true, - "icon": "external link", - "includeVars": true, - "keepTime": true, - "tags": [ - "loki" - ], - "targetBlank": false, - "title": "Loki Dashboards", - "type": "dashboards" - } - ], - "refresh": "10s", - "rows": [ - { - "collapse": false, - "collapsed": false, - "panels": [ - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ ] - }, - "gridPos": { }, - "id": 1, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "In-memory streams", - "tooltip": { - "sort": 2 - }, - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "short" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { }, - "id": 2, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"}[$__rate_interval]))", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", resource=\"cpu\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_cpu_quota{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"} / container_spec_cpu_period{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"})", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "CPU", - "tooltip": { - "sort": 2 - }, - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ - { - "matcher": { - "id": "byName", - "options": "request" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#FFC000", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - }, - { - "matcher": { - "id": "byName", - "options": "limit" - }, - "properties": [ - { - "id": "color", - "value": { - "fixedColor": "#E02F44", - "mode": "fixed" - } - }, - { - "id": "custom.fillOpacity", - "value": 0 - } - ] - } - ] - }, - "gridPos": { }, - "id": 3, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - }, - { - "expr": "min(kube_pod_container_resource_requests{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", resource=\"memory\"} > 0)", - "format": "time_series", - "legendFormat": "request", - "legendLink": null - }, - { - "expr": "min(container_spec_memory_limit_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\"} > 0)", - "format": "time_series", - "legendFormat": "limit", - "legendLink": null - } - ], - "title": "Memory (workingset)", - "tooltip": { - "sort": 2 - }, - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "bytes" - }, - "overrides": [ ] - }, - "gridPos": { }, - "id": 4, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/(loki.*|enterprise-logs)-write\"})", - "format": "time_series", - "legendFormat": "{{pod}}", - "legendLink": null - } - ], - "title": "Memory (go heap inuse)", - "tooltip": { - "sort": 2 - }, - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "gridPos": { }, - "id": 5, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk Writes", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 100, - "lineWidth": 0, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "normal" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "Bps" - }, - "overrides": [ ] - }, - "gridPos": { }, - "id": 6, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"loki\", pod=~\"(loki.*|enterprise-logs)-write.*\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", - "format": "time_series", - "legendFormat": "{{pod}} - {{device}}", - "legendLink": null - } - ], - "title": "Disk Reads", - "type": "timeseries" - }, - { - "datasource": "$datasource", - "fieldConfig": { - "defaults": { - "custom": { - "drawStyle": "line", - "fillOpacity": 10, - "lineWidth": 1, - "pointSize": 5, - "showPoints": "never", - "spanNulls": false, - "stacking": { - "group": "A", - "mode": "none" - } - }, - "thresholds": { - "mode": "absolute", - "steps": [ ] - }, - "unit": "percentunit" - }, - "overrides": [ ] - }, - "gridPos": { }, - "id": 7, - "links": [ ], - "options": { - "legend": { - "showLegend": true - }, - "tooltip": { - "mode": "single", - "sort": "none" - } - }, - "targets": [ - { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"(loki.*|enterprise-logs)-write.*\"})", - "format": "time_series", - "legendFormat": "{{persistentvolumeclaim}}", - "legendLink": null - } - ], - "title": "Disk Space Utilization", - "type": "timeseries" - } - ], - "repeat": null, - "repeatIteration": null, - "repeatRowId": null, - "showTitle": true, - "title": "Write path", - "titleSize": "h6", - "type": "row" - } - ], - "schemaVersion": 14, - "style": "dark", - "tags": [ - "loki" - ], - "templating": { - "list": [ - { - "current": { - "text": "default", - "value": "default" - }, - "hide": 0, - "label": "Data source", - "name": "datasource", - "options": [ ], - "query": "prometheus", - "refresh": 1, - "regex": "", - "type": "datasource" - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "cluster", - "multi": false, - "name": "cluster", - "options": [ ], - "query": "label_values(loki_build_info, cluster)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - }, - { - "allValue": null, - "current": { - "text": "prod", - "value": "prod" - }, - "datasource": "$datasource", - "hide": 0, - "includeAll": false, - "label": "namespace", - "multi": false, - "name": "namespace", - "options": [ ], - "query": "label_values(loki_build_info{cluster=~\"$cluster\"}, namespace)", - "refresh": 1, - "regex": "", - "sort": 2, - "tagValuesQuery": "", - "tags": [ ], - "tagsQuery": "", - "type": "query", - "useTags": false - } - ] - }, - "time": { - "from": "now-1h", - "to": "now" - }, - "timepicker": { - "refresh_intervals": [ - "5s", - "10s", - "30s", - "1m", - "5m", - "15m", - "30m", - "1h", - "2h", - "1d" - ], - "time_options": [ - "5m", - "15m", - "1h", - "6h", - "12h", - "24h", - "2d", - "7d", - "30d" - ] - }, - "timezone": "utc", - "title": "Loki / Writes Resources", - "uid": "writes-resources", - "version": 0 - } \ No newline at end of file diff --git a/production/loki-mixin-compiled/alerts.yaml b/production/loki-mixin-compiled/alerts.yaml index 7c0825d8580d..09b9b6f54341 100644 --- a/production/loki-mixin-compiled/alerts.yaml +++ b/production/loki-mixin-compiled/alerts.yaml @@ -4,12 +4,12 @@ groups: - alert: LokiRequestErrors annotations: description: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. + {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. summary: Loki request error rate is high. expr: | - 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route) + 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (cluster, namespace, job, route) / - sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route) + sum(rate(loki_request_duration_seconds_count[2m])) by (cluster, namespace, job, route) > 10 for: 15m labels: @@ -17,16 +17,16 @@ groups: - alert: LokiRequestPanics annotations: description: | - {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics. + {{ $labels.cluster }} {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics. summary: Loki requests are causing code panics. expr: | - sum(increase(loki_panic_total[10m])) by (namespace, job) > 0 + sum(increase(loki_panic_total[10m])) by (cluster, namespace, job) > 0 labels: severity: critical - alert: LokiRequestLatency annotations: description: | - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. + {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. summary: Loki request error latency is high. expr: | cluster_namespace_job_route:loki_request_duration_seconds:99quantile{route!~"(?i).*tail.*|/schedulerpb.SchedulerForQuerier/QuerierLoop"} > 1 @@ -39,7 +39,7 @@ groups: {{ $labels.cluster }} {{ $labels.namespace }} has had {{ printf "%.0f" $value }} compactors running for more than 5m. Only one compactor should run at a time. summary: Loki deployment is running more than one compactor. expr: | - sum(loki_boltdb_shipper_compactor_running) by (namespace, cluster) > 1 + sum(loki_boltdb_shipper_compactor_running) by (cluster, namespace) > 1 for: 5m labels: severity: warning diff --git a/production/loki-mixin-compiled/dashboards/loki-reads-resources.json b/production/loki-mixin-compiled/dashboards/loki-reads-resources.json index ed3965e5d7a2..29c124021665 100644 --- a/production/loki-mixin-compiled/dashboards/loki-reads-resources.json +++ b/production/loki-mixin-compiled/dashboards/loki-reads-resources.json @@ -552,7 +552,7 @@ }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "datasource": "$datasource", @@ -617,7 +617,6 @@ } ] }, - "gridPos": { }, "id": 7, "links": [ ], "options": { @@ -629,6 +628,7 @@ "sort": "none" } }, + "span": 2, "targets": [ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"}[$__rate_interval]))", @@ -718,7 +718,6 @@ } ] }, - "gridPos": { }, "id": 8, "links": [ ], "options": { @@ -730,6 +729,7 @@ "sort": "none" } }, + "span": 2, "targets": [ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"querier\"})", @@ -780,7 +780,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 9, "links": [ ], "options": { @@ -792,6 +791,7 @@ "sort": "none" } }, + "span": 2, "targets": [ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/querier\"})", @@ -830,7 +830,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 10, "links": [ ], "options": { @@ -842,9 +841,10 @@ "sort": "none" } }, + "span": 2, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -877,7 +877,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 11, "links": [ ], "options": { @@ -889,9 +888,10 @@ "sort": "none" } }, + "span": 2, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance,device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"querier\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -924,7 +924,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 12, "links": [ ], "options": { @@ -936,9 +935,10 @@ "sort": "none" } }, + "span": 2, "targets": [ { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"querier.*\"})", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*querier.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*querier.*\"})", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null @@ -953,12 +953,11 @@ "repeatRowId": null, "showTitle": true, "title": "Querier", - "titleSize": "h6", - "type": "row" + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "datasource": "$datasource", @@ -1023,7 +1022,6 @@ } ] }, - "gridPos": { }, "id": 13, "links": [ ], "options": { @@ -1035,6 +1033,7 @@ "sort": "none" } }, + "span": 2, "targets": [ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"}[$__rate_interval]))", @@ -1124,7 +1123,6 @@ } ] }, - "gridPos": { }, "id": 14, "links": [ ], "options": { @@ -1136,6 +1134,7 @@ "sort": "none" } }, + "span": 2, "targets": [ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\"})", @@ -1186,7 +1185,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 15, "links": [ ], "options": { @@ -1198,6 +1196,7 @@ "sort": "none" } }, + "span": 2, "targets": [ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/index-gateway\"})", @@ -1236,7 +1235,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 16, "links": [ ], "options": { @@ -1248,9 +1246,10 @@ "sort": "none" } }, + "span": 2, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1283,7 +1282,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 17, "links": [ ], "options": { @@ -1295,9 +1293,10 @@ "sort": "none" } }, + "span": 2, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"index-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1330,7 +1329,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 18, "links": [ ], "options": { @@ -1342,9 +1340,10 @@ "sort": "none" } }, + "span": 2, "targets": [ { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"index-gateway.*\"})", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*index-gateway.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*index-gateway.*\"})", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null @@ -1359,12 +1358,11 @@ "repeatRowId": null, "showTitle": true, "title": "Index Gateway", - "titleSize": "h6", - "type": "row" + "titleSize": "h6" }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "datasource": "$datasource", @@ -1429,7 +1427,6 @@ } ] }, - "gridPos": { }, "id": 19, "links": [ ], "options": { @@ -1441,6 +1438,7 @@ "sort": "none" } }, + "span": 2, "targets": [ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"}[$__rate_interval]))", @@ -1530,7 +1528,6 @@ } ] }, - "gridPos": { }, "id": 20, "links": [ ], "options": { @@ -1542,6 +1539,7 @@ "sort": "none" } }, + "span": 2, "targets": [ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"bloom-gateway\"})", @@ -1592,7 +1590,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 21, "links": [ ], "options": { @@ -1604,6 +1601,7 @@ "sort": "none" } }, + "span": 2, "targets": [ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/bloom-gateway\"})", @@ -1642,7 +1640,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 22, "links": [ ], "options": { @@ -1654,9 +1651,10 @@ "sort": "none" } }, + "span": 2, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1689,7 +1687,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 23, "links": [ ], "options": { @@ -1701,9 +1698,10 @@ "sort": "none" } }, + "span": 2, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"bloom-gateway\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -1736,7 +1734,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 24, "links": [ ], "options": { @@ -1748,9 +1745,10 @@ "sort": "none" } }, + "span": 2, "targets": [ { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"bloom-gateway.*\"})", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*bloom-gateway.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*bloom-gateway.*\"})", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null @@ -1765,8 +1763,7 @@ "repeatRowId": null, "showTitle": true, "title": "Bloom Gateway", - "titleSize": "h6", - "type": "row" + "titleSize": "h6" }, { "collapse": false, @@ -2034,7 +2031,7 @@ }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "datasource": "$datasource", @@ -2060,7 +2057,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 28, "links": [ ], "options": { @@ -2072,6 +2068,7 @@ "sort": "none" } }, + "span": 3, "targets": [ { "expr": "sum by(pod) (loki_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"}) or sum by(pod) (cortex_prometheus_rule_group_rules{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", @@ -2146,7 +2143,6 @@ } ] }, - "gridPos": { }, "id": 29, "links": [ ], "options": { @@ -2158,6 +2154,7 @@ "sort": "none" } }, + "span": 3, "targets": [ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"}[$__rate_interval]))", @@ -2247,7 +2244,6 @@ } ] }, - "gridPos": { }, "id": 30, "links": [ ], "options": { @@ -2259,6 +2255,7 @@ "sort": "none" } }, + "span": 3, "targets": [ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=~\"ruler\"})", @@ -2309,7 +2306,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 31, "links": [ ], "options": { @@ -2321,6 +2317,7 @@ "sort": "none" } }, + "span": 3, "targets": [ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ruler\"})", @@ -2341,8 +2338,7 @@ "repeatRowId": null, "showTitle": true, "title": "Ruler", - "titleSize": "h6", - "type": "row" + "titleSize": "h6" } ], "schemaVersion": 14, diff --git a/production/loki-mixin-compiled/dashboards/loki-writes-resources.json b/production/loki-mixin-compiled/dashboards/loki-writes-resources.json index 40e21484ee54..b99fcf145cd3 100644 --- a/production/loki-mixin-compiled/dashboards/loki-writes-resources.json +++ b/production/loki-mixin-compiled/dashboards/loki-writes-resources.json @@ -288,7 +288,7 @@ }, { "collapse": false, - "collapsed": false, + "height": "250px", "panels": [ { "datasource": "$datasource", @@ -314,7 +314,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 4, "links": [ ], "options": { @@ -326,6 +325,7 @@ "sort": "none" } }, + "span": 1, "targets": [ { "expr": "sum by(pod) (loki_ingester_memory_streams{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", @@ -403,7 +403,6 @@ } ] }, - "gridPos": { }, "id": 5, "links": [ ], "options": { @@ -415,6 +414,7 @@ "sort": "none" } }, + "span": 1, "targets": [ { "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"}[$__rate_interval]))", @@ -504,7 +504,6 @@ } ] }, - "gridPos": { }, "id": 6, "links": [ ], "options": { @@ -516,6 +515,7 @@ "sort": "none" } }, + "span": 1, "targets": [ { "expr": "max by(pod) (container_memory_working_set_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\"})", @@ -566,7 +566,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 7, "links": [ ], "options": { @@ -578,6 +577,7 @@ "sort": "none" } }, + "span": 1, "targets": [ { "expr": "sum by(pod) (go_memstats_heap_inuse_bytes{cluster=~\"$cluster\", job=~\"($namespace)/ingester.*\"})", @@ -616,7 +616,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 8, "links": [ ], "options": { @@ -628,9 +627,10 @@ "sort": "none" } }, + "span": 1, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -663,7 +663,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 9, "links": [ ], "options": { @@ -675,9 +674,10 @@ "sort": "none" } }, + "span": 1, "targets": [ { - "expr": "sum by(instance, pod, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", + "expr": "sum by(instance, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + ignoring(pod) group_right() (label_replace(count by(instance, pod, device) (container_fs_writes_bytes_total{cluster=~\"$cluster\", namespace=~\"$namespace\", container=\"ingester\", device!~\".*sda.*\"}), \"device\", \"$1\", \"device\", \"/dev/(.*)\") * 0)\n", "format": "time_series", "legendFormat": "{{pod}} - {{device}}", "legendLink": null @@ -710,7 +710,6 @@ }, "overrides": [ ] }, - "gridPos": { }, "id": 10, "links": [ ], "options": { @@ -722,9 +721,10 @@ "sort": "none" } }, + "span": 1, "targets": [ { - "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\"}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{cluster=~\"$cluster\", namespace=~\"$namespace\",label_name=~\"ingester.*.*\"})", + "expr": "max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*ingester.*.*\"} / kubelet_volume_stats_capacity_bytes{cluster=~\"$cluster\", namespace=~\"$namespace\", persistentvolumeclaim=~\".*ingester.*.*\"})", "format": "time_series", "legendFormat": "{{persistentvolumeclaim}}", "legendLink": null @@ -739,8 +739,7 @@ "repeatRowId": null, "showTitle": true, "title": "Ingester", - "titleSize": "h6", - "type": "row" + "titleSize": "h6" } ], "schemaVersion": 14, diff --git a/production/loki-mixin/alerts.libsonnet b/production/loki-mixin/alerts.libsonnet index 5bff18e72c6e..9261dbccecf9 100644 --- a/production/loki-mixin/alerts.libsonnet +++ b/production/loki-mixin/alerts.libsonnet @@ -6,36 +6,36 @@ rules: [ { alert: 'LokiRequestErrors', - expr: ||| - 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (namespace, job, route) + expr: std.strReplace(||| + 100 * sum(rate(loki_request_duration_seconds_count{status_code=~"5.."}[2m])) by (cluster, namespace, job, route) / - sum(rate(loki_request_duration_seconds_count[2m])) by (namespace, job, route) + sum(rate(loki_request_duration_seconds_count[2m])) by (cluster, namespace, job, route) > 10 - |||, + |||, 'cluster', $._config.per_cluster_label), 'for': '15m', labels: { severity: 'critical', }, annotations: { summary: 'Loki request error rate is high.', - description: ||| - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. - |||, + description: std.strReplace(||| + {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}% errors. + |||, 'cluster', $._config.per_cluster_label), }, }, { alert: 'LokiRequestPanics', expr: ||| - sum(increase(loki_panic_total[10m])) by (namespace, job) > 0 - |||, + sum(increase(loki_panic_total[10m])) by (%s, namespace, job) > 0 + ||| % $._config.per_cluster_label, labels: { severity: 'critical', }, annotations: { summary: 'Loki requests are causing code panics.', - description: ||| - {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics. - |||, + description: std.strReplace(||| + {{ $labels.cluster }} {{ $labels.job }} is experiencing {{ printf "%.2f" $value }}% increase of panics. + |||, 'cluster', $._config.per_cluster_label), }, }, { @@ -49,15 +49,15 @@ }, annotations: { summary: 'Loki request error latency is high.', - description: ||| - {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. - |||, + description: std.strReplace(||| + {{ $labels.cluster }} {{ $labels.job }} {{ $labels.route }} is experiencing {{ printf "%.2f" $value }}s 99th percentile latency. + |||, 'cluster', $._config.per_cluster_label), }, }, { alert: 'LokiTooManyCompactorsRunning', expr: ||| - sum(loki_boltdb_shipper_compactor_running) by (namespace, %s) > 1 + sum(loki_boltdb_shipper_compactor_running) by (%s, namespace) > 1 ||| % $._config.per_cluster_label, 'for': '5m', labels: { diff --git a/production/loki-mixin/config.libsonnet b/production/loki-mixin/config.libsonnet index 98d002ee8496..eeea227c8f06 100644 --- a/production/loki-mixin/config.libsonnet +++ b/production/loki-mixin/config.libsonnet @@ -15,11 +15,35 @@ // Enable dashboard and panels for Grafana Labs internal components. internal_components: false, + blooms: { + // Whether or not to include blooms specific dashboards + enabled: true, + }, + promtail: { // Whether or not to include promtail specific dashboards enabled: true, }, + operational: { + // Whether or not to include memcached in the operational dashboard + memcached: true, + // Whether or not to include consul in the operational dashboard + consul: true, + // Whether or not to include big table in the operational dashboard + bigTable: true, + // Whether or not to include dynamo in the operational dashboard + dynamo: true, + // Whether or not to include gcs in the operational dashboard + gcs: true, + // Whether or not to include s3 in the operational dashboard + s3: true, + // Whether or not to include azure blob in the operational dashboard + azureBlob: true, + // Whether or not to include bolt db in the operational dashboard + boltDB: true, + }, + // Enable TSDB specific dashboards tsdb: true, diff --git a/production/loki-mixin/dashboards.libsonnet b/production/loki-mixin/dashboards.libsonnet index 33f3b136d5ad..8b1ced08f3d1 100644 --- a/production/loki-mixin/dashboards.libsonnet +++ b/production/loki-mixin/dashboards.libsonnet @@ -3,10 +3,11 @@ (import 'dashboards/loki-chunks.libsonnet') + (import 'dashboards/loki-logs.libsonnet') + (import 'dashboards/loki-operational.libsonnet') + +(import 'dashboards/loki-resources-overview.libsonnet') + (import 'dashboards/loki-reads.libsonnet') + +(import 'dashboards/loki-reads-resources.libsonnet') + (import 'dashboards/loki-writes.libsonnet') + (import 'dashboards/loki-writes-resources.libsonnet') + -(import 'dashboards/loki-reads-resources.libsonnet') + (import 'dashboards/loki-deletion.libsonnet') + (import 'dashboards/loki-canary-dashboard.libsonnet') + (import 'dashboards/recording-rules.libsonnet') + diff --git a/production/loki-mixin/dashboards/dashboard-loki-operational.json b/production/loki-mixin/dashboards/dashboard-loki-operational.json index ef52297d40cd..d31cc205539e 100644 --- a/production/loki-mixin/dashboards/dashboard-loki-operational.json +++ b/production/loki-mixin/dashboards/dashboard-loki-operational.json @@ -3894,6 +3894,411 @@ "title": "Querier", "type": "row" }, + { + "collapsed": true, + "datasource": null, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 31 + }, + "id": 64, + "panels": [ + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 32 + }, + "hiddenSeries": false, + "id": 68, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum by(pod) (rate(container_cpu_usage_seconds_total{cluster=~\"$cluster\", namespace=~\"$namespace\", pod=~\"querier.*\"}[$__rate_interval]))", + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "CPU Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fieldConfig": { + "defaults": { + "unit": "binBps" + } + }, + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 39 + }, + "hiddenSeries": false, + "id": 69, + "legend": { + "avg": false, + "current": false, + "hideEmpty": false, + "hideZero": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": true, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "go_memstats_heap_inuse_bytes{cluster=\"$cluster\", namespace=\"$namespace\", pod=~\"backend.*\"}", + "instant": false, + "intervalFactor": 3, + "legendFormat": "{{pod}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Memory Usage", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "bytes", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "aliasColors": {}, + "bars": true, + "dashLength": 10, + "dashes": false, + "datasource": "$loki_datasource", + "fill": 1, + "fillGradient": 0, + "gridPos": { + "h": 3, + "w": 18, + "x": 12, + "y": 32 + }, + "hiddenSeries": false, + "id": 65, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": false, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 2, + "points": false, + "renderer": "flot", + "seriesOverrides": [ + { + "alias": "{}", + "color": "#F2495C" + } + ], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate({cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/backend\"} | logfmt | level=\"error\"[$__auto]))", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Error Log Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": false, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": false + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + }, + { + "datasource": "$loki_datasource", + "gridPos": { + "h": 18, + "w": 18, + "x": 12, + "y": 35 + }, + "id": 66, + "options": { + "showLabels": false, + "showTime": false, + "sortOrder": "Descending", + "wrapLogMessage": true + }, + "targets": [ + { + "expr": "{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/backend\"} |= \"level=error\"", + "refId": "A" + } + ], + "timeFrom": null, + "timeShift": null, + "title": "Logs", + "type": "logs" + }, + { + "aliasColors": {}, + "bars": false, + "dashLength": 10, + "dashes": false, + "datasource": "$datasource", + "fill": 0, + "fillGradient": 0, + "gridPos": { + "h": 7, + "w": 6, + "x": 0, + "y": 46 + }, + "hiddenSeries": false, + "id": 70, + "legend": { + "avg": false, + "current": false, + "max": false, + "min": false, + "show": false, + "total": false, + "values": false + }, + "lines": true, + "linewidth": 1, + "nullPointMode": "null", + "options": { + "dataLinks": [] + }, + "percentage": false, + "pointradius": 1, + "points": false, + "renderer": "flot", + "seriesOverrides": [], + "spaceLength": 10, + "stack": false, + "steppedLine": false, + "targets": [ + { + "expr": "sum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/backend\", status_code!~\"5[0-9]{2}\"}[$__rate_interval])) by (route)\n/\nsum(rate(loki_request_duration_seconds_count{cluster=\"$cluster\", namespace=\"$namespace\", job=\"$namespace/backend\"}[$__rate_interval])) by (route) > 0", + "interval": "", + "intervalFactor": 1, + "legendFormat": "{{route}}", + "refId": "A" + } + ], + "thresholds": [], + "timeFrom": null, + "timeRegions": [], + "timeShift": null, + "title": "Success Rate", + "tooltip": { + "shared": true, + "sort": 2, + "value_type": "individual" + }, + "type": "timeseries", + "xaxis": { + "buckets": null, + "mode": "time", + "name": null, + "show": true, + "values": [] + }, + "yaxes": [ + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + }, + { + "format": "short", + "label": null, + "logBase": 1, + "max": null, + "min": null, + "show": true + } + ], + "yaxis": { + "align": false, + "alignLevel": null + } + } + ], + "title": "Backend Path", + "type": "row" + }, { "collapsed": true, "datasource": null, diff --git a/production/loki-mixin/dashboards/dashboard-utils.libsonnet b/production/loki-mixin/dashboards/dashboard-utils.libsonnet index 577f3235eaf5..1ba9127dd8d0 100644 --- a/production/loki-mixin/dashboards/dashboard-utils.libsonnet +++ b/production/loki-mixin/dashboards/dashboard-utils.libsonnet @@ -87,9 +87,6 @@ local utils = import 'mixin-utils/utils.libsonnet'; namespaceMatcher():: $._config.per_cluster_label + '=~"$cluster", namespace=~"$namespace"', - containerLabelMatcher(containerName):: - 'label_name=~"%s.*"' % containerName, - logPanel(title, selector, datasource='$loki_datasource'):: { title: title, type: 'logs', @@ -327,7 +324,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; containerDiskSpaceUtilizationPanel(title, containerName):: $.newQueryPanel(title, 'percentunit') + - $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s} / kubelet_volume_stats_capacity_bytes{%s}) and count by(persistentvolumeclaim) (kube_persistentvolumeclaim_labels{%s,%s})' % [$.namespaceMatcher(), $.namespaceMatcher(), $.namespaceMatcher(), $.containerLabelMatcher(containerName)], '{{persistentvolumeclaim}}'), + $.queryPanel('max by(persistentvolumeclaim) (kubelet_volume_stats_used_bytes{%s, persistentvolumeclaim=~".*%s.*"} / kubelet_volume_stats_capacity_bytes{%s, persistentvolumeclaim=~".*%s.*"})' % [$.namespaceMatcher(), containerName, $.namespaceMatcher(), containerName], '{{persistentvolumeclaim}}'), local latencyPanelWithExtraGrouping(metricName, selector, multiplier='1e3', extra_grouping='') = { nullPointMode: 'null as zero', diff --git a/production/loki-mixin/dashboards/loki-bloom-compactor.libsonnet b/production/loki-mixin/dashboards/loki-bloom-compactor.libsonnet index 236ebff8666d..fc78becb2713 100644 --- a/production/loki-mixin/dashboards/loki-bloom-compactor.libsonnet +++ b/production/loki-mixin/dashboards/loki-bloom-compactor.libsonnet @@ -6,67 +6,66 @@ local raw = (import './dashboard-bloom-compactor.json'); // 2. Copy the JSON into `dashboard-bloom-compactor.json` // 3. Delete the `id` and `templating` fields from the JSON (import 'dashboard-utils.libsonnet') { - grafanaDashboards+: - { - 'loki-bloom-compactor.json': - raw - { - local replaceClusterMatchers(expr) = - // Replace the recording rules cluster label with the per-cluster label + grafanaDashboards+:: if !$._config.blooms.enabled then {} else { + 'loki-bloom-compactor.json': + raw + { + local replaceClusterMatchers(expr) = + // Replace the recording rules cluster label with the per-cluster label + std.strReplace( + // Replace the cluster label for equality matchers with the per-cluster label std.strReplace( - // Replace the cluster label for equality matchers with the per-cluster label + // Replace the cluster label for regex matchers with the per-cluster label std.strReplace( - // Replace the cluster label for regex matchers with the per-cluster label - std.strReplace( - expr, - 'cluster=~"$cluster"', - $._config.per_cluster_label + '=~"$cluster"' - ), - 'cluster="$cluster"', - $._config.per_cluster_label + '="$cluster"' + expr, + 'cluster=~"$cluster"', + $._config.per_cluster_label + '=~"$cluster"' ), - 'cluster_job', - $._config.per_cluster_label + '_job' + 'cluster="$cluster"', + $._config.per_cluster_label + '="$cluster"' ), + 'cluster_job', + $._config.per_cluster_label + '_job' + ), - panels: [ - p { - targets: if std.objectHas(p, 'targets') then [ - e { - expr: replaceClusterMatchers(e.expr), - } - for e in p.targets - ] else [], - panels: if std.objectHas(p, 'panels') then [ - sp { - targets: if std.objectHas(sp, 'targets') then [ - spe { - expr: replaceClusterMatchers(spe.expr), - } - for spe in sp.targets - ] else [], - panels: if std.objectHas(sp, 'panels') then [ - ssp { - targets: if std.objectHas(ssp, 'targets') then [ - sspe { - expr: replaceClusterMatchers(sspe.expr), - } - for sspe in ssp.targets - ] else [], - } - for ssp in sp.panels - ] else [], - } - for sp in p.panels - ] else [], - } - for p in super.panels - ], - } - + $.dashboard('Loki / Bloom Compactor', uid='bloom-compactor') - .addCluster() - .addNamespace() - .addLog() - .addTag(), - }, + panels: [ + p { + targets: if std.objectHas(p, 'targets') then [ + e { + expr: replaceClusterMatchers(e.expr), + } + for e in p.targets + ] else [], + panels: if std.objectHas(p, 'panels') then [ + sp { + targets: if std.objectHas(sp, 'targets') then [ + spe { + expr: replaceClusterMatchers(spe.expr), + } + for spe in sp.targets + ] else [], + panels: if std.objectHas(sp, 'panels') then [ + ssp { + targets: if std.objectHas(ssp, 'targets') then [ + sspe { + expr: replaceClusterMatchers(sspe.expr), + } + for sspe in ssp.targets + ] else [], + } + for ssp in sp.panels + ] else [], + } + for sp in p.panels + ] else [], + } + for p in super.panels + ], + } + + $.dashboard('Loki / Bloom Compactor', uid='bloom-compactor') + .addCluster() + .addNamespace() + .addLog() + .addTag(), + }, } diff --git a/production/loki-mixin/dashboards/loki-bloom-gateway.libsonnet b/production/loki-mixin/dashboards/loki-bloom-gateway.libsonnet index db9c90783911..40e1644ac029 100644 --- a/production/loki-mixin/dashboards/loki-bloom-gateway.libsonnet +++ b/production/loki-mixin/dashboards/loki-bloom-gateway.libsonnet @@ -6,67 +6,66 @@ local raw = (import './dashboard-bloom-gateway.json'); // 2. Copy the JSON into `dashboard-bloom-gateway.json` // 3. Delete the `id` and `templating` fields from the JSON (import 'dashboard-utils.libsonnet') { - grafanaDashboards+: - { - 'loki-bloom-gateway.json': - raw - { - local replaceClusterMatchers(expr) = - // Replace the recording rules cluster label with the per-cluster label + grafanaDashboards+:: if !$._config.blooms.enabled then {} else { + 'loki-bloom-gateway.json': + raw + { + local replaceClusterMatchers(expr) = + // Replace the recording rules cluster label with the per-cluster label + std.strReplace( + // Replace the cluster label for equality matchers with the per-cluster label std.strReplace( - // Replace the cluster label for equality matchers with the per-cluster label + // Replace the cluster label for regex matchers with the per-cluster label std.strReplace( - // Replace the cluster label for regex matchers with the per-cluster label - std.strReplace( - expr, - 'cluster=~"$cluster"', - $._config.per_cluster_label + '=~"$cluster"' - ), - 'cluster="$cluster"', - $._config.per_cluster_label + '="$cluster"' + expr, + 'cluster=~"$cluster"', + $._config.per_cluster_label + '=~"$cluster"' ), - 'cluster_job', - $._config.per_cluster_label + '_job' + 'cluster="$cluster"', + $._config.per_cluster_label + '="$cluster"' ), + 'cluster_job', + $._config.per_cluster_label + '_job' + ), - panels: [ - p { - targets: if std.objectHas(p, 'targets') then [ - e { - expr: replaceClusterMatchers(e.expr), - } - for e in p.targets - ] else [], - panels: if std.objectHas(p, 'panels') then [ - sp { - targets: if std.objectHas(sp, 'targets') then [ - spe { - expr: replaceClusterMatchers(spe.expr), - } - for spe in sp.targets - ] else [], - panels: if std.objectHas(sp, 'panels') then [ - ssp { - targets: if std.objectHas(ssp, 'targets') then [ - sspe { - expr: replaceClusterMatchers(sspe.expr), - } - for sspe in ssp.targets - ] else [], - } - for ssp in sp.panels - ] else [], - } - for sp in p.panels - ] else [], - } - for p in super.panels - ], - } - + $.dashboard('Loki / Bloom Gateway', uid='bloom-gateway') - .addCluster() - .addNamespace() - .addLog() - .addTag(), - }, + panels: [ + p { + targets: if std.objectHas(p, 'targets') then [ + e { + expr: replaceClusterMatchers(e.expr), + } + for e in p.targets + ] else [], + panels: if std.objectHas(p, 'panels') then [ + sp { + targets: if std.objectHas(sp, 'targets') then [ + spe { + expr: replaceClusterMatchers(spe.expr), + } + for spe in sp.targets + ] else [], + panels: if std.objectHas(sp, 'panels') then [ + ssp { + targets: if std.objectHas(ssp, 'targets') then [ + sspe { + expr: replaceClusterMatchers(sspe.expr), + } + for sspe in ssp.targets + ] else [], + } + for ssp in sp.panels + ] else [], + } + for sp in p.panels + ] else [], + } + for p in super.panels + ], + } + + $.dashboard('Loki / Bloom Gateway', uid='bloom-gateway') + .addCluster() + .addNamespace() + .addLog() + .addTag(), + }, } diff --git a/production/loki-mixin/dashboards/loki-operational.libsonnet b/production/loki-mixin/dashboards/loki-operational.libsonnet index c6944487ce8e..b926f38814c8 100644 --- a/production/loki-mixin/dashboards/loki-operational.libsonnet +++ b/production/loki-mixin/dashboards/loki-operational.libsonnet @@ -14,8 +14,16 @@ local utils = import 'mixin-utils/utils.libsonnet'; hiddenRows:: [ 'Cassandra', - ] + if !$._config.ssd.enabled then [] else [ - 'Ingester', + if $._config.ssd.enabled then 'Ingester', + if !$._config.ssd.enabled then 'Backend Path', + if !$._config.operational.memcached then 'Memcached', + if !$._config.operational.consul then 'Consul', + if !$._config.operational.bigTable then 'Big Table', + if !$._config.operational.dynamo then 'Dynamo', + if !$._config.operational.gcs then 'GCS', + if !$._config.operational.s3 then 'S3', + if !$._config.operational.azureBlob then 'Azure Blob', + if !$._config.operational.boltDB then 'BoltDB Shipper', ], hiddenPanels:: if $._config.promtail.enabled then [] else [ @@ -36,6 +44,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; queryFrontend: if $._config.meta_monitoring.enabled then [utils.selector.re('job', '($namespace)/(query-frontend|%s-read|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)] else [utils.selector.re('job', '($namespace)/%s' % (if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'query-frontend'))], + backend: [utils.selector.re('job', '($namespace)/%s-backend' % $._config.ssd.pod_prefix_matcher)], }, podMatchers:: { @@ -49,6 +58,7 @@ local utils = import 'mixin-utils/utils.libsonnet'; querier: if $._config.meta_monitoring.enabled then [utils.selector.re('pod', '(querier|%s-read|loki-single-binary)' % $._config.ssd.pod_prefix_matcher)] else [utils.selector.re('pod', '%s' % (if $._config.ssd.enabled then '%s-read.*' % $._config.ssd.pod_prefix_matcher else 'querier.*'))], + backend: [utils.selector.re('pod', '%s-backend.*' % $._config.ssd.pod_prefix_matcher)], }, } + lokiOperational + { @@ -106,6 +116,36 @@ local utils = import 'mixin-utils/utils.libsonnet'; '' ), + local replaceBackendMatchers(expr) = + std.strReplace( + std.strReplace( + std.strReplace( + expr, + 'pod=~"backend.*"', + matcherStr('backend', matcher='pod', sep='') + ), + 'job="$namespace/backend",', + matcherStr('backend') + ), + 'job="$namespace/backend"', + std.rstripChars(matcherStr('backend'), ',') + ), + + local replaceQuerierMatchers(expr) = + std.strReplace( + std.strReplace( + std.strReplace( + expr, + 'pod=~"querier.*"', + matcherStr('querier', matcher='pod', sep='') + ), + 'job="$namespace/querier",', + matcherStr('querier') + ), + 'job="$namespace/querier"', + std.rstripChars(matcherStr('querier'), ',') + ), + local replaceMatchers(expr) = std.strReplace( std.strReplace( @@ -119,59 +159,50 @@ local utils = import 'mixin-utils/utils.libsonnet'; std.strReplace( std.strReplace( std.strReplace( - std.strReplace( - std.strReplace( - std.strReplace( - expr, - 'pod=~"querier.*"', - matcherStr('querier', matcher='pod', sep='') - ), - 'pod=~"ingester.*"', - matcherStr('ingester', matcher='pod', sep='') - ), - 'pod=~"distributor.*"', - matcherStr('distributor', matcher='pod', sep='') - ), - 'job="$namespace/cortex-gw",', - matcherStr('cortexgateway') + expr, + 'pod=~"ingester.*"', + matcherStr('ingester', matcher='pod', sep='') ), - 'job="$namespace/cortex-gw"', - std.rstripChars(matcherStr('cortexgateway'), ',') + 'pod=~"distributor.*"', + matcherStr('distributor', matcher='pod', sep='') ), - 'job=~"($namespace)/cortex-gw",', + 'job="$namespace/cortex-gw",', matcherStr('cortexgateway') ), - 'job="$namespace/distributor",', - matcherStr('distributor') + 'job="$namespace/cortex-gw"', + std.rstripChars(matcherStr('cortexgateway'), ',') ), - 'job="$namespace/distributor"', - std.rstripChars(matcherStr('distributor'), ',') + 'job=~"($namespace)/cortex-gw",', + matcherStr('cortexgateway') ), - 'job=~"($namespace)/distributor",', + 'job="$namespace/distributor",', matcherStr('distributor') ), - 'job=~"($namespace)/distributor"', + 'job="$namespace/distributor"', std.rstripChars(matcherStr('distributor'), ',') ), - 'job="$namespace/ingester",', - matcherStr('ingester') + 'job=~"($namespace)/distributor",', + matcherStr('distributor') ), - 'job="$namespace/ingester"', - std.rstripChars(matcherStr('ingester'), ',') + 'job=~"($namespace)/distributor"', + std.rstripChars(matcherStr('distributor'), ',') ), - 'job=~"($namespace)/ingester",', - matcherStr('ingester'), + 'job="$namespace/ingester",', + matcherStr('ingester') ), - 'job="$namespace/querier",', - matcherStr('querier') + 'job="$namespace/ingester"', + std.rstripChars(matcherStr('ingester'), ',') ), - 'job="$namespace/querier"', - std.rstripChars(matcherStr('querier'), ',') + 'job=~"($namespace)/ingester",', + matcherStr('ingester'), ), - local replaceAllMatchers(expr) = - replaceMatchers(expr), + replaceBackendMatchers( + replaceQuerierMatchers( + replaceMatchers(expr) + ) + ), local selectDatasource(ds) = if ds == null || ds == '' then ds diff --git a/production/loki-mixin/dashboards/loki-reads-resources.libsonnet b/production/loki-mixin/dashboards/loki-reads-resources.libsonnet index 0ec22e131edb..b300051961f1 100644 --- a/production/loki-mixin/dashboards/loki-reads-resources.libsonnet +++ b/production/loki-mixin/dashboards/loki-reads-resources.libsonnet @@ -1,209 +1,185 @@ -local grafana = import 'grafonnet/grafana.libsonnet'; -local utils = import 'mixin-utils/utils.libsonnet'; - (import 'dashboard-utils.libsonnet') { local index_gateway_pod_matcher = if $._config.meta_monitoring.enabled - then 'container=~"loki|index-gateway", pod=~"(index-gateway.*|%s-read.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher - else if $._config.ssd.enabled then 'container="loki", pod=~"%s-read.*"' % $._config.ssd.pod_prefix_matcher else 'container="index-gateway"', + then 'container=~"loki|index-gateway", pod=~"(index-gateway.*|loki-single-binary)"' + else 'container="index-gateway"', local index_gateway_job_matcher = if $._config.meta_monitoring.enabled - then '(index-gateway.*|%s-read.*|loki-single-binary)' % $._config.ssd.pod_prefix_matcher - else if $._config.ssd.enabled then '%s-read' % $._config.ssd.pod_prefix_matcher else 'index-gateway', + then '(index-gateway.*|loki-single-binary)' + else 'index-gateway', local ingester_pod_matcher = if $._config.meta_monitoring.enabled - then 'container=~"loki|ingester", pod=~"(ingester.*|%s-write.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher - else if $._config.ssd.enabled then 'container="loki", pod=~"%s-write.*"' % $._config.ssd.pod_prefix_matcher else 'container="ingester"', + then 'container=~"loki|ingester", pod=~"(ingester.*|loki-single-binary)"' + else 'container="ingester"', local ingester_job_matcher = if $._config.meta_monitoring.enabled - then '(ingester.+|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher - else if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.+', + then '(ingester.+|loki-single-binary)' + else 'ingester.+', - grafanaDashboards+:: - { - 'loki-reads-resources.json': - ($.dashboard('Loki / Reads Resources', uid='reads-resources')) - .addCluster() - .addNamespace() - .addTag() - .addRowIf( - $._config.internal_components, - $.row('Gateway') - .addPanel( - $.containerCPUUsagePanel('CPU', 'cortex-gw(-internal)?'), - ) - .addPanel( - $.containerMemoryWorkingSetPanel('Memory (workingset)', 'cortex-gw(-internal)?'), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', 'cortex-gw(-internal)?'), - ) - ) - .addRowIf( - !$._config.ssd.enabled, - $.row('Query Frontend') - .addPanel( - $.containerCPUUsagePanel('CPU', 'query-frontend'), - ) - .addPanel( - $.containerMemoryWorkingSetPanel('Memory (workingset)', 'query-frontend'), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', 'query-frontend'), - ) - ) - .addRowIf( - !$._config.ssd.enabled, - $.row('Query Scheduler') - .addPanel( - $.containerCPUUsagePanel('CPU', 'query-scheduler'), - ) - .addPanel( - $.containerMemoryWorkingSetPanel('Memory (workingset)', 'query-scheduler'), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', 'query-scheduler'), - ) - ) - .addRowIf( - !$._config.ssd.enabled, - grafana.row.new('Querier') - .addPanel( - $.containerCPUUsagePanel('CPU', 'querier'), - ) - .addPanel( - $.containerMemoryWorkingSetPanel('Memory (workingset)', 'querier'), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', 'querier'), - ) - .addPanel( - $.newQueryPanel('Disk Writes', 'Bps') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('querier')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.withStacking, - ) - .addPanel( - $.newQueryPanel('Disk Reads', 'Bps') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('querier')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.withStacking, - ) - .addPanel( - $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', 'querier'), - ) - ) - // Add the read path for single scalable deployment only. The read path should not display disk utilization as the index gateway is present in the backend pods. - .addRowIf( - $._config.ssd.enabled, - grafana.row.new('Read path') - .addPanel( - $.CPUUsagePanel('CPU', index_gateway_pod_matcher), - ) - .addPanel( - $.memoryWorkingSetPanel('Memory (workingset)', index_gateway_pod_matcher), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', index_gateway_job_matcher), - ) - ) - // Otherwise we add the index gateway information - .addRowIf( - !$._config.ssd.enabled, - grafana.row.new('Index Gateway') - .addPanel( - $.CPUUsagePanel('CPU', index_gateway_pod_matcher), - ) - .addPanel( - $.memoryWorkingSetPanel('Memory (workingset)', index_gateway_pod_matcher), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', index_gateway_job_matcher), - ) - .addPanel( - $.newQueryPanel('Disk Writes', 'Bps') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(index_gateway_pod_matcher)], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.withStacking, - ) - .addPanel( - $.newQueryPanel('Disk Reads', 'Bps') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(index_gateway_pod_matcher)], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.withStacking, - ) - .addPanel( - $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', index_gateway_job_matcher), - ) - ) - .addRowIf( - !$._config.ssd.enabled, - grafana.row.new('Bloom Gateway') - .addPanel( - $.containerCPUUsagePanel('CPU', 'bloom-gateway'), - ) - .addPanel( - $.containerMemoryWorkingSetPanel('Memory (workingset)', 'bloom-gateway'), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', 'bloom-gateway'), - ) - .addPanel( - $.newQueryPanel('Disk Writes', 'Bps') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('bloom-gateway')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.withStacking, - ) - .addPanel( - $.newQueryPanel('Disk Reads', 'Bps') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDiskContainer('bloom-gateway')], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.withStacking, - ) - .addPanel( - $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', 'bloom-gateway'), - ) - ) - .addRow( - $.row('Ingester') - .addPanel( - $.CPUUsagePanel('CPU', ingester_pod_matcher), - ) - .addPanel( - $.memoryWorkingSetPanel('Memory (workingset)', ingester_pod_matcher), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', ingester_job_matcher), - ) - ) - .addRowIf( - !$._config.ssd.enabled, - grafana.row.new('Ruler') - .addPanel( - $.newQueryPanel('Rules') + - $.queryPanel( - 'sum by(%(label)s) (loki_prometheus_rule_group_rules{%(matcher)s}) or sum by(%(label)s) (cortex_prometheus_rule_group_rules{%(matcher)s})' % { label: $._config.per_instance_label, matcher: $.jobMatcher('ruler') }, - '{{%s}}' % $._config.per_instance_label - ), - ) - .addPanel( - $.containerCPUUsagePanel('CPU', 'ruler'), - ) - .addPanel( - $.containerMemoryWorkingSetPanel('Memory (workingset)', 'ruler'), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', 'ruler'), - ) - ), - }, + grafanaDashboards+:: if $._config.ssd.enabled then {} else { + 'loki-reads-resources.json': + ($.dashboard('Loki / Reads Resources', uid='reads-resources')) + .addCluster() + .addNamespace() + .addTag() + .addRowIf( + $._config.internal_components, + $.row('Gateway') + .addPanel( + $.containerCPUUsagePanel('CPU', 'cortex-gw(-internal)?'), + ) + .addPanel( + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'cortex-gw(-internal)?'), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', 'cortex-gw(-internal)?'), + ) + ) + .addRow( + $.row('Query Frontend') + .addPanel( + $.containerCPUUsagePanel('CPU', 'query-frontend'), + ) + .addPanel( + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'query-frontend'), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', 'query-frontend'), + ) + ) + .addRow( + $.row('Query Scheduler') + .addPanel( + $.containerCPUUsagePanel('CPU', 'query-scheduler'), + ) + .addPanel( + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'query-scheduler'), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', 'query-scheduler'), + ) + ) + .addRow( + $.row('Querier') + .addPanel( + $.containerCPUUsagePanel('CPU', 'querier'), + ) + .addPanel( + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'querier'), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', 'querier'), + ) + .addPanel( + $.newQueryPanel('Disk Writes', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDiskContainer('querier')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.newQueryPanel('Disk Reads', 'Bps') + + $.queryPanel( + 'sum by(%s,device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDiskContainer('querier')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', 'querier'), + ) + ) + // Otherwise we add the index gateway information + .addRow( + $.row('Index Gateway') + .addPanel( + $.CPUUsagePanel('CPU', index_gateway_pod_matcher), + ) + .addPanel( + $.memoryWorkingSetPanel('Memory (workingset)', index_gateway_pod_matcher), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', index_gateway_job_matcher), + ) + .addPanel( + $.newQueryPanel('Disk Writes', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDisk(index_gateway_pod_matcher)], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.newQueryPanel('Disk Reads', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDisk(index_gateway_pod_matcher)], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', index_gateway_job_matcher), + ) + ) + .addRow( + $.row('Bloom Gateway') + .addPanel( + $.containerCPUUsagePanel('CPU', 'bloom-gateway'), + ) + .addPanel( + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'bloom-gateway'), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', 'bloom-gateway'), + ) + .addPanel( + $.newQueryPanel('Disk Writes', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDiskContainer('bloom-gateway')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.newQueryPanel('Disk Reads', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDiskContainer('bloom-gateway')], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', 'bloom-gateway'), + ) + ) + .addRow( + $.row('Ingester') + .addPanel( + $.CPUUsagePanel('CPU', ingester_pod_matcher), + ) + .addPanel( + $.memoryWorkingSetPanel('Memory (workingset)', ingester_pod_matcher), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', ingester_job_matcher), + ) + ) + .addRow( + $.row('Ruler') + .addPanel( + $.newQueryPanel('Rules') + + $.queryPanel( + 'sum by(%(label)s) (loki_prometheus_rule_group_rules{%(matcher)s}) or sum by(%(label)s) (cortex_prometheus_rule_group_rules{%(matcher)s})' % { label: $._config.per_instance_label, matcher: $.jobMatcher('ruler') }, + '{{%s}}' % $._config.per_instance_label + ), + ) + .addPanel( + $.containerCPUUsagePanel('CPU', 'ruler'), + ) + .addPanel( + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'ruler'), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', 'ruler'), + ) + ), + }, } diff --git a/production/loki-mixin/dashboards/loki-resources-overview.libsonnet b/production/loki-mixin/dashboards/loki-resources-overview.libsonnet new file mode 100644 index 000000000000..b0f7500a4945 --- /dev/null +++ b/production/loki-mixin/dashboards/loki-resources-overview.libsonnet @@ -0,0 +1,113 @@ +(import 'dashboard-utils.libsonnet') { + local read_pod_matcher = 'container="loki", pod=~"%s-read.*"' % $._config.ssd.pod_prefix_matcher, + local read_job_matcher = '%s-read' % $._config.ssd.pod_prefix_matcher, + + local write_pod_matcher = 'container="loki", pod=~"%s-write.*"' % $._config.ssd.pod_prefix_matcher, + local write_job_matcher = '%s-write' % $._config.ssd.pod_prefix_matcher, + + local backend_pod_matcher = 'container="loki", pod=~"%s-backend.*"' % $._config.ssd.pod_prefix_matcher, + local backend_job_matcher = '%s-backend' % $._config.ssd.pod_prefix_matcher, + + // This dashboard is for the single scalable deployment only and it : + // - replaces the loki-reads-resources dashboards + // - replaces the loki-write-resources dashboards + // - adds backend pods resources + grafanaDashboards+:: if !$._config.ssd.enabled then {} else { + 'loki-resources-overview.json': + ($.dashboard('Loki / Resources Overview', uid='resources-overview')) + .addCluster() + .addNamespace() + .addTag() + .addRow( + // The read path does not display disk utilization as the index gateway is present in the backend pods. + $.row('Read path') + .addPanel( + $.CPUUsagePanel('CPU', read_pod_matcher), + ) + .addPanel( + $.memoryWorkingSetPanel('Memory (workingset)', read_pod_matcher), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', read_job_matcher), + ) + ) + .addRow( + $.row('Write path') + .addPanel( + $.newQueryPanel('In-memory streams') + + $.queryPanel( + 'sum by(%s) (loki_write_memory_streams{%s})' % [$._config.per_instance_label, $.jobMatcher(write_job_matcher)], + '{{%s}}' % $._config.per_instance_label + ) + + { + tooltip: { sort: 2 }, // Sort descending. + } + ) + .addPanel( + $.CPUUsagePanel('CPU', write_pod_matcher), + ) + .addPanel( + $.memoryWorkingSetPanel('Memory (workingset)', write_pod_matcher), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', write_job_matcher), + ) + ) + .addRow( + $.row('') + .addPanel( + $.newQueryPanel('Disk Writes', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDisk(write_pod_matcher)], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.newQueryPanel('Disk Reads', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDisk(write_pod_matcher)], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', write_job_matcher), + ) + ) + .addRow( + $.row('Backend path') + .addPanel( + $.CPUUsagePanel('CPU', backend_pod_matcher), + ) + .addPanel( + $.memoryWorkingSetPanel('Memory (workingset)', backend_pod_matcher), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', backend_job_matcher), + ) + ) + .addRow( + $.row('') + .addPanel( + $.newQueryPanel('Disk Writes', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDisk(backend_pod_matcher)], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.newQueryPanel('Disk Reads', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDisk(backend_pod_matcher)], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', backend_job_matcher), + ) + ), + }, +} diff --git a/production/loki-mixin/dashboards/loki-retention.libsonnet b/production/loki-mixin/dashboards/loki-retention.libsonnet index c2e461c35f4c..2a1c4777a293 100644 --- a/production/loki-mixin/dashboards/loki-retention.libsonnet +++ b/production/loki-mixin/dashboards/loki-retention.libsonnet @@ -1,5 +1,3 @@ -local utils = import 'mixin-utils/utils.libsonnet'; - (import 'dashboard-utils.libsonnet') { local compactor_pod_matcher = if $._config.meta_monitoring.enabled then 'pod=~"(compactor.*|%s-backend.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher diff --git a/production/loki-mixin/dashboards/loki-writes-resources.libsonnet b/production/loki-mixin/dashboards/loki-writes-resources.libsonnet index 1d4c693a9b9b..2d78776422ed 100644 --- a/production/loki-mixin/dashboards/loki-writes-resources.libsonnet +++ b/production/loki-mixin/dashboards/loki-writes-resources.libsonnet @@ -1,87 +1,82 @@ -local grafana = import 'grafonnet/grafana.libsonnet'; -local utils = import 'mixin-utils/utils.libsonnet'; - (import 'dashboard-utils.libsonnet') { local ingester_pod_matcher = if $._config.meta_monitoring.enabled - then 'container=~"loki|ingester", pod=~"(ingester.*|%s-write.*|loki-single-binary)"' % $._config.ssd.pod_prefix_matcher - else if $._config.ssd.enabled then 'container="loki", pod=~"%s-write.*"' % $._config.ssd.pod_prefix_matcher else 'container="ingester"', + then 'container=~"loki|ingester", pod=~"(ingester.*loki-single-binary)"' + else 'container="ingester"', local ingester_job_matcher = if $._config.meta_monitoring.enabled - then '(ingester.*|%s-write|loki-single-binary)' % $._config.ssd.pod_prefix_matcher - else if $._config.ssd.enabled then '%s-write' % $._config.ssd.pod_prefix_matcher else 'ingester.*', + then '(ingester.*|loki-single-binary)' + else 'ingester.*', - grafanaDashboards+:: - { - 'loki-writes-resources.json': - ($.dashboard('Loki / Writes Resources', uid='writes-resources')) - .addCluster() - .addNamespace() - .addTag() - .addRowIf( - $._config.internal_components, - $.row('Gateway') - .addPanel( - $.containerCPUUsagePanel('CPU', 'cortex-gw(-internal)?'), - ) - .addPanel( - $.containerMemoryWorkingSetPanel('Memory (workingset)', 'cortex-gw(-internal)?'), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', 'cortex-gw(-internal)?'), - ) + grafanaDashboards+:: if $._config.ssd.enabled then {} else { + 'loki-writes-resources.json': + ($.dashboard('Loki / Writes Resources', uid='writes-resources')) + .addCluster() + .addNamespace() + .addTag() + .addRowIf( + $._config.internal_components, + $.row('Gateway') + .addPanel( + $.containerCPUUsagePanel('CPU', 'cortex-gw(-internal)?'), + ) + .addPanel( + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'cortex-gw(-internal)?'), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', 'cortex-gw(-internal)?'), + ) + ) + .addRow( + $.row('Distributor') + .addPanel( + $.containerCPUUsagePanel('CPU', 'distributor'), + ) + .addPanel( + $.containerMemoryWorkingSetPanel('Memory (workingset)', 'distributor'), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', 'distributor'), + ) + ) + .addRow( + $.row('Ingester') + .addPanel( + $.newQueryPanel('In-memory streams') + + $.queryPanel( + 'sum by(%s) (loki_ingester_memory_streams{%s})' % [$._config.per_instance_label, $.jobMatcher(ingester_job_matcher)], + '{{%s}}' % $._config.per_instance_label + ) + + { + tooltip: { sort: 2 }, // Sort descending. + }, + ) + .addPanel( + $.CPUUsagePanel('CPU', ingester_pod_matcher), + ) + .addPanel( + $.memoryWorkingSetPanel('Memory (workingset)', ingester_pod_matcher), + ) + .addPanel( + $.goHeapInUsePanel('Memory (go heap inuse)', ingester_job_matcher), + ) + .addPanel( + $.newQueryPanel('Disk Writes', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDisk(ingester_pod_matcher)], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, + ) + .addPanel( + $.newQueryPanel('Disk Reads', 'Bps') + + $.queryPanel( + 'sum by(%s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $.filterNodeDisk(ingester_pod_matcher)], + '{{%s}} - {{device}}' % $._config.per_instance_label + ) + + $.withStacking, ) - .addRowIf( - !$._config.ssd.enabled, - $.row('Distributor') - .addPanel( - $.containerCPUUsagePanel('CPU', 'distributor'), - ) - .addPanel( - $.containerMemoryWorkingSetPanel('Memory (workingset)', 'distributor'), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', 'distributor'), - ) + .addPanel( + $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', ingester_job_matcher), ) - .addRow( - grafana.row.new(if $._config.ssd.enabled then 'Write path' else 'Ingester') - .addPanel( - $.newQueryPanel('In-memory streams') + - $.queryPanel( - 'sum by(%s) (loki_ingester_memory_streams{%s})' % [$._config.per_instance_label, $.jobMatcher(ingester_job_matcher)], - '{{%s}}' % $._config.per_instance_label - ) + - { - tooltip: { sort: 2 }, // Sort descending. - }, - ) - .addPanel( - $.CPUUsagePanel('CPU', ingester_pod_matcher), - ) - .addPanel( - $.memoryWorkingSetPanel('Memory (workingset)', ingester_pod_matcher), - ) - .addPanel( - $.goHeapInUsePanel('Memory (go heap inuse)', ingester_job_matcher), - ) - .addPanel( - $.newQueryPanel('Disk Writes', 'Bps') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_written_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(ingester_pod_matcher)], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.withStacking, - ) - .addPanel( - $.newQueryPanel('Disk Reads', 'Bps') + - $.queryPanel( - 'sum by(%s, %s, device) (rate(node_disk_read_bytes_total[$__rate_interval])) + %s' % [$._config.per_node_label, $._config.per_instance_label, $.filterNodeDisk(ingester_pod_matcher)], - '{{%s}} - {{device}}' % $._config.per_instance_label - ) + - $.withStacking, - ) - .addPanel( - $.containerDiskSpaceUtilizationPanel('Disk Space Utilization', ingester_job_matcher), - ) - ), - }, + ), + }, } diff --git a/production/loki-mixin/mixin.libsonnet b/production/loki-mixin/mixin.libsonnet index 7e21657b2e61..53584824c633 100644 --- a/production/loki-mixin/mixin.libsonnet +++ b/production/loki-mixin/mixin.libsonnet @@ -2,4 +2,6 @@ (import 'alerts.libsonnet') + (import 'recording_rules.libsonnet') + { grafanaDashboardFolder: 'Loki', + // Without this, configs is not taken into account + _config+:: {}, } diff --git a/tools/bloom/inspector/main.go b/tools/bloom/inspector/main.go index 6d1be5b0beb6..9bc193526a9b 100644 --- a/tools/bloom/inspector/main.go +++ b/tools/bloom/inspector/main.go @@ -3,6 +3,7 @@ package main import ( "fmt" "os" + "strings" v1 "github.com/grafana/loki/v3/pkg/storage/bloom/v1" "github.com/grafana/loki/v3/pkg/util/mempool" @@ -13,8 +14,8 @@ func main() { fmt.Println("Usage: go run main.go BLOCK_DIRECTORY") os.Exit(2) } - path := os.Args[1] + fmt.Printf("Block: %s\n", path) r := v1.NewDirectoryBlockReader(path) @@ -30,45 +31,38 @@ func main() { fmt.Printf("Checksum: 0x%x\n", md.Checksum) fmt.Printf("Series: %+v\n", md.Series) fmt.Printf("Options: %+v\n", md.Options) - - fmt.Println("-----------------------------") + fmt.Println("") count := 0 for qIter.Next() { swb := qIter.At() series := swb.Series + fmt.Printf( + "%s chunks=%d fields=%+v\n", + series.Fingerprint, + series.Chunks.Len(), + series.Meta.Fields.Items(), + ) p := 0 for swb.Blooms.Next() { bloom := swb.Blooms.At() fmt.Printf( - "fp=%s page=%d chunks=%d size=%vB fill=%v count=%v\n", - series.Fingerprint, + "%s page=%d size=%v count=%v fill=%v\n", + strings.Repeat(" ", 16), // padding p, - series.Chunks.Len(), bloom.Capacity()/8, - bloom.FillRatio(), bloom.Count(), + bloom.FillRatio(), ) p++ } count++ } - fmt.Printf("Stream count: %4d\n", count) - // q.Reset() - - // fmt.Println("-----------------------------") - - // count = 0 - // for q.Next() { - // swb := q.At() - // series := swb.Series - // fmt.Printf("%s (%3d) %v\n", series.Fingerprint, series.Chunks.Len(), swb.Meta.Fields.Items()) - // count++ - // } - // fmt.Printf("Stream count: %4d\n", count) - - if q.Err() != nil { + if qIter.Err() != nil { fmt.Printf("error: %s\n", q.Err()) } + + fmt.Println("") + fmt.Printf("Stream count: %4d\n", count) } diff --git a/tools/dev/loki-tsdb-storage-s3/dev.dockerfile b/tools/dev/loki-tsdb-storage-s3/dev.dockerfile index d9cc7d0dab5f..d8526a3d9385 100644 --- a/tools/dev/loki-tsdb-storage-s3/dev.dockerfile +++ b/tools/dev/loki-tsdb-storage-s3/dev.dockerfile @@ -1,8 +1,8 @@ -FROM golang:1.22.6 +FROM golang:1.23 ENV CGO_ENABLED=0 RUN go install github.com/go-delve/delve/cmd/dlv@v1.22.1 -FROM alpine:3.20.2 +FROM alpine:3.20.3 RUN mkdir /loki WORKDIR /loki diff --git a/tools/lambda-promtail/go.mod b/tools/lambda-promtail/go.mod index c548f6149945..42e8066c9436 100644 --- a/tools/lambda-promtail/go.mod +++ b/tools/lambda-promtail/go.mod @@ -10,7 +10,7 @@ require ( github.com/go-kit/log v0.2.1 github.com/gogo/protobuf v1.3.2 github.com/golang/snappy v0.0.4 - github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b + github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319 github.com/grafana/loki/v3 v3.0.0-20240809103847-9315b3d03d79 github.com/prometheus/common v0.55.0 github.com/stretchr/testify v1.9.0 diff --git a/tools/lambda-promtail/go.sum b/tools/lambda-promtail/go.sum index c1088c8692cc..1729b4d8e00c 100644 --- a/tools/lambda-promtail/go.sum +++ b/tools/lambda-promtail/go.sum @@ -216,8 +216,8 @@ github.com/google/uuid v1.6.0 h1:NIvaJDMOsjHA8n1jAhLSgzrAzy1Hgr+hNrb57e+94F0= github.com/google/uuid v1.6.0/go.mod h1:TIyPZe4MgqvfeYDBFedMoGGpEw/LqOeaOT+nhxU+yHo= github.com/gorilla/mux v1.8.1 h1:TuBL49tXwgrFYWhqrNgrUNEY92u81SPhu7sTdzQEiWY= github.com/gorilla/mux v1.8.1/go.mod h1:AKf9I4AEqPTmMytcMc0KkNouC66V3BtZ4qD5fmWSiMQ= -github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b h1:x2HCzk29I0o5pRPfqWP/qwhXaPGlcz8pohq5kO1NZoE= -github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b/go.mod h1:SPLNCARd4xdjCkue0O6hvuoveuS1dGJjDnfxYe405YQ= +github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319 h1:KACpOOTqA4WqyyKF2fFPQFiaSOpZdOT5f5gg0qkPLiU= +github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319/go.mod h1:SPLNCARd4xdjCkue0O6hvuoveuS1dGJjDnfxYe405YQ= github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56 h1:X8IKQ0wu40wpvYcKfBcc5T4QnhdQjUhtUtB/1CY89lE= github.com/grafana/gomemcache v0.0.0-20240229205252-cd6a66d6fb56/go.mod h1:PGk3RjYHpxMM8HFPhKKo+vve3DdlPUELZLSDEFehPuU= github.com/grafana/jsonparser v0.0.0-20240425183733-ea80629e1a32 h1:NznuPwItog+rwdVg8hAuGKP29ndRSzJAwhxKldkP8oQ= diff --git a/tools/tsdb/migrate-versions/main.go b/tools/tsdb/migrate-versions/main.go index 8469cd560711..e4fb39e69a4f 100644 --- a/tools/tsdb/migrate-versions/main.go +++ b/tools/tsdb/migrate-versions/main.go @@ -17,7 +17,7 @@ import ( "github.com/pkg/errors" "github.com/prometheus/client_golang/prometheus" - "github.com/grafana/loki/v3/pkg/chunkenc" + "github.com/grafana/loki/v3/pkg/compression" "github.com/grafana/loki/v3/pkg/loki" "github.com/grafana/loki/v3/pkg/storage" "github.com/grafana/loki/v3/pkg/storage/chunk/client/util" @@ -257,8 +257,9 @@ func uploadFile(idx shipperindex.Index, indexStorageClient shipperstorage.Client } }() - compressedWriter := chunkenc.Gzip.GetWriter(f) - defer chunkenc.Gzip.PutWriter(compressedWriter) + gzipPool := compression.GetWriterPool(compression.EncGZIP) + compressedWriter := gzipPool.GetWriter(f) + defer gzipPool.PutWriter(compressedWriter) idxReader, err := idx.Reader() if err != nil { diff --git a/vendor/github.com/grafana/dskit/grpcclient/grpcclient.go b/vendor/github.com/grafana/dskit/grpcclient/grpcclient.go index 751899047154..a8f728c61e29 100644 --- a/vendor/github.com/grafana/dskit/grpcclient/grpcclient.go +++ b/vendor/github.com/grafana/dskit/grpcclient/grpcclient.go @@ -2,6 +2,8 @@ package grpcclient import ( "flag" + "slices" + "strings" "time" "github.com/pkg/errors" @@ -40,6 +42,9 @@ type Config struct { Middleware []grpc.UnaryClientInterceptor `yaml:"-"` StreamMiddleware []grpc.StreamClientInterceptor `yaml:"-"` + + // CustomCompressors allows configuring custom compressors. + CustomCompressors []string `yaml:"-"` } // RegisterFlags registers flags. @@ -55,9 +60,19 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { cfg.InitialStreamWindowSize = defaultInitialWindowSize cfg.InitialConnectionWindowSize = defaultInitialWindowSize + var supportedCompressors strings.Builder + supportedCompressors.WriteString("Use compression when sending messages. Supported values are: 'gzip', 'snappy'") + for _, cmp := range cfg.CustomCompressors { + supportedCompressors.WriteString(", ") + supportedCompressors.WriteString("'") + supportedCompressors.WriteString(cmp) + supportedCompressors.WriteString("'") + } + supportedCompressors.WriteString(" and '' (disable compression)") + f.IntVar(&cfg.MaxRecvMsgSize, prefix+".grpc-max-recv-msg-size", 100<<20, "gRPC client max receive message size (bytes).") f.IntVar(&cfg.MaxSendMsgSize, prefix+".grpc-max-send-msg-size", 100<<20, "gRPC client max send message size (bytes).") - f.StringVar(&cfg.GRPCCompression, prefix+".grpc-compression", "", "Use compression when sending messages. Supported values are: 'gzip', 'snappy' and '' (disable compression)") + f.StringVar(&cfg.GRPCCompression, prefix+".grpc-compression", "", supportedCompressors.String()) f.Float64Var(&cfg.RateLimit, prefix+".grpc-client-rate-limit", 0., "Rate limit for gRPC client; 0 means disabled.") f.IntVar(&cfg.RateLimitBurst, prefix+".grpc-client-rate-limit-burst", 0, "Rate limit burst for gRPC client.") f.BoolVar(&cfg.BackoffOnRatelimits, prefix+".backoff-on-ratelimits", false, "Enable backoff and retry when we hit rate limits.") @@ -74,11 +89,10 @@ func (cfg *Config) RegisterFlagsWithPrefix(prefix string, f *flag.FlagSet) { } func (cfg *Config) Validate() error { - switch cfg.GRPCCompression { - case gzip.Name, snappy.Name, "": - // valid - default: - return errors.Errorf("unsupported compression type: %s", cfg.GRPCCompression) + supportedCompressors := []string{gzip.Name, snappy.Name, ""} + supportedCompressors = append(supportedCompressors, cfg.CustomCompressors...) + if !slices.Contains(supportedCompressors, cfg.GRPCCompression) { + return errors.Errorf("unsupported compression type: %q", cfg.GRPCCompression) } return nil } diff --git a/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go b/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go index a7eefe92fc22..1d96363fe3fa 100644 --- a/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go +++ b/vendor/github.com/grafana/dskit/kv/memberlist/memberlist_client.go @@ -552,7 +552,7 @@ func (m *KV) fastJoinMembersOnStartup(ctx context.Context) { for toJoin > 0 && len(nodes) > 0 && ctx.Err() == nil { reached, err := m.memberlist.Join(nodes[0:1]) // Try to join single node only. if err != nil { - level.Debug(m.logger).Log("msg", "fast-joining node failed", "node", nodes[0], "err", err) + level.Info(m.logger).Log("msg", "fast-joining node failed", "node", nodes[0], "err", err) } totalJoined += reached @@ -1018,14 +1018,16 @@ func (m *KV) trySingleCas(key string, codec codec.Codec, f func(in interface{}) } // Don't even try - r, ok := out.(Mergeable) - if !ok || r == nil { + incomingValue, ok := out.(Mergeable) + if !ok || incomingValue == nil { return nil, 0, retry, fmt.Errorf("invalid type: %T, expected Mergeable", out) } // To support detection of removed items from value, we will only allow CAS operation to // succeed if version hasn't changed, i.e. state hasn't changed since running 'f'. - change, newver, err := m.mergeValueForKey(key, r, ver, codec) + // Supplied function may have kept a reference to the returned "incoming value". + // If KV store will keep this value as well, it needs to make a clone. + change, newver, err := m.mergeValueForKey(key, incomingValue, true, ver, codec) if err == errVersionMismatch { return nil, 0, retry, err } @@ -1379,14 +1381,15 @@ func (m *KV) mergeBytesValueForKey(key string, incomingData []byte, codec codec. return nil, 0, fmt.Errorf("expected Mergeable, got: %T", decodedValue) } - return m.mergeValueForKey(key, incomingValue, 0, codec) + // No need to clone this "incomingValue", since we have just decoded it from bytes, and won't be using it. + return m.mergeValueForKey(key, incomingValue, false, 0, codec) } // Merges incoming value with value we have in our store. Returns "a change" that can be sent to other // cluster members to update their state, and new version of the value. // If CAS version is specified, then merging will fail if state has changed already, and errVersionMismatch is reported. // If no modification occurred, new version is 0. -func (m *KV) mergeValueForKey(key string, incomingValue Mergeable, casVersion uint, codec codec.Codec) (Mergeable, uint, error) { +func (m *KV) mergeValueForKey(key string, incomingValue Mergeable, incomingValueRequiresClone bool, casVersion uint, codec codec.Codec) (Mergeable, uint, error) { m.storeMu.Lock() defer m.storeMu.Unlock() @@ -1398,7 +1401,7 @@ func (m *KV) mergeValueForKey(key string, incomingValue Mergeable, casVersion ui if casVersion > 0 && curr.Version != casVersion { return nil, 0, errVersionMismatch } - result, change, err := computeNewValue(incomingValue, curr.value, casVersion > 0) + result, change, err := computeNewValue(incomingValue, incomingValueRequiresClone, curr.value, casVersion > 0) if err != nil { return nil, 0, err } @@ -1441,8 +1444,16 @@ func (m *KV) mergeValueForKey(key string, incomingValue Mergeable, casVersion ui } // returns [result, change, error] -func computeNewValue(incoming Mergeable, oldVal Mergeable, cas bool) (Mergeable, Mergeable, error) { +func computeNewValue(incoming Mergeable, incomingValueRequiresClone bool, oldVal Mergeable, cas bool) (Mergeable, Mergeable, error) { if oldVal == nil { + // It's OK to return the same value twice (once as result, once as change), because "change" will be cloned + // in mergeValueForKey if needed. + + if incomingValueRequiresClone { + clone := incoming.Clone() + return clone, clone, nil + } + return incoming, incoming, nil } diff --git a/vendor/github.com/grafana/dskit/ring/ring_status.gohtml b/vendor/github.com/grafana/dskit/ring/ring_status.gohtml index 5270b457c625..157f8d89e637 100644 --- a/vendor/github.com/grafana/dskit/ring/ring_status.gohtml +++ b/vendor/github.com/grafana/dskit/ring/ring_status.gohtml @@ -38,8 +38,13 @@ {{ .State }} {{ .Address }} {{ .RegisteredTimestamp | timeOrEmptyString }} + {{ if .ReadOnly }} {{ .ReadOnly }} + {{ .ReadOnlyUpdatedTimestamp | durationSince }} ago ({{ .ReadOnlyUpdatedTimestamp.Format "15:04:05.999" }}) + {{ else }} + {{ .ReadOnlyUpdatedTimestamp | timeOrEmptyString }} + {{ end }} {{ .HeartbeatTimestamp | durationSince }} ago ({{ .HeartbeatTimestamp.Format "15:04:05.999" }}) {{ .NumTokens }} {{ .Ownership | humanFloat }}% diff --git a/vendor/github.com/grafana/loki/pkg/push/types.go b/vendor/github.com/grafana/loki/pkg/push/types.go index d0fc6d6cb835..7ab5ab2aeb5b 100644 --- a/vendor/github.com/grafana/loki/pkg/push/types.go +++ b/vendor/github.com/grafana/loki/pkg/push/types.go @@ -571,8 +571,6 @@ func (m *Entry) Unmarshal(dAtA []byte) error { } // Unmarshal a LabelAdapter, implements proto.Unmarshaller. -// NB this is a copy of the autogenerated code to unmarshal a LabelPair, -// with the byte copying replaced with a yoloString. func (m *LabelAdapter) Unmarshal(dAtA []byte) error { l := len(dAtA) iNdEx := 0 @@ -632,7 +630,7 @@ func (m *LabelAdapter) Unmarshal(dAtA []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - m.Name = yoloString(dAtA[iNdEx:postIndex]) + m.Name = string(dAtA[iNdEx:postIndex]) iNdEx = postIndex case 2: if wireType != 2 { @@ -664,7 +662,7 @@ func (m *LabelAdapter) Unmarshal(dAtA []byte) error { if postIndex > l { return io.ErrUnexpectedEOF } - m.Value = yoloString(dAtA[iNdEx:postIndex]) + m.Value = string(dAtA[iNdEx:postIndex]) iNdEx = postIndex default: iNdEx = preIndex diff --git a/vendor/modules.txt b/vendor/modules.txt index 9705d54b3fb3..b49a55d88cae 100644 --- a/vendor/modules.txt +++ b/vendor/modules.txt @@ -980,7 +980,7 @@ github.com/gorilla/websocket # github.com/grafana/cloudflare-go v0.0.0-20230110200409-c627cf6792f2 ## explicit; go 1.17 github.com/grafana/cloudflare-go -# github.com/grafana/dskit v0.0.0-20240905221822-931a021fb06b +# github.com/grafana/dskit v0.0.0-20240925193654-7c41a4057319 ## explicit; go 1.21 github.com/grafana/dskit/aws github.com/grafana/dskit/backoff @@ -1034,7 +1034,7 @@ github.com/grafana/gomemcache/memcache # github.com/grafana/jsonparser v0.0.0-20240425183733-ea80629e1a32 ## explicit; go 1.13 github.com/grafana/jsonparser -# github.com/grafana/loki/pkg/push v0.0.0-20231124142027-e52380921608 => ./pkg/push +# github.com/grafana/loki/pkg/push v0.0.0-20240924133635-758364c7775f => ./pkg/push ## explicit; go 1.19 github.com/grafana/loki/pkg/push # github.com/grafana/pyroscope-go/godeltaprof v0.1.8