From 448ce648bec90e882d20eebda910347c261b5735 Mon Sep 17 00:00:00 2001 From: Daniel Hillman Date: Fri, 18 Oct 2024 11:03:43 -0400 Subject: [PATCH] enhancement(sample transform): add stratified sampling capability (#21274) * (enhancement sample transform): add stratified sampling capability * Convert group_by key to Template * Generate documentation * Add changelog * Fix typo in docs * more documentation clean up * Fix formatting to pass linter, fix punctuation in docs --------- Co-authored-by: Jesse Szwedko --- ...ampling_to_sample_transform.enhancement.md | 3 + src/transforms/sample/config.rs | 14 ++++ src/transforms/sample/transform.rs | 82 +++++++++++++++++-- .../components/transforms/base/sample.cue | 13 +++ 4 files changed, 106 insertions(+), 6 deletions(-) create mode 100644 changelog.d/20921_add_stratified_sampling_to_sample_transform.enhancement.md diff --git a/changelog.d/20921_add_stratified_sampling_to_sample_transform.enhancement.md b/changelog.d/20921_add_stratified_sampling_to_sample_transform.enhancement.md new file mode 100644 index 0000000000000..5fad60cda3d94 --- /dev/null +++ b/changelog.d/20921_add_stratified_sampling_to_sample_transform.enhancement.md @@ -0,0 +1,3 @@ +The `sample` transform can now take in a `group_by` configuration option that will allow logs with unique values for the patterns passed in to be sampled independently. This can reduce the complexity of the topology, since users would no longer need to create separate samplers with similar configuration to handle different log streams. + +authors: hillmandj diff --git a/src/transforms/sample/config.rs b/src/transforms/sample/config.rs index d05338493ef80..532bc2b5d0bbd 100644 --- a/src/transforms/sample/config.rs +++ b/src/transforms/sample/config.rs @@ -10,6 +10,7 @@ use crate::{ TransformOutput, }, schema, + template::Template, transforms::Transform, }; @@ -44,6 +45,16 @@ pub struct SampleConfig { #[configurable(metadata(docs::examples = "message"))] pub key_field: Option, + /// The value to group events into separate buckets to be sampled independently. + /// + /// If left unspecified, or if the event doesn't have `group_by`, then the event is not + /// sampled separately. + #[configurable(metadata( + docs::examples = "{{ service }}", + docs::examples = "{{ hostname }}-{{ service }}" + ))] + pub group_by: Option