From 186b27c122d791f802270fe817b373053f189ed7 Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Tue, 10 Jun 2025 16:05:23 +0200 Subject: [PATCH 1/8] feat(middleware): add middleware to filter out tags --- src/middleware/filter_tag.rs | 137 +++++++++++++++++++++++++++++++++++ src/middleware/mod.rs | 1 + 2 files changed, 138 insertions(+) create mode 100644 src/middleware/filter_tag.rs diff --git a/src/middleware/filter_tag.rs b/src/middleware/filter_tag.rs new file mode 100644 index 0000000..05fca1e --- /dev/null +++ b/src/middleware/filter_tag.rs @@ -0,0 +1,137 @@ +use anyhow::Error; +use crate::middleware::Middleware; +use crate::types::Metric; + +/// Different types of operations that can be used to filter out a metric by name. +pub enum FilterType { + /// The metric starts with the specified string. + StartsWith(String), + /// The metric ends with the specified string. + EndsWith(String) +} + +impl FilterType { + /// Returns `true` if the metric name (in bytes) matches the given filter operation. + pub fn matches(&self, value: &[u8]) -> bool { + match self { + Self::StartsWith(starts_with) => value.starts_with(starts_with.as_bytes()), + Self::EndsWith(ends_with) => value.ends_with(ends_with.as_bytes()) + } + } +} + +/// A middleware that filters metric tags based on configurable filter rules. +/// +/// This middleware allows you to selectively filter out tags from metrics based on predefined +/// filter rules. It's particularly useful when you want to: +/// - Apply consistent tag filtering across multiple metric calls +/// - Manage metric cardinality by filtering out certain tags +/// - Configure tag filtering at a central location rather than in individual metric calls +/// +/// This middleware is particularly useful for managing metric cardinality. For example, you can +/// filter out high-cardinality tags (like user IDs) in certain environments while keeping them +/// in others, all without modifying the metric emission code. +pub struct FilterTag { + /// A list of filter rules that determine which tags should be filtered out. + filters: Vec, + /// The next middleware in the chain. + next: M +} + +impl FilterTag where M:Middleware { + pub fn new(filters: Vec, next: M) -> FilterTag { + Self { + filters, next + } + } +} + +impl Middleware for FilterTag where M:Middleware { + fn join(&mut self) -> Result<(), Error> { + self.next.join() + } + fn poll(&mut self) { + self.next.poll() + } + + fn submit(&mut self, metric: &mut Metric) { + let has_filtered_tags = metric + .tags_iter() + .any(|t| self.filters.iter().any(|filters| filters.matches(t.name()))); + + if has_filtered_tags { + let mut new_metric = metric.clone(); + new_metric.set_tags_from_iter( + metric + .tags_iter() + .filter(|t| !self.filters.iter().any(|filters| filters.matches(t.name()))), + ); + self.next.submit(&mut new_metric); + } else { + self.next.submit(metric); + } + } +} + +#[cfg(test)] +mod tests { + use std::cell::RefCell; + use crate::middleware::filter_tag::{FilterTag, FilterType}; + use crate::middleware::Middleware; + use crate::testutils::FnStep; + use crate::types::Metric; + + #[test] + fn test_filter_starts_with() { + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let mut filter = FilterTag::new(vec![FilterType::StartsWith("hc_".to_owned())], next); + filter.submit(&mut Metric::new( + b"foo.bar:1|c|#abc.tag:test,hc_project:1000".to_vec(), + )); + + assert_eq!( + results.borrow()[0], + Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) + ); + } + + #[test] + fn test_filter_ends_with() { + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let mut filter = FilterTag::new(vec![FilterType::EndsWith("_hc".to_owned())], next); + filter.submit(&mut Metric::new( + b"foo.bar:1|c|#abc.tag:test,project_hc:1000".to_vec(), + )); + + assert_eq!( + results.borrow()[0], + Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) + ); + } + + #[test] + fn test_multiple_filters() { + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let mut filter = FilterTag::new(vec![ + FilterType::StartsWith("hc_".to_owned()), + FilterType::EndsWith("_with_ending".to_owned()) + ], next); + filter.submit(&mut Metric::new( + b"foo.bar:1|c|#abc.tag:test,hc_project:1000,metric_with_ending:12".to_vec(), + )); + + assert_eq!( + results.borrow()[0], + Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) + ); + } +} \ No newline at end of file diff --git a/src/middleware/mod.rs b/src/middleware/mod.rs index 78f7df7..9440191 100644 --- a/src/middleware/mod.rs +++ b/src/middleware/mod.rs @@ -7,6 +7,7 @@ pub mod aggregate; pub mod allow_tag; pub mod cardinality_limit; pub mod deny_tag; +pub mod filter_tag; pub mod mirror; pub mod sample; pub mod tag_cardinality_limit; From b0602da46e3f610109e2721b594f2b30c107876d Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Tue, 10 Jun 2025 16:37:44 +0200 Subject: [PATCH 2/8] rename to StripTag --- src/middleware/mod.rs | 2 +- .../{filter_tag.rs => strip_tag.rs} | 30 +++++++++---------- 2 files changed, 16 insertions(+), 16 deletions(-) rename src/middleware/{filter_tag.rs => strip_tag.rs} (78%) diff --git a/src/middleware/mod.rs b/src/middleware/mod.rs index 9440191..a0499cd 100644 --- a/src/middleware/mod.rs +++ b/src/middleware/mod.rs @@ -7,7 +7,7 @@ pub mod aggregate; pub mod allow_tag; pub mod cardinality_limit; pub mod deny_tag; -pub mod filter_tag; +pub mod strip_tag; pub mod mirror; pub mod sample; pub mod tag_cardinality_limit; diff --git a/src/middleware/filter_tag.rs b/src/middleware/strip_tag.rs similarity index 78% rename from src/middleware/filter_tag.rs rename to src/middleware/strip_tag.rs index 05fca1e..d72a278 100644 --- a/src/middleware/filter_tag.rs +++ b/src/middleware/strip_tag.rs @@ -2,11 +2,11 @@ use anyhow::Error; use crate::middleware::Middleware; use crate::types::Metric; -/// Different types of operations that can be used to filter out a metric by name. +/// Different types of operations that can be used to strip out a metric tag by name. pub enum FilterType { - /// The metric starts with the specified string. + /// The metric tag starts with the specified string. StartsWith(String), - /// The metric ends with the specified string. + /// The metric tag ends with the specified string. EndsWith(String) } @@ -20,33 +20,33 @@ impl FilterType { } } -/// A middleware that filters metric tags based on configurable filter rules. +/// A middleware that strips metric tags based on configurable filter rules. /// -/// This middleware allows you to selectively filter out tags from metrics based on predefined +/// This middleware allows you to selectively strip tags from metrics based on predefined /// filter rules. It's particularly useful when you want to: /// - Apply consistent tag filtering across multiple metric calls /// - Manage metric cardinality by filtering out certain tags /// - Configure tag filtering at a central location rather than in individual metric calls /// -/// This middleware is particularly useful for managing metric cardinality. For example, you can +/// A common use case is managing metric cardinality. For example, you can /// filter out high-cardinality tags (like user IDs) in certain environments while keeping them /// in others, all without modifying the metric emission code. -pub struct FilterTag { - /// A list of filter rules that determine which tags should be filtered out. +pub struct StripTag { + /// A list of filter rules that determine which tags should be stripped out. filters: Vec, /// The next middleware in the chain. next: M } -impl FilterTag where M:Middleware { - pub fn new(filters: Vec, next: M) -> FilterTag { +impl StripTag where M:Middleware { + pub fn new(filters: Vec, next: M) -> StripTag { Self { filters, next } } } -impl Middleware for FilterTag where M:Middleware { +impl Middleware for StripTag where M:Middleware { fn join(&mut self) -> Result<(), Error> { self.next.join() } @@ -76,7 +76,7 @@ impl Middleware for FilterTag where M:Middleware { #[cfg(test)] mod tests { use std::cell::RefCell; - use crate::middleware::filter_tag::{FilterTag, FilterType}; + use crate::middleware::strip_tag::{StripTag, FilterType}; use crate::middleware::Middleware; use crate::testutils::FnStep; use crate::types::Metric; @@ -87,7 +87,7 @@ mod tests { let next = FnStep(|metric: &mut Metric| { results.borrow_mut().push(metric.clone()); }); - let mut filter = FilterTag::new(vec![FilterType::StartsWith("hc_".to_owned())], next); + let mut filter = StripTag::new(vec![FilterType::StartsWith("hc_".to_owned())], next); filter.submit(&mut Metric::new( b"foo.bar:1|c|#abc.tag:test,hc_project:1000".to_vec(), )); @@ -104,7 +104,7 @@ mod tests { let next = FnStep(|metric: &mut Metric| { results.borrow_mut().push(metric.clone()); }); - let mut filter = FilterTag::new(vec![FilterType::EndsWith("_hc".to_owned())], next); + let mut filter = StripTag::new(vec![FilterType::EndsWith("_hc".to_owned())], next); filter.submit(&mut Metric::new( b"foo.bar:1|c|#abc.tag:test,project_hc:1000".to_vec(), )); @@ -121,7 +121,7 @@ mod tests { let next = FnStep(|metric: &mut Metric| { results.borrow_mut().push(metric.clone()); }); - let mut filter = FilterTag::new(vec![ + let mut filter = StripTag::new(vec![ FilterType::StartsWith("hc_".to_owned()), FilterType::EndsWith("_with_ending".to_owned()) ], next); From 2342ebf0122544d71b55bf08f344e5057d7f25e8 Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Tue, 10 Jun 2025 17:40:28 +0200 Subject: [PATCH 3/8] tests and documentation --- README.md | 1 + example.yaml | 5 +++++ src/config.rs | 33 +++++++++++++++++++++++++++++++++ src/main.rs | 9 +++++++++ 4 files changed, 48 insertions(+) diff --git a/README.md b/README.md index 680fe86..d21b791 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ A proxy for transforming, pre-aggregating and routing statsd metrics, like Currently supports the following transformations: * Deny- or allow-listing of specific tag keys or metric names +* Strip tags based on prefix or suffix * Adding hardcoded tags to all metrics * Basic cardinality limiting, tracking the number of distinct tag values per key or the number of overall timeseries (=combinations of metrics and tags). diff --git a/example.yaml b/example.yaml index a07d708..a40519e 100644 --- a/example.yaml +++ b/example.yaml @@ -16,6 +16,11 @@ middlewares: - type: allow-tag tags: [x, y, z] + # Removes all tags that either start with "foo" or end with "bar" + - type: strip-tag + starts_with: [foo] + ends_with: [bar] + # Apply a limit on the number of timeseries that can be passed through. # Multiple limits with different windows can be specified. - type: cardinality-limit diff --git a/src/config.rs b/src/config.rs index 975d72f..4b0134d 100644 --- a/src/config.rs +++ b/src/config.rs @@ -28,6 +28,7 @@ impl Config { pub enum MiddlewareConfig { DenyTag(DenyTagConfig), AllowTag(AllowTagConfig), + StripTag(StripTagConfig), CardinalityLimit(CardinalityLimitConfig), AggregateMetrics(AggregateMetricsConfig), Sample(SampleConfig), @@ -47,6 +48,14 @@ pub struct AllowTagConfig { pub tags: Vec, } +#[cfg_attr(feature = "cli", derive(Deserialize))] +#[derive(Debug, Default, PartialEq)] +#[serde(default)] +pub struct StripTagConfig { + pub starts_with: Vec, + pub ends_with: Vec, +} + #[cfg_attr(feature = "cli", derive(Deserialize))] #[derive(Debug, PartialEq)] pub struct LimitConfig { @@ -167,6 +176,20 @@ mod tests { assert!(config.is_err()); } + #[test] + fn test_empty_strip_config() { + let yaml = r#" + middlewares: + - type: strip-tag + "#; + let config = serde_yaml::from_str::(yaml).unwrap(); + let empty_config = MiddlewareConfig::StripTag(StripTagConfig { + starts_with: Vec::new(), + ends_with: Vec::new(), + }); + assert_eq!(config.middlewares[0], empty_config); + } + #[test] fn config() { let config = Config::new("example.yaml").unwrap(); @@ -191,6 +214,16 @@ mod tests { ], }, ), + StripTag( + StripTagConfig { + starts_with: [ + "foo", + ], + ends_with: [ + "bar", + ], + }, + ), CardinalityLimit( CardinalityLimitConfig { limits: [ diff --git a/src/main.rs b/src/main.rs index ea8864c..34e4502 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,6 +5,7 @@ use clap::Parser; use statsdproxy::config; use statsdproxy::middleware::{self, server::Server, upstream::Upstream}; +use statsdproxy::middleware::strip_tag::FilterType; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] @@ -47,6 +48,14 @@ fn main() -> Result<(), Error> { config::MiddlewareConfig::DenyTag(config) => { client = Box::new(middleware::deny_tag::DenyTag::new(config, client)); } + config::MiddlewareConfig::StripTag(config) => { + let filters = config.starts_with.into_iter() + .map(|s| FilterType::StartsWith(s)) + .chain(config.ends_with.into_iter() + .map(|s| FilterType::EndsWith(s))) + .collect(); + client = Box::new(middleware::strip_tag::StripTag::new(filters, client)); + } config::MiddlewareConfig::CardinalityLimit(config) => { client = Box::new(middleware::cardinality_limit::CardinalityLimit::new( config, client, From 47ab9ec3ba6f0508eb3c9aba795ee55fa381d1ca Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Tue, 10 Jun 2025 17:44:18 +0200 Subject: [PATCH 4/8] lints --- src/config.rs | 5 ++++- 1 file changed, 4 insertions(+), 1 deletion(-) diff --git a/src/config.rs b/src/config.rs index 4b0134d..329f1ae 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,4 +1,6 @@ +#[cfg(feature = "cli")] use std::fmt::Formatter; +#[cfg(feature = "cli")] use std::time::Duration; #[cfg(feature = "cli")] use serde::de::Visitor; @@ -50,9 +52,10 @@ pub struct AllowTagConfig { #[cfg_attr(feature = "cli", derive(Deserialize))] #[derive(Debug, Default, PartialEq)] -#[serde(default)] pub struct StripTagConfig { + #[cfg_attr(feature = "cli", serde(default))] pub starts_with: Vec, + #[cfg_attr(feature = "cli", serde(default))] pub ends_with: Vec, } From 74bb9cc2085cc6775b04c14890a1b9d920d5ef50 Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Tue, 10 Jun 2025 17:46:26 +0200 Subject: [PATCH 5/8] lints --- src/config.rs | 1 - src/main.rs | 4 ++-- 2 files changed, 2 insertions(+), 3 deletions(-) diff --git a/src/config.rs b/src/config.rs index 329f1ae..1260601 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,6 +1,5 @@ #[cfg(feature = "cli")] use std::fmt::Formatter; -#[cfg(feature = "cli")] use std::time::Duration; #[cfg(feature = "cli")] use serde::de::Visitor; diff --git a/src/main.rs b/src/main.rs index 34e4502..092b2cf 100644 --- a/src/main.rs +++ b/src/main.rs @@ -50,9 +50,9 @@ fn main() -> Result<(), Error> { } config::MiddlewareConfig::StripTag(config) => { let filters = config.starts_with.into_iter() - .map(|s| FilterType::StartsWith(s)) + .map(FilterType::StartsWith) .chain(config.ends_with.into_iter() - .map(|s| FilterType::EndsWith(s))) + .map(FilterType::EndsWith)) .collect(); client = Box::new(middleware::strip_tag::StripTag::new(filters, client)); } From 490ba0e029efad6de29d91618c57a125c557d3ba Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Wed, 11 Jun 2025 14:22:54 +0200 Subject: [PATCH 6/8] merge strip-tags into deny-tags --- README.md | 2 +- example.yaml | 8 +-- src/config.rs | 56 ++++++--------- src/middleware/deny_tag.rs | 137 ++++++++++++++++++++++++++++++++++-- src/middleware/mod.rs | 1 - src/middleware/strip_tag.rs | 137 ------------------------------------ 6 files changed, 157 insertions(+), 184 deletions(-) delete mode 100644 src/middleware/strip_tag.rs diff --git a/README.md b/README.md index d21b791..244a516 100644 --- a/README.md +++ b/README.md @@ -7,7 +7,7 @@ A proxy for transforming, pre-aggregating and routing statsd metrics, like Currently supports the following transformations: * Deny- or allow-listing of specific tag keys or metric names -* Strip tags based on prefix or suffix +* Deny tags based on prefix or suffix * Adding hardcoded tags to all metrics * Basic cardinality limiting, tracking the number of distinct tag values per key or the number of overall timeseries (=combinations of metrics and tags). diff --git a/example.yaml b/example.yaml index a40519e..84cb7f1 100644 --- a/example.yaml +++ b/example.yaml @@ -8,19 +8,17 @@ middlewares: # Remove a list of tag names ("a", "b" and "c") from incoming metrics + # Also removes tags that start or end with certain words ("foo" or "bar") - type: deny-tag tags: [a, b, c] + starts_with: [foo] + ends_with: [bar] # Allow a list of tag names ("a", "b" and "c") from incoming metrics, and # remove all other tags. - type: allow-tag tags: [x, y, z] - # Removes all tags that either start with "foo" or end with "bar" - - type: strip-tag - starts_with: [foo] - ends_with: [bar] - # Apply a limit on the number of timeseries that can be passed through. # Multiple limits with different windows can be specified. - type: cardinality-limit diff --git a/src/config.rs b/src/config.rs index 1260601..7463ff5 100644 --- a/src/config.rs +++ b/src/config.rs @@ -29,7 +29,6 @@ impl Config { pub enum MiddlewareConfig { DenyTag(DenyTagConfig), AllowTag(AllowTagConfig), - StripTag(StripTagConfig), CardinalityLimit(CardinalityLimitConfig), AggregateMetrics(AggregateMetricsConfig), Sample(SampleConfig), @@ -41,6 +40,10 @@ pub enum MiddlewareConfig { #[derive(Debug, PartialEq)] pub struct DenyTagConfig { pub tags: Vec, + #[cfg_attr(feature = "cli", serde(default))] + pub starts_with: Vec, + #[cfg_attr(feature = "cli", serde(default))] + pub ends_with: Vec, } #[cfg_attr(feature = "cli", derive(Deserialize))] @@ -49,15 +52,6 @@ pub struct AllowTagConfig { pub tags: Vec, } -#[cfg_attr(feature = "cli", derive(Deserialize))] -#[derive(Debug, Default, PartialEq)] -pub struct StripTagConfig { - #[cfg_attr(feature = "cli", serde(default))] - pub starts_with: Vec, - #[cfg_attr(feature = "cli", serde(default))] - pub ends_with: Vec, -} - #[cfg_attr(feature = "cli", derive(Deserialize))] #[derive(Debug, PartialEq)] pub struct LimitConfig { @@ -178,19 +172,19 @@ mod tests { assert!(config.is_err()); } - #[test] - fn test_empty_strip_config() { - let yaml = r#" - middlewares: - - type: strip-tag - "#; - let config = serde_yaml::from_str::(yaml).unwrap(); - let empty_config = MiddlewareConfig::StripTag(StripTagConfig { - starts_with: Vec::new(), - ends_with: Vec::new(), - }); - assert_eq!(config.middlewares[0], empty_config); - } + // #[test] + // fn test_empty_strip_config() { + // let yaml = r#" + // middlewares: + // - type: deny-tag + // "#; + // let config = serde_yaml::from_str::(yaml).unwrap(); + // let empty_config = MiddlewareConfig::StripTag(StripTagConfig { + // starts_with: Vec::new(), + // ends_with: Vec::new(), + // }); + // assert_eq!(config.middlewares[0], empty_config); + // } #[test] fn config() { @@ -205,6 +199,12 @@ mod tests { "b", "c", ], + starts_with: [ + "foo", + ], + ends_with: [ + "bar", + ], }, ), AllowTag( @@ -216,16 +216,6 @@ mod tests { ], }, ), - StripTag( - StripTagConfig { - starts_with: [ - "foo", - ], - ends_with: [ - "bar", - ], - }, - ), CardinalityLimit( CardinalityLimitConfig { limits: [ diff --git a/src/middleware/deny_tag.rs b/src/middleware/deny_tag.rs index c2920df..fcc2886 100644 --- a/src/middleware/deny_tag.rs +++ b/src/middleware/deny_tag.rs @@ -1,11 +1,22 @@ +use std::collections::HashSet; use crate::config::DenyTagConfig; use crate::middleware::Middleware; use crate::types::Metric; use anyhow::Error; -use std::collections::HashSet; +/// A middleware that denies metric tags based on configurable filter rules. +/// +/// This middleware allows you to explicitly deny tags from metrics based on predefined +/// filter rules. It's particularly useful when you want to: +/// - Consistently deny specific tags across multiple metric calls +/// - Control metric cardinality by denying high-cardinality tags +/// - Centralize tag denial rules rather than handling them in individual metric calls +/// +/// A common use case is managing metric cardinality. For example, you can +/// deny high-cardinality tags (like user IDs) in certain environments while allowing them +/// in others, all without modifying the metric emission code. pub struct DenyTag { - tags: HashSet>, + filters: HashSet, next: M, } @@ -14,10 +25,14 @@ where M: Middleware, { pub fn new(config: DenyTagConfig, next: M) -> Self { - let tags: HashSet> = - HashSet::from_iter(config.tags.iter().cloned().map(|tag| tag.into_bytes())); + let filters = config.starts_with.into_iter() + .map(DenyType::StartsWith) + .chain(config.ends_with.into_iter() + .map(DenyType::EndsWith)) + .chain(config.tags.into_iter().map(DenyType::Equals)) + .collect(); - Self { tags, next } + Self { filters, next } } } @@ -34,7 +49,7 @@ where let mut rewrite_tags = false; for tag in metric.tags_iter() { - if self.tags.contains(tag.name()) { + if self.filters.iter().any(|f| f.matches(tag.name())) { log::debug!("deny_tag: Dropping tag {:?}", tag.name()); rewrite_tags = true; } else { @@ -56,10 +71,31 @@ where } } +/// Different types of operations that can be used to strip out a metric tag by name. +#[derive(PartialEq, Eq, Hash, Clone, Debug)] +pub enum DenyType { + /// The metric tag starts with the specified string. + StartsWith(String), + /// The metric tag ends with the specified string. + EndsWith(String), + /// The metric tag matches the word exactly. + Equals(String), +} + +impl DenyType { + /// Returns `true` if the metric name (in bytes) matches the given filter operation. + pub fn matches(&self, value: &[u8]) -> bool { + match self { + Self::StartsWith(starts_with) => value.starts_with(starts_with.as_bytes()), + Self::EndsWith(ends_with) => value.ends_with(ends_with.as_bytes()), + Self::Equals(equals) => equals.as_bytes() == value, + } + } +} + #[cfg(test)] mod tests { use std::cell::RefCell; - use super::*; use crate::testutils::FnStep; @@ -67,6 +103,8 @@ mod tests { fn basic() { let config = DenyTagConfig { tags: vec!["nope".to_string()], + starts_with: vec![], + ends_with: vec![] }; let results = RefCell::new(vec![]); @@ -91,4 +129,89 @@ mod tests { Metric::new(b"servers.online:1|c|#country:china,extra_stuff,,".to_vec()) ); } + + #[test] + fn test_filter_starts_with() { + let config = DenyTagConfig { + tags: vec![], + starts_with: vec!["hc_".to_owned()], + ends_with: vec![] + }; + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let mut filter = DenyTag::new(config, next); + filter.submit(&mut Metric::new( + b"foo.bar:1|c|#abc.tag:test,hc_project:1000".to_vec(), + )); + + assert_eq!( + results.borrow()[0], + Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) + ); + } + + #[test] + fn test_filter_ends_with() { + let config = DenyTagConfig { + tags: vec![], + starts_with: vec![], + ends_with: vec!["_hc".to_owned()] + }; + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let mut filter = DenyTag::new(config, next); + filter.submit(&mut Metric::new( + b"foo.bar:1|c|#abc.tag:test,project_hc:1000".to_vec(), + )); + + assert_eq!( + results.borrow()[0], + Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) + ); + } + + #[test] + fn test_multiple_filters() { + let config = DenyTagConfig { + tags: vec![], + starts_with: vec!["hc_".to_owned()], + ends_with: vec!["_with_ending".to_owned()] + }; + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let mut filter = DenyTag::new(config, next); + filter.submit(&mut Metric::new( + b"foo.bar:1|c|#abc.tag:test,hc_project:1000,metric_with_ending:12".to_vec(), + )); + + assert_eq!( + results.borrow()[0], + Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) + ); + } + + #[test] + fn test_deduplication() { + let config = DenyTagConfig { + tags: vec!["test1".to_owned(), "test1".to_owned()], + starts_with: vec!["start1".to_owned(), "start1".to_owned()], + ends_with: vec!["end1".to_owned(), "end1".to_owned()] + }; + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let filter = DenyTag::new(config, next); + let expected = HashSet::from_iter(vec![ + DenyType::Equals("test1".to_owned()), + DenyType::StartsWith("start1".to_owned()), + DenyType::EndsWith("end1".to_owned())].iter().cloned()); + assert_eq!(filter.filters, expected); + } } diff --git a/src/middleware/mod.rs b/src/middleware/mod.rs index a0499cd..78f7df7 100644 --- a/src/middleware/mod.rs +++ b/src/middleware/mod.rs @@ -7,7 +7,6 @@ pub mod aggregate; pub mod allow_tag; pub mod cardinality_limit; pub mod deny_tag; -pub mod strip_tag; pub mod mirror; pub mod sample; pub mod tag_cardinality_limit; diff --git a/src/middleware/strip_tag.rs b/src/middleware/strip_tag.rs deleted file mode 100644 index d72a278..0000000 --- a/src/middleware/strip_tag.rs +++ /dev/null @@ -1,137 +0,0 @@ -use anyhow::Error; -use crate::middleware::Middleware; -use crate::types::Metric; - -/// Different types of operations that can be used to strip out a metric tag by name. -pub enum FilterType { - /// The metric tag starts with the specified string. - StartsWith(String), - /// The metric tag ends with the specified string. - EndsWith(String) -} - -impl FilterType { - /// Returns `true` if the metric name (in bytes) matches the given filter operation. - pub fn matches(&self, value: &[u8]) -> bool { - match self { - Self::StartsWith(starts_with) => value.starts_with(starts_with.as_bytes()), - Self::EndsWith(ends_with) => value.ends_with(ends_with.as_bytes()) - } - } -} - -/// A middleware that strips metric tags based on configurable filter rules. -/// -/// This middleware allows you to selectively strip tags from metrics based on predefined -/// filter rules. It's particularly useful when you want to: -/// - Apply consistent tag filtering across multiple metric calls -/// - Manage metric cardinality by filtering out certain tags -/// - Configure tag filtering at a central location rather than in individual metric calls -/// -/// A common use case is managing metric cardinality. For example, you can -/// filter out high-cardinality tags (like user IDs) in certain environments while keeping them -/// in others, all without modifying the metric emission code. -pub struct StripTag { - /// A list of filter rules that determine which tags should be stripped out. - filters: Vec, - /// The next middleware in the chain. - next: M -} - -impl StripTag where M:Middleware { - pub fn new(filters: Vec, next: M) -> StripTag { - Self { - filters, next - } - } -} - -impl Middleware for StripTag where M:Middleware { - fn join(&mut self) -> Result<(), Error> { - self.next.join() - } - fn poll(&mut self) { - self.next.poll() - } - - fn submit(&mut self, metric: &mut Metric) { - let has_filtered_tags = metric - .tags_iter() - .any(|t| self.filters.iter().any(|filters| filters.matches(t.name()))); - - if has_filtered_tags { - let mut new_metric = metric.clone(); - new_metric.set_tags_from_iter( - metric - .tags_iter() - .filter(|t| !self.filters.iter().any(|filters| filters.matches(t.name()))), - ); - self.next.submit(&mut new_metric); - } else { - self.next.submit(metric); - } - } -} - -#[cfg(test)] -mod tests { - use std::cell::RefCell; - use crate::middleware::strip_tag::{StripTag, FilterType}; - use crate::middleware::Middleware; - use crate::testutils::FnStep; - use crate::types::Metric; - - #[test] - fn test_filter_starts_with() { - let results = RefCell::new(Vec::new()); - let next = FnStep(|metric: &mut Metric| { - results.borrow_mut().push(metric.clone()); - }); - let mut filter = StripTag::new(vec![FilterType::StartsWith("hc_".to_owned())], next); - filter.submit(&mut Metric::new( - b"foo.bar:1|c|#abc.tag:test,hc_project:1000".to_vec(), - )); - - assert_eq!( - results.borrow()[0], - Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) - ); - } - - #[test] - fn test_filter_ends_with() { - let results = RefCell::new(Vec::new()); - let next = FnStep(|metric: &mut Metric| { - results.borrow_mut().push(metric.clone()); - }); - let mut filter = StripTag::new(vec![FilterType::EndsWith("_hc".to_owned())], next); - filter.submit(&mut Metric::new( - b"foo.bar:1|c|#abc.tag:test,project_hc:1000".to_vec(), - )); - - assert_eq!( - results.borrow()[0], - Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) - ); - } - - #[test] - fn test_multiple_filters() { - let results = RefCell::new(Vec::new()); - let next = FnStep(|metric: &mut Metric| { - results.borrow_mut().push(metric.clone()); - }); - let mut filter = StripTag::new(vec![ - FilterType::StartsWith("hc_".to_owned()), - FilterType::EndsWith("_with_ending".to_owned()) - ], next); - filter.submit(&mut Metric::new( - b"foo.bar:1|c|#abc.tag:test,hc_project:1000,metric_with_ending:12".to_vec(), - )); - - assert_eq!( - results.borrow()[0], - Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) - ); - } -} \ No newline at end of file From 4c641349c85aded2629b4421e6de7a5f52c3a986 Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Wed, 11 Jun 2025 14:25:52 +0200 Subject: [PATCH 7/8] fix --- src/main.rs | 9 --------- 1 file changed, 9 deletions(-) diff --git a/src/main.rs b/src/main.rs index 092b2cf..ea8864c 100644 --- a/src/main.rs +++ b/src/main.rs @@ -5,7 +5,6 @@ use clap::Parser; use statsdproxy::config; use statsdproxy::middleware::{self, server::Server, upstream::Upstream}; -use statsdproxy::middleware::strip_tag::FilterType; #[derive(Parser, Debug)] #[command(author, version, about, long_about = None)] @@ -48,14 +47,6 @@ fn main() -> Result<(), Error> { config::MiddlewareConfig::DenyTag(config) => { client = Box::new(middleware::deny_tag::DenyTag::new(config, client)); } - config::MiddlewareConfig::StripTag(config) => { - let filters = config.starts_with.into_iter() - .map(FilterType::StartsWith) - .chain(config.ends_with.into_iter() - .map(FilterType::EndsWith)) - .collect(); - client = Box::new(middleware::strip_tag::StripTag::new(filters, client)); - } config::MiddlewareConfig::CardinalityLimit(config) => { client = Box::new(middleware::cardinality_limit::CardinalityLimit::new( config, client, From e21c1720155cd352bcd25856963280e9188799ac Mon Sep 17 00:00:00 2001 From: Martin Linzmayer Date: Wed, 11 Jun 2025 14:27:23 +0200 Subject: [PATCH 8/8] fix --- src/config.rs | 14 -------------- 1 file changed, 14 deletions(-) diff --git a/src/config.rs b/src/config.rs index 7463ff5..877e5a9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -172,20 +172,6 @@ mod tests { assert!(config.is_err()); } - // #[test] - // fn test_empty_strip_config() { - // let yaml = r#" - // middlewares: - // - type: deny-tag - // "#; - // let config = serde_yaml::from_str::(yaml).unwrap(); - // let empty_config = MiddlewareConfig::StripTag(StripTagConfig { - // starts_with: Vec::new(), - // ends_with: Vec::new(), - // }); - // assert_eq!(config.middlewares[0], empty_config); - // } - #[test] fn config() { let config = Config::new("example.yaml").unwrap();