diff --git a/README.md b/README.md index 680fe86..244a516 100644 --- a/README.md +++ b/README.md @@ -7,6 +7,7 @@ A proxy for transforming, pre-aggregating and routing statsd metrics, like Currently supports the following transformations: * Deny- or allow-listing of specific tag keys or metric names +* Deny tags based on prefix or suffix * Adding hardcoded tags to all metrics * Basic cardinality limiting, tracking the number of distinct tag values per key or the number of overall timeseries (=combinations of metrics and tags). diff --git a/example.yaml b/example.yaml index a07d708..84cb7f1 100644 --- a/example.yaml +++ b/example.yaml @@ -8,8 +8,11 @@ middlewares: # Remove a list of tag names ("a", "b" and "c") from incoming metrics + # Also removes tags that start or end with certain words ("foo" or "bar") - type: deny-tag tags: [a, b, c] + starts_with: [foo] + ends_with: [bar] # Allow a list of tag names ("a", "b" and "c") from incoming metrics, and # remove all other tags. diff --git a/src/config.rs b/src/config.rs index 975d72f..877e5a9 100644 --- a/src/config.rs +++ b/src/config.rs @@ -1,3 +1,4 @@ +#[cfg(feature = "cli")] use std::fmt::Formatter; use std::time::Duration; #[cfg(feature = "cli")] @@ -39,6 +40,10 @@ pub enum MiddlewareConfig { #[derive(Debug, PartialEq)] pub struct DenyTagConfig { pub tags: Vec, + #[cfg_attr(feature = "cli", serde(default))] + pub starts_with: Vec, + #[cfg_attr(feature = "cli", serde(default))] + pub ends_with: Vec, } #[cfg_attr(feature = "cli", derive(Deserialize))] @@ -180,6 +185,12 @@ mod tests { "b", "c", ], + starts_with: [ + "foo", + ], + ends_with: [ + "bar", + ], }, ), AllowTag( diff --git a/src/middleware/deny_tag.rs b/src/middleware/deny_tag.rs index c2920df..fcc2886 100644 --- a/src/middleware/deny_tag.rs +++ b/src/middleware/deny_tag.rs @@ -1,11 +1,22 @@ +use std::collections::HashSet; use crate::config::DenyTagConfig; use crate::middleware::Middleware; use crate::types::Metric; use anyhow::Error; -use std::collections::HashSet; +/// A middleware that denies metric tags based on configurable filter rules. +/// +/// This middleware allows you to explicitly deny tags from metrics based on predefined +/// filter rules. It's particularly useful when you want to: +/// - Consistently deny specific tags across multiple metric calls +/// - Control metric cardinality by denying high-cardinality tags +/// - Centralize tag denial rules rather than handling them in individual metric calls +/// +/// A common use case is managing metric cardinality. For example, you can +/// deny high-cardinality tags (like user IDs) in certain environments while allowing them +/// in others, all without modifying the metric emission code. pub struct DenyTag { - tags: HashSet>, + filters: HashSet, next: M, } @@ -14,10 +25,14 @@ where M: Middleware, { pub fn new(config: DenyTagConfig, next: M) -> Self { - let tags: HashSet> = - HashSet::from_iter(config.tags.iter().cloned().map(|tag| tag.into_bytes())); + let filters = config.starts_with.into_iter() + .map(DenyType::StartsWith) + .chain(config.ends_with.into_iter() + .map(DenyType::EndsWith)) + .chain(config.tags.into_iter().map(DenyType::Equals)) + .collect(); - Self { tags, next } + Self { filters, next } } } @@ -34,7 +49,7 @@ where let mut rewrite_tags = false; for tag in metric.tags_iter() { - if self.tags.contains(tag.name()) { + if self.filters.iter().any(|f| f.matches(tag.name())) { log::debug!("deny_tag: Dropping tag {:?}", tag.name()); rewrite_tags = true; } else { @@ -56,10 +71,31 @@ where } } +/// Different types of operations that can be used to strip out a metric tag by name. +#[derive(PartialEq, Eq, Hash, Clone, Debug)] +pub enum DenyType { + /// The metric tag starts with the specified string. + StartsWith(String), + /// The metric tag ends with the specified string. + EndsWith(String), + /// The metric tag matches the word exactly. + Equals(String), +} + +impl DenyType { + /// Returns `true` if the metric name (in bytes) matches the given filter operation. + pub fn matches(&self, value: &[u8]) -> bool { + match self { + Self::StartsWith(starts_with) => value.starts_with(starts_with.as_bytes()), + Self::EndsWith(ends_with) => value.ends_with(ends_with.as_bytes()), + Self::Equals(equals) => equals.as_bytes() == value, + } + } +} + #[cfg(test)] mod tests { use std::cell::RefCell; - use super::*; use crate::testutils::FnStep; @@ -67,6 +103,8 @@ mod tests { fn basic() { let config = DenyTagConfig { tags: vec!["nope".to_string()], + starts_with: vec![], + ends_with: vec![] }; let results = RefCell::new(vec![]); @@ -91,4 +129,89 @@ mod tests { Metric::new(b"servers.online:1|c|#country:china,extra_stuff,,".to_vec()) ); } + + #[test] + fn test_filter_starts_with() { + let config = DenyTagConfig { + tags: vec![], + starts_with: vec!["hc_".to_owned()], + ends_with: vec![] + }; + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let mut filter = DenyTag::new(config, next); + filter.submit(&mut Metric::new( + b"foo.bar:1|c|#abc.tag:test,hc_project:1000".to_vec(), + )); + + assert_eq!( + results.borrow()[0], + Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) + ); + } + + #[test] + fn test_filter_ends_with() { + let config = DenyTagConfig { + tags: vec![], + starts_with: vec![], + ends_with: vec!["_hc".to_owned()] + }; + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let mut filter = DenyTag::new(config, next); + filter.submit(&mut Metric::new( + b"foo.bar:1|c|#abc.tag:test,project_hc:1000".to_vec(), + )); + + assert_eq!( + results.borrow()[0], + Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) + ); + } + + #[test] + fn test_multiple_filters() { + let config = DenyTagConfig { + tags: vec![], + starts_with: vec!["hc_".to_owned()], + ends_with: vec!["_with_ending".to_owned()] + }; + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let mut filter = DenyTag::new(config, next); + filter.submit(&mut Metric::new( + b"foo.bar:1|c|#abc.tag:test,hc_project:1000,metric_with_ending:12".to_vec(), + )); + + assert_eq!( + results.borrow()[0], + Metric::new(b"foo.bar:1|c|#abc.tag:test".to_vec()) + ); + } + + #[test] + fn test_deduplication() { + let config = DenyTagConfig { + tags: vec!["test1".to_owned(), "test1".to_owned()], + starts_with: vec!["start1".to_owned(), "start1".to_owned()], + ends_with: vec!["end1".to_owned(), "end1".to_owned()] + }; + let results = RefCell::new(Vec::new()); + let next = FnStep(|metric: &mut Metric| { + results.borrow_mut().push(metric.clone()); + }); + let filter = DenyTag::new(config, next); + let expected = HashSet::from_iter(vec![ + DenyType::Equals("test1".to_owned()), + DenyType::StartsWith("start1".to_owned()), + DenyType::EndsWith("end1".to_owned())].iter().cloned()); + assert_eq!(filter.filters, expected); + } }