Skip to content

Commit

Permalink
fix: unit conversion in prom exporter (open-telemetry#1157)
Browse files Browse the repository at this point in the history
* fix: unit conversion in prom exporter

* re-generate && make fmt

* add change log

* fix: proto changes

* use split_once
  • Loading branch information
TommyCpp authored Jul 24, 2023
1 parent e150092 commit b87a827
Show file tree
Hide file tree
Showing 4 changed files with 210 additions and 98 deletions.
2 changes: 1 addition & 1 deletion .gitmodules
Original file line number Diff line number Diff line change
@@ -1,4 +1,4 @@
[submodule "opentelemetry-proto/src/proto/opentelemetry-proto"]
path = opentelemetry-proto/src/proto/opentelemetry-proto
url = https://github.com/open-telemetry/opentelemetry-proto
branch = tags/v0.19.0
branch = tags/v1.0.0
1 change: 1 addition & 0 deletions opentelemetry-prometheus/CHANGELOG.md
Original file line number Diff line number Diff line change
Expand Up @@ -5,6 +5,7 @@
### Added

- Add `with_namespace` option to exporter config.
- Add more units conversions between OTEL metrics and prometheus metrics [#1157](https://github.com/open-telemetry/opentelemetry-rust/pull/1157).
- Add `without_counter_suffixes` option to exporter config.

## v0.12.0
Expand Down
109 changes: 12 additions & 97 deletions opentelemetry-prometheus/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -96,7 +96,7 @@
use once_cell::sync::{Lazy, OnceCell};
use opentelemetry_api::{
global,
metrics::{MetricsError, Result, Unit},
metrics::{MetricsError, Result},
Context, Key, Value,
};
use opentelemetry_sdk::{
Expand Down Expand Up @@ -132,6 +132,7 @@ const SCOPE_INFO_KEYS: [&str; 2] = ["otel_scope_name", "otel_scope_version"];
const COUNTER_SUFFIX: &str = "_total";

mod config;
mod utils;

pub use config::ExporterBuilder;

Expand Down Expand Up @@ -259,14 +260,16 @@ impl Collector {
}

fn get_name(&self, m: &data::Metric) -> Cow<'static, str> {
let name = sanitize_name(&m.name);
match (
&self.namespace,
get_unit_suffixes(&m.unit).filter(|_| !self.without_units),
) {
(Some(namespace), Some(suffix)) => Cow::Owned(format!("{namespace}{name}{suffix}")),
let name = utils::sanitize_name(&m.name);
let unit_suffixes = if self.without_units {
None
} else {
utils::get_unit_suffixes(&m.unit)
};
match (&self.namespace, unit_suffixes) {
(Some(namespace), Some(suffix)) => Cow::Owned(format!("{namespace}{name}_{suffix}")),
(Some(namespace), None) => Cow::Owned(format!("{namespace}{name}")),
(None, Some(suffix)) => Cow::Owned(format!("{name}{suffix}")),
(None, Some(suffix)) => Cow::Owned(format!("{name}_{suffix}")),
(None, None) => name,
}
}
Expand Down Expand Up @@ -378,7 +381,7 @@ impl prometheus::core::Collector for Collector {
fn get_attrs(kvs: &mut dyn Iterator<Item = (&Key, &Value)>, extra: &[LabelPair]) -> Vec<LabelPair> {
let mut keys_map = BTreeMap::<String, Vec<String>>::new();
for (key, value) in kvs {
let key = sanitize_prom_kv(key.as_str());
let key = utils::sanitize_prom_kv(key.as_str());
keys_map
.entry(key)
.and_modify(|v| v.push(value.to_string()))
Expand Down Expand Up @@ -587,63 +590,6 @@ fn create_scope_info_metric(scope: &Scope) -> MetricFamily {
mf
}

fn get_unit_suffixes(unit: &Unit) -> Option<&'static str> {
match unit.as_str() {
"1" => Some("_ratio"),
"By" => Some("_bytes"),
"ms" => Some("_milliseconds"),
_ => None,
}
}

#[allow(clippy::ptr_arg)]
fn sanitize_name(s: &Cow<'static, str>) -> Cow<'static, str> {
// prefix chars to add in case name starts with number
let mut prefix = "";

// Find first invalid char
if let Some((replace_idx, _)) = s.char_indices().find(|(i, c)| {
if *i == 0 && c.is_ascii_digit() {
// first char is number, add prefix and replace reset of chars
prefix = "_";
true
} else {
// keep checking
!c.is_alphanumeric() && *c != '_' && *c != ':'
}
}) {
// up to `replace_idx` have been validated, convert the rest
let (valid, rest) = s.split_at(replace_idx);
Cow::Owned(
prefix
.chars()
.chain(valid.chars())
.chain(rest.chars().map(|c| {
if c.is_ascii_alphanumeric() || c == '_' || c == ':' {
c
} else {
'_'
}
}))
.collect(),
)
} else {
s.clone() // no invalid chars found, return existing
}
}

fn sanitize_prom_kv(s: &str) -> String {
s.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == ':' {
c
} else {
'_'
}
})
.collect()
}

trait Numeric: fmt::Debug {
// lossy at large values for u64 and i64 but prometheus only handles floats
fn as_f64(&self) -> f64;
Expand All @@ -666,34 +612,3 @@ impl Numeric for f64 {
*self
}
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn name_sanitization() {
let tests = vec![
("nam€_with_3_width_rune", "nam__with_3_width_rune"),
("`", "_"),
(
r##"! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWKYZ[]\^_abcdefghijklmnopqrstuvwkyz{|}~"##,
"________________0123456789:______ABCDEFGHIJKLMNOPQRSTUVWKYZ_____abcdefghijklmnopqrstuvwkyz____",
),

("Avalid_23name", "Avalid_23name"),
("_Avalid_23name", "_Avalid_23name"),
("1valid_23name", "_1valid_23name"),
("avalid_23name", "avalid_23name"),
("Ava:lid_23name", "Ava:lid_23name"),
("a lid_23name", "a_lid_23name"),
(":leading_colon", ":leading_colon"),
("colon:in:the:middle", "colon:in:the:middle"),
("", ""),
];

for (input, want) in tests {
assert_eq!(want, sanitize_name(&input.into()), "input: {input}")
}
}
}
196 changes: 196 additions & 0 deletions opentelemetry-prometheus/src/utils.rs
Original file line number Diff line number Diff line change
@@ -0,0 +1,196 @@
use opentelemetry_api::metrics::Unit;
use std::borrow::Cow;

const NON_APPLICABLE_ON_PER_UNIT: [&str; 8] = ["1", "d", "h", "min", "s", "ms", "us", "ns"];

pub(crate) fn get_unit_suffixes(unit: &Unit) -> Option<Cow<'static, str>> {
// no unit return early
if unit.as_str().is_empty() {
return None;
}

// direct match with known units
if let Some(matched) = get_prom_units(unit.as_str()) {
return Some(Cow::Borrowed(matched));
}

// converting foo/bar to foo_per_bar
// split the string by the first '/'
// if the first part is empty, we just return the second part if it's a match with known per unit
// e.g
// "test/y" => "per_year"
// "km/s" => "kilometers_per_second"
if let Some((first, second)) = unit.as_str().split_once('/') {
return match (
NON_APPLICABLE_ON_PER_UNIT.contains(&first),
get_prom_units(first),
get_prom_per_unit(second),
) {
(true, _, Some(second_part)) | (false, None, Some(second_part)) => {
Some(Cow::Owned(format!("per_{}", second_part)))
}
(false, Some(first_part), Some(second_part)) => {
Some(Cow::Owned(format!("{}_per_{}", first_part, second_part)))
}
_ => None,
};
}

None
}

fn get_prom_units(unit: &str) -> Option<&'static str> {
match unit {
// Time
"d" => Some("days"),
"h" => Some("hours"),
"min" => Some("minutes"),
"s" => Some("seconds"),
"ms" => Some("milliseconds"),
"us" => Some("microseconds"),
"ns" => Some("nanoseconds"),
"By" => Some("bytes"),

// Bytes
"KiBy" => Some("kibibytes"),
"MiBy" => Some("mebibytes"),
"GiBy" => Some("gibibytes"),
"TiBy" => Some("tibibytes"),
"KBy" => Some("kilobytes"),
"MBy" => Some("megabytes"),
"GBy" => Some("gigabytes"),
"TBy" => Some("terabytes"),
"B" => Some("bytes"),
"KB" => Some("kilobytes"),
"MB" => Some("megabytes"),
"GB" => Some("gigabytes"),
"TB" => Some("terabytes"),

// SI
"m" => Some("meters"),
"V" => Some("volts"),
"A" => Some("amperes"),
"J" => Some("joules"),
"W" => Some("watts"),
"g" => Some("grams"),

// Misc
"Cel" => Some("celsius"),
"Hz" => Some("hertz"),
"1" => Some("ratio"),
"%" => Some("percent"),
"$" => Some("dollars"),
_ => None,
}
}

fn get_prom_per_unit(unit: &str) -> Option<&'static str> {
match unit {
"s" => Some("second"),
"m" => Some("minute"),
"h" => Some("hour"),
"d" => Some("day"),
"w" => Some("week"),
"mo" => Some("month"),
"y" => Some("year"),
_ => None,
}
}

#[allow(clippy::ptr_arg)]
pub(crate) fn sanitize_name(s: &Cow<'static, str>) -> Cow<'static, str> {
// prefix chars to add in case name starts with number
let mut prefix = "";

// Find first invalid char
if let Some((replace_idx, _)) = s.char_indices().find(|(i, c)| {
if *i == 0 && c.is_ascii_digit() {
// first char is number, add prefix and replace reset of chars
prefix = "_";
true
} else {
// keep checking
!c.is_alphanumeric() && *c != '_' && *c != ':'
}
}) {
// up to `replace_idx` have been validated, convert the rest
let (valid, rest) = s.split_at(replace_idx);
Cow::Owned(
prefix
.chars()
.chain(valid.chars())
.chain(rest.chars().map(|c| {
if c.is_ascii_alphanumeric() || c == '_' || c == ':' {
c
} else {
'_'
}
}))
.collect(),
)
} else {
s.clone() // no invalid chars found, return existing
}
}

pub(crate) fn sanitize_prom_kv(s: &str) -> String {
s.chars()
.map(|c| {
if c.is_ascii_alphanumeric() || c == ':' {
c
} else {
'_'
}
})
.collect()
}

#[cfg(test)]
mod tests {
use super::*;

#[test]
fn name_sanitization() {
let tests = vec![
("nam€_with_3_width_rune", "nam__with_3_width_rune"),
("`", "_"),
(
r##"! "#$%&'()*+,-./0123456789:;<=>?@ABCDEFGHIJKLMNOPQRSTUVWKYZ[]\^_abcdefghijklmnopqrstuvwkyz{|}~"##,
"________________0123456789:______ABCDEFGHIJKLMNOPQRSTUVWKYZ_____abcdefghijklmnopqrstuvwkyz____",
),

("Avalid_23name", "Avalid_23name"),
("_Avalid_23name", "_Avalid_23name"),
("1valid_23name", "_1valid_23name"),
("avalid_23name", "avalid_23name"),
("Ava:lid_23name", "Ava:lid_23name"),
("a lid_23name", "a_lid_23name"),
(":leading_colon", ":leading_colon"),
("colon:in:the:middle", "colon:in:the:middle"),
("", ""),
];

for (input, want) in tests {
assert_eq!(want, sanitize_name(&input.into()), "input: {input}")
}
}

#[test]
fn test_get_unit_suffixes() {
let test_cases = vec![
// Direct match
("g", Some(Cow::Borrowed("grams"))),
// Per unit
("test/y", Some(Cow::Owned("per_year".to_owned()))),
("1/y", Some(Cow::Owned("per_year".to_owned()))),
("m/s", Some(Cow::Owned("meters_per_second".to_owned()))),
// No match
("invalid", None),
("", None),
];
for (unit_str, expected_suffix) in test_cases {
let unit = Unit::new(unit_str);
assert_eq!(get_unit_suffixes(&unit), expected_suffix);
}
}
}

0 comments on commit b87a827

Please sign in to comment.