Skip to content
New issue

Have a question about this project? Sign up for a free GitHub account to open an issue and contact its maintainers and the community.

By clicking “Sign up for GitHub”, you agree to our terms of service and privacy statement. We’ll occasionally send you account related emails.

Already on GitHub? Sign in to your account

feat: Gathering metrics at development time #1214

Merged
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
2 changes: 1 addition & 1 deletion common/infrastructure/Cargo.toml
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ actix-tls = { workspace = true, features = ["openssl"] }
actix-web = { workspace = true, features = ["openssl"] }
actix-web-extras = { workspace = true }
actix-web-httpauth = { workspace = true }
actix-web-opentelemetry = { workspace = true }
actix-web-opentelemetry = { workspace = true, features = ["metrics"] }
actix-web-prom = { workspace = true }
anyhow = { workspace = true }
bytesize = { workspace = true }
Expand Down
23 changes: 21 additions & 2 deletions common/infrastructure/src/app/http.rs
Original file line number Diff line number Diff line change
@@ -1,7 +1,7 @@
use crate::{
app::{new_app, AppOptions},
endpoint::Endpoint,
tracing::Tracing,
otel::{Metrics as OtelMetrics, Tracing},
};
use actix_cors::Cors;
use actix_tls::{accept::openssl::reexports::SslAcceptor, connect::openssl::reexports::SslMethod};
Expand All @@ -10,7 +10,7 @@ use actix_web::{
web::{self, JsonConfig},
App, HttpResponse, HttpServer,
};
use actix_web_opentelemetry::RequestTracing;
use actix_web_opentelemetry::{RequestMetrics, RequestTracing};
use actix_web_prom::{PrometheusMetrics, PrometheusMetricsBuilder};
use anyhow::{anyhow, Context};
use bytesize::ByteSize;
Expand Down Expand Up @@ -289,6 +289,7 @@ pub struct HttpServerBuilder {
json_limit: Option<usize>,
request_limit: Option<usize>,
tracing: Tracing,
metrics: OtelMetrics,

disable_log: bool,

Expand Down Expand Up @@ -329,6 +330,7 @@ impl HttpServerBuilder {
json_limit: None,
request_limit: None,
tracing: Tracing::default(),
metrics: OtelMetrics::default(),
openapi_info: None,
disable_log: false,
}
Expand Down Expand Up @@ -370,6 +372,11 @@ impl HttpServerBuilder {
self
}

pub fn metrics_otel(mut self, metrics: OtelMetrics) -> Self {
self.metrics = metrics;
self
}

pub fn openapi_info(mut self, openapi_info: Info) -> Self {
self.openapi_info = Some(openapi_info);
self
Expand Down Expand Up @@ -488,6 +495,17 @@ impl HttpServerBuilder {
tracing_logger.is_some()
);

let otel_metrics = match self.metrics {
OtelMetrics::Disabled => None,
OtelMetrics::Enabled => Some(RequestMetrics::default()),
};

log::debug!(
"Otel Metrics({}) - metrics: {}",
self.metrics,
otel_metrics.is_some()
);

let mut app = new_app(AppOptions {
cors,
metrics: metrics.clone(),
Expand All @@ -498,6 +516,7 @@ impl HttpServerBuilder {
.unwrap_or_else(|| Authorizer::new(None)),
logger,
tracing_logger,
otel_metrics,
})
.app_data(json)
.into_utoipa_app();
Expand Down
5 changes: 4 additions & 1 deletion common/infrastructure/src/app/mod.rs
Original file line number Diff line number Diff line change
Expand Up @@ -9,7 +9,7 @@ use actix_web::{
};
use actix_web_extras::middleware::Condition;
use actix_web_httpauth::{extractors::bearer::BearerAuth, middleware::HttpAuthentication};
use actix_web_opentelemetry::RequestTracing;
use actix_web_opentelemetry::{RequestMetrics, RequestTracing};
use actix_web_prom::PrometheusMetrics;
use futures::{future::LocalBoxFuture, FutureExt};
use std::sync::Arc;
Expand All @@ -23,6 +23,7 @@ pub struct AppOptions {
pub authorizer: Authorizer,
pub logger: Option<Logger>,
pub tracing_logger: Option<RequestTracing>,
pub otel_metrics: Option<RequestMetrics>,
}

/// create a new authenticator
Expand Down Expand Up @@ -78,6 +79,8 @@ pub fn new_app(
.wrap(Condition::from_option(options.cors))
// Next, record metrics for the request (should never fail)
.wrap(Condition::from_option(options.metrics))
// Next, record otel metrics for the request (should never fail)
.wrap(Condition::from_option(options.otel_metrics))
// Compress everything
.wrap(Compress::default())
// First log the request, so that we know what happens (can't fail)
Expand Down
7 changes: 6 additions & 1 deletion common/infrastructure/src/infra.rs
Original file line number Diff line number Diff line change
Expand Up @@ -11,7 +11,7 @@ use futures::future::select_all;
use prometheus::{Registry, TextEncoder};
use tokio::signal;

use crate::tracing::{init_tracing, Tracing};
use crate::otel::{init_metrics, init_tracing, Metrics as OtelMetrics, Tracing};

use crate::health::{Checks, HealthChecks};
#[cfg(unix)]
Expand Down Expand Up @@ -39,6 +39,9 @@ pub struct InfrastructureConfig {
/// Enable tracing
#[arg(long, env, default_value_t = Tracing::Disabled)]
pub tracing: Tracing,
/// Enable metrics
#[arg(long, env, default_value_t = OtelMetrics::Disabled)]
pub metrics: OtelMetrics,
}

impl Default for InfrastructureConfig {
Expand All @@ -48,6 +51,7 @@ impl Default for InfrastructureConfig {
infrastructure_bind: DEFAULT_BIND_ADDR.into(),
infrastructure_workers: 1,
tracing: Tracing::Disabled,
metrics: OtelMetrics::Disabled,
}
}
}
Expand Down Expand Up @@ -222,6 +226,7 @@ impl Infrastructure {
MFut: Future<Output = anyhow::Result<()>>,
{
init_tracing(id, self.config.tracing);
init_metrics(id, self.config.metrics);

let init_data = init(InitContext {
metrics: self.metrics.clone(),
Expand Down
2 changes: 1 addition & 1 deletion common/infrastructure/src/lib.rs
Original file line number Diff line number Diff line change
Expand Up @@ -3,7 +3,7 @@ mod infra;
pub mod app;
pub mod endpoint;
pub mod health;
pub mod tracing;
pub mod otel;

pub use infra::*;

Expand Down
Original file line number Diff line number Diff line change
@@ -1,13 +1,37 @@
use core::fmt;
use opentelemetry::{propagation::Injector, trace::TracerProvider, Context, KeyValue};
use opentelemetry_otlp::SpanExporter;
use opentelemetry_sdk::{trace as sdktrace, Resource};
use opentelemetry::{
global::{
get_text_map_propagator, set_meter_provider, set_text_map_propagator, set_tracer_provider,
},
propagation::Injector,
trace::TracerProvider,
Context, KeyValue,
};
use opentelemetry_otlp::{MetricExporter, SpanExporter};
use opentelemetry_sdk::{
metrics::{PeriodicReader, SdkMeterProvider},
propagation::TraceContextPropagator,
runtime::TokioCurrentThread,
trace::{
self as sdktrace,
Sampler::{self, ParentBased},
},
Resource,
};
use reqwest::RequestBuilder;
use std::sync::Once;
use tracing_subscriber::{
field::MakeExt, layer::SubscriberExt, util::SubscriberInitExt, EnvFilter,
};

#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)]
pub enum Metrics {
#[clap(name = "disabled")]
Disabled,
#[clap(name = "enabled")]
Enabled,
}

#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)]
pub enum Tracing {
#[clap(name = "disabled")]
Expand All @@ -16,12 +40,27 @@ pub enum Tracing {
Enabled,
}

impl Default for Metrics {
fn default() -> Self {
Self::Disabled
}
}

impl Default for Tracing {
fn default() -> Self {
Self::Disabled
}
}

impl fmt::Display for Metrics {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Metrics::Disabled => write!(f, "disabled"),
Metrics::Enabled => write!(f, "enabled"),
}
}
}

impl fmt::Display for Tracing {
fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result {
match self {
Expand Down Expand Up @@ -53,7 +92,7 @@ pub trait WithTracing {

impl WithTracing for RequestBuilder {
fn propagate_context(self, cx: &Context) -> Self {
let headers = opentelemetry::global::get_text_map_propagator(|prop| {
let headers = get_text_map_propagator(|prop| {
let mut injector = HeaderInjector::new();
prop.inject_context(cx, &mut injector);
injector.0
Expand Down Expand Up @@ -87,11 +126,11 @@ fn sampling_from_env() -> Option<f64> {
.and_then(|s| s.to_str().and_then(|s| s.parse::<f64>().ok()))
}

fn sampler() -> opentelemetry_sdk::trace::Sampler {
fn sampler() -> Sampler {
if let Some(p) = sampling_from_env() {
opentelemetry_sdk::trace::Sampler::TraceIdRatioBased(p)
Sampler::TraceIdRatioBased(p)
} else {
opentelemetry_sdk::trace::Sampler::TraceIdRatioBased(0.001)
Sampler::TraceIdRatioBased(0.001)
}
}

Expand All @@ -107,26 +146,56 @@ pub fn init_tracing(name: &str, tracing: Tracing) {
}
}

pub fn init_metrics(name: &str, metrics: Metrics) {
match metrics {
Metrics::Disabled => {
INIT.call_once(init_no_tracing);
}
Metrics::Enabled => {
init_otlp_metrics(name);
}
}
}

fn init_otlp_metrics(name: &str) {
#[allow(clippy::expect_used)]
let exporter = MetricExporter::builder()
.with_tonic()
.build()
.expect("Unable to build metrics exporter.");

let reader = PeriodicReader::builder(exporter, TokioCurrentThread).build();

let provider = SdkMeterProvider::builder()
.with_reader(reader)
.with_resource(Resource::new(vec![KeyValue::new(
"service.name",
name.to_string(),
)]))
.build();

println!("Using OTEL Collector with Prometheus as the back-end.");
println!("{:#?}", provider);

set_meter_provider(provider.clone());
}

fn init_otlp(name: &str) {
opentelemetry::global::set_text_map_propagator(
opentelemetry_sdk::propagation::TraceContextPropagator::new(),
);
set_text_map_propagator(TraceContextPropagator::new());

#[allow(clippy::expect_used)]
let exporter = SpanExporter::builder()
.with_tonic()
.build()
.expect("Unable to build OTEL exporter");
.expect("Unable to build tracing exporter");

let provider = sdktrace::TracerProvider::builder()
.with_resource(Resource::new(vec![KeyValue::new(
"service.name",
name.to_string(),
)]))
.with_batch_exporter(exporter, opentelemetry_sdk::runtime::TokioCurrentThread)
.with_sampler(opentelemetry_sdk::trace::Sampler::ParentBased(Box::new(
sampler(),
)))
.with_batch_exporter(exporter, TokioCurrentThread)
.with_sampler(ParentBased(Box::new(sampler())))
.build();

println!("Using OTEL Collector with Jaeger as the back-end.");
Expand All @@ -142,7 +211,7 @@ fn init_otlp(name: &str) {
{
eprintln!("Error initializing tracing: {:?}", e);
}
opentelemetry::global::set_tracer_provider(provider);
set_tracer_provider(provider);
}

fn init_no_tracing() {
Expand Down
21 changes: 0 additions & 21 deletions docs/design/log_tracing.md
Original file line number Diff line number Diff line change
Expand Up @@ -136,25 +136,4 @@ mean it will panic. For example, the `Option::unwrap_or` function:

![Screenshot of rustdoc for Option::unwrap_or](drawings/log_tracing_2.png)

## Sending traces to OpenTelemetry Collector (devmode)

Jaeger and OTEL Collector:

```shell
podman compose -f etc/dev-traces/compose.yaml up
```

Database:

```shell
podman compose -f etc/deploy/compose/compose.yaml up
```

Trustify with traces:

```shell
OTEL_TRACES_SAMPLER_ARG=1 OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4317" cargo run --bin trustd api --db-password trustify --auth-disabled --tracing enabled
```

Access Trustify at [localhost:8080](http://localhost:8080) and analyze the traces using the [Jaeger UI](http://localhost:16686/)

45 changes: 45 additions & 0 deletions docs/otel.md
Original file line number Diff line number Diff line change
@@ -0,0 +1,45 @@
# OpenTelemetry

## Sending traces to OpenTelemetry Collector at development time

Jaeger and OTEL Collector:

```shell
podman compose -f etc/telemetry/compose.yaml up
```

Database:

```shell
podman compose -f etc/deploy/compose/compose.yaml up
```

Trustify with traces:

```shell
OTEL_TRACES_SAMPLER_ARG=1 OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4317" cargo run --bin trustd api --db-password trustify --auth-disabled --tracing enabled
```

Access Trustify at [localhost:8080](http://localhost:8080) and analyze the traces using the [Jaeger UI](http://localhost:16686/)

## Gathering metrics at development time

Prometheus and OTEL Collector:

```shell
podman compose -f etc/telemetry/compose.yaml up
```

Database:

```shell
podman compose -f etc/deploy/compose/compose.yaml up
```

Trustify with metrics:

```shell
cargo run --bin trustd api --db-password trustify --auth-disabled --metrics enabled
```

Access Trustify at [localhost:8080](http://localhost:8080) and analyze the metrics using the [Prometheus UI](http://localhost:9090/)
Loading