diff --git a/common/infrastructure/Cargo.toml b/common/infrastructure/Cargo.toml index 92eaa4b68..d1aa26125 100644 --- a/common/infrastructure/Cargo.toml +++ b/common/infrastructure/Cargo.toml @@ -11,7 +11,7 @@ actix-tls = { workspace = true, features = ["openssl"] } actix-web = { workspace = true, features = ["openssl"] } actix-web-extras = { workspace = true } actix-web-httpauth = { workspace = true } -actix-web-opentelemetry = { workspace = true } +actix-web-opentelemetry = { workspace = true, features = ["metrics"] } actix-web-prom = { workspace = true } anyhow = { workspace = true } bytesize = { workspace = true } diff --git a/common/infrastructure/src/app/http.rs b/common/infrastructure/src/app/http.rs index f4b694642..6c55e98d4 100644 --- a/common/infrastructure/src/app/http.rs +++ b/common/infrastructure/src/app/http.rs @@ -1,7 +1,7 @@ use crate::{ app::{new_app, AppOptions}, endpoint::Endpoint, - tracing::Tracing, + otel::{Metrics as OtelMetrics, Tracing}, }; use actix_cors::Cors; use actix_tls::{accept::openssl::reexports::SslAcceptor, connect::openssl::reexports::SslMethod}; @@ -10,7 +10,7 @@ use actix_web::{ web::{self, JsonConfig}, App, HttpResponse, HttpServer, }; -use actix_web_opentelemetry::RequestTracing; +use actix_web_opentelemetry::{RequestMetrics, RequestTracing}; use actix_web_prom::{PrometheusMetrics, PrometheusMetricsBuilder}; use anyhow::{anyhow, Context}; use bytesize::ByteSize; @@ -289,6 +289,7 @@ pub struct HttpServerBuilder { json_limit: Option, request_limit: Option, tracing: Tracing, + metrics: OtelMetrics, disable_log: bool, @@ -329,6 +330,7 @@ impl HttpServerBuilder { json_limit: None, request_limit: None, tracing: Tracing::default(), + metrics: OtelMetrics::default(), openapi_info: None, disable_log: false, } @@ -370,6 +372,11 @@ impl HttpServerBuilder { self } + pub fn metrics_otel(mut self, metrics: OtelMetrics) -> Self { + self.metrics = metrics; + self + } + pub fn openapi_info(mut self, openapi_info: Info) -> Self { self.openapi_info = Some(openapi_info); self @@ -488,6 +495,17 @@ impl HttpServerBuilder { tracing_logger.is_some() ); + let otel_metrics = match self.metrics { + OtelMetrics::Disabled => None, + OtelMetrics::Enabled => Some(RequestMetrics::default()), + }; + + log::debug!( + "Otel Metrics({}) - metrics: {}", + self.metrics, + otel_metrics.is_some() + ); + let mut app = new_app(AppOptions { cors, metrics: metrics.clone(), @@ -498,6 +516,7 @@ impl HttpServerBuilder { .unwrap_or_else(|| Authorizer::new(None)), logger, tracing_logger, + otel_metrics, }) .app_data(json) .into_utoipa_app(); diff --git a/common/infrastructure/src/app/mod.rs b/common/infrastructure/src/app/mod.rs index 3630dbcee..d11b6d671 100644 --- a/common/infrastructure/src/app/mod.rs +++ b/common/infrastructure/src/app/mod.rs @@ -9,7 +9,7 @@ use actix_web::{ }; use actix_web_extras::middleware::Condition; use actix_web_httpauth::{extractors::bearer::BearerAuth, middleware::HttpAuthentication}; -use actix_web_opentelemetry::RequestTracing; +use actix_web_opentelemetry::{RequestMetrics, RequestTracing}; use actix_web_prom::PrometheusMetrics; use futures::{future::LocalBoxFuture, FutureExt}; use std::sync::Arc; @@ -23,6 +23,7 @@ pub struct AppOptions { pub authorizer: Authorizer, pub logger: Option, pub tracing_logger: Option, + pub otel_metrics: Option, } /// create a new authenticator @@ -78,6 +79,8 @@ pub fn new_app( .wrap(Condition::from_option(options.cors)) // Next, record metrics for the request (should never fail) .wrap(Condition::from_option(options.metrics)) + // Next, record otel metrics for the request (should never fail) + .wrap(Condition::from_option(options.otel_metrics)) // Compress everything .wrap(Compress::default()) // First log the request, so that we know what happens (can't fail) diff --git a/common/infrastructure/src/infra.rs b/common/infrastructure/src/infra.rs index 82e8b670b..af56d67f9 100644 --- a/common/infrastructure/src/infra.rs +++ b/common/infrastructure/src/infra.rs @@ -11,7 +11,7 @@ use futures::future::select_all; use prometheus::{Registry, TextEncoder}; use tokio::signal; -use crate::tracing::{init_tracing, Tracing}; +use crate::otel::{init_metrics, init_tracing, Metrics as OtelMetrics, Tracing}; use crate::health::{Checks, HealthChecks}; #[cfg(unix)] @@ -39,6 +39,9 @@ pub struct InfrastructureConfig { /// Enable tracing #[arg(long, env, default_value_t = Tracing::Disabled)] pub tracing: Tracing, + /// Enable metrics + #[arg(long, env, default_value_t = OtelMetrics::Disabled)] + pub metrics: OtelMetrics, } impl Default for InfrastructureConfig { @@ -48,6 +51,7 @@ impl Default for InfrastructureConfig { infrastructure_bind: DEFAULT_BIND_ADDR.into(), infrastructure_workers: 1, tracing: Tracing::Disabled, + metrics: OtelMetrics::Disabled, } } } @@ -222,6 +226,7 @@ impl Infrastructure { MFut: Future>, { init_tracing(id, self.config.tracing); + init_metrics(id, self.config.metrics); let init_data = init(InitContext { metrics: self.metrics.clone(), diff --git a/common/infrastructure/src/lib.rs b/common/infrastructure/src/lib.rs index 9ec04003e..5067d7d83 100644 --- a/common/infrastructure/src/lib.rs +++ b/common/infrastructure/src/lib.rs @@ -3,7 +3,7 @@ mod infra; pub mod app; pub mod endpoint; pub mod health; -pub mod tracing; +pub mod otel; pub use infra::*; diff --git a/common/infrastructure/src/tracing.rs b/common/infrastructure/src/otel.rs similarity index 63% rename from common/infrastructure/src/tracing.rs rename to common/infrastructure/src/otel.rs index db973bb1f..53527dae9 100644 --- a/common/infrastructure/src/tracing.rs +++ b/common/infrastructure/src/otel.rs @@ -1,13 +1,37 @@ use core::fmt; -use opentelemetry::{propagation::Injector, trace::TracerProvider, Context, KeyValue}; -use opentelemetry_otlp::SpanExporter; -use opentelemetry_sdk::{trace as sdktrace, Resource}; +use opentelemetry::{ + global::{ + get_text_map_propagator, set_meter_provider, set_text_map_propagator, set_tracer_provider, + }, + propagation::Injector, + trace::TracerProvider, + Context, KeyValue, +}; +use opentelemetry_otlp::{MetricExporter, SpanExporter}; +use opentelemetry_sdk::{ + metrics::{PeriodicReader, SdkMeterProvider}, + propagation::TraceContextPropagator, + runtime::TokioCurrentThread, + trace::{ + self as sdktrace, + Sampler::{self, ParentBased}, + }, + Resource, +}; use reqwest::RequestBuilder; use std::sync::Once; use tracing_subscriber::{ field::MakeExt, layer::SubscriberExt, util::SubscriberInitExt, EnvFilter, }; +#[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)] +pub enum Metrics { + #[clap(name = "disabled")] + Disabled, + #[clap(name = "enabled")] + Enabled, +} + #[derive(clap::ValueEnum, Clone, Copy, Debug, PartialEq)] pub enum Tracing { #[clap(name = "disabled")] @@ -16,12 +40,27 @@ pub enum Tracing { Enabled, } +impl Default for Metrics { + fn default() -> Self { + Self::Disabled + } +} + impl Default for Tracing { fn default() -> Self { Self::Disabled } } +impl fmt::Display for Metrics { + fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { + match self { + Metrics::Disabled => write!(f, "disabled"), + Metrics::Enabled => write!(f, "enabled"), + } + } +} + impl fmt::Display for Tracing { fn fmt(&self, f: &mut fmt::Formatter<'_>) -> fmt::Result { match self { @@ -53,7 +92,7 @@ pub trait WithTracing { impl WithTracing for RequestBuilder { fn propagate_context(self, cx: &Context) -> Self { - let headers = opentelemetry::global::get_text_map_propagator(|prop| { + let headers = get_text_map_propagator(|prop| { let mut injector = HeaderInjector::new(); prop.inject_context(cx, &mut injector); injector.0 @@ -87,11 +126,11 @@ fn sampling_from_env() -> Option { .and_then(|s| s.to_str().and_then(|s| s.parse::().ok())) } -fn sampler() -> opentelemetry_sdk::trace::Sampler { +fn sampler() -> Sampler { if let Some(p) = sampling_from_env() { - opentelemetry_sdk::trace::Sampler::TraceIdRatioBased(p) + Sampler::TraceIdRatioBased(p) } else { - opentelemetry_sdk::trace::Sampler::TraceIdRatioBased(0.001) + Sampler::TraceIdRatioBased(0.001) } } @@ -107,26 +146,56 @@ pub fn init_tracing(name: &str, tracing: Tracing) { } } +pub fn init_metrics(name: &str, metrics: Metrics) { + match metrics { + Metrics::Disabled => { + INIT.call_once(init_no_tracing); + } + Metrics::Enabled => { + init_otlp_metrics(name); + } + } +} + +fn init_otlp_metrics(name: &str) { + #[allow(clippy::expect_used)] + let exporter = MetricExporter::builder() + .with_tonic() + .build() + .expect("Unable to build metrics exporter."); + + let reader = PeriodicReader::builder(exporter, TokioCurrentThread).build(); + + let provider = SdkMeterProvider::builder() + .with_reader(reader) + .with_resource(Resource::new(vec![KeyValue::new( + "service.name", + name.to_string(), + )])) + .build(); + + println!("Using OTEL Collector with Prometheus as the back-end."); + println!("{:#?}", provider); + + set_meter_provider(provider.clone()); +} + fn init_otlp(name: &str) { - opentelemetry::global::set_text_map_propagator( - opentelemetry_sdk::propagation::TraceContextPropagator::new(), - ); + set_text_map_propagator(TraceContextPropagator::new()); #[allow(clippy::expect_used)] let exporter = SpanExporter::builder() .with_tonic() .build() - .expect("Unable to build OTEL exporter"); + .expect("Unable to build tracing exporter"); let provider = sdktrace::TracerProvider::builder() .with_resource(Resource::new(vec![KeyValue::new( "service.name", name.to_string(), )])) - .with_batch_exporter(exporter, opentelemetry_sdk::runtime::TokioCurrentThread) - .with_sampler(opentelemetry_sdk::trace::Sampler::ParentBased(Box::new( - sampler(), - ))) + .with_batch_exporter(exporter, TokioCurrentThread) + .with_sampler(ParentBased(Box::new(sampler()))) .build(); println!("Using OTEL Collector with Jaeger as the back-end."); @@ -142,7 +211,7 @@ fn init_otlp(name: &str) { { eprintln!("Error initializing tracing: {:?}", e); } - opentelemetry::global::set_tracer_provider(provider); + set_tracer_provider(provider); } fn init_no_tracing() { diff --git a/docs/design/log_tracing.md b/docs/design/log_tracing.md index 6da5d3707..29cffb9d6 100644 --- a/docs/design/log_tracing.md +++ b/docs/design/log_tracing.md @@ -136,25 +136,4 @@ mean it will panic. For example, the `Option::unwrap_or` function: ![Screenshot of rustdoc for Option::unwrap_or](drawings/log_tracing_2.png) -## Sending traces to OpenTelemetry Collector (devmode) - -Jaeger and OTEL Collector: - -```shell -podman compose -f etc/dev-traces/compose.yaml up -``` - -Database: - -```shell -podman compose -f etc/deploy/compose/compose.yaml up -``` - -Trustify with traces: - -```shell -OTEL_TRACES_SAMPLER_ARG=1 OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4317" cargo run --bin trustd api --db-password trustify --auth-disabled --tracing enabled -``` - -Access Trustify at [localhost:8080](http://localhost:8080) and analyze the traces using the [Jaeger UI](http://localhost:16686/) diff --git a/docs/otel.md b/docs/otel.md new file mode 100644 index 000000000..e340f52de --- /dev/null +++ b/docs/otel.md @@ -0,0 +1,45 @@ +# OpenTelemetry + +## Sending traces to OpenTelemetry Collector at development time + +Jaeger and OTEL Collector: + +```shell +podman compose -f etc/telemetry/compose.yaml up +``` + +Database: + +```shell +podman compose -f etc/deploy/compose/compose.yaml up +``` + +Trustify with traces: + +```shell +OTEL_TRACES_SAMPLER_ARG=1 OTEL_EXPORTER_OTLP_ENDPOINT="http://localhost:4317" cargo run --bin trustd api --db-password trustify --auth-disabled --tracing enabled +``` + +Access Trustify at [localhost:8080](http://localhost:8080) and analyze the traces using the [Jaeger UI](http://localhost:16686/) + +## Gathering metrics at development time + +Prometheus and OTEL Collector: + +```shell +podman compose -f etc/telemetry/compose.yaml up +``` + +Database: + +```shell +podman compose -f etc/deploy/compose/compose.yaml up +``` + +Trustify with metrics: + +```shell +cargo run --bin trustd api --db-password trustify --auth-disabled --metrics enabled +``` + +Access Trustify at [localhost:8080](http://localhost:8080) and analyze the metrics using the [Prometheus UI](http://localhost:9090/) diff --git a/etc/dev-traces/compose.yaml b/etc/telemetry/compose.yaml similarity index 79% rename from etc/dev-traces/compose.yaml rename to etc/telemetry/compose.yaml index 525de07a9..2fa5a3889 100644 --- a/etc/dev-traces/compose.yaml +++ b/etc/telemetry/compose.yaml @@ -1,4 +1,12 @@ services: + prometheus: + container_name: prometheus + image: prom/prometheus:latest + volumes: + - ./prometheus.yaml:/etc/prometheus/prometheus.yml:z + ports: + - "9090:9090" + - "9464:9464" jaeger-all-in-one: hostname: jaeger-all-in-one image: jaegertracing/all-in-one:1.53.0 # Using this version to align with trustify-helm-charts diff --git a/etc/dev-traces/config.yaml b/etc/telemetry/config.yaml similarity index 74% rename from etc/dev-traces/config.yaml rename to etc/telemetry/config.yaml index aa52cda78..c2cf053c3 100644 --- a/etc/dev-traces/config.yaml +++ b/etc/telemetry/config.yaml @@ -11,12 +11,17 @@ exporters: insecure: true debug: verbosity: detailed + prometheus: + endpoint: "0.0.0.0:9464" processors: batch: {} service: pipelines: + metrics: + receivers: [otlp] + exporters: [debug, prometheus] traces: receivers: [otlp] processors: [batch] diff --git a/etc/telemetry/prometheus.yaml b/etc/telemetry/prometheus.yaml new file mode 100644 index 000000000..be8264ba8 --- /dev/null +++ b/etc/telemetry/prometheus.yaml @@ -0,0 +1,8 @@ +global: + scrape_interval: 10s + +scrape_configs: + - job_name: 'collector' + static_configs: + - targets: ['collector:9464'] + diff --git a/server/src/profile/api.rs b/server/src/profile/api.rs index d17af5c0a..35a46acdd 100644 --- a/server/src/profile/api.rs +++ b/server/src/profile/api.rs @@ -31,7 +31,7 @@ use trustify_infrastructure::{ }, endpoint::Trustify, health::checks::{Local, Probe}, - tracing::Tracing, + otel::{Metrics as OtelMetrics, Tracing}, Infrastructure, InfrastructureConfig, InitContext, Metrics, }; use trustify_module_analysis::service::AnalysisService; @@ -162,6 +162,7 @@ struct InitData { storage: DispatchBackend, http: HttpServerConfig, tracing: Tracing, + metrics: OtelMetrics, swagger_oidc: Option>, #[cfg(feature = "garage-door")] embedded_oidc: Option, @@ -294,6 +295,7 @@ impl InitData { config, http: run.http, tracing: run.infra.tracing, + metrics: run.infra.metrics, swagger_oidc, storage, #[cfg(feature = "garage-door")] @@ -312,6 +314,7 @@ impl InitData { let http = { HttpServerBuilder::try_from(self.http)? .tracing(self.tracing) + .metrics_otel(self.metrics) .metrics(metrics.registry().clone(), SERVICE_ID) .authorizer(self.authorizer) .swagger_ui_oidc(self.swagger_oidc.clone()) diff --git a/server/src/profile/importer.rs b/server/src/profile/importer.rs index dae538ac5..bdbb31413 100644 --- a/server/src/profile/importer.rs +++ b/server/src/profile/importer.rs @@ -29,7 +29,7 @@ use trustify_infrastructure::{ }, endpoint::Trustify, health::checks::{Local, Probe}, - tracing::Tracing, + otel::Tracing, Infrastructure, InfrastructureConfig, InitContext, Metrics, }; use trustify_module_graphql::RootQuery; diff --git a/trustd/src/db.rs b/trustd/src/db.rs index 28154886f..c9a27fc14 100644 --- a/trustd/src/db.rs +++ b/trustd/src/db.rs @@ -6,7 +6,7 @@ use std::process::ExitCode; use std::time::Duration; use trustify_common::config::Database; use trustify_common::db; -use trustify_infrastructure::tracing::{init_tracing, Tracing}; +use trustify_infrastructure::otel::{init_tracing, Tracing}; #[derive(clap::Args, Debug)] pub struct Run {