diff --git a/tests/otel_postgres_metrics_e2e/disabled_metrics_config.yaml b/tests/otel_postgres_metrics_e2e/disabled_metrics_config.yaml new file mode 100644 index 00000000000..9f52d63b886 --- /dev/null +++ b/tests/otel_postgres_metrics_e2e/disabled_metrics_config.yaml @@ -0,0 +1,29 @@ + metrics: + postgresql.blks_hit: + enabled: true + postgresql.blks_read: + enabled: true + postgresql.database.locks: + enabled: true + postgresql.deadlocks: + enabled: true + postgresql.function.calls: + enabled: true + postgresql.sequential_scans: + enabled: true + postgresql.temp.io: + enabled: true + postgresql.temp_files: + enabled: true + postgresql.tup_deleted: + enabled: true + postgresql.tup_fetched: + enabled: true + postgresql.tup_inserted: + enabled: true + postgresql.tup_returned: + enabled: true + postgresql.tup_updated: + enabled: true + postgresql.wal.delay: + enabled: true \ No newline at end of file diff --git a/tests/otel_postgres_metrics_e2e/postgres_metrics.json b/tests/otel_postgres_metrics_e2e/postgres_metrics.json new file mode 100644 index 00000000000..7ad0512827c --- /dev/null +++ b/tests/otel_postgres_metrics_e2e/postgres_metrics.json @@ -0,0 +1,146 @@ +{ + "postgresql.backends": { + "data_type": "Sum", + "description": "The number of backends." + }, + "postgresql.bgwriter.buffers.allocated": { + "data_type": "Sum", + "description": "Number of buffers allocated." + }, + "postgresql.bgwriter.buffers.writes": { + "data_type": "Sum", + "description": "Number of buffers written." + }, + "postgresql.bgwriter.checkpoint.count": { + "data_type": "Sum", + "description": "The number of checkpoints performed." + }, + "postgresql.bgwriter.duration": { + "data_type": "Sum", + "description": "Total time spent writing and syncing files to disk by checkpoints." + }, + "postgresql.bgwriter.maxwritten": { + "data_type": "Sum", + "description": "Number of times the background writer stopped a cleaning scan because it had written too many buffers." + }, + "postgresql.blks_hit": { + "data_type": "Sum", + "description": "Number of times disk blocks were found already in the buffer cache." + }, + "postgresql.blks_read": { + "data_type": "Sum", + "description": "Number of disk blocks read in this database." + }, + "postgresql.blocks_read": { + "data_type": "Sum", + "description": "The number of blocks read." + }, + "postgresql.commits": { + "data_type": "Sum", + "description": "The number of commits." + }, + "postgresql.connection.max": { + "data_type": "Gauge", + "description": "Configured maximum number of client connections allowed" + }, + "postgresql.database.count": { + "data_type": "Sum", + "description": "Number of user databases." + }, + "postgresql.database.locks": { + "data_type": "Gauge", + "description": "The number of database locks." + }, + "postgresql.db_size": { + "data_type": "Sum", + "description": "The database disk usage." + }, + "postgresql.deadlocks": { + "data_type": "Sum", + "description": "The number of deadlocks." + }, + "postgresql.function.calls": { + "data_type": "Sum", + "description": "The number of calls made to a function. Requires `track_functions=pl|all` in Postgres config." + }, + "postgresql.index.scans": { + "data_type": "Sum", + "description": "The number of index scans on a table." + }, + "postgresql.index.size": { + "data_type": "Gauge", + "description": "The size of the index on disk." + }, + "postgresql.operations": { + "data_type": "Sum", + "description": "The number of db row operations." + }, + "postgresql.replication.data_delay": { + "data_type": "Gauge", + "description": "The amount of data delayed in replication." + }, + "postgresql.rollbacks": { + "data_type": "Sum", + "description": "The number of rollbacks." + }, + "postgresql.rows": { + "data_type": "Sum", + "description": "The number of rows in the database." + }, + "postgresql.sequential_scans": { + "data_type": "Sum", + "description": "The number of sequential scans." + }, + "postgresql.table.count": { + "data_type": "Sum", + "description": "Number of user tables in a database." + }, + "postgresql.table.size": { + "data_type": "Sum", + "description": "Disk space used by a table." + }, + "postgresql.table.vacuum.count": { + "data_type": "Sum", + "description": "Number of times a table has manually been vacuumed." + }, + "postgresql.temp.io": { + "data_type": "Sum", + "description": "Total amount of data written to temporary files by queries." + }, + "postgresql.temp_files": { + "data_type": "Sum", + "description": "The number of temp files." + }, + "postgresql.tup_deleted": { + "data_type": "Sum", + "description": "Number of rows deleted by queries in the database." + }, + "postgresql.tup_fetched": { + "data_type": "Sum", + "description": "Number of rows fetched by queries in the database." + }, + "postgresql.tup_inserted": { + "data_type": "Sum", + "description": "Number of rows inserted by queries in the database." + }, + "postgresql.tup_returned": { + "data_type": "Sum", + "description": "Number of rows returned by queries in the database." + }, + "postgresql.tup_updated": { + "data_type": "Sum", + "description": "Number of rows updated by queries in the database." + }, + "postgresql.wal.age": { + "data_type": "Gauge", + "description": "Age of the oldest WAL file." + }, + "postgresql.wal.delay": { + "data_type": "Gauge", + "description": "Time between flushing recent WAL locally and receiving notification that the standby server has completed an operation with it." + }, + "postgresql.wal.lag": { + "data_type": "Gauge", + "description": "Time between flushing recent WAL locally and receiving notification that the standby server has completed an operation with it." + } +} \ No newline at end of file diff --git a/tests/otel_postgres_metrics_e2e/test_postgres_metrics.py b/tests/otel_postgres_metrics_e2e/test_postgres_metrics.py index 8dbef1ecb60..655ed92cb76 100644 --- a/tests/otel_postgres_metrics_e2e/test_postgres_metrics.py +++ b/tests/otel_postgres_metrics_e2e/test_postgres_metrics.py @@ -10,76 +10,103 @@ from utils._context._scenarios.otel_collector import OtelCollectorScenario -# Note that an extra comma was added because there is an inconsistency in the postgres metadata compared to what gets sent -postgresql_metrics = { - # Default metrics - "postgresql.connection.max": { - "data_type": "Gauge", - "description": "Configured maximum number of client connections allowed", - }, - "postgresql.database.count": {"data_type": "Sum", "description": "Number of user databases"}, - "postgresql.commits": {"data_type": "Sum", "description": "The number of commits"}, - "postgresql.rollbacks": {"data_type": "Sum", "description": "The number of rollbacks"}, - "postgresql.db_size": {"data_type": "Sum", "description": "The database disk usage"}, - "postgresql.table.count": {"data_type": "Sum", "description": "Number of user tables in a database"}, - # "postgresql.backends": {"data_type": "Sum", "description": "The number of backends"}, - # "postgresql.bgwriter.buffers.allocated": {"data_type": "Sum", "description": "Number of buffers allocated"}, - # "postgresql.bgwriter.buffers.writes": {"data_type": "Sum", "description": "Number of buffers written"}, - # "postgresql.bgwriter.checkpoint.count": {"data_type": "Sum", "description": "The number of checkpoints performed"}, - # "postgresql.bgwriter.duration": { - # "data_type": "Sum", - # "description": "Total time spent writing and syncing files to disk by checkpoints", - # }, - # "postgresql.bgwriter.maxwritten": { - # "data_type": "Sum", - # "description": "Number of times the background writer stopped a cleaning scan because it had written too many buffers", - # }, - # Optional metrics (enabled in otelcol-config-with-postgres.yaml) - # "postgresql.blks_hit": { - # "data_type": "Sum", - # "description": "Number of times disk blocks were found already in the buffer cache", - # }, - # "postgresql.blks_read": {"data_type": "Sum", "description": "Number of disk blocks read in this database"}, - "postgresql.database.locks": {"data_type": "Gauge", "description": "The number of database locks"}, - "postgresql.deadlocks": {"data_type": "Sum", "description": "The number of deadlocks"}, - # "postgresql.temp.io": { - # "data_type": "Sum", - # "description": "Total amount of data written to temporary files by queries", - # }, - "postgresql.temp_files": {"data_type": "Sum", "description": "The number of temp files"}, - # "postgresql.tup_deleted": {"data_type": "Sum", "description": "Number of rows deleted by queries in the database"}, - # "postgresql.tup_fetched": {"data_type": "Sum", "description": "Number of rows fetched by queries in the database"}, - # "postgresql.tup_inserted": { - # "data_type": "Sum", - # "description": "Number of rows inserted by queries in the database", - # }, - # "postgresql.tup_returned": { - # "data_type": "Sum", - # "description": "Number of rows returned by queries in the database", - # }, - # "postgresql.tup_updated": {"data_type": "Sum", "description": "Number of rows updated by queries in the database"}, - # "postgresql.function.calls": { - # "data_type": "Sum", - # "description": "The number of calls made to a function. Requires `track_functions=pl|all` in Postgres config.", - # }, - "postgresql.sequential_scans": {"data_type": "Sum", "description": "The number of sequential scans"}, - "postgresql.table.size": {"data_type": "Sum", "description": "Disk space used by a table."}, - "postgresql.rows": {"data_type": "Sum", "description": "The number of rows in the database"}, - "postgresql.operations": {"data_type": "Sum", "description": "The number of db row operations"}, - "postgresql.index.scans": {"data_type": "Sum", "description": "The number of index scans on a table"}, - "postgresql.index.size": {"data_type": "Gauge", "description": "The size of the index on disk."}, - "postgresql.blocks_read": {"data_type": "Sum", "description": "The number of blocks read"}, - "postgresql.table.vacuum.count": { - "data_type": "Sum", - "description": "Number of times a table has manually been vacuumed", - }, - # Metrics not yet appearing due to needing a replica db - # "postgresql.wal.delay": {"data_type": "Gauge", "description": "Time between flushing recent WAL locally and receiving notification that the standby server has completed an operation with it"}, - # "postgresql.wal.age": {"data_type": "Gauge", "description": "Age of the oldest WAL file"}, - # "postgresql.replication.data_delay": {"data_type": "Gauge", "description": "The amount of data delayed in replication"}, - # "postgresql.wal.lag": {"data_type": "Gauge", "description": "Time between flushing recent WAL locally and receiving notification"}, -} - +# # Note that an extra comma was added because there is an inconsistency in the postgres metadata compared to what gets sent +# postgresql_metrics = { +# # Default metrics +# "postgresql.connection.max": { +# "data_type": "Gauge", +# "description": "Configured maximum number of client connections allowed", +# }, +# "postgresql.database.count": {"data_type": "Sum", "description": "Number of user databases"}, +# "postgresql.commits": {"data_type": "Sum", "description": "The number of commits"}, +# "postgresql.rollbacks": {"data_type": "Sum", "description": "The number of rollbacks"}, +# "postgresql.db_size": {"data_type": "Sum", "description": "The database disk usage"}, +# "postgresql.table.count": {"data_type": "Sum", "description": "Number of user tables in a database"}, +# # "postgresql.backends": {"data_type": "Sum", "description": "The number of backends"}, +# # "postgresql.bgwriter.buffers.allocated": {"data_type": "Sum", "description": "Number of buffers allocated"}, +# # "postgresql.bgwriter.buffers.writes": {"data_type": "Sum", "description": "Number of buffers written"}, +# # "postgresql.bgwriter.checkpoint.count": {"data_type": "Sum", "description": "The number of checkpoints performed"}, +# # "postgresql.bgwriter.duration": { +# # "data_type": "Sum", +# # "description": "Total time spent writing and syncing files to disk by checkpoints", +# # }, +# # "postgresql.bgwriter.maxwritten": { +# # "data_type": "Sum", +# # "description": "Number of times the background writer stopped a cleaning scan because it had written too many buffers", +# # }, +# # Optional metrics (enabled in otelcol-config-with-postgres.yaml) +# # "postgresql.blks_hit": { +# # "data_type": "Sum", +# # "description": "Number of times disk blocks were found already in the buffer cache", +# # }, +# # "postgresql.blks_read": {"data_type": "Sum", "description": "Number of disk blocks read in this database"}, +# "postgresql.database.locks": {"data_type": "Gauge", "description": "The number of database locks"}, +# "postgresql.deadlocks": {"data_type": "Sum", "description": "The number of deadlocks"}, +# # "postgresql.temp.io": { +# # "data_type": "Sum", +# # "description": "Total amount of data written to temporary files by queries", +# # }, +# "postgresql.temp_files": {"data_type": "Sum", "description": "The number of temp files"}, +# # "postgresql.tup_deleted": {"data_type": "Sum", "description": "Number of rows deleted by queries in the database"}, +# # "postgresql.tup_fetched": {"data_type": "Sum", "description": "Number of rows fetched by queries in the database"}, +# # "postgresql.tup_inserted": { +# # "data_type": "Sum", +# # "description": "Number of rows inserted by queries in the database", +# # }, +# # "postgresql.tup_returned": { +# # "data_type": "Sum", +# # "description": "Number of rows returned by queries in the database", +# # }, +# # "postgresql.tup_updated": {"data_type": "Sum", "description": "Number of rows updated by queries in the database"}, +# # "postgresql.function.calls": { +# # "data_type": "Sum", +# # "description": "The number of calls made to a function. Requires `track_functions=pl|all` in Postgres config.", +# # }, +# "postgresql.sequential_scans": {"data_type": "Sum", "description": "The number of sequential scans"}, +# "postgresql.table.size": {"data_type": "Sum", "description": "Disk space used by a table."}, +# "postgresql.rows": {"data_type": "Sum", "description": "The number of rows in the database"}, +# "postgresql.operations": {"data_type": "Sum", "description": "The number of db row operations"}, +# "postgresql.index.scans": {"data_type": "Sum", "description": "The number of index scans on a table"}, +# "postgresql.index.size": {"data_type": "Gauge", "description": "The size of the index on disk."}, +# "postgresql.blocks_read": {"data_type": "Sum", "description": "The number of blocks read"}, +# "postgresql.table.vacuum.count": { +# "data_type": "Sum", +# "description": "Number of times a table has manually been vacuumed", +# }, +# # Metrics not yet appearing due to needing a replica db +# # "postgresql.wal.delay": {"data_type": "Gauge", "description": "Time between flushing recent WAL locally and receiving notification that the standby server has completed an operation with it"}, +# # "postgresql.wal.age": {"data_type": "Gauge", "description": "Age of the oldest WAL file"}, +# # "postgresql.replication.data_delay": {"data_type": "Gauge", "description": "The amount of data delayed in replication"}, +# # "postgresql.wal.lag": {"data_type": "Gauge", "description": "Time between flushing recent WAL locally and receiving notification"}, +# } + +def _read_metrics_file() -> dict: + """Load PostgreSQL metrics from OpenTelemetry. + + TODO: extend this to other types of metrics + TODO: Parse the OTel collector config file (utils/build/docker/otelcol-config-with-postgres.yaml) + to dynamically determine which metrics are enabled via the 'metrics' section under + 'receivers.postgresql'. This would allow filtering metrics based on 'enabled: true' + instead of hardcoding exclusions. + """ + metrics_file = Path(__file__).parent / "postgres_metrics.json" + if not metrics_file.exists(): + raise FileNotFoundError(f"PostgreSQL metrics file not found: {metrics_file}") + with open(metrics_file) as f: + metrics = json.load(f) + + # Exclude metrics that require a replica database + # These metrics are enabled in the config but won't appear without a replica setup + excluded_metrics = { + "postgresql.wal.delay", + "postgresql.wal.age", + "postgresql.replication.data_delay", + "postgresql.wal.lag", + } + + return {k: v for k, v in metrics.items() if k not in excluded_metrics} + +postgresql_metrics = _read_metrics_file() def _get_metrics() -> list[dict]: scenario: OtelCollectorScenario = context.scenario # type: ignore[assignment] diff --git a/utils/tools/fetch_otel_postgres_metrics.py b/utils/tools/fetch_otel_postgres_metrics.py new file mode 100644 index 00000000000..51613bb8eca --- /dev/null +++ b/utils/tools/fetch_otel_postgres_metrics.py @@ -0,0 +1,174 @@ +#!/usr/bin/env python3 +""" +Fetch PostgreSQL receiver metrics from OpenTelemetry Collector Contrib. + +This simple script: +1. Fetches metadata.yaml from GitHub +2. Parses it to extract metrics +3. Saves to postgres_metrics.json in the test directory + +Usage: + python fetch_otel_postgres_metrics.py +""" +import httpx +import yaml +import json +from pathlib import Path +from typing import Dict, Any + + +def fetch_postgres_metadata(ref: str = "main") -> Dict[str, Any]: + """ + Fetch PostgreSQL receiver metadata from GitHub. + + Args: + ref: Git reference (branch, tag, or commit SHA). Defaults to 'main'. + + Returns: + Parsed YAML metadata as dictionary + """ + url = ( + f"https://raw.githubusercontent.com/open-telemetry/" + f"opentelemetry-collector-contrib/{ref}/" + f"receiver/postgresqlreceiver/metadata.yaml" + ) + + print(f"šŸ“” Fetching from: {url}") + + try: + response = httpx.get(url, timeout=30.0, follow_redirects=True) + response.raise_for_status() + return yaml.safe_load(response.text) + except Exception as e: + print(f"āŒ Error fetching metadata: {e}") + raise + + +def extract_metrics_summary(metadata: Dict[str, Any]) -> Dict[str, Dict[str, str]]: + """ + Extract metrics summary from metadata. + + Args: + metadata: Full metadata dictionary + + Returns: + Dictionary mapping metric names to {data_type, description} + """ + metrics = metadata.get("metrics", {}) + summary = {} + + for name, config in metrics.items(): + # Determine metric type (Sum, Gauge, Histogram, etc.) + metric_type = "Unknown" + if "sum" in config: + metric_type = "Sum" + elif "gauge" in config: + metric_type = "Gauge" + elif "histogram" in config: + metric_type = "Histogram" + + summary[name] = { + "data_type": metric_type, + "description": config.get("description", "No description"), + } + + return summary + + +def extract_disabled_metrics(metadata: Dict[str, Any]) -> list[str]: + """ + Extract metrics that have enabled: false by default. + + Args: + metadata: Full metadata dictionary + + Returns: + List of metric names that are disabled by default + """ + metrics = metadata.get("metrics", {}) + disabled = [] + + for name, config in metrics.items(): + if config.get("enabled", True) is False: + disabled.append(name) + + return sorted(disabled) + + +def generate_yaml_config(disabled_metrics: list[str]) -> str: + """ + Generate YAML configuration snippet for disabled metrics. + + Args: + disabled_metrics: List of metric names that are disabled by default + + Returns: + YAML string to be added to otelcol-config-with-postgres.yaml + """ + yaml_lines = [" metrics:"] + for metric in disabled_metrics: + yaml_lines.append(f" {metric}:") + yaml_lines.append(" enabled: true") + + return "\n".join(yaml_lines) + + +def save_metrics(metrics: Dict[str, Any], output_path: Path) -> None: + """Save metrics to JSON file.""" + with open(output_path, "w") as f: + json.dump(metrics, f, indent=2) + + print(f"šŸ’¾ Saved {len(metrics)} metrics to: {output_path}") + + +def main() -> None: + """Main function.""" + print("=" * 80) + print("PostgreSQL Metrics Fetcher") + print("=" * 80) + + # Fetch metadata from GitHub + metadata = fetch_postgres_metadata(ref="main") + print(f"āœ“ Fetched metadata from OpenTelemetry Collector Contrib") + + # Extract metrics summary + metrics_summary = extract_metrics_summary(metadata) + print(f"āœ“ Extracted {len(metrics_summary)} metrics") + + # Save to test directory + script_dir = Path(__file__).parent + output_path = script_dir.parent.parent / "tests/otel_postgres_metrics_e2e/postgres_metrics.json" + save_metrics(metrics_summary, output_path) + + print("\n" + "=" * 80) + print("āœ“ Done! The test file will automatically load this JSON.") + print("=" * 80) + + # Show sample + print("\nSample metrics:") + for i, (name, info) in enumerate(list(metrics_summary.items())[:3]): + print(f" {name}:") + print(f" data_type: {info['data_type']}") + print(f" description: {info['description']}") + if i < 2: + print() + + # Generate YAML config for disabled metrics for otelcol-config-with-postgres.yaml + print("\n" + "=" * 80) + print("Generating YAML config for disabled metrics") + print("=" * 80) + + disabled_metrics = extract_disabled_metrics(metadata) + print(f"āœ“ Found {len(disabled_metrics)} metrics that are disabled by default") + + yaml_config = generate_yaml_config(disabled_metrics) + print("\nšŸ“ Copy this to otelcol-config-with-postgres.yaml:") + print("\n" + "-" * 80) + print(yaml_config) + print("-" * 80) + + + +if __name__ == "__main__": + main() +