Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension

Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
29 changes: 29 additions & 0 deletions tests/otel_postgres_metrics_e2e/disabled_metrics_config.yaml
Original file line number Diff line number Diff line change
@@ -0,0 +1,29 @@
metrics:
postgresql.blks_hit:
enabled: true
postgresql.blks_read:
enabled: true
postgresql.database.locks:
enabled: true
postgresql.deadlocks:
enabled: true
postgresql.function.calls:
enabled: true
postgresql.sequential_scans:
enabled: true
postgresql.temp.io:
enabled: true
postgresql.temp_files:
enabled: true
postgresql.tup_deleted:
enabled: true
postgresql.tup_fetched:
enabled: true
postgresql.tup_inserted:
enabled: true
postgresql.tup_returned:
enabled: true
postgresql.tup_updated:
enabled: true
postgresql.wal.delay:
enabled: true
146 changes: 146 additions & 0 deletions tests/otel_postgres_metrics_e2e/postgres_metrics.json
Original file line number Diff line number Diff line change
@@ -0,0 +1,146 @@
{
"postgresql.backends": {
"data_type": "Sum",
"description": "The number of backends."
},
"postgresql.bgwriter.buffers.allocated": {
"data_type": "Sum",
"description": "Number of buffers allocated."
},
"postgresql.bgwriter.buffers.writes": {
"data_type": "Sum",
"description": "Number of buffers written."
},
"postgresql.bgwriter.checkpoint.count": {
"data_type": "Sum",
"description": "The number of checkpoints performed."
},
"postgresql.bgwriter.duration": {
"data_type": "Sum",
"description": "Total time spent writing and syncing files to disk by checkpoints."
},
"postgresql.bgwriter.maxwritten": {
"data_type": "Sum",
"description": "Number of times the background writer stopped a cleaning scan because it had written too many buffers."
},
"postgresql.blks_hit": {
"data_type": "Sum",
"description": "Number of times disk blocks were found already in the buffer cache."
},
"postgresql.blks_read": {
"data_type": "Sum",
"description": "Number of disk blocks read in this database."
},
"postgresql.blocks_read": {
"data_type": "Sum",
"description": "The number of blocks read."
},
"postgresql.commits": {
"data_type": "Sum",
"description": "The number of commits."
},
"postgresql.connection.max": {
"data_type": "Gauge",
"description": "Configured maximum number of client connections allowed"
},
"postgresql.database.count": {
"data_type": "Sum",
"description": "Number of user databases."
},
"postgresql.database.locks": {
"data_type": "Gauge",
"description": "The number of database locks."
},
"postgresql.db_size": {
"data_type": "Sum",
"description": "The database disk usage."
},
"postgresql.deadlocks": {
"data_type": "Sum",
"description": "The number of deadlocks."
},
"postgresql.function.calls": {
"data_type": "Sum",
"description": "The number of calls made to a function. Requires `track_functions=pl|all` in Postgres config."
},
"postgresql.index.scans": {
"data_type": "Sum",
"description": "The number of index scans on a table."
},
"postgresql.index.size": {
"data_type": "Gauge",
"description": "The size of the index on disk."
},
"postgresql.operations": {
"data_type": "Sum",
"description": "The number of db row operations."
},
"postgresql.replication.data_delay": {
"data_type": "Gauge",
"description": "The amount of data delayed in replication."
},
"postgresql.rollbacks": {
"data_type": "Sum",
"description": "The number of rollbacks."
},
"postgresql.rows": {
"data_type": "Sum",
"description": "The number of rows in the database."
},
"postgresql.sequential_scans": {
"data_type": "Sum",
"description": "The number of sequential scans."
},
"postgresql.table.count": {
"data_type": "Sum",
"description": "Number of user tables in a database."
},
"postgresql.table.size": {
"data_type": "Sum",
"description": "Disk space used by a table."
},
"postgresql.table.vacuum.count": {
"data_type": "Sum",
"description": "Number of times a table has manually been vacuumed."
},
"postgresql.temp.io": {
"data_type": "Sum",
"description": "Total amount of data written to temporary files by queries."
},
"postgresql.temp_files": {
"data_type": "Sum",
"description": "The number of temp files."
},
"postgresql.tup_deleted": {
"data_type": "Sum",
"description": "Number of rows deleted by queries in the database."
},
"postgresql.tup_fetched": {
"data_type": "Sum",
"description": "Number of rows fetched by queries in the database."
},
"postgresql.tup_inserted": {
"data_type": "Sum",
"description": "Number of rows inserted by queries in the database."
},
"postgresql.tup_returned": {
"data_type": "Sum",
"description": "Number of rows returned by queries in the database."
},
"postgresql.tup_updated": {
"data_type": "Sum",
"description": "Number of rows updated by queries in the database."
},
"postgresql.wal.age": {
"data_type": "Gauge",
"description": "Age of the oldest WAL file."
},
"postgresql.wal.delay": {
"data_type": "Gauge",
"description": "Time between flushing recent WAL locally and receiving notification that the standby server has completed an operation with it."
},
"postgresql.wal.lag": {
"data_type": "Gauge",
"description": "Time between flushing recent WAL locally and receiving notification that the standby server has completed an operation with it."
}
}
167 changes: 97 additions & 70 deletions tests/otel_postgres_metrics_e2e/test_postgres_metrics.py
Original file line number Diff line number Diff line change
Expand Up @@ -10,76 +10,103 @@
from utils._context._scenarios.otel_collector import OtelCollectorScenario


# Note that an extra comma was added because there is an inconsistency in the postgres metadata compared to what gets sent
postgresql_metrics = {
# Default metrics
"postgresql.connection.max": {
"data_type": "Gauge",
"description": "Configured maximum number of client connections allowed",
},
"postgresql.database.count": {"data_type": "Sum", "description": "Number of user databases"},
"postgresql.commits": {"data_type": "Sum", "description": "The number of commits"},
"postgresql.rollbacks": {"data_type": "Sum", "description": "The number of rollbacks"},
"postgresql.db_size": {"data_type": "Sum", "description": "The database disk usage"},
"postgresql.table.count": {"data_type": "Sum", "description": "Number of user tables in a database"},
# "postgresql.backends": {"data_type": "Sum", "description": "The number of backends"},
# "postgresql.bgwriter.buffers.allocated": {"data_type": "Sum", "description": "Number of buffers allocated"},
# "postgresql.bgwriter.buffers.writes": {"data_type": "Sum", "description": "Number of buffers written"},
# "postgresql.bgwriter.checkpoint.count": {"data_type": "Sum", "description": "The number of checkpoints performed"},
# "postgresql.bgwriter.duration": {
# "data_type": "Sum",
# "description": "Total time spent writing and syncing files to disk by checkpoints",
# },
# "postgresql.bgwriter.maxwritten": {
# "data_type": "Sum",
# "description": "Number of times the background writer stopped a cleaning scan because it had written too many buffers",
# },
# Optional metrics (enabled in otelcol-config-with-postgres.yaml)
# "postgresql.blks_hit": {
# "data_type": "Sum",
# "description": "Number of times disk blocks were found already in the buffer cache",
# },
# "postgresql.blks_read": {"data_type": "Sum", "description": "Number of disk blocks read in this database"},
"postgresql.database.locks": {"data_type": "Gauge", "description": "The number of database locks"},
"postgresql.deadlocks": {"data_type": "Sum", "description": "The number of deadlocks"},
# "postgresql.temp.io": {
# "data_type": "Sum",
# "description": "Total amount of data written to temporary files by queries",
# },
"postgresql.temp_files": {"data_type": "Sum", "description": "The number of temp files"},
# "postgresql.tup_deleted": {"data_type": "Sum", "description": "Number of rows deleted by queries in the database"},
# "postgresql.tup_fetched": {"data_type": "Sum", "description": "Number of rows fetched by queries in the database"},
# "postgresql.tup_inserted": {
# "data_type": "Sum",
# "description": "Number of rows inserted by queries in the database",
# },
# "postgresql.tup_returned": {
# "data_type": "Sum",
# "description": "Number of rows returned by queries in the database",
# },
# "postgresql.tup_updated": {"data_type": "Sum", "description": "Number of rows updated by queries in the database"},
# "postgresql.function.calls": {
# "data_type": "Sum",
# "description": "The number of calls made to a function. Requires `track_functions=pl|all` in Postgres config.",
# },
"postgresql.sequential_scans": {"data_type": "Sum", "description": "The number of sequential scans"},
"postgresql.table.size": {"data_type": "Sum", "description": "Disk space used by a table."},
"postgresql.rows": {"data_type": "Sum", "description": "The number of rows in the database"},
"postgresql.operations": {"data_type": "Sum", "description": "The number of db row operations"},
"postgresql.index.scans": {"data_type": "Sum", "description": "The number of index scans on a table"},
"postgresql.index.size": {"data_type": "Gauge", "description": "The size of the index on disk."},
"postgresql.blocks_read": {"data_type": "Sum", "description": "The number of blocks read"},
"postgresql.table.vacuum.count": {
"data_type": "Sum",
"description": "Number of times a table has manually been vacuumed",
},
# Metrics not yet appearing due to needing a replica db
# "postgresql.wal.delay": {"data_type": "Gauge", "description": "Time between flushing recent WAL locally and receiving notification that the standby server has completed an operation with it"},
# "postgresql.wal.age": {"data_type": "Gauge", "description": "Age of the oldest WAL file"},
# "postgresql.replication.data_delay": {"data_type": "Gauge", "description": "The amount of data delayed in replication"},
# "postgresql.wal.lag": {"data_type": "Gauge", "description": "Time between flushing recent WAL locally and receiving notification"},
}

# # Note that an extra comma was added because there is an inconsistency in the postgres metadata compared to what gets sent
# postgresql_metrics = {
# # Default metrics
# "postgresql.connection.max": {
# "data_type": "Gauge",
# "description": "Configured maximum number of client connections allowed",
# },
# "postgresql.database.count": {"data_type": "Sum", "description": "Number of user databases"},
# "postgresql.commits": {"data_type": "Sum", "description": "The number of commits"},
# "postgresql.rollbacks": {"data_type": "Sum", "description": "The number of rollbacks"},
# "postgresql.db_size": {"data_type": "Sum", "description": "The database disk usage"},
# "postgresql.table.count": {"data_type": "Sum", "description": "Number of user tables in a database"},
# # "postgresql.backends": {"data_type": "Sum", "description": "The number of backends"},
# # "postgresql.bgwriter.buffers.allocated": {"data_type": "Sum", "description": "Number of buffers allocated"},
# # "postgresql.bgwriter.buffers.writes": {"data_type": "Sum", "description": "Number of buffers written"},
# # "postgresql.bgwriter.checkpoint.count": {"data_type": "Sum", "description": "The number of checkpoints performed"},
# # "postgresql.bgwriter.duration": {
# # "data_type": "Sum",
# # "description": "Total time spent writing and syncing files to disk by checkpoints",
# # },
# # "postgresql.bgwriter.maxwritten": {
# # "data_type": "Sum",
# # "description": "Number of times the background writer stopped a cleaning scan because it had written too many buffers",
# # },
# # Optional metrics (enabled in otelcol-config-with-postgres.yaml)
# # "postgresql.blks_hit": {
# # "data_type": "Sum",
# # "description": "Number of times disk blocks were found already in the buffer cache",
# # },
# # "postgresql.blks_read": {"data_type": "Sum", "description": "Number of disk blocks read in this database"},
# "postgresql.database.locks": {"data_type": "Gauge", "description": "The number of database locks"},
# "postgresql.deadlocks": {"data_type": "Sum", "description": "The number of deadlocks"},
# # "postgresql.temp.io": {
# # "data_type": "Sum",
# # "description": "Total amount of data written to temporary files by queries",
# # },
# "postgresql.temp_files": {"data_type": "Sum", "description": "The number of temp files"},
# # "postgresql.tup_deleted": {"data_type": "Sum", "description": "Number of rows deleted by queries in the database"},
# # "postgresql.tup_fetched": {"data_type": "Sum", "description": "Number of rows fetched by queries in the database"},
# # "postgresql.tup_inserted": {
# # "data_type": "Sum",
# # "description": "Number of rows inserted by queries in the database",
# # },
# # "postgresql.tup_returned": {
# # "data_type": "Sum",
# # "description": "Number of rows returned by queries in the database",
# # },
# # "postgresql.tup_updated": {"data_type": "Sum", "description": "Number of rows updated by queries in the database"},
# # "postgresql.function.calls": {
# # "data_type": "Sum",
# # "description": "The number of calls made to a function. Requires `track_functions=pl|all` in Postgres config.",
# # },
# "postgresql.sequential_scans": {"data_type": "Sum", "description": "The number of sequential scans"},
# "postgresql.table.size": {"data_type": "Sum", "description": "Disk space used by a table."},
# "postgresql.rows": {"data_type": "Sum", "description": "The number of rows in the database"},
# "postgresql.operations": {"data_type": "Sum", "description": "The number of db row operations"},
# "postgresql.index.scans": {"data_type": "Sum", "description": "The number of index scans on a table"},
# "postgresql.index.size": {"data_type": "Gauge", "description": "The size of the index on disk."},
# "postgresql.blocks_read": {"data_type": "Sum", "description": "The number of blocks read"},
# "postgresql.table.vacuum.count": {
# "data_type": "Sum",
# "description": "Number of times a table has manually been vacuumed",
# },
# # Metrics not yet appearing due to needing a replica db
# # "postgresql.wal.delay": {"data_type": "Gauge", "description": "Time between flushing recent WAL locally and receiving notification that the standby server has completed an operation with it"},
# # "postgresql.wal.age": {"data_type": "Gauge", "description": "Age of the oldest WAL file"},
# # "postgresql.replication.data_delay": {"data_type": "Gauge", "description": "The amount of data delayed in replication"},
# # "postgresql.wal.lag": {"data_type": "Gauge", "description": "Time between flushing recent WAL locally and receiving notification"},
# }

def _read_metrics_file() -> dict:
"""Load PostgreSQL metrics from OpenTelemetry.

TODO: extend this to other types of metrics
TODO: Parse the OTel collector config file (utils/build/docker/otelcol-config-with-postgres.yaml)
to dynamically determine which metrics are enabled via the 'metrics' section under
'receivers.postgresql'. This would allow filtering metrics based on 'enabled: true'
instead of hardcoding exclusions.
"""
metrics_file = Path(__file__).parent / "postgres_metrics.json"
if not metrics_file.exists():
raise FileNotFoundError(f"PostgreSQL metrics file not found: {metrics_file}")
with open(metrics_file) as f:
metrics = json.load(f)

# Exclude metrics that require a replica database
# These metrics are enabled in the config but won't appear without a replica setup
excluded_metrics = {
"postgresql.wal.delay",
"postgresql.wal.age",
"postgresql.replication.data_delay",
"postgresql.wal.lag",
}

return {k: v for k, v in metrics.items() if k not in excluded_metrics}

postgresql_metrics = _read_metrics_file()

def _get_metrics() -> list[dict]:
scenario: OtelCollectorScenario = context.scenario # type: ignore[assignment]
Expand Down
Loading
Loading