diff --git a/docs/how-tos/chain-events/configuration.md b/docs/how-tos/chain-events/configuration.md new file mode 100644 index 0000000..5de71ea --- /dev/null +++ b/docs/how-tos/chain-events/configuration.md @@ -0,0 +1,151 @@ +--- +id: configuration +title: Configuration Reference +sidebar_label: Configuration +sidebar_position: 3 +--- + +# Configuration Reference + +The container is configured via two sources: + +| Source | Purpose | +|---|---| +| `config.json` (volume-mounted) | Chain definitions, webhook URL, server settings | +| Environment variables (`.env` or `-e` flags) | Secrets and deployment-specific overrides | + +`${ENV_VAR}` placeholders inside `config.json` values are interpolated from the process environment at startup — use this to keep secrets out of the file. + +```json +{ + "chains": [...], + "webhook": { + "url": "https://my-system.example.com/events", + "headers": { + "Authorization": "Bearer ${WEBHOOK_API_TOKEN}" + } + } +} +``` + +--- + +## Chain Fields + +Each entry in the `chains` array configures one blockchain connection. + +| Field | Required | Default | Description | +|---|---|---|---| +| `chainKey` | **Yes** | — | Identifies the chain — see [Supported Chains](/docs/how-tos/chain-events/overview#supported-chains) | +| `rpcUrl` | **Yes** | — | WebSocket (`wss://`, `ws://`) or HTTP (`https://`, `http://`) RPC endpoint | +| `registryAddresses` | No | `[]` | EVM addresses of Token Registries to watch; can be managed at runtime via the [Registry API](./registry-api) | +| `replayFromBlock` | No | `0` | Block number where your registry was deployed — replay starts here on first run | +| `replayBatchSize` | No | `2000` | Max blocks per `eth_getLogs` batch — lower this (e.g. `500`) on free-tier RPCs with rate limits | +| `replayDelayMs` | No | `0` | Delay between replay batches in ms — add `500`–`1000` on free-tier RPCs | +| `confirmations` | No | `1` | Blocks to wait before delivery (max `12`) — increase to reduce reorg risk on faster chains | +| `pollIntervalMs` | No | chain default | Polling interval for HTTP-transport chains (`stability`, `astron`) — omit for WebSocket chains | + +:::tip WebSocket vs HTTP RPCs +WebSocket URLs (`wss://`) are used for event subscriptions on Ethereum, Polygon, and XDC — they receive new blocks in real time. Stability and Astron use HTTP polling (`https://`) because they do not support WebSocket subscriptions. +::: + +--- + +## Webhook Fields + +| Field | Required | Default | Description | +|---|---|---|---| +| `url` | **Yes** | — | Your downstream HTTP endpoint (must accept POST) | +| `timeoutMs` | No | `10000` | Per-attempt timeout in ms | +| `retryAttempts` | No | `3` | Retries on delivery failure (max `10`) | +| `retryBackoffMs` | No | `1000` | Base backoff in ms — doubles each attempt | +| `headers` | No | none | Extra headers on every delivery (e.g. `Authorization`, `X-Api-Key`) | +| `maxConcurrentDeliveries` | No | `10` | Max in-flight POSTs at the same time | +| `maxQueueSize` | No | `10000` | In-memory event buffer — events beyond this are logged and dropped | + +--- + +## Server Fields + +| Field | Required | Default | Description | +|---|---|---|---| +| `port` | No | `8080` | Port for the health check and Registry API | +| `host` | No | `0.0.0.0` | Keep `0.0.0.0` when running inside Docker | +| `workerProcesses` | No | `true` | Spawn each chain in its own OS process for fault isolation — one chain crashing does not affect others | +| `logLevel` | No | `info` | `trace` / `debug` / `info` / `warn` / `error` / `fatal` | + +--- + +## Environment Variables + +| Variable | Required | Description | +|---|---|---| +| `SIGNING_PRIVATE_KEY` | **Yes** | Raw 32-byte Ed25519 seed, base64-encoded — see [Quick Start](./quick-start#step-2--generate-a-signing-key) | +| `CONFIG_PATH` | No | Path to config file inside the container (default: `/app/config.json`) | +| `DB_HOST` | No | Database hostname — enables state persistence and distributed leasing | +| `DB_DIALECT` | No | Database type: `postgres` (default), `mysql`, `mariadb`, or `mssql` | +| `DB_PORT` | No | Database port — defaults to `5432` (postgres), `3306` (mysql/mariadb), `1433` (mssql) | +| `DB_NAME` | No | Database name (default: `trustvc_events`) | +| `DB_USER` | No | Database username (default: `postgres`) | +| `DB_PASSWORD` | No | Database password | +| `DB_POOL_MAX` | No | Connection pool max (default: `5`) | +| `DB_LEASE_TTL_MS` | No | Distributed lease TTL in ms for HA deployments (default: `30000`) | +| `OTEL_ENABLED` | No | Set `true` to enable OpenTelemetry traces and metrics | +| `OTEL_EXPORTER_OTLP_ENDPOINT` | No | OTLP collector endpoint (default: `http://localhost:4318`) | +| `OTEL_SERVICE_NAME` | No | Service name reported in telemetry (default: `trustvc-webhook-events`) | + +--- + +## Full `config.json` Example + +```json +{ + "chains": [ + { + "chainKey": "ethereum-sepolia", + "rpcUrl": "wss://eth-sepolia.g.alchemy.com/v2/${ALCHEMY_API_KEY}", + "registryAddresses": [ + "0xe6b5ce7E3691a0927b2806CE6638b35237DFfAc4" + ], + "replayFromBlock": 10896377, + "replayBatchSize": 10000, + "replayDelayMs": 500, + "confirmations": 1 + }, + { + "chainKey": "stability", + "rpcUrl": "https://rpc.stabilityprotocol.com/zgt/${STABILITY_API_KEY}", + "registryAddresses": [ + "0xCB524ba5D1C39f86d87af20B180c01aeD4517DcB" + ], + "pollIntervalMs": 10000, + "replayFromBlock": 35000000, + "replayBatchSize": 5000, + "replayDelayMs": 5000, + "confirmations": 1 + }, + { + "chainKey": "polygon-amoy", + "rpcUrl": "wss://polygon-amoy-bor-rpc.publicnode.com", + "registryAddresses": [], + "replayFromBlock": 39173608, + "replayBatchSize": 5000 + } + ], + "webhook": { + "url": "https://your-system.example.com/trustvc/events", + "timeoutMs": 10000, + "retryAttempts": 3, + "retryBackoffMs": 1000, + "headers": { + "Authorization": "Bearer ${WEBHOOK_SECRET}" + } + }, + "server": { + "port": 8080, + "host": "0.0.0.0", + "workerProcesses": true, + "logLevel": "info" + } +} +``` diff --git a/docs/how-tos/chain-events/opentelemetry.md b/docs/how-tos/chain-events/opentelemetry.md new file mode 100644 index 0000000..8b8165d --- /dev/null +++ b/docs/how-tos/chain-events/opentelemetry.md @@ -0,0 +1,170 @@ +--- +id: opentelemetry +title: OpenTelemetry +sidebar_label: OpenTelemetry +sidebar_position: 7 +--- + +# OpenTelemetry + +`trustvc-chain-events` can export traces and metrics to any [OpenTelemetry](https://opentelemetry.io/)-compatible backend. Point it at your existing OTLP endpoint using environment variables — no changes to `config.json` are required. + +When `OTEL_ENABLED` is not set, all telemetry operations are no-ops with zero overhead. + +--- + +## Configuration + +Add these to your `.env`: + +```bash +OTEL_ENABLED=true +OTEL_SERVICE_NAME=trustvc-chain-events +OTEL_EXPORTER_OTLP_ENDPOINT=https://your-otlp-endpoint +``` + +### Optional Variables + +| Variable | Default | Description | +|---|---|---| +| `OTEL_EXPORTER_OTLP_HEADERS` | — | Auth headers required by your backend (see examples below) | +| `OTEL_INSTANCE_ID` | `-` | Custom instance identifier shown in metrics labels | +| `OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION` | — | Set to `explicit_bucket_histogram` for Prometheus-compatible histograms | + +--- + +## Backend Examples + +### Grafana Cloud + +```bash +OTEL_ENABLED=true +OTEL_SERVICE_NAME=trustvc-chain-events +OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp-gateway-prod-us-central-0.grafana.net/otlp +OTEL_EXPORTER_OTLP_HEADERS=Authorization=Basic +OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION=explicit_bucket_histogram +``` + +### Datadog + +```bash +OTEL_ENABLED=true +OTEL_SERVICE_NAME=trustvc-chain-events +OTEL_EXPORTER_OTLP_ENDPOINT=https://api.datadoghq.com/api/intake/otlp/v1/traces +OTEL_EXPORTER_OTLP_HEADERS=DD-API-KEY= +``` + +### New Relic + +```bash +OTEL_ENABLED=true +OTEL_SERVICE_NAME=trustvc-chain-events +OTEL_EXPORTER_OTLP_ENDPOINT=https://otlp.nr-data.net +OTEL_EXPORTER_OTLP_HEADERS=api-key= +OTEL_EXPORTER_OTLP_METRICS_DEFAULT_HISTOGRAM_AGGREGATION=explicit_bucket_histogram +``` + +### Self-hosted OTLP Collector + +```bash +OTEL_ENABLED=true +OTEL_SERVICE_NAME=trustvc-chain-events +OTEL_EXPORTER_OTLP_ENDPOINT=http://your-collector-host:4318 +``` + +--- + +## Emitted Metrics + +Metrics are exported on a 15-second interval. Prometheus-compatible backends receive them with dots replaced by underscores (e.g. `trustvc.instance.health` → `trustvc_instance_health`). + +### Instance Metrics + +| Metric | Type | Description | +|---|---|---| +| `trustvc.instance.health` | Gauge | `1` = ok/starting, `0` = degraded (at least one chain permanently failed) | +| `trustvc.instance.uptime_seconds` | Gauge | Process uptime in seconds | +| `trustvc.instance.active_chains` | Gauge | Number of chains currently running | +| `trustvc.instance.active_workers` | Gauge | Active child worker processes (`0` when `workerProcesses: false`) | +| `trustvc.instance.total_escrows` | Gauge | Total active TitleEscrow subscriptions across all chains | + +### Chain Metrics + +Labels: `chain`, `transport` + +| Metric | Type | Description | +|---|---|---| +| `trustvc.chain.connected` | Gauge | `1` = RPC connected, `0` = not connected | +| `trustvc.chain.last_seen_block` | Gauge | Latest block number observed | +| `trustvc.chain.active_escrows` | Gauge | Active TitleEscrow subscriptions on this chain | +| `trustvc.chain.reconnect_attempts` | Gauge | Cumulative RPC reconnection attempts | +| `trustvc.chain.events_received` | Counter | On-chain ETR events detected — labels: `chain`, `event_type` | +| `trustvc.chain.state_changes` | Counter | RPC provider state transitions — labels: `chain`, `from_status`, `to_status` | +| `trustvc.rpc.connects` | Counter | Successful RPC connections — labels: `chain`, `transport` | +| `trustvc.rpc.disconnects` | Counter | RPC disconnections — labels: `chain` | + +### Webhook Metrics + +| Metric | Type | Description | +|---|---|---| +| `trustvc.webhook.delivered` | Counter | Successful deliveries — label: `event_type` | +| `trustvc.webhook.failed` | Counter | Deliveries failed after all retries or dropped (queue full) — label: `event_type` | +| `trustvc.webhook.delivery_duration_ms` | Histogram | End-to-end delivery duration including retries — labels: `event_type`, `success` | +| `trustvc.webhook.queue_depth` | Gauge | Events currently waiting in the delivery queue | + +--- + +## Emitted Traces + +| Span | Description | Attributes | +|---|---|---| +| `deliver {event.type}` | Top-level span for a webhook delivery attempt | `event.id`, `event.type`, `event.source`, `webhook.url`, `delivery.attempts` | +| `webhook attempt {n}` | Child span for each individual retry | `http.attempt`, `http.url`, `http.status_code` | +| `chain.status_changed` | Emitted when the RPC provider state changes | `chain`, `transport`, `from_status`, `to_status`, `instance` | + +--- + +## Grafana Dashboards + +Two pre-built Grafana dashboards are available. Both use a Prometheus data source and can be imported directly into your Grafana instance. + +**To import either dashboard:** + +1. In Grafana, go to **Dashboards → Import** +2. Upload the downloaded JSON file +3. Select your Prometheus data source when prompted +4. Click **Import** + +--- + +### Dashboard 1 — Webhook Events + +Focused on day-to-day operational health: is my webhook delivering events? How fast? Are chains connected and tracking escrows? + +Download Webhook Events Dashboard + +

+ +| Section | Panels | +|---|---| +| **Overview** | Uptime, chains connected, active escrows, total delivered, total failed, queue depth | +| **Webhook Delivery** | Delivery rate per minute, p50/p95/p99 delivery duration, queue depth over time | +| **Chain Status** | Chain status table, active escrows per chain, latest block per chain, escrow replay duration (from traces) | +| **On-Chain Events** | Events detected by type per minute | + +--- + +### Dashboard 2 — Fleet & Chain Health + +Focused on infrastructure health across multiple instances: useful when running more than one container in a high-availability setup. Shows which instances are healthy, which chains are connected, and how state is distributed across the fleet. + +Download Fleet & Chain Health Dashboard + +

+ +| Section | Panels | +|---|---| +| **Fleet Overview** | Active instances, healthy instances, degraded instances, total active chains, total escrows, active worker processes | +| **Instance Health** | Instance status per host, instance uptime | +| **Chain Connectivity** | RPC connection status per chain, reconnect attempts, state transition rate, state transition counts | +| **Chain Activity** | Active escrows per chain, last seen block per chain | diff --git a/docs/how-tos/chain-events/overview.md b/docs/how-tos/chain-events/overview.md new file mode 100644 index 0000000..e070608 --- /dev/null +++ b/docs/how-tos/chain-events/overview.md @@ -0,0 +1,61 @@ +--- +id: overview +title: Chain Events — ETR Webhook Sidecar +sidebar_label: Overview +sidebar_position: 1 +--- + +# Chain Events — ETR Webhook Sidecar + +`trustvc-chain-events` is a self-hosted Docker container that watches your Token Registry contracts on-chain and delivers every ETR lifecycle event — mint, transfer, surrender, burn — to your system as a signed HTTP webhook within seconds of chain finality. + +## How It Works + +![TrustVC Secure Webhook Event Flow](/docs/etr-listener/how-it-works.png) + +Each event arrives as a **[CloudEvents 1.0](https://cloudevents.io/)** JSON payload with an `X-TrustVC-Signature` header you can verify independently. + +## Why Self-Hosted + +| Concern | Sidecar approach | +|---|---| +| Data sovereignty | Events never leave your network | +| Provider flexibility | Use your own Alchemy / QuickNode RPC | +| Compliance | Runs in a private VPC — no outbound except to your RPC and webhook | +| Isolation | Each deployment is fully independent | +| Availability | Decoupled from TrustVC infrastructure | + +## Supported Chains + +| `chainKey` | Network | Transport | +|---|---|---| +| `ethereum` | Ethereum Mainnet | WebSocket | +| `ethereum-sepolia` | Ethereum Sepolia | WebSocket | +| `polygon` | Polygon Mainnet | WebSocket | +| `polygon-amoy` | Polygon Amoy | WebSocket | +| `xdc` | XDC Network | WebSocket | +| `xdc-apothem` | XDC Apothem | WebSocket | +| `stability` | Stability Mainnet | HTTP polling | +| `stability-testnet` | Stability Testnet | HTTP polling | +| `astron` | Astron Mainnet | HTTP polling | +| `astron-testnet` | Astron Testnet | HTTP polling | + +Actual delivery timing depends on your `confirmations` setting in `config.json` — events are held until that many blocks have passed after the transaction. For example, with `"confirmations": 3` on Ethereum, delivery takes roughly 36 seconds (3 × 12 sec). + +## Prerequisites + +- A container runtime or managed service — Docker, AWS ECS, EC2, or any environment that can run a container image +- Access to an RPC endpoint for each chain you want to watch (WebSocket for EVM chains, HTTP for Stability/Astron) +- A database — PostgreSQL, MySQL, MariaDB, or MSSQL (optional but recommended — enables state persistence and hot restarts) +- An HTTP endpoint on your system that can receive POST requests + +:::tip No database? +The container runs without a database using in-memory state. If the container restarts it will replay missed events from the last known block. For production use, connect a database so state survives restarts. +::: + +## Next Steps + +1. [Quick Start](./quick-start) — get running in under 5 minutes +2. [Configuration Reference](./configuration) — all `config.json` and environment variable options +3. [Webhook Payload & Verification](./webhook-payload) — event schema and signature verification +4. [Registry API](./registry-api) — add and remove registries at runtime diff --git a/docs/how-tos/chain-events/quick-start.md b/docs/how-tos/chain-events/quick-start.md new file mode 100644 index 0000000..8947620 --- /dev/null +++ b/docs/how-tos/chain-events/quick-start.md @@ -0,0 +1,183 @@ +--- +id: quick-start +title: Quick Start +sidebar_label: Quick Start +sidebar_position: 2 +--- + +# Quick Start + +Get `trustvc-chain-events` running locally in under 5 minutes. + +--- + +## Step 1 — Pull the Image + +**Docker Hub (recommended)** + +```bash +docker pull trustvc/trustvc-chain-events:latest +``` + +**GitHub Container Registry (alternative)** + +```bash +docker pull ghcr.io/trustvc/trustvc-chain-events:latest +``` + +--- + +## Step 2 — Generate a Signing Key + +The container signs every webhook delivery with an **Ed25519** key. You keep the private key; your receiver uses the public key to verify payloads. + +**Option A — random seed (simplest)** + +```bash +openssl rand -base64 32 +# example output: cZejchTTcxHUk8N+sbcOyVHZ3MVxzYQGYDCn+hFa4S4= +# Paste this value as SIGNING_PRIVATE_KEY in your .env +``` + +**Option B — PEM key pair (keep the public key for verification)** + +```bash +openssl genpkey -algorithm ed25519 -out private.pem +openssl pkey -in private.pem -pubout -out public.pem + +# Extract the 32-byte seed the container expects: +openssl pkey -in private.pem -outform DER | tail -c 32 | base64 +# Paste this output as SIGNING_PRIVATE_KEY in your .env +``` + +:::important +`SIGNING_PRIVATE_KEY` must be the **raw 32-byte Ed25519 seed encoded as base64** — not the PEM file itself. Use the extraction command above if you generated a PEM key. +::: + +--- + +## Step 3 — Create `config.json` + +Create a `config.json` in your working directory. At a minimum you need a chain, an RPC URL, and a webhook URL. + +Download config.json example + +

+ +```json +{ + "chains": [ + { + "chainKey": "ethereum-sepolia", + "rpcUrl": "wss://eth-sepolia.g.alchemy.com/v2/YOUR_API_KEY", + "registryAddresses": ["0xYourTokenRegistryAddress"], + "replayFromBlock": 6000000 + } + ], + "webhook": { + "url": "https://your-system.example.com/trustvc/events" + } +} +``` + +:::tip Finding your `replayFromBlock` +Set this to the block number when your Token Registry was deployed. The container replays all events from that block on first start to catch up. Using `0` works but will scan the entire chain history. +::: + +You can leave `registryAddresses` as an empty array `[]` and add them later via the [Registry API](./registry-api) — useful when you do not know addresses at deploy time. + +--- + +## Step 4 — Create `.env` + +```bash +# .env +SIGNING_PRIVATE_KEY="cZejchTTcxHUk8N+sbcOyVHZ3MVxzYQGYDCn+hFa4S4=" + +# Optional — PostgreSQL for state persistence +DB_HOST=localhost +DB_PORT=5432 +DB_NAME=trustvcevents +DB_USER=postgres +DB_PASSWORD=secret +``` + +--- + +## Step 5 — Run + +**With an env file (recommended)** + +```bash +docker run -d \ + -v $(pwd)/config.json:/app/config.json:ro \ + --env-file .env \ + -p 8080:8080 \ + --name trustvc-events \ + trustvc/trustvc-chain-events:latest +``` + +**With Docker Compose** + +```yaml title="docker-compose.yml" +services: + trustvc-events: + image: trustvc/trustvc-chain-events:latest + ports: + - "8080:8080" + volumes: + - ./config.json:/app/config.json:ro + env_file: + - .env + restart: unless-stopped +``` + +```bash +docker compose up -d +``` + +--- + +## Step 6 — Verify + +```bash +curl http://localhost:8080/health +``` + +```json +{"status":"ok"} +``` + +Check the logs to confirm chains are connected and escrows are loaded: + +```bash +docker logs trustvc-events +``` + +You should see output similar to: + +```text +INFO [startup]: trustvc-webhook-events starting version: "0.1.0" +INFO [startup]: Database connected +INFO [startup]: Chain worker ready chain: "ethereum-sepolia" escrows: 22 +INFO [startup]: ✓ Server ready — listening for on-chain events + chains: "ethereum-sepolia (22 escrows)" + webhook: "https://your-system.example.com/trustvc/events" +``` + +:::note Chain worker shows 0 escrows? +If you see `escrows: 0` in the "Chain worker ready" line but the final "Server ready" line shows the correct count, this is a display-only race condition in startup logging — the container is working correctly. The definitive count is always in the final summary. +::: + +--- + +## What Happens Next + +Once running, the container: + +1. **Replays history** — scans from `replayFromBlock` to the current block to catch any events you missed +2. **Subscribes to live blocks** — uses WebSocket subscriptions (or polling for HTTP-transport chains) to receive new events in real time +3. **Signs and delivers** — each event is signed with your Ed25519 key and POSTed to your webhook URL +4. **Retries on failure** — uses exponential backoff (configurable via `retryAttempts` and `retryBackoffMs`) + +See [Webhook Payload & Verification](./webhook-payload) for the full event schema and how to verify signatures on your receiver. diff --git a/docs/how-tos/chain-events/rate-limits.md b/docs/how-tos/chain-events/rate-limits.md new file mode 100644 index 0000000..93d39c0 --- /dev/null +++ b/docs/how-tos/chain-events/rate-limits.md @@ -0,0 +1,148 @@ +--- +id: rate-limits +title: Avoiding RPC Rate Limits +sidebar_label: Rate Limits +sidebar_position: 6 +--- + +# Avoiding RPC Rate Limits + +When the container first starts it replays historical events by scanning blocks in batches using `eth_getLogs`. Free-tier and public RPC endpoints enforce strict rate limits — too many requests too fast will result in `429 Too Many Requests` errors and missed events. + +The following config fields let you tune replay speed to stay within your RPC's limits. + +--- + +## Key Fields + +| Field | Default | What it controls | +|---|---|---| +| `replayBatchSize` | `2000` | Max number of blocks scanned per `eth_getLogs` call | +| `replayDelayMs` | `0` | Pause between each batch in milliseconds | +| `confirmations` | `1` | Blocks to wait after a transaction before delivering the event | +| `pollIntervalMs` | chain default | How often HTTP-polling chains (Stability, Astron) check for new blocks | + +--- + +## How Replay Works + +On startup the container scans from `replayFromBlock` to the current block in chunks of `replayBatchSize`: + +```text +Block 6,000,000 ──► [batch 1: 6,000,000 – 6,002,000] ──wait replayDelayMs──► + [batch 2: 6,002,000 – 6,004,000] ──wait replayDelayMs──► + ... + [current block] ──► switch to live subscription +``` + +Reducing `replayBatchSize` means more requests, each covering fewer blocks. Adding `replayDelayMs` spaces them out so you don't burst past the RPC's per-second limit. + +--- + +## Recommended Settings by RPC Tier + +### Public / free-tier RPCs + +Free public endpoints (e.g. `publicnode.com`, Infura free) typically allow 5–10 requests/second and cap `eth_getLogs` at 2,000 blocks per call. + +```json +{ + "chainKey": "ethereum-sepolia", + "rpcUrl": "wss://ethereum-sepolia-rpc.publicnode.com", + "replayFromBlock": 6000000, + "replayBatchSize": 500, + "replayDelayMs": 1000, + "confirmations": 1 +} +``` + +### Paid RPC (e.g. Alchemy Growth / QuickNode) + +Paid plans support much larger batch sizes and higher throughput. + +```json +{ + "chainKey": "ethereum-sepolia", + "rpcUrl": "wss://eth-sepolia.g.alchemy.com/v2/${ALCHEMY_API_KEY}", + "replayFromBlock": 6000000, + "replayBatchSize": 10000, + "replayDelayMs": 100, + "confirmations": 1 +} +``` + +### HTTP-polling chains (Stability, Astron) + +These chains use polling instead of WebSocket subscriptions. `pollIntervalMs` controls how often new blocks are checked — set it no lower than your RPC's minimum polling window. + +```json +{ + "chainKey": "stability", + "rpcUrl": "https://rpc.stabilityprotocol.com/zgt/${STABILITY_API_KEY}", + "replayFromBlock": 35000000, + "replayBatchSize": 5000, + "replayDelayMs": 5000, + "pollIntervalMs": 10000, + "confirmations": 1 +} +``` + +--- + +## Diagnosing Rate Limit Errors + +Check logs for these patterns: + +| Log message | Cause | Fix | +|---|---|---| +| `missing response` / `timeout` | Batch too large, RPC dropped the connection | Lower `replayBatchSize` | +| `rate limit exceeded` / `429` | Too many requests per second | Increase `replayDelayMs` | +| `could not decode result data` | Wrong registry address or RPC returned empty | Verify `registryAddresses` and `rpcUrl` | + +Enable debug logging to see each batch request: + + +--- + +## Full Example — Multi-Chain with Conservative Rate Limits + +```json +{ + "chains": [ + { + "chainKey": "ethereum-sepolia", + "rpcUrl": "wss://ethereum-sepolia-rpc.publicnode.com", + "registryAddresses": ["0xYourRegistryAddress"], + "replayFromBlock": 6000000, + "replayBatchSize": 500, + "replayDelayMs": 1000, + "confirmations": 1 + }, + { + "chainKey": "polygon-amoy", + "rpcUrl": "wss://polygon-amoy-bor-rpc.publicnode.com", + "registryAddresses": [], + "replayFromBlock": 39000000, + "replayBatchSize": 1000, + "replayDelayMs": 500, + "confirmations": 1 + }, + { + "chainKey": "stability", + "rpcUrl": "https://rpc.stabilityprotocol.com/zgt/${STABILITY_API_KEY}", + "registryAddresses": [], + "replayFromBlock": 35000000, + "replayBatchSize": 5000, + "replayDelayMs": 5000, + "pollIntervalMs": 10000, + "confirmations": 1 + } + ], + "webhook": { + "url": "https://your-system.example.com/trustvc/events" + }, + "server": { + "logLevel": "info" + } +} +``` diff --git a/docs/how-tos/chain-events/registry-api.md b/docs/how-tos/chain-events/registry-api.md new file mode 100644 index 0000000..7efd6ff --- /dev/null +++ b/docs/how-tos/chain-events/registry-api.md @@ -0,0 +1,158 @@ +--- +id: registry-api +title: Registry API +sidebar_label: Registry API +sidebar_position: 5 +--- + +# Registry API + +The container exposes a REST API on port `8080` that lets you add and remove Token Registry contracts at runtime — without restarting the container or editing `config.json`. + +:::warning Database required +All registry management endpoints require `DB_HOST` to be configured. They return `503 Service Unavailable` if no database is connected. Registries added via the API are persisted to the database and survive container restarts. +::: + +--- + +## Health Check + +```bash +GET /health +``` + +```bash +curl http://localhost:8080/health +``` + +```json +{"status":"ok"} +``` + +| `status` | Meaning | HTTP | +|---|---|---| +| `ok` | All chains connected and ready | `200` | +| `starting` | At least one chain still connecting | `200` | +| `degraded` | At least one chain permanently failed | `503` | + +--- + +## Add a Registry + +```bash +POST /registry +Content-Type: application/json +``` + +```bash +curl -X POST http://localhost:8080/registry \ + -H 'Content-Type: application/json' \ + -d '{ + "chainKey": "ethereum-sepolia", + "address": "0xYourTokenRegistryAddress", + "fromBlock": 6000000 + }' +``` + +### Request Body + +| Field | Required | Description | +|---|---|---| +| `chainKey` | **Yes** | Must match a `chainKey` in your running `config.json` | +| `address` | **Yes** | EVM address of the Token Registry contract | +| `fromBlock` | No | Block to replay history from (default: `0`) — set to your registry's deployment block | + +### Responses + +| HTTP | Meaning | +|---|---| +| `200` | Registry added and syncing — historical events are replaying in the background | +| `400` | Missing or invalid fields | +| `422` | Address is not a deployed TrustVC registry on that chain | +| `503` | Database not configured | + +:::tip Set `fromBlock` accurately +If you omit `fromBlock` or pass `0`, the container scans from the chain genesis — this can take a long time. Always pass the block number when your registry was deployed. +::: + +--- + +## List Registries + +```bash +GET /registries +``` + +```bash +curl http://localhost:8080/registries +``` + +```json +[ + { + "chainKey": "ethereum-sepolia", + "address": "0xe6b5ce7e3691a0927b2806ce6638b35237dffac4", + "fromBlock": 6000000, + "addedAt": "2024-01-15T10:00:00.000Z" + } +] +``` + +--- + +## Remove a Registry + +```bash +DELETE /registry/:chainKey/:address +``` + +```bash +curl -X DELETE \ + http://localhost:8080/registry/ethereum-sepolia/0xe6b5ce7e3691a0927b2806ce6638b35237dffac4 +``` + +| HTTP | Meaning | +|---|---| +| `200` | Registry removed — no further events will be delivered for this registry | +| `404` | Registry not found | +| `503` | Database not configured | + +--- + +## Workflow Example + +A common pattern is to start the container with an empty `registryAddresses: []` in `config.json`, then add registries as they are deployed. + +**Step 1 — Start the container** + +```bash +docker run -d \ + -v $(pwd)/config.json:/app/config.json:ro \ + --env-file .env \ + -p 8080:8080 \ + trustvc/trustvc-chain-events:latest +``` + +**Step 2 — Deploy your Token Registry** + +Deploy your Token Registry contract and note the contract address and deployment block number. + +**Step 3 — Register it** + +```bash +curl -X POST http://localhost:8080/registry \ + -H 'Content-Type: application/json' \ + -d '{ + "chainKey": "ethereum-sepolia", + "address": "0xYourRegistryAddress", + "fromBlock": 6000000 + }' +``` + +Events start flowing immediately once the registry is added. + +**Step 4 — Confirm it is active** + +```bash +curl http://localhost:8080/registries +``` diff --git a/docs/how-tos/chain-events/scaling.md b/docs/how-tos/chain-events/scaling.md new file mode 100644 index 0000000..7c93135 --- /dev/null +++ b/docs/how-tos/chain-events/scaling.md @@ -0,0 +1,176 @@ +--- +id: scaling +title: Scaling & High Availability +sidebar_label: Scaling & HA +sidebar_position: 8 +--- + +# Scaling & High Availability + +--- + +## Single Instance (Default) + +By default, each chain runs in its own child process (`workerProcesses: true` in config). This means one chain crashing or losing its RPC connection does not affect the others. For most deployments a single container is sufficient. + +```json +{ + "server": { + "workerProcesses": true + } +} +``` + +--- + +## Horizontal Scaling + +:::warning A database is required for scaling and HA + +Running more than one replica **without a database will cause duplicate events** — every replica polls the chain independently, so your webhook endpoint receives multiple copies of the same event. + +The database solves this in two ways: + +- **One replica owns each chain at a time** — replicas compete for a lease stored in the database. Only the winner polls; the others wait on standby. +- **Progress survives restarts** — the last processed block is persisted, so a restarting replica resumes exactly where it left off instead of replaying from scratch. + +**Set `DB_HOST` before running more than one container.** + +::: + +To run multiple instances of the container in parallel — for redundancy or higher throughput — you must connect a database. The container uses a **distributed lease** mechanism (one active worker per chain at a time) to prevent duplicate event delivery when multiple instances are running. + +```bash +# Required for horizontal scaling +DB_HOST=your-postgres-or-mysql-host +DB_LEASE_TTL_MS=30000 # how long a lease is held before another instance can take over +``` + +If the active instance crashes or loses its lease, another instance picks it up within `DB_LEASE_TTL_MS` milliseconds. + +--- + +## Docker Compose — 2 Replicas + +```yaml +services: + trustvc-events: + image: trustvc/trustvc-chain-events:latest + deploy: + replicas: 2 + ports: + - "8080-8081:8080" + volumes: + - ./config.json:/app/config.json:ro + env_file: + - .env + restart: unless-stopped + + postgres: + image: postgres:16-alpine + environment: + POSTGRES_DB: trustvcevents + POSTGRES_USER: trustvc + POSTGRES_PASSWORD: secret + volumes: + - pgdata:/var/lib/postgresql/data + restart: unless-stopped + +volumes: + pgdata: +``` + +**`.env`** + +```bash +SIGNING_PRIVATE_KEY="your-base64-seed" + +DB_HOST=postgres +DB_PORT=5432 +DB_NAME=trustvcevents +DB_USER=trustvc +DB_PASSWORD=secret +DB_LEASE_TTL_MS=30000 +``` + +--- + +## AWS ECS — Fargate + +For production deployments on AWS, run the container as a Fargate service. The task definition below mirrors the Docker run command. + +**Task definition (excerpt)** + +```json +{ + "family": "trustvc-chain-events", + "containerDefinitions": [ + { + "name": "trustvc-chain-events", + "image": "trustvc/trustvc-chain-events:latest", + "portMappings": [ + { "containerPort": 8080, "protocol": "tcp" } + ], + "mountPoints": [ + { + "sourceVolume": "config", + "containerPath": "/app/config.json", + "readOnly": true + } + ], + "environment": [ + { "name": "DB_HOST", "value": "your-rds-endpoint" }, + { "name": "DB_NAME", "value": "trustvcevents" }, + { "name": "DB_USER", "value": "trustvc" }, + { "name": "DB_LEASE_TTL_MS", "value": "30000" } + ], + "secrets": [ + { + "name": "SIGNING_PRIVATE_KEY", + "valueFrom": "arn:aws:secretsmanager:region:account:secret:trustvc/signing-key" + }, + { + "name": "DB_PASSWORD", + "valueFrom": "arn:aws:secretsmanager:region:account:secret:trustvc/db-password" + } + ], + "logConfiguration": { + "logDriver": "awslogs", + "options": { + "awslogs-group": "/ecs/trustvc-chain-events", + "awslogs-region": "ap-southeast-1", + "awslogs-stream-prefix": "ecs" + } + } + } + ] +} +``` + +:::tip `config.json` on ECS +Mount `config.json` from an S3 object using an init container, or bake a non-secret config into a custom image layer. Keep secrets in AWS Secrets Manager and inject them as environment variables as shown above. +::: + +--- + +## Health Check Integration + +All orchestrators (ECS, Kubernetes, Docker Compose) can use the `/health` endpoint to determine instance readiness: + +```json +{ + "healthCheck": { + "command": ["CMD-SHELL", "curl -f http://localhost:8080/health || exit 1"], + "interval": 30, + "timeout": 5, + "retries": 3, + "startPeriod": 15 + } +} +``` + +| `/health` response | Meaning | +|---|---| +| `{"status":"ok"}` | All chains connected | +| `{"status":"starting"}` | Container is still connecting to one or more chains | +| `{"status":"degraded"}` | At least one chain has permanently failed — restart the container | diff --git a/docs/how-tos/chain-events/webhook-payload.md b/docs/how-tos/chain-events/webhook-payload.md new file mode 100644 index 0000000..081dc39 --- /dev/null +++ b/docs/how-tos/chain-events/webhook-payload.md @@ -0,0 +1,175 @@ +--- +id: webhook-payload +title: Webhook Payload & Verification +sidebar_label: Webhook Payload +sidebar_position: 4 +--- + +# Webhook Payload & Verification + +Every event is delivered as an HTTP `POST` to your configured `webhook.url`. + +--- + +## Request Format + +```text +POST /your-endpoint +Content-Type: application/json +X-TrustVC-Signature: ed25519= +``` + +The body follows the [CloudEvents 1.0](https://cloudevents.io/) specification: + +```json +{ + "specversion": "1.0", + "id": "550e8400-e29b-41d4-a716-446655440000", + "source": "urn:trustvc:11155111:0xe6b5ce7e3691a0927b2806ce6638b35237dffac4", + "type": "com.trustvc.etr.holder_transfer", + "datacontenttype": "application/json", + "time": "2024-01-15T10:31:00.000Z", + "subject": "1", + "data": { + "chainKey": "ethereum-sepolia", + "chainId": 11155111, + "registryAddress": "0xe6b5ce7e3691a0927b2806ce6638b35237dffac4", + "tokenId": "1", + "blockNumber": 6123456, + "transactionHash": "0xabcdef...", + "logIndex": 0, + "payload": { + "fromHolder": "0xSenderAddress", + "toHolder": "0xReceiverAddress" + } + } +} +``` + +### Top-Level Fields + +| Field | Description | +|---|---| +| `specversion` | Always `"1.0"` | +| `id` | UUID — globally unique event identifier | +| `source` | `urn:trustvc::` | +| `type` | Event type — see [Event Types](#event-types) below | +| `time` | ISO-8601 block timestamp | +| `subject` | Token ID as a string | +| `data` | Event-specific payload — see below | + +### `data` Fields + +| Field | Description | +|---|---| +| `chainKey` | Chain identifier (e.g. `ethereum-sepolia`) | +| `chainId` | Numeric EIP-155 chain ID | +| `registryAddress` | Token Registry contract address (lowercase) | +| `tokenId` | ETR token ID as a string | +| `blockNumber` | Block the event was confirmed in | +| `transactionHash` | Transaction that emitted the event | +| `logIndex` | Log index within the transaction | +| `payload` | Event-specific data (addresses, amounts, etc.) | + +:::tip Idempotency +Use `data.transactionHash + data.logIndex` as your idempotency key — this combination uniquely identifies any on-chain event. +::: + +--- + +## Event Types + +| `type` | Trigger | Key `payload` fields | +|---|---|---| +| `com.trustvc.etr.minted` | Token minted | `tokenId`, `owner` | +| `com.trustvc.etr.burned` | Token burned | `tokenId` | +| `com.trustvc.etr.surrendered` | Token surrendered to registry | `tokenId` | +| `com.trustvc.etr.restored` | Token restored from registry | `tokenId` | +| `com.trustvc.etr.registry_paused` | Registry paused | — | +| `com.trustvc.etr.registry_unpaused` | Registry unpaused | — | +| `com.trustvc.etr.escrow_created` | New TitleEscrow deployed | `escrowAddress` | +| `com.trustvc.etr.token_received` | Escrow took custody | `escrowAddress` | +| `com.trustvc.etr.nomination` | Beneficiary nominee set | `nominee` | +| `com.trustvc.etr.beneficiary_transfer` | Beneficiary transferred | `fromBeneficiary`, `toBeneficiary` | +| `com.trustvc.etr.holder_transfer` | Holder transferred | `fromHolder`, `toHolder` | +| `com.trustvc.etr.return_to_issuer` | Token returned to issuer | — | +| `com.trustvc.etr.shred` | Token permanently destroyed | — | +| `com.trustvc.etr.reject_transfer_beneficiary` | Beneficiary transfer rejected | — | +| `com.trustvc.etr.reject_transfer_holder` | Holder transfer rejected | — | +| `com.trustvc.etr.reject_transfer_owners` | Both roles rejected simultaneously | — | + +--- + +## Signature Verification + +Every request includes an `X-TrustVC-Signature` header: + +```text +X-TrustVC-Signature: ed25519= +``` + +The signature is computed over the **raw request body bytes** using the Ed25519 private key configured in `SIGNING_PRIVATE_KEY`. Your receiver verifies it with the corresponding public key — the public key cannot forge payloads even if your receiver is compromised. + +:::important Verify on raw bytes +Parse the signature header before parsing the JSON body. Always verify the signature against the raw, unparsed body bytes — not a re-serialized object. +::: + +### Node.js / TypeScript + +```typescript +import crypto from 'node:crypto'; +import fs from 'node:fs'; + +const publicKey = crypto.createPublicKey(fs.readFileSync('public.pem')); + +function verifyTrustVCWebhook(rawBody: Buffer, signatureHeader: string): boolean { + const b64url = signatureHeader.replace('ed25519=', ''); + const signature = Buffer.from(b64url, 'base64url'); + return crypto.verify(null, rawBody, publicKey, signature); +} + +// Express example +app.post('/trustvc/events', express.raw({ type: 'application/json' }), (req, res) => { + if (!verifyTrustVCWebhook(req.body, req.headers['x-trustvc-signature'] as string)) { + return res.status(401).send('Invalid signature'); + } + const event = JSON.parse(req.body.toString()); + // handle event... + res.status(200).send('ok'); +}); +``` + +### Python + +```python +import base64 +from cryptography.hazmat.primitives.asymmetric.ed25519 import Ed25519PublicKey +from cryptography.hazmat.primitives.serialization import load_pem_public_key +from cryptography.exceptions import InvalidSignature + +with open('public.pem', 'rb') as f: + public_key = load_pem_public_key(f.read()) + +def verify_trustvc_webhook(raw_body: bytes, signature_header: str) -> bool: + b64url = signature_header.replace('ed25519=', '') + # base64url → standard base64 + padding = '=' * (-len(b64url) % 4) + signature = base64.b64decode(b64url.replace('-', '+').replace('_', '/') + padding) + try: + public_key.verify(signature, raw_body) + return True + except InvalidSignature: + return False +``` + +### Getting the Public Key + +If you generated the key with `openssl rand -base64 32` (a raw seed), derive the public key: + +```bash +# Convert raw seed to PEM private key, then extract public key +echo "YOUR_BASE64_SEED" | base64 -d > seed.bin +openssl pkey -inform DER -in <(printf '\x30\x2e\x02\x01\x00\x30\x05\x06\x03\x2b\x65\x70\x04\x22\x04\x20'; cat seed.bin) -pubout -out public.pem +``` + +If you generated with `openssl genpkey`, you already have `public.pem` from [Quick Start Step 2](./quick-start#step-2--generate-a-signing-key). diff --git a/sidebars.json b/sidebars.json index deb27ce..a0708c1 100644 --- a/sidebars.json +++ b/sidebars.json @@ -52,6 +52,20 @@ "how-tos/decentralized-renderer/template-advanced-features" ] }, + { + "label": "Chain Events (ETR Webhook)", + "type": "category", + "items": [ + "how-tos/chain-events/overview", + "how-tos/chain-events/quick-start", + "how-tos/chain-events/configuration", + "how-tos/chain-events/webhook-payload", + "how-tos/chain-events/registry-api", + "how-tos/chain-events/rate-limits", + "how-tos/chain-events/opentelemetry", + "how-tos/chain-events/scaling" + ] + }, "how-tos/implementing-qr-codes", { "label": "OpenAttestation (Legacy)", diff --git a/static/docs/chain-events/config.example.json b/static/docs/chain-events/config.example.json new file mode 100644 index 0000000..ec66ece --- /dev/null +++ b/static/docs/chain-events/config.example.json @@ -0,0 +1,117 @@ +{ + "chains": [ + { + "chainKey": "ethereum-sepolia", + "rpcUrl": "wss://eth-sepolia.g.alchemy.com/v2/YOUR_ALCHEMY_API_KEY", + "registryAddresses": [ + "0xYourTokenRegistryAddress" + ], + "replayFromBlock": 6000000, + "replayBatchSize": 10000, + "replayDelayMs": 500, + "confirmations": 1 + }, + { + "chainKey": "ethereum", + "rpcUrl": "wss://eth-mainnet.g.alchemy.com/v2/YOUR_ALCHEMY_API_KEY", + "registryAddresses": [], + "replayFromBlock": 0, + "replayBatchSize": 10000, + "replayDelayMs": 500, + "confirmations": 3 + }, + { + "chainKey": "polygon", + "rpcUrl": "wss://polygon-mainnet.g.alchemy.com/v2/YOUR_ALCHEMY_API_KEY", + "registryAddresses": [], + "replayFromBlock": 0, + "replayBatchSize": 5000, + "replayDelayMs": 500, + "confirmations": 2 + }, + { + "chainKey": "polygon-amoy", + "rpcUrl": "wss://polygon-amoy-bor-rpc.publicnode.com", + "registryAddresses": [], + "replayFromBlock": 0, + "replayBatchSize": 5000, + "replayDelayMs": 500, + "confirmations": 1 + }, + { + "chainKey": "xdc", + "rpcUrl": "wss://rpc.xinfin.network/ws", + "registryAddresses": [], + "replayFromBlock": 0, + "replayBatchSize": 5000, + "replayDelayMs": 0, + "confirmations": 1 + }, + { + "chainKey": "xdc-apothem", + "rpcUrl": "wss://rpc.apothem.network/ws", + "registryAddresses": [], + "replayFromBlock": 0, + "replayBatchSize": 5000, + "replayDelayMs": 0, + "confirmations": 1 + }, + { + "chainKey": "stability", + "rpcUrl": "https://rpc.stabilityprotocol.com/zgt/YOUR_STABILITY_API_KEY", + "registryAddresses": [], + "pollIntervalMs": 10000, + "replayFromBlock": 0, + "replayBatchSize": 5000, + "replayDelayMs": 5000, + "confirmations": 1 + }, + { + "chainKey": "stability-testnet", + "rpcUrl": "https://rpc.testnet.stabilityprotocol.com/zgt/YOUR_STABILITY_API_KEY", + "registryAddresses": [], + "pollIntervalMs": 10000, + "replayFromBlock": 0, + "replayBatchSize": 5000, + "replayDelayMs": 5000, + "confirmations": 1 + }, + { + "chainKey": "astron", + "rpcUrl": "https://rpc.astron.network", + "registryAddresses": [], + "pollIntervalMs": 10000, + "replayFromBlock": 0, + "replayBatchSize": 5000, + "replayDelayMs": 0, + "confirmations": 1 + }, + { + "chainKey": "astron-testnet", + "rpcUrl": "https://rpc.testnet.astron.network", + "registryAddresses": [], + "pollIntervalMs": 10000, + "replayFromBlock": 0, + "replayBatchSize": 5000, + "replayDelayMs": 0, + "confirmations": 1 + } + ], + "webhook": { + "url": "https://your-system.example.com/trustvc/events", + "timeoutMs": 10000, + "retryAttempts": 3, + "retryBackoffMs": 1000, + "headers": { + "Authorization": "Bearer ${WEBHOOK_SECRET}" + }, + "maxConcurrentDeliveries": 10, + "maxQueueSize": 10000 + }, + "server": { + "port": 8080, + "host": "0.0.0.0", + "workerProcesses": true, + "logLevel": "info" + } +} diff --git a/static/docs/chain-events/grafana-fleet-health.json b/static/docs/chain-events/grafana-fleet-health.json new file mode 100644 index 0000000..6731ddd --- /dev/null +++ b/static/docs/chain-events/grafana-fleet-health.json @@ -0,0 +1,320 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "Prometheus / Mimir datasource connected to your OTel collector", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + } + ], + "__requires": [ + { "type": "grafana", "id": "grafana", "name": "Grafana", "version": "10.0.0" }, + { "type": "datasource", "id": "prometheus", "name": "Prometheus", "version": "1.0.0" }, + { "type": "panel", "id": "stat", "name": "Stat", "version": "" }, + { "type": "panel", "id": "timeseries", "name": "Time series", "version": "" }, + { "type": "panel", "id": "table", "name": "Table", "version": "" } + ], + "uid": "trustvc-fleet-health", + "title": "TrustVC — Fleet & Chain Health", + "description": "Fleet health, per-instance status, chain connection, state-change events, and escrow subscriptions", + "schemaVersion": 39, + "version": 1, + "editable": true, + "graphTooltip": 1, + "time": { "from": "now-1h", "to": "now" }, + "refresh": "15s", + "tags": ["trustvc", "fleet"], + "templating": { + "list": [ + { + "name": "instance", + "label": "Instance", + "type": "query", + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "query": "label_values(trustvc_instance_health, instance)", + "includeAll": true, + "allValue": ".*", + "multi": true, + "refresh": 2, + "sort": 1, + "current": {} + } + ] + }, + "panels": [ + { + "id": 100, + "type": "row", + "title": "Fleet Overview", + "collapsed": false, + "gridPos": { "x": 0, "y": 0, "w": 24, "h": 1 } + }, + { + "id": 1, + "type": "stat", + "title": "Active Instances", + "description": "Running process replicas currently emitting metrics", + "gridPos": { "x": 0, "y": 1, "w": 4, "h": 4 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "background", "graphMode": "none", "justifyMode": "center", "orientation": "auto", "textMode": "auto" }, + "fieldConfig": { + "defaults": { + "noValue": "0", + "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } + }, + "overrides": [] + }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "count(trustvc_instance_health{instance=~\"$instance\"})", "legendFormat": "", "refId": "A", "instant": true }] + }, + { + "id": 2, + "type": "stat", + "title": "Healthy Instances", + "description": "Instances where no chain has permanently failed", + "gridPos": { "x": 4, "y": 1, "w": 4, "h": 4 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "background", "graphMode": "none", "justifyMode": "center" }, + "fieldConfig": { + "defaults": { + "noValue": "0", + "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } + }, + "overrides": [] + }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum(trustvc_instance_health{instance=~\"$instance\"} == 1) or vector(0)", "legendFormat": "", "refId": "A", "instant": true }] + }, + { + "id": 3, + "type": "stat", + "title": "Degraded Instances", + "description": "Instances with at least one permanently failed chain", + "gridPos": { "x": 8, "y": 1, "w": 4, "h": 4 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "background", "graphMode": "none", "justifyMode": "center" }, + "fieldConfig": { + "defaults": { + "noValue": "0", + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "red", "value": 1 }] } + }, + "overrides": [] + }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum(trustvc_instance_health{instance=~\"$instance\"} == 0) or vector(0)", "legendFormat": "", "refId": "A", "instant": true }] + }, + { + "id": 4, + "type": "stat", + "title": "Total Active Chains", + "description": "Sum of chains running across all instances", + "gridPos": { "x": 12, "y": 1, "w": 4, "h": 4 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "background", "graphMode": "none", "justifyMode": "center" }, + "fieldConfig": { + "defaults": { + "noValue": "0", + "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } + }, + "overrides": [] + }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum(trustvc_instance_active_chains{instance=~\"$instance\"}) or vector(0)", "legendFormat": "", "refId": "A", "instant": true }] + }, + { + "id": 5, + "type": "stat", + "title": "Total Active Escrows", + "description": "TitleEscrow subscriptions across all chains and instances", + "gridPos": { "x": 16, "y": 1, "w": 4, "h": 4 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "none", "justifyMode": "center" }, + "fieldConfig": { + "defaults": { + "noValue": "0", + "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] } + }, + "overrides": [] + }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum(trustvc_instance_total_escrows{instance=~\"$instance\"}) or vector(0)", "legendFormat": "", "refId": "A", "instant": true }] + }, + { + "id": 6, + "type": "stat", + "title": "Active Worker Processes", + "description": "Forked child worker processes across all instances", + "gridPos": { "x": 20, "y": 1, "w": 4, "h": 4 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "reduceOptions": { "calcs": ["lastNotNull"] }, "colorMode": "value", "graphMode": "none", "justifyMode": "center" }, + "fieldConfig": { + "defaults": { + "noValue": "0", + "thresholds": { "mode": "absolute", "steps": [{ "color": "blue", "value": null }] } + }, + "overrides": [] + }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "sum(trustvc_instance_active_workers{instance=~\"$instance\"}) or vector(0)", "legendFormat": "", "refId": "A", "instant": true }] + }, + { + "id": 101, + "type": "row", + "title": "Per-Instance Status", + "collapsed": false, + "gridPos": { "x": 0, "y": 5, "w": 24, "h": 1 } + }, + { + "id": 7, + "type": "table", + "title": "Instance Status", + "description": "One row per active process replica — health, uptime, chains, workers, escrows", + "gridPos": { "x": 0, "y": 6, "w": 16, "h": 7 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "showHeader": true, "cellHeight": "sm", "footer": { "show": false }, "sortBy": [{ "desc": false, "displayName": "Instance" }] }, + "fieldConfig": { + "defaults": { "custom": { "align": "left", "displayMode": "auto" } }, + "overrides": [ + { + "matcher": { "id": "byName", "options": "Health" }, + "properties": [ + { "id": "mappings", "value": [{ "type": "value", "options": { "1": { "text": "ok", "color": "green" }, "0": { "text": "degraded", "color": "red" } } }] }, + { "id": "custom.displayMode", "value": "color-background" } + ] + }, + { "matcher": { "id": "byName", "options": "Uptime" }, "properties": [{ "id": "unit", "value": "s" }, { "id": "custom.width", "value": 110 }] } + ] + }, + "transformations": [ + { "id": "labelsToFields", "options": { "valueLabel": "instance" } }, + { "id": "merge", "options": {} }, + { "id": "organize", "options": { "renameByName": { "trustvc_instance_health": "Health", "Value #A": "Health", "trustvc_instance_uptime_seconds": "Uptime", "Value #B": "Uptime", "trustvc_instance_active_chains": "Chains", "Value #C": "Chains", "trustvc_instance_active_workers": "Workers", "Value #D": "Workers", "trustvc_instance_total_escrows": "Escrows", "Value #E": "Escrows", "instance": "Instance" }, "excludeByName": { "Time": true, "__name__": true } } } + ], + "targets": [ + { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_instance_health{instance=~\"$instance\"}", "refId": "A", "instant": true, "format": "table" }, + { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_instance_uptime_seconds{instance=~\"$instance\"}", "refId": "B", "instant": true, "format": "table" }, + { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_instance_active_chains{instance=~\"$instance\"}", "refId": "C", "instant": true, "format": "table" }, + { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_instance_active_workers{instance=~\"$instance\"}", "refId": "D", "instant": true, "format": "table" }, + { "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_instance_total_escrows{instance=~\"$instance\"}", "refId": "E", "instant": true, "format": "table" } + ] + }, + { + "id": 8, + "type": "timeseries", + "title": "Instance Uptime", + "description": "Gaps indicate process restarts", + "gridPos": { "x": 16, "y": 6, "w": 8, "h": 7 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "tooltip": { "mode": "multi", "sort": "none" }, "legend": { "displayMode": "list", "placement": "bottom" } }, + "fieldConfig": { "defaults": { "unit": "s", "custom": { "lineWidth": 2, "fillOpacity": 10 } }, "overrides": [] }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_instance_uptime_seconds{instance=~\"$instance\"}", "legendFormat": "{{instance}}", "refId": "A" }] + }, + { + "id": 102, + "type": "row", + "title": "Chain Health", + "collapsed": false, + "gridPos": { "x": 0, "y": 13, "w": 24, "h": 1 } + }, + { + "id": 9, + "type": "timeseries", + "title": "Chain RPC Connection Status", + "description": "1 = connected, 0 = not connected. Drops indicate disconnections.", + "gridPos": { "x": 0, "y": 14, "w": 12, "h": 8 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "tooltip": { "mode": "multi", "sort": "none" }, "legend": { "displayMode": "list", "placement": "bottom" } }, + "fieldConfig": { + "defaults": { + "min": 0, "max": 1, + "custom": { "lineWidth": 2, "fillOpacity": 15, "drawStyle": "line", "spanNulls": false }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "red", "value": null }, { "color": "green", "value": 1 }] } + }, + "overrides": [] + }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_chain_connected{instance=~\"$instance\"}", "legendFormat": "{{chain}} ({{instance}})", "refId": "A" }] + }, + { + "id": 10, + "type": "timeseries", + "title": "Chain Reconnect Attempts", + "description": "Rising line = persistent connection trouble", + "gridPos": { "x": 12, "y": 14, "w": 12, "h": 8 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "tooltip": { "mode": "multi", "sort": "none" }, "legend": { "displayMode": "list", "placement": "bottom" } }, + "fieldConfig": { + "defaults": { + "custom": { "lineWidth": 2, "fillOpacity": 10 }, + "thresholds": { "mode": "absolute", "steps": [{ "color": "green", "value": null }, { "color": "yellow", "value": 3 }, { "color": "red", "value": 10 }] } + }, + "overrides": [] + }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_chain_reconnect_attempts{instance=~\"$instance\"}", "legendFormat": "{{chain}} ({{instance}})", "refId": "A" }] + }, + { + "id": 103, + "type": "row", + "title": "State-Change Events", + "collapsed": false, + "gridPos": { "x": 0, "y": 22, "w": 24, "h": 1 } + }, + { + "id": 11, + "type": "timeseries", + "title": "Chain State Transitions (rate)", + "description": "Spikes = flapping connection. Labels show from_status → to_status.", + "gridPos": { "x": 0, "y": 23, "w": 14, "h": 8 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "tooltip": { "mode": "multi", "sort": "none" }, "legend": { "displayMode": "list", "placement": "bottom" } }, + "fieldConfig": { + "defaults": { + "unit": "ops", + "custom": { "lineWidth": 2, "fillOpacity": 10, "drawStyle": "bars", "barAlignment": 0 } + }, + "overrides": [] + }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "rate(trustvc_chain_state_changes_total{instance=~\"$instance\"}[5m])", "legendFormat": "{{chain}}: {{from_status}} → {{to_status}}", "refId": "A" }] + }, + { + "id": 12, + "type": "table", + "title": "State Transition Counts", + "description": "Total transitions since process start", + "gridPos": { "x": 14, "y": 23, "w": 10, "h": 8 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "showHeader": true, "cellHeight": "sm", "footer": { "show": false } }, + "fieldConfig": { "defaults": { "custom": { "align": "left", "displayMode": "auto" } }, "overrides": [] }, + "transformations": [ + { "id": "organize", "options": { "renameByName": { "chain": "Chain", "from_status": "From", "to_status": "To", "Value": "Count" }, "excludeByName": { "Time": true, "__name__": true } } } + ], + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_chain_state_changes_total{instance=~\"$instance\"}", "refId": "A", "instant": true, "format": "table" }] + }, + { + "id": 104, + "type": "row", + "title": "Event Processing", + "collapsed": false, + "gridPos": { "x": 0, "y": 31, "w": 24, "h": 1 } + }, + { + "id": 13, + "type": "timeseries", + "title": "Active Escrows per Chain", + "description": "Grows as new tokens are minted", + "gridPos": { "x": 0, "y": 32, "w": 12, "h": 8 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "tooltip": { "mode": "multi", "sort": "none" }, "legend": { "displayMode": "list", "placement": "bottom" } }, + "fieldConfig": { "defaults": { "min": 0, "custom": { "lineWidth": 2, "fillOpacity": 15 } }, "overrides": [] }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_chain_active_escrows{instance=~\"$instance\"}", "legendFormat": "{{chain}} ({{instance}})", "refId": "A" }] + }, + { + "id": 14, + "type": "timeseries", + "title": "Last Seen Block per Chain", + "description": "Flat line = listener stalled", + "gridPos": { "x": 12, "y": 32, "w": 12, "h": 8 }, + "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, + "options": { "tooltip": { "mode": "multi", "sort": "none" }, "legend": { "displayMode": "list", "placement": "bottom" } }, + "fieldConfig": { "defaults": { "custom": { "lineWidth": 2, "fillOpacity": 10 } }, "overrides": [] }, + "targets": [{ "datasource": { "type": "prometheus", "uid": "${DS_PROMETHEUS}" }, "expr": "trustvc_chain_last_seen_block{instance=~\"$instance\"}", "legendFormat": "{{chain}} ({{instance}})", "refId": "A" }] + } + ], + "annotations": { "list": [] }, + "links": [] +} diff --git a/static/docs/chain-events/grafana-webhook-events.json b/static/docs/chain-events/grafana-webhook-events.json new file mode 100644 index 0000000..6523921 --- /dev/null +++ b/static/docs/chain-events/grafana-webhook-events.json @@ -0,0 +1,1344 @@ +{ + "__inputs": [ + { + "name": "DS_PROMETHEUS", + "label": "Prometheus", + "description": "", + "type": "datasource", + "pluginId": "prometheus", + "pluginName": "Prometheus" + }, + { + "name": "DS_TEMPO", + "label": "Tempo", + "description": "", + "type": "datasource", + "pluginId": "tempo", + "pluginName": "Tempo" + } + ], + "__requires": [ + { + "type": "grafana", + "id": "grafana", + "name": "Grafana", + "version": "10.0.0" + }, + { + "type": "datasource", + "id": "prometheus", + "name": "Prometheus", + "version": "1.0.0" + }, + { + "type": "datasource", + "id": "tempo", + "name": "Tempo", + "version": "1.0.0" + }, + { + "type": "panel", + "id": "stat", + "name": "Stat", + "version": "" + }, + { + "type": "panel", + "id": "timeseries", + "name": "Time series", + "version": "" + }, + { + "type": "panel", + "id": "table", + "name": "Table", + "version": "" + }, + { + "type": "panel", + "id": "text", + "name": "Text", + "version": "" + } + ], + "annotations": { + "list": [] + }, + "editable": true, + "fiscalYearStartMonth": 0, + "graphTooltip": 1, + "id": null, + "links": [], + "panels": [ + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 0 + }, + "id": 100, + "title": "Overview", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 0, + "y": 1 + }, + "id": 1, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "title": "Uptime", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "trustvc_instance_uptime_seconds", + "legendFormat": "uptime", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 0.5 + } + ] + }, + "unit": "short" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 4, + "y": 1 + }, + "id": 2, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "title": "Chains Connected", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(trustvc_chain_connected)", + "legendFormat": "connected", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 8, + "y": 1 + }, + "id": 3, + "options": { + "colorMode": "background", + "graphMode": "none", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "title": "Active Escrows", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(trustvc_chain_active_escrows)", + "legendFormat": "escrows", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 12, + "y": 1 + }, + "id": 4, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "title": "Total Delivered", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(trustvc_webhook_delivered_total[24h]))", + "legendFormat": "delivered", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 1 + }, + { + "color": "red", + "value": 5 + } + ] + }, + "unit": "short" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 16, + "y": 1 + }, + "id": 5, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "title": "Total Failed", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum(increase(trustvc_webhook_failed_total[24h]))", + "legendFormat": "failed", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "yellow", + "value": 50 + }, + { + "color": "red", + "value": 200 + } + ] + }, + "unit": "short" + } + }, + "gridPos": { + "h": 4, + "w": 4, + "x": 20, + "y": 1 + }, + "id": 6, + "options": { + "colorMode": "background", + "graphMode": "area", + "justifyMode": "center", + "orientation": "auto", + "reduceOptions": { + "calcs": [ + "lastNotNull" + ] + }, + "textMode": "auto" + }, + "title": "Queue Depth", + "type": "stat", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "trustvc_webhook_queue_depth", + "legendFormat": "queue", + "refId": "A" + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 5 + }, + "id": 102, + "title": "Webhook Delivery", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [ + { + "matcher": { + "id": "byName", + "options": "failed" + }, + "properties": [ + { + "id": "color", + "value": { + "fixedColor": "red", + "mode": "fixed" + } + } + ] + } + ] + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 6 + }, + "id": 10, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "sum" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "title": "Delivery Rate (per minute)", + "type": "timeseries", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum by (event_type) (rate(trustvc_webhook_delivered_total[1m]) * 60)", + "legendFormat": "delivered • {{event_type}}", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum by (event_type) (rate(trustvc_webhook_failed_total[1m]) * 60)", + "legendFormat": "failed • {{event_type}}", + "refId": "B" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "ms" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 6 + }, + "id": 11, + "options": { + "legend": { + "calcs": [ + "mean", + "p95" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "title": "Delivery Duration (p50 / p95 / p99)", + "type": "timeseries", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.50, sum by (le) (rate(trustvc_webhook_delivery_duration_ms_bucket[5m])))", + "legendFormat": "p50", + "refId": "A" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.95, sum by (le) (rate(trustvc_webhook_delivery_duration_ms_bucket[5m])))", + "legendFormat": "p95", + "refId": "B" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "histogram_quantile(0.99, sum by (le) (rate(trustvc_webhook_delivery_duration_ms_bucket[5m])))", + "legendFormat": "p99", + "refId": "C" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 20, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "line+area", + "steps": [ + { + "color": "transparent", + "value": null + }, + { + "color": "red", + "value": 200 + } + ] + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + }, + { + "color": "red", + "value": 200 + } + ] + }, + "unit": "short" + } + }, + "gridPos": { + "h": 6, + "w": 24, + "x": 0, + "y": 14 + }, + "id": 12, + "options": { + "legend": { + "calcs": [ + "lastNotNull", + "max" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "title": "Queue Depth", + "type": "timeseries", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "trustvc_webhook_queue_depth", + "legendFormat": "pending events", + "refId": "A" + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 20 + }, + "id": 103, + "title": "Chain Status", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "thresholds" + }, + "mappings": [ + { + "options": { + "0": { + "color": "red", + "index": 0, + "text": "Disconnected" + }, + "1": { + "color": "green", + "index": 1, + "text": "Connected" + } + }, + "type": "value" + } + ], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "red", + "value": null + }, + { + "color": "green", + "value": 1 + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 21 + }, + "id": 20, + "options": { + "cellHeight": "sm", + "footer": { + "countRows": false, + "fields": "", + "reducer": [ + "sum" + ], + "show": false + }, + "showHeader": true, + "sortBy": [ + { + "desc": false, + "displayName": "Chain" + } + ] + }, + "title": "Chain Status Table", + "transformations": [ + { + "id": "merge", + "options": {} + }, + { + "id": "organize", + "options": { + "excludeByName": { + "Time": true, + "__name__": true, + "job": true, + "instance": true + }, + "renameByName": { + "chain": "Chain", + "transport": "Transport", + "Value #connected": "Connected", + "Value #last_block": "Last Block", + "Value #escrows": "Active Escrows", + "Value #reconnects": "Reconnects" + } + } + } + ], + "type": "table", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "trustvc_chain_connected", + "legendFormat": "{{chain}}", + "refId": "connected", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "trustvc_chain_last_seen_block", + "legendFormat": "{{chain}}", + "refId": "last_block", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "trustvc_chain_active_escrows", + "legendFormat": "{{chain}}", + "refId": "escrows", + "instant": true + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "trustvc_chain_reconnect_attempts", + "legendFormat": "{{chain}}", + "refId": "reconnects", + "instant": true + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "stepAfter", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 0, + "y": 29 + }, + "id": 21, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "title": "Active Escrows per Chain", + "type": "timeseries", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "trustvc_chain_active_escrows", + "legendFormat": "{{chain}}", + "refId": "A" + } + ] + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "none" + } + }, + "gridPos": { + "h": 8, + "w": 12, + "x": 12, + "y": 29 + }, + "id": 22, + "options": { + "legend": { + "calcs": [ + "lastNotNull" + ], + "displayMode": "table", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "none" + } + }, + "title": "Latest Block per Chain", + "type": "timeseries", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "trustvc_chain_last_seen_block", + "legendFormat": "{{chain}}", + "refId": "A" + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 37 + }, + "id": 104, + "title": "Traces", + "type": "row" + }, + { + "datasource": { + "type": "tempo", + "uid": "${DS_TEMPO}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "bars", + "fillOpacity": 100, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 1, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "never", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "s" + } + }, + "gridPos": { + "h": 8, + "w": 24, + "x": 0, + "y": 38 + }, + "id": 30, + "options": { + "legend": { + "calcs": [], + "displayMode": "list", + "placement": "bottom", + "showLegend": true + }, + "tooltip": { + "mode": "single", + "sort": "none" + } + }, + "title": "Escrow Replay Duration (from Traces)", + "type": "timeseries", + "targets": [ + { + "datasource": { + "type": "tempo", + "uid": "${DS_TEMPO}" + }, + "filters": [ + { + "id": "service-name", + "operator": "=", + "scope": "resource", + "tag": "service.name", + "value": "trustvc-webhook-events", + "valueType": "string" + }, + { + "id": "span-name", + "operator": "=", + "scope": "span", + "tag": "name", + "value": "escrow.historical-replay", + "valueType": "string" + } + ], + "limit": 20, + "queryType": "traceql", + "query": "{resource.service.name=\"trustvc-webhook-events\" && name=\"escrow.historical-replay\"}", + "refId": "A", + "tableType": "spans" + } + ] + }, + { + "collapsed": false, + "gridPos": { + "h": 1, + "w": 24, + "x": 0, + "y": 46 + }, + "id": 200, + "title": "Chain Events", + "type": "row" + }, + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "fieldConfig": { + "defaults": { + "color": { + "mode": "palette-classic" + }, + "custom": { + "axisCenteredZero": false, + "axisColorMode": "text", + "axisLabel": "", + "axisPlacement": "auto", + "barAlignment": 0, + "drawStyle": "line", + "fillOpacity": 10, + "gradientMode": "none", + "hideFrom": { + "legend": false, + "tooltip": false, + "viz": false + }, + "lineInterpolation": "linear", + "lineWidth": 2, + "pointSize": 5, + "scaleDistribution": { + "type": "linear" + }, + "showPoints": "auto", + "spanNulls": false, + "stacking": { + "group": "A", + "mode": "none" + }, + "thresholdsStyle": { + "mode": "off" + } + }, + "mappings": [], + "thresholds": { + "mode": "absolute", + "steps": [ + { + "color": "green", + "value": null + } + ] + }, + "unit": "short" + }, + "overrides": [] + }, + "gridPos": { + "h": 10, + "w": 24, + "x": 0, + "y": 47 + }, + "id": 201, + "options": { + "legend": { + "calcs": [ + "sum" + ], + "displayMode": "table", + "placement": "right", + "showLegend": true + }, + "tooltip": { + "mode": "multi", + "sort": "desc" + } + }, + "title": "On-chain Events Detected (by type, per minute)", + "type": "timeseries", + "targets": [ + { + "datasource": { + "type": "prometheus", + "uid": "${DS_PROMETHEUS}" + }, + "expr": "sum by (chain, event_type) (rate(trustvc_chain_events_received_total[5m]) * 60)", + "legendFormat": "{{chain}} / {{event_type}}", + "refId": "A" + } + ] + } + ], + "refresh": "30s", + "schemaVersion": 39, + "tags": [ + "trustvc", + "webhook", + "etr" + ], + "templating": { + "list": [] + }, + "time": { + "from": "now-1h", + "to": "now" + }, + "timepicker": {}, + "timezone": "browser", + "title": "TrustVC Webhook Events", + "uid": "trustvc-webhook-events", + "version": 1 +} diff --git a/static/docs/etr-listener/how-it-works.png b/static/docs/etr-listener/how-it-works.png new file mode 100644 index 0000000..149f826 Binary files /dev/null and b/static/docs/etr-listener/how-it-works.png differ