diff --git a/notebooks/09-network-overview.ipynb b/notebooks/09-network-overview.ipynb new file mode 100644 index 0000000..5955cf5 --- /dev/null +++ b/notebooks/09-network-overview.ipynb @@ -0,0 +1,269 @@ +{ + "cells": [ + { + "cell_type": "markdown", + "id": "0", + "metadata": {}, + "source": [ + "Network overview using Xatu sentry node observations on Ethereum mainnet." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "1", + "metadata": { + "tags": [ + "parameters" + ] + }, + "outputs": [], + "source": [ + "import pandas as pd\n", + "import plotly.express as px\n", + "\n", + "from loaders import load_parquet, display_sql\n", + "\n", + "target_date = None # Set via papermill, or auto-detect from manifest" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "2", + "metadata": { + "tags": [ + "sql-fold" + ] + }, + "outputs": [], + "source": [ + "display_sql(\"xatu_client_connectivity\", target_date)" + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "3", + "metadata": {}, + "outputs": [], + "source": [ + "df = load_parquet(\"xatu_client_connectivity\", target_date)" + ] + }, + { + "cell_type": "markdown", + "id": "4", + "metadata": {}, + "source": [ + "## Total unique peers\n", + "\n", + "Number of unique peers observed by Xatu sentry nodes throughout the day." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "5", + "metadata": {}, + "outputs": [], + "source": [ + "df_unique = df.groupby(\"hour_bucket\")[\"peer_id\"].nunique().reset_index()\n", + "df_unique.columns = [\"hour_bucket\", \"unique_peers\"]\n", + "\n", + "fig = px.line(\n", + " df_unique,\n", + " x=\"hour_bucket\",\n", + " y=\"unique_peers\",\n", + ")\n", + "fig.update_layout(\n", + " xaxis_title=None,\n", + " yaxis_title=\"Unique peers\",\n", + " height=400,\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "6", + "metadata": {}, + "source": [ + "## Client distribution\n", + "\n", + "Distribution of unique peers by client implementation over time. Each peer is assigned to a single client based on their most recently observed agent string." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "7", + "metadata": {}, + "outputs": [], + "source": [ + "# Filter out empty client names and deduplicate per hour/peer\n", + "df_clients = df[df[\"client_name\"].notna() & (df[\"client_name\"] != \"\")].copy()\n", + "df_clients = df_clients.sort_values([\"hour_bucket\", \"peer_id\", \"client_name\"], ascending=[True, True, False])\n", + "df_clients = df_clients.drop_duplicates(subset=[\"hour_bucket\", \"peer_id\"], keep=\"first\")\n", + "\n", + "df_client_counts = df_clients.groupby([\"hour_bucket\", \"client_name\"]).size().reset_index(name=\"peers\")\n", + "\n", + "fig = px.area(\n", + " df_client_counts,\n", + " x=\"hour_bucket\",\n", + " y=\"peers\",\n", + " color=\"client_name\",\n", + ")\n", + "fig.update_layout(\n", + " xaxis_title=None,\n", + " yaxis_title=\"Peers\",\n", + " legend_title=\"Client\",\n", + " height=500,\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "8", + "metadata": {}, + "source": [ + "## Connections per Xatu node\n", + "\n", + "Number of unique peers connected to each Xatu sentry node over time." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "9", + "metadata": {}, + "outputs": [], + "source": [ + "df_xatu = df.groupby([\"hour_bucket\", \"local_name\"])[\"peer_id\"].nunique().reset_index()\n", + "df_xatu.columns = [\"hour_bucket\", \"local_name\", \"peers\"]\n", + "df_xatu[\"local_name\"] = df_xatu[\"local_name\"].str.replace(\"ethpandaops/mainnet/\", \"\", regex=False)\n", + "\n", + "fig = px.line(\n", + " df_xatu,\n", + " x=\"hour_bucket\",\n", + " y=\"peers\",\n", + " color=\"local_name\",\n", + ")\n", + "fig.update_layout(\n", + " xaxis_title=None,\n", + " yaxis_title=\"Connected peers\",\n", + " legend=dict(\n", + " title=\"Xatu node\",\n", + " orientation=\"h\",\n", + " yanchor=\"top\",\n", + " y=-0.2,\n", + " xanchor=\"center\",\n", + " x=0.5,\n", + " ),\n", + " height=500,\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "10", + "metadata": {}, + "source": [ + "## Transport protocol distribution\n", + "\n", + "Distribution of connections by IP protocol (IPv4/IPv6) and transport protocol (TCP/QUIC) combinations." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "11", + "metadata": {}, + "outputs": [], + "source": [ + "# Group transports per peer/hour/protocol\n", + "df_transport = (\n", + " df.groupby([\"hour_bucket\", \"peer_id\", \"protocol\"])[\"transport_protocol\"]\n", + " .apply(lambda x: \" & \".join(sorted(x.dropna().unique())))\n", + " .reset_index()\n", + ")\n", + "df_transport[\"protocol_combos\"] = df_transport[\"protocol\"] + \" + (\" + df_transport[\"transport_protocol\"] + \")\"\n", + "\n", + "df_proto_counts = df_transport.groupby([\"hour_bucket\", \"protocol_combos\"]).size().reset_index(name=\"peers\")\n", + "\n", + "fig = px.line(\n", + " df_proto_counts,\n", + " x=\"hour_bucket\",\n", + " y=\"peers\",\n", + " color=\"protocol_combos\",\n", + ")\n", + "fig.update_layout(\n", + " xaxis_title=None,\n", + " yaxis_title=\"Connected peers\",\n", + " legend_title=\"Protocol\",\n", + " height=500,\n", + ")\n", + "fig.show()" + ] + }, + { + "cell_type": "markdown", + "id": "12", + "metadata": {}, + "source": [ + "## Port popularity\n", + "\n", + "Most commonly used ports by connected peers. Standard Ethereum P2P port is 30303." + ] + }, + { + "cell_type": "code", + "execution_count": null, + "id": "13", + "metadata": {}, + "outputs": [], + "source": [ + "# Count unique peers per port\n", + "df_ports = df.drop_duplicates(subset=[\"peer_id\", \"port\"]).groupby(\"port\").size().reset_index(name=\"peers\")\n", + "df_ports = df_ports.sort_values(\"peers\", ascending=False).head(20)\n", + "df_ports[\"port\"] = df_ports[\"port\"].astype(str)\n", + "\n", + "fig = px.bar(\n", + " df_ports,\n", + " x=\"port\",\n", + " y=\"peers\",\n", + ")\n", + "fig.update_xaxes(type=\"category\")\n", + "fig.update_layout(\n", + " xaxis_title=\"Port\",\n", + " yaxis_title=\"Peers\",\n", + " height=400,\n", + ")\n", + "fig.show()" + ] + } + ], + "metadata": { + "kernelspec": { + "display_name": "Python 3 (ipykernel)", + "language": "python", + "name": "python3" + }, + "language_info": { + "codemirror_mode": { + "name": "ipython", + "version": 3 + }, + "file_extension": ".py", + "mimetype": "text/x-python", + "name": "python", + "nbconvert_exporter": "python", + "pygments_lexer": "ipython3", + "version": "3.12.0" + } + }, + "nbformat": 4, + "nbformat_minor": 5 +} diff --git a/pipeline.yaml b/pipeline.yaml index 99d1dc2..1ccac0b 100644 --- a/pipeline.yaml +++ b/pipeline.yaml @@ -100,6 +100,12 @@ queries: description: End-to-end block production timing from MEV bidding to column arrival output_file: block_production_timeline.parquet + xatu_client_connectivity: + module: queries.network_overview + function: fetch_xatu_client_connectivity + description: Peer connectivity data from Xatu sentry nodes + output_file: xatu_client_connectivity.parquet + # ============================================ # Notebook Registry # ============================================ @@ -222,6 +228,20 @@ notebooks: required: true order: 8 + - id: network-overview + title: Network overview + description: Network connectivity and client distribution from Xatu sentry nodes + icon: Network + source: notebooks/09-network-overview.ipynb + schedule: daily + queries: + - xatu_client_connectivity + parameters: + - name: target_date + type: date + required: true + order: 9 + # Schedule options: hourly, daily, weekly, manual # - hourly: Runs every hour, accumulating data throughout the day # - daily: Runs once per day at 1am UTC diff --git a/queries/network_overview.py b/queries/network_overview.py new file mode 100644 index 0000000..211315b --- /dev/null +++ b/queries/network_overview.py @@ -0,0 +1,44 @@ +""" +Fetch functions for network overview analysis. + +Analyzes overall network connectivity using Xatu sentry node observations. +""" + + +def _get_date_filter(target_date: str, column: str = "event_date_time") -> str: + """Generate SQL date filter for a specific date.""" + return f"{column} >= '{target_date}' AND {column} < '{target_date}'::date + INTERVAL 1 DAY" + + +def fetch_xatu_client_connectivity( + client, + target_date: str, + network: str = "mainnet", +) -> tuple: + """Fetch peer connectivity data from Xatu sentry nodes. + + Uses libp2p_connected_local to capture all peers connected to Xatu nodes, + including their client type, transport protocol, and geographic location. + + Returns (df, query). + """ + date_filter = _get_date_filter(target_date) + + query = f""" +SELECT + toStartOfInterval(event_date_time, INTERVAL 1 hour) AS hour_bucket, + remote_peer_id_unique_key AS peer_id, + remote_protocol AS protocol, + remote_transport_protocol AS transport_protocol, + remote_port AS port, + remote_agent_implementation AS client_name, + meta_client_name AS local_name, + remote_geo_country_code AS geo_country_code +FROM libp2p_connected_local +WHERE meta_network_name = '{network}' + AND {date_filter} +ORDER BY hour_bucket ASC +""" + + df = client.query_df(query) + return df, query diff --git a/site/src/components/Icon.astro b/site/src/components/Icon.astro index 5b8b505..e92d518 100644 --- a/site/src/components/Icon.astro +++ b/site/src/components/Icon.astro @@ -1,5 +1,5 @@ --- -import { Activity, AlertCircle, AlertTriangle, Calendar, ChevronLeft, ChevronRight, Eye, FileText, Gavel, Grid3x3, Layers, Link, List, PanelLeft, Timer, XCircle } from 'lucide-react'; +import { Activity, AlertCircle, AlertTriangle, Calendar, ChevronLeft, ChevronRight, Eye, FileText, Gavel, Grid3x3, Layers, Link, List, Network, PanelLeft, Timer, XCircle } from 'lucide-react'; interface Props { name: string; @@ -26,6 +26,7 @@ const icons: Record = { Layers, Link, List, + Network, PanelLeft, Timer, XCircle,