ethp2p · cortze · Dec 15, 2025 · Dec 16, 2025 · Dec 16, 2025 · Dec 18, 2025
diff --git a/README.md b/README.md
@@ -181,6 +181,5 @@ python -m http.server -d _site
    df = load_parquet("my_data")
    # Visualize...
    ```
-   ```
 
 4. **Add to site** in `_quarto.yml` navbar
diff --git a/_quarto.yml b/_quarto.yml
@@ -15,18 +15,16 @@ website:
       contents:
       - text: Introduction
         href: index.qmd
-    - section: '2025-12-09'
+    - section: '2025-12-14'
       contents:
       - text: Blob inclusion
         href: notebooks/01-blob-inclusion.qmd
       - text: Blob flow
         href: notebooks/02-blob-flow.qmd
       - text: Column propagation
         href: notebooks/03-column-propagation.qmd
-    - section: Historical
-      contents:
-      - text: '2025-12-08'
-        href: 20251208/index.qmd
+      - text: Network overview
+        href: notebooks/04-network-overview.qmd
 format:
   html:
     theme:

diff --git a/index.qmd b/index.qmd
@@ -11,6 +11,7 @@ A collection of notebooks analyzing P2P dynamics in Ethereum networks. Currently
 - [Blob inclusion](notebooks/01-blob-inclusion.qmd): Blob inclusion patterns per block and epoch.
 - [Blob flow](notebooks/02-blob-flow.qmd): Flow diagrams tracing blob packing per entities, builders, and relays.
 - [Column propagation](notebooks/03-column-propagation.qmd): Column propagation timing across 128 data columns subnets.
+- [Network Overview](notebooks/04-network-overview.qmd): General view of the p2p network.
 
 ## Generation
 

diff --git a/notebooks/04-network-overview.qmd b/notebooks/04-network-overview.qmd
@@ -0,0 +1,176 @@
+---
+tittle: "Network Overview"
+---
+
+Analysis script to compute the overall network overview from the Xatu sentry nodes on Ethereum mainnet.
+
+```{python}
+#| tags: [parameters]
+target_date = None  # Set via papermill, or auto-detect from manifest
+network = None # Set via papermill, or auto-detect from manifest
+```
+
+```{python}
+import polars as pl
+import plotly.express as px
+from loaders import load_parquet
+
+raw_df = load_parquet("xatu_client_connectivity", target_date)
+```
+
+## Total unique peers in the network
+
+```{python}
+# Display the number of unique peers in the network
+df = (
+    pl.from_pandas(raw_df)
+    .group_by("hour_bucket")
+    .agg(unique_peers=pl.col("peer_id").n_unique())
+    .sort("hour_bucket")
+)
+
+fig = px.line(
+    df,
+    x="hour_bucket",
+    y="unique_peers",
+)
+
+fig.update_layout(
+    title="Total number of unique peers",
+    xaxis_title="Date",
+    yaxis_title="Unique peers",
+)
+```
+
+## Client distribution of the unique peers
+
+```{python}
+# get the number of unique peers
+df = (
+    pl.from_pandas(raw_df)
+    .sort(["hour_bucket", "peer_id", "client_name"], descending=[False, False, True])
+    .unique(subset=["hour_bucket", "peer_id"], keep="first")
+    .filter(
+        pl.col("client_name").is_not_null() & (pl.col("client_name") != "")
+    )
+    .group_by(["hour_bucket","client_name"])
+    .agg(peers=pl.len())
+    .sort("hour_bucket", "peers")
+)
+
+fig = px.area(
+    df,
+    x="hour_bucket",
+    y="peers",
+    color="client_name",
+)
+
+fig.update_layout(
+    title="Total number of unique peers",
+    xaxis_title="Date",
+    yaxis_title="Peers",
+    width=1200,
+    height=800,
+)
+```
+
+## Number of connections from each Xatu node
+
+```{python}
+# Plot the number of connections per each Xatu node
+df = (
+    pl.from_pandas(raw_df)
+    .group_by(["hour_bucket", "local_name"])
+    .agg(peers=pl.col("peer_id").n_unique())
+    .sort("hour_bucket")
+    .with_columns(
+        pl.col("local_name").str.replace(f"ethpandaops/{network}/", "")
+    )
+)
+
+fig = px.line(
+    df,
+    x="hour_bucket",
+    y="peers",
+    color="local_name",
+)
+
+fig.update_layout(
+    title="Connections per Xatu nodes",
+    xaxis_title=None,
+    yaxis_title="Connected peers",
+    legend=dict(
+        title="Client Names",
+        orientation = "h",
+        yanchor="top",
+        y=-.25,
+        xanchor="center",
+        x=0.5,
+        # entrywidth=300,
+    ),
+    width=1200,
+    height=800,
+)
+```
+
+## Distribution of connections to peers on each IP protocol + Transport protocol combination
+
+```{python}
+df = (
+    pl.from_pandas(raw_df)
+    .group_by(["hour_bucket", "peer_id", "protocol"])
+    .agg(
+        all_transports=pl.col("transport_protocol").unique().sort().str.join(" & ")
+    )
+    .with_columns(
+        protocol_combos=pl.col("protocol") + " + (" + pl.col("all_transports") + ")"
+    )
+    .group_by(["hour_bucket", "protocol_combos"])
+    .agg(peers=pl.count("peer_id"))
+    .sort("hour_bucket")
+)
+
+fig = px.line(
+    df,
+    x="hour_bucket",
+    y="peers",
+    color="protocol_combos",
+)
+
+fig.update_layout(
+    title="Transport protocol distribution for Xatu nodes",
+    yaxis_title="Connected peers",
+    width=1200,
+    height=800,
+)
+
+```
+
+## Popularity of ports
+
+```{python}
+df = (
+    pl.from_pandas(raw_df)
+    # this might double count peers that use different ports in the same day
+    .group_by(["peer_id", "port"])
+    .agg()
+    .group_by("port")
+    .agg(peers=pl.count("peer_id"))
+    .with_columns(port=pl.col("port").cast(pl.String))
+    .sort("peers", descending=True)
+)
+
+fig = px.bar(
+    df.head(20),
+    x="port",
+    y="peers",
+)
+fig.update_xaxes(type='category')
+fig.update_layout(
+    title="Popularity of ports",
+    xaxis_title=None,
+    yaxis_title="Connected peers",
+    width=1200,
+    height=800,
+)
+```
diff --git a/pyproject.toml b/pyproject.toml
@@ -14,6 +14,7 @@ dependencies = [
     "jupytext>=1.18.1",
     "jupyterlab>=4.5.0",
     "pyyaml>=6.0.3",
+    "polars>=1.36.1",
 ]
 
 [dependency-groups]

diff --git a/queries/__init__.py b/queries/__init__.py
@@ -12,6 +12,9 @@
 )
 from queries.blob_flow import fetch_proposer_blobs
 from queries.column_propagation import fetch_col_first_seen, NUM_COLUMNS
+from queries.network_overview import (
+    fetch_xatu_client_connectivity,
+)
 
 __all__ = [
     # Blob inclusion
@@ -24,4 +27,6 @@
     # Column propagation
     "fetch_col_first_seen",
     "NUM_COLUMNS",
+    # Network overview
+    "fetch_xatu_client_connectivity",
 ]
diff --git a/queries/network_overview.py b/queries/network_overview.py
@@ -0,0 +1,49 @@
+"""
+Fetch functions for network overview analysis.
+
+Each function executes SQL and writes directly to Parquet.
+"""
+
+from pathlib import Path
+
+
+def _get_date_filter(target_date: str, column: str = "slot_start_date_time") -> str:
+    """Generate SQL date filter for a specific date."""
+    return f"{column} BETWEEN '{target_date}' AND '{target_date}'::date + INTERVAL 1 DAY"
+
+
+def fetch_xatu_client_connectivity(
+    client,
+    target_date: str,
+    output_path: Path,
+    network: str = "mainnet",
+) -> int:
+    """Fetch the unique number of peer_ids know using the gossipsub synthetic_heartbeat
+     data and write to Parquet.
+
+    Returns row count.
+    """
+    date_filter = _get_date_filter(target_date, column="event_date_time")
+
+    query = f"""
+SELECT
+    toStartOfInterval(event_date_time, INTERVAL 1 hour) AS hour_bucket,
+    remote_peer_id_unique_key as peer_id,
+    remote_protocol as protocol,
+    remote_transport_protocol as transport_protocol,
+    remote_port as port,
+    remote_agent_implementation as client_name,
+    meta_client_name as local_name,
+    remote_geo_country_code as geo_country_code
+FROM libp2p_connected_local
+WHERE
+    meta_network_name LIKE '{network}'
+    AND {date_filter}
+ORDER BY hour_bucket ASC
+"""
+
+    df = client.query_df(query)
+    output_path.parent.mkdir(parents=True, exist_ok=True)
+    df.to_parquet(output_path, index=False)
+    return len(df)
+
diff --git a/scripts/fetch_data.py b/scripts/fetch_data.py
@@ -29,6 +29,7 @@
     fetch_slot_in_epoch,
     fetch_proposer_blobs,
     fetch_col_first_seen,
+    fetch_xatu_client_connectivity,
 )
 
 # List of (name, fetcher) tuples
@@ -39,6 +40,7 @@
     ("slot_in_epoch", fetch_slot_in_epoch),
     ("proposer_blobs", fetch_proposer_blobs),
     ("col_first_seen", fetch_col_first_seen),
+    ("xatu_client_connectivity", fetch_xatu_client_connectivity),
 ]
 
 

diff --git a/scripts/prepare_publish.py b/scripts/prepare_publish.py
@@ -20,6 +20,7 @@
     ("01-blob-inclusion", "Blob inclusion"),
     ("02-blob-flow", "Blob flow"),
     ("03-column-propagation", "Column propagation"),
+    ("04-network-overview", "Network overview"),
 ]
 
 DATA_ROOT = Path("notebooks/data")

diff --git a/uv.lock b/uv.lock