Skip to content
Draft
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
1 change: 0 additions & 1 deletion README.md
Original file line number Diff line number Diff line change
Expand Up @@ -181,6 +181,5 @@ python -m http.server -d _site
df = load_parquet("my_data")
# Visualize...
```
```

4. **Add to site** in `_quarto.yml` navbar
8 changes: 3 additions & 5 deletions _quarto.yml
Original file line number Diff line number Diff line change
Expand Up @@ -15,18 +15,16 @@ website:
contents:
- text: Introduction
href: index.qmd
- section: '2025-12-09'
- section: '2025-12-14'
contents:
- text: Blob inclusion
href: notebooks/01-blob-inclusion.qmd
- text: Blob flow
href: notebooks/02-blob-flow.qmd
- text: Column propagation
href: notebooks/03-column-propagation.qmd
- section: Historical
contents:
- text: '2025-12-08'
href: 20251208/index.qmd
- text: Network overview
href: notebooks/04-network-overview.qmd
format:
html:
theme:
Expand Down
1 change: 1 addition & 0 deletions index.qmd
Original file line number Diff line number Diff line change
Expand Up @@ -11,6 +11,7 @@ A collection of notebooks analyzing P2P dynamics in Ethereum networks. Currently
- [Blob inclusion](notebooks/01-blob-inclusion.qmd): Blob inclusion patterns per block and epoch.
- [Blob flow](notebooks/02-blob-flow.qmd): Flow diagrams tracing blob packing per entities, builders, and relays.
- [Column propagation](notebooks/03-column-propagation.qmd): Column propagation timing across 128 data columns subnets.
- [Network Overview](notebooks/04-network-overview.qmd): General view of the p2p network.

## Generation

Expand Down
176 changes: 176 additions & 0 deletions notebooks/04-network-overview.qmd
Original file line number Diff line number Diff line change
@@ -0,0 +1,176 @@
---
tittle: "Network Overview"
---

Analysis script to compute the overall network overview from the Xatu sentry nodes on Ethereum mainnet.

```{python}
#| tags: [parameters]
target_date = None # Set via papermill, or auto-detect from manifest
network = None # Set via papermill, or auto-detect from manifest
```

```{python}
import polars as pl
import plotly.express as px
from loaders import load_parquet

raw_df = load_parquet("xatu_client_connectivity", target_date)
```

## Total unique peers in the network

```{python}
# Display the number of unique peers in the network
df = (
pl.from_pandas(raw_df)
.group_by("hour_bucket")
.agg(unique_peers=pl.col("peer_id").n_unique())
.sort("hour_bucket")
)

fig = px.line(
df,
x="hour_bucket",
y="unique_peers",
)

fig.update_layout(
title="Total number of unique peers",
xaxis_title="Date",
yaxis_title="Unique peers",
)
```

## Client distribution of the unique peers

```{python}
# get the number of unique peers
df = (
pl.from_pandas(raw_df)
.sort(["hour_bucket", "peer_id", "client_name"], descending=[False, False, True])
.unique(subset=["hour_bucket", "peer_id"], keep="first")
.filter(
pl.col("client_name").is_not_null() & (pl.col("client_name") != "")
)
.group_by(["hour_bucket","client_name"])
.agg(peers=pl.len())
.sort("hour_bucket", "peers")
)

fig = px.area(
df,
x="hour_bucket",
y="peers",
color="client_name",
)

fig.update_layout(
title="Total number of unique peers",
xaxis_title="Date",
yaxis_title="Peers",
width=1200,
height=800,
)
```

## Number of connections from each Xatu node

```{python}
# Plot the number of connections per each Xatu node
df = (
pl.from_pandas(raw_df)
.group_by(["hour_bucket", "local_name"])
.agg(peers=pl.col("peer_id").n_unique())
.sort("hour_bucket")
.with_columns(
pl.col("local_name").str.replace(f"ethpandaops/{network}/", "")
)
)

fig = px.line(
df,
x="hour_bucket",
y="peers",
color="local_name",
)

fig.update_layout(
title="Connections per Xatu nodes",
xaxis_title=None,
yaxis_title="Connected peers",
legend=dict(
title="Client Names",
orientation = "h",
yanchor="top",
y=-.25,
xanchor="center",
x=0.5,
# entrywidth=300,
),
width=1200,
height=800,
)
```

## Distribution of connections to peers on each IP protocol + Transport protocol combination

```{python}
df = (
pl.from_pandas(raw_df)
.group_by(["hour_bucket", "peer_id", "protocol"])
.agg(
all_transports=pl.col("transport_protocol").unique().sort().str.join(" & ")
)
.with_columns(
protocol_combos=pl.col("protocol") + " + (" + pl.col("all_transports") + ")"
)
.group_by(["hour_bucket", "protocol_combos"])
.agg(peers=pl.count("peer_id"))
.sort("hour_bucket")
)

fig = px.line(
df,
x="hour_bucket",
y="peers",
color="protocol_combos",
)

fig.update_layout(
title="Transport protocol distribution for Xatu nodes",
yaxis_title="Connected peers",
width=1200,
height=800,
)

```

## Popularity of ports

```{python}
df = (
pl.from_pandas(raw_df)
# this might double count peers that use different ports in the same day
.group_by(["peer_id", "port"])
.agg()
.group_by("port")
.agg(peers=pl.count("peer_id"))
.with_columns(port=pl.col("port").cast(pl.String))
.sort("peers", descending=True)
)

fig = px.bar(
df.head(20),
x="port",
y="peers",
)
fig.update_xaxes(type='category')
fig.update_layout(
title="Popularity of ports",
xaxis_title=None,
yaxis_title="Connected peers",
width=1200,
height=800,
)
```
1 change: 1 addition & 0 deletions pyproject.toml
Original file line number Diff line number Diff line change
Expand Up @@ -14,6 +14,7 @@ dependencies = [
"jupytext>=1.18.1",
"jupyterlab>=4.5.0",
"pyyaml>=6.0.3",
"polars>=1.36.1",
]

[dependency-groups]
Expand Down
5 changes: 5 additions & 0 deletions queries/__init__.py
Original file line number Diff line number Diff line change
Expand Up @@ -12,6 +12,9 @@
)
from queries.blob_flow import fetch_proposer_blobs
from queries.column_propagation import fetch_col_first_seen, NUM_COLUMNS
from queries.network_overview import (
fetch_xatu_client_connectivity,
)

__all__ = [
# Blob inclusion
Expand All @@ -24,4 +27,6 @@
# Column propagation
"fetch_col_first_seen",
"NUM_COLUMNS",
# Network overview
"fetch_xatu_client_connectivity",
]
49 changes: 49 additions & 0 deletions queries/network_overview.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,49 @@
"""
Fetch functions for network overview analysis.

Each function executes SQL and writes directly to Parquet.
"""

from pathlib import Path


def _get_date_filter(target_date: str, column: str = "slot_start_date_time") -> str:
"""Generate SQL date filter for a specific date."""
return f"{column} BETWEEN '{target_date}' AND '{target_date}'::date + INTERVAL 1 DAY"


def fetch_xatu_client_connectivity(
client,
target_date: str,
output_path: Path,
network: str = "mainnet",
) -> int:
"""Fetch the unique number of peer_ids know using the gossipsub synthetic_heartbeat
data and write to Parquet.

Returns row count.
"""
date_filter = _get_date_filter(target_date, column="event_date_time")

query = f"""
SELECT
toStartOfInterval(event_date_time, INTERVAL 1 hour) AS hour_bucket,
remote_peer_id_unique_key as peer_id,
remote_protocol as protocol,
remote_transport_protocol as transport_protocol,
remote_port as port,
remote_agent_implementation as client_name,
meta_client_name as local_name,
remote_geo_country_code as geo_country_code
FROM libp2p_connected_local
WHERE
meta_network_name LIKE '{network}'
AND {date_filter}
ORDER BY hour_bucket ASC
"""

df = client.query_df(query)
output_path.parent.mkdir(parents=True, exist_ok=True)
df.to_parquet(output_path, index=False)
return len(df)

2 changes: 2 additions & 0 deletions scripts/fetch_data.py
Original file line number Diff line number Diff line change
Expand Up @@ -29,6 +29,7 @@
fetch_slot_in_epoch,
fetch_proposer_blobs,
fetch_col_first_seen,
fetch_xatu_client_connectivity,
)

# List of (name, fetcher) tuples
Expand All @@ -39,6 +40,7 @@
("slot_in_epoch", fetch_slot_in_epoch),
("proposer_blobs", fetch_proposer_blobs),
("col_first_seen", fetch_col_first_seen),
("xatu_client_connectivity", fetch_xatu_client_connectivity),
]


Expand Down
1 change: 1 addition & 0 deletions scripts/prepare_publish.py
Original file line number Diff line number Diff line change
Expand Up @@ -20,6 +20,7 @@
("01-blob-inclusion", "Blob inclusion"),
("02-blob-flow", "Blob flow"),
("03-column-propagation", "Column propagation"),
("04-network-overview", "Network overview"),
]

DATA_ROOT = Path("notebooks/data")
Expand Down
30 changes: 29 additions & 1 deletion uv.lock

Some generated files are not rendered by default. Learn more about how customized files appear on GitHub.