diff --git a/README.md b/README.md index 8de06b3..9e133c0 100644 --- a/README.md +++ b/README.md @@ -181,6 +181,5 @@ python -m http.server -d _site df = load_parquet("my_data") # Visualize... ``` - ``` 4. **Add to site** in `_quarto.yml` navbar diff --git a/_quarto.yml b/_quarto.yml index bcf684a..0ca7b37 100644 --- a/_quarto.yml +++ b/_quarto.yml @@ -15,7 +15,7 @@ website: contents: - text: Introduction href: index.qmd - - section: '2025-12-09' + - section: '2025-12-14' contents: - text: Blob inclusion href: notebooks/01-blob-inclusion.qmd @@ -23,10 +23,8 @@ website: href: notebooks/02-blob-flow.qmd - text: Column propagation href: notebooks/03-column-propagation.qmd - - section: Historical - contents: - - text: '2025-12-08' - href: 20251208/index.qmd + - text: Network overview + href: notebooks/04-network-overview.qmd format: html: theme: diff --git a/index.qmd b/index.qmd index cb90a50..5d2c135 100644 --- a/index.qmd +++ b/index.qmd @@ -11,6 +11,7 @@ A collection of notebooks analyzing P2P dynamics in Ethereum networks. Currently - [Blob inclusion](notebooks/01-blob-inclusion.qmd): Blob inclusion patterns per block and epoch. - [Blob flow](notebooks/02-blob-flow.qmd): Flow diagrams tracing blob packing per entities, builders, and relays. - [Column propagation](notebooks/03-column-propagation.qmd): Column propagation timing across 128 data columns subnets. +- [Network Overview](notebooks/04-network-overview.qmd): General view of the p2p network. ## Generation diff --git a/notebooks/04-network-overview.qmd b/notebooks/04-network-overview.qmd new file mode 100644 index 0000000..d16627a --- /dev/null +++ b/notebooks/04-network-overview.qmd @@ -0,0 +1,176 @@ +--- +tittle: "Network Overview" +--- + +Analysis script to compute the overall network overview from the Xatu sentry nodes on Ethereum mainnet. + +```{python} +#| tags: [parameters] +target_date = None # Set via papermill, or auto-detect from manifest +network = None # Set via papermill, or auto-detect from manifest +``` + +```{python} +import polars as pl +import plotly.express as px +from loaders import load_parquet + +raw_df = load_parquet("xatu_client_connectivity", target_date) +``` + +## Total unique peers in the network + +```{python} +# Display the number of unique peers in the network +df = ( + pl.from_pandas(raw_df) + .group_by("hour_bucket") + .agg(unique_peers=pl.col("peer_id").n_unique()) + .sort("hour_bucket") +) + +fig = px.line( + df, + x="hour_bucket", + y="unique_peers", +) + +fig.update_layout( + title="Total number of unique peers", + xaxis_title="Date", + yaxis_title="Unique peers", +) +``` + +## Client distribution of the unique peers + +```{python} +# get the number of unique peers +df = ( + pl.from_pandas(raw_df) + .sort(["hour_bucket", "peer_id", "client_name"], descending=[False, False, True]) + .unique(subset=["hour_bucket", "peer_id"], keep="first") + .filter( + pl.col("client_name").is_not_null() & (pl.col("client_name") != "") + ) + .group_by(["hour_bucket","client_name"]) + .agg(peers=pl.len()) + .sort("hour_bucket", "peers") +) + +fig = px.area( + df, + x="hour_bucket", + y="peers", + color="client_name", +) + +fig.update_layout( + title="Total number of unique peers", + xaxis_title="Date", + yaxis_title="Peers", + width=1200, + height=800, +) +``` + +## Number of connections from each Xatu node + +```{python} +# Plot the number of connections per each Xatu node +df = ( + pl.from_pandas(raw_df) + .group_by(["hour_bucket", "local_name"]) + .agg(peers=pl.col("peer_id").n_unique()) + .sort("hour_bucket") + .with_columns( + pl.col("local_name").str.replace(f"ethpandaops/{network}/", "") + ) +) + +fig = px.line( + df, + x="hour_bucket", + y="peers", + color="local_name", +) + +fig.update_layout( + title="Connections per Xatu nodes", + xaxis_title=None, + yaxis_title="Connected peers", + legend=dict( + title="Client Names", + orientation = "h", + yanchor="top", + y=-.25, + xanchor="center", + x=0.5, + # entrywidth=300, + ), + width=1200, + height=800, +) +``` + +## Distribution of connections to peers on each IP protocol + Transport protocol combination + +```{python} +df = ( + pl.from_pandas(raw_df) + .group_by(["hour_bucket", "peer_id", "protocol"]) + .agg( + all_transports=pl.col("transport_protocol").unique().sort().str.join(" & ") + ) + .with_columns( + protocol_combos=pl.col("protocol") + " + (" + pl.col("all_transports") + ")" + ) + .group_by(["hour_bucket", "protocol_combos"]) + .agg(peers=pl.count("peer_id")) + .sort("hour_bucket") +) + +fig = px.line( + df, + x="hour_bucket", + y="peers", + color="protocol_combos", +) + +fig.update_layout( + title="Transport protocol distribution for Xatu nodes", + yaxis_title="Connected peers", + width=1200, + height=800, +) + +``` + +## Popularity of ports + +```{python} +df = ( + pl.from_pandas(raw_df) + # this might double count peers that use different ports in the same day + .group_by(["peer_id", "port"]) + .agg() + .group_by("port") + .agg(peers=pl.count("peer_id")) + .with_columns(port=pl.col("port").cast(pl.String)) + .sort("peers", descending=True) +) + +fig = px.bar( + df.head(20), + x="port", + y="peers", +) +fig.update_xaxes(type='category') +fig.update_layout( + title="Popularity of ports", + xaxis_title=None, + yaxis_title="Connected peers", + width=1200, + height=800, +) +``` \ No newline at end of file diff --git a/pyproject.toml b/pyproject.toml index a2c210a..44ddfdb 100644 --- a/pyproject.toml +++ b/pyproject.toml @@ -14,6 +14,7 @@ dependencies = [ "jupytext>=1.18.1", "jupyterlab>=4.5.0", "pyyaml>=6.0.3", + "polars>=1.36.1", ] [dependency-groups] diff --git a/queries/__init__.py b/queries/__init__.py index a767c6d..2b31343 100644 --- a/queries/__init__.py +++ b/queries/__init__.py @@ -12,6 +12,9 @@ ) from queries.blob_flow import fetch_proposer_blobs from queries.column_propagation import fetch_col_first_seen, NUM_COLUMNS +from queries.network_overview import ( + fetch_xatu_client_connectivity, +) __all__ = [ # Blob inclusion @@ -24,4 +27,6 @@ # Column propagation "fetch_col_first_seen", "NUM_COLUMNS", + # Network overview + "fetch_xatu_client_connectivity", ] diff --git a/queries/network_overview.py b/queries/network_overview.py new file mode 100644 index 0000000..ac84c09 --- /dev/null +++ b/queries/network_overview.py @@ -0,0 +1,49 @@ +""" +Fetch functions for network overview analysis. + +Each function executes SQL and writes directly to Parquet. +""" + +from pathlib import Path + + +def _get_date_filter(target_date: str, column: str = "slot_start_date_time") -> str: + """Generate SQL date filter for a specific date.""" + return f"{column} BETWEEN '{target_date}' AND '{target_date}'::date + INTERVAL 1 DAY" + + +def fetch_xatu_client_connectivity( + client, + target_date: str, + output_path: Path, + network: str = "mainnet", +) -> int: + """Fetch the unique number of peer_ids know using the gossipsub synthetic_heartbeat + data and write to Parquet. + + Returns row count. + """ + date_filter = _get_date_filter(target_date, column="event_date_time") + + query = f""" +SELECT + toStartOfInterval(event_date_time, INTERVAL 1 hour) AS hour_bucket, + remote_peer_id_unique_key as peer_id, + remote_protocol as protocol, + remote_transport_protocol as transport_protocol, + remote_port as port, + remote_agent_implementation as client_name, + meta_client_name as local_name, + remote_geo_country_code as geo_country_code +FROM libp2p_connected_local +WHERE + meta_network_name LIKE '{network}' + AND {date_filter} +ORDER BY hour_bucket ASC +""" + + df = client.query_df(query) + output_path.parent.mkdir(parents=True, exist_ok=True) + df.to_parquet(output_path, index=False) + return len(df) + diff --git a/scripts/fetch_data.py b/scripts/fetch_data.py index 06e0855..6f1e4f9 100644 --- a/scripts/fetch_data.py +++ b/scripts/fetch_data.py @@ -29,6 +29,7 @@ fetch_slot_in_epoch, fetch_proposer_blobs, fetch_col_first_seen, + fetch_xatu_client_connectivity, ) # List of (name, fetcher) tuples @@ -39,6 +40,7 @@ ("slot_in_epoch", fetch_slot_in_epoch), ("proposer_blobs", fetch_proposer_blobs), ("col_first_seen", fetch_col_first_seen), + ("xatu_client_connectivity", fetch_xatu_client_connectivity), ] diff --git a/scripts/prepare_publish.py b/scripts/prepare_publish.py index 63fa426..c441da0 100644 --- a/scripts/prepare_publish.py +++ b/scripts/prepare_publish.py @@ -20,6 +20,7 @@ ("01-blob-inclusion", "Blob inclusion"), ("02-blob-flow", "Blob flow"), ("03-column-propagation", "Column propagation"), + ("04-network-overview", "Network overview"), ] DATA_ROOT = Path("notebooks/data") diff --git a/uv.lock b/uv.lock index b60929a..c3da5d2 100644 --- a/uv.lock +++ b/uv.lock @@ -1,5 +1,5 @@ version = 1 -revision = 2 +revision = 3 requires-python = ">=3.13" resolution-markers = [ "python_full_version >= '3.14'", @@ -1175,6 +1175,7 @@ dependencies = [ { name = "numpy" }, { name = "pandas" }, { name = "plotly" }, + { name = "polars" }, { name = "pyarrow" }, { name = "python-dotenv" }, { name = "pyyaml" }, @@ -1197,6 +1198,7 @@ requires-dist = [ { name = "numpy", specifier = ">=1.26" }, { name = "pandas", specifier = ">=2.0" }, { name = "plotly", specifier = ">=5.0" }, + { name = "polars", specifier = ">=1.36.1" }, { name = "pyarrow", specifier = ">=22.0.0" }, { name = "python-dotenv", specifier = ">=1.0" }, { name = "pyyaml", specifier = ">=6.0.3" }, @@ -1244,6 +1246,32 @@ wheels = [ { url = "https://files.pythonhosted.org/packages/e7/c3/3031c931098de393393e1f93a38dc9ed6805d86bb801acc3cf2d5bd1e6b7/plotly-6.5.0-py3-none-any.whl", hash = "sha256:5ac851e100367735250206788a2b1325412aa4a4917a4fe3e6f0bc5aa6f3d90a", size = 9893174, upload-time = "2025-11-17T18:39:20.351Z" }, ] +[[package]] +name = "polars" +version = "1.36.1" +source = { registry = "https://pypi.org/simple" } +dependencies = [ + { name = "polars-runtime-32" }, +] +sdist = { url = "https://files.pythonhosted.org/packages/9f/dc/56f2a90c79a2cb13f9e956eab6385effe54216ae7a2068b3a6406bae4345/polars-1.36.1.tar.gz", hash = "sha256:12c7616a2305559144711ab73eaa18814f7aa898c522e7645014b68f1432d54c", size = 711993, upload-time = "2025-12-10T01:14:53.033Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/f6/c6/36a1b874036b49893ecae0ac44a2f63d1a76e6212631a5b2f50a86e0e8af/polars-1.36.1-py3-none-any.whl", hash = "sha256:853c1bbb237add6a5f6d133c15094a9b727d66dd6a4eb91dbb07cdb056b2b8ef", size = 802429, upload-time = "2025-12-10T01:13:53.838Z" }, +] + +[[package]] +name = "polars-runtime-32" +version = "1.36.1" +source = { registry = "https://pypi.org/simple" } +sdist = { url = "https://files.pythonhosted.org/packages/31/df/597c0ef5eb8d761a16d72327846599b57c5d40d7f9e74306fc154aba8c37/polars_runtime_32-1.36.1.tar.gz", hash = "sha256:201c2cfd80ceb5d5cd7b63085b5fd08d6ae6554f922bcb941035e39638528a09", size = 2788751, upload-time = "2025-12-10T01:14:54.172Z" } +wheels = [ + { url = "https://files.pythonhosted.org/packages/e1/ea/871129a2d296966c0925b078a9a93c6c5e7facb1c5eebfcd3d5811aeddc1/polars_runtime_32-1.36.1-cp39-abi3-macosx_10_12_x86_64.whl", hash = "sha256:327b621ca82594f277751f7e23d4b939ebd1be18d54b4cdf7a2f8406cecc18b2", size = 43494311, upload-time = "2025-12-10T01:13:56.096Z" }, + { url = "https://files.pythonhosted.org/packages/d8/76/0038210ad1e526ce5bb2933b13760d6b986b3045eccc1338e661bd656f77/polars_runtime_32-1.36.1-cp39-abi3-macosx_11_0_arm64.whl", hash = "sha256:ab0d1f23084afee2b97de8c37aa3e02ec3569749ae39571bd89e7a8b11ae9e83", size = 39300602, upload-time = "2025-12-10T01:13:59.366Z" }, + { url = "https://files.pythonhosted.org/packages/54/1e/2707bee75a780a953a77a2c59829ee90ef55708f02fc4add761c579bf76e/polars_runtime_32-1.36.1-cp39-abi3-manylinux_2_17_x86_64.manylinux2014_x86_64.whl", hash = "sha256:899b9ad2e47ceb31eb157f27a09dbc2047efbf4969a923a6b1ba7f0412c3e64c", size = 44511780, upload-time = "2025-12-10T01:14:02.285Z" }, + { url = "https://files.pythonhosted.org/packages/11/b2/3fede95feee441be64b4bcb32444679a8fbb7a453a10251583053f6efe52/polars_runtime_32-1.36.1-cp39-abi3-manylinux_2_24_aarch64.whl", hash = "sha256:d9d077bb9df711bc635a86540df48242bb91975b353e53ef261c6fae6cb0948f", size = 40688448, upload-time = "2025-12-10T01:14:05.131Z" }, + { url = "https://files.pythonhosted.org/packages/05/0f/e629713a72999939b7b4bfdbf030a32794db588b04fdf3dc977dd8ea6c53/polars_runtime_32-1.36.1-cp39-abi3-win_amd64.whl", hash = "sha256:cc17101f28c9a169ff8b5b8d4977a3683cd403621841623825525f440b564cf0", size = 44464898, upload-time = "2025-12-10T01:14:08.296Z" }, + { url = "https://files.pythonhosted.org/packages/d1/d8/a12e6aa14f63784cead437083319ec7cece0d5bb9a5bfe7678cc6578b52a/polars_runtime_32-1.36.1-cp39-abi3-win_arm64.whl", hash = "sha256:809e73857be71250141225ddd5d2b30c97e6340aeaa0d445f930e01bef6888dc", size = 39798896, upload-time = "2025-12-10T01:14:11.568Z" }, +] + [[package]] name = "prometheus-client" version = "0.23.1"