diff --git a/notebooks/04-mempool-visibility.ipynb b/notebooks/04-mempool-visibility.ipynb
index 22acdd8..7d3dd99 100644
--- a/notebooks/04-mempool-visibility.ipynb
+++ b/notebooks/04-mempool-visibility.ipynb
@@ -4,7 +4,9 @@
    "cell_type": "markdown",
    "metadata": {},
    "source": [
-    "Analysis of transaction visibility in the public mempool before block inclusion on Ethereum mainnet."
+    "Analysis of transaction visibility in the public mempool before block inclusion on Ethereum mainnet.\n",
+    "\n",
+    "**Methodology:** A transaction is counted as \"seen in mempool\" only if it was observed by our sentries *before* the slot start time of the block that included it. This corrects for transactions that appear in the mempool after block propagation."
    ]
   },
   {
@@ -16,7 +18,41 @@
     ]
    },
    "outputs": [],
-   "source": "import pandas as pd\nimport plotly.express as px\nimport plotly.graph_objects as go\n\nfrom loaders import load_parquet, display_sql\n\n# Transaction type labels\nTX_TYPE_LABELS = {\n    0: \"Legacy\",\n    1: \"Access list\",\n    2: \"EIP-1559\",\n    3: \"Blob\",\n    4: \"EIP-7702\",\n}\n\nTX_TYPE_COLORS = {\n    0: \"#636EFA\",\n    1: \"#EF553B\",\n    2: \"#00CC96\",\n    3: \"#AB63FA\",\n    4: \"#FFA15A\",\n}\n\ntarget_date = None  # Set via papermill, or auto-detect from manifest"
+   "source": [
+    "import pandas as pd\n",
+    "import numpy as np\n",
+    "import plotly.express as px\n",
+    "import plotly.graph_objects as go\n",
+    "from plotly.subplots import make_subplots\n",
+    "\n",
+    "from loaders import load_parquet, display_sql\n",
+    "\n",
+    "# Transaction type labels and colors\n",
+    "TX_TYPE_LABELS = {\n",
+    "    0: \"Legacy\",\n",
+    "    1: \"Access list\",\n",
+    "    2: \"EIP-1559\",\n",
+    "    3: \"Blob\",\n",
+    "    4: \"EIP-7702\",\n",
+    "}\n",
+    "\n",
+    "TX_TYPE_COLORS = {\n",
+    "    0: \"#636EFA\",\n",
+    "    1: \"#EF553B\",\n",
+    "    2: \"#00CC96\",\n",
+    "    3: \"#AB63FA\",\n",
+    "    4: \"#FFA15A\",\n",
+    "}\n",
+    "\n",
+    "# Histogram bucket labels (log2 seconds, up to 1 hour)\n",
+    "HIST_LABELS = [\n",
+    "    \"<0.5s\", \"0.5-1s\", \"1-2s\", \"2-4s\", \"4-8s\", \"8-16s\",\n",
+    "    \"16-32s\", \"32s-1m\", \"1-2m\", \"2-4m\", \"4-8m\", \"8-17m\",\n",
+    "    \"17-34m\", \"34-60m\", \">=1h\"\n",
+    "]\n",
+    "\n",
+    "target_date = None  # Set via papermill, or auto-detect from manifest"
+   ]
   },
   {
    "cell_type": "code",
@@ -27,14 +63,42 @@
     ]
    },
    "outputs": [],
-   "source": "display_sql(\"mempool_coverage\", target_date)"
+   "source": [
+    "display_sql(\"mempool_availability\", target_date)"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "df = load_parquet(\"mempool_coverage\", target_date)\ndf[\"tx_type_label\"] = df[\"tx_type\"].map(TX_TYPE_LABELS)\ndf[\"coverage_pct\"] = df[\"seen_in_mempool\"] / df[\"total_txs\"] * 100\n\nprint(f\"Loaded {len(df):,} hour/type rows\")\nprint(f\"Hours: {df['hour'].nunique():,}\")\nprint(f\"Total transactions: {df['total_txs'].sum():,}\")"
+   "source": [
+    "df = load_parquet(\"mempool_availability\", target_date)\n",
+    "df[\"tx_type_label\"] = df[\"tx_type\"].map(TX_TYPE_LABELS)\n",
+    "df[\"coverage_pct\"] = df[\"seen_before_slot\"] / df[\"total_txs\"] * 100\n",
+    "\n",
+    "# Calculate never seen (truly private)\n",
+    "df[\"never_seen\"] = df[\"total_txs\"] - df[\"seen_before_slot\"] - df[\"seen_after_slot\"]\n",
+    "\n",
+    "# Extract p50 age from percentiles array (index 0)\n",
+    "df[\"p50_age_ms\"] = df[\"age_percentiles_ms\"].apply(lambda x: x[0] if x is not None and len(x) > 0 else np.nan)\n",
+    "df[\"p50_age_s\"] = df[\"p50_age_ms\"] / 1000\n",
+    "\n",
+    "# Add hour column for time-series aggregation\n",
+    "df[\"hour\"] = df[\"slot_start_date_time\"].dt.floor(\"h\")\n",
+    "\n",
+    "total = df[\"total_txs\"].sum()\n",
+    "before = df[\"seen_before_slot\"].sum()\n",
+    "after = df[\"seen_after_slot\"].sum()\n",
+    "never = total - before - after\n",
+    "\n",
+    "print(f\"Loaded {len(df):,} slot/type rows\")\n",
+    "print(f\"Slots: {df['slot'].nunique():,}\")\n",
+    "print(f\"Total transactions: {total:,}\")\n",
+    "print(f\"  Seen before slot: {before:,} ({100*before/total:.1f}%)\")\n",
+    "print(f\"  Seen after slot:  {after:,} ({100*after/total:.1f}%)\")\n",
+    "print(f\"  Never seen:       {never:,} ({100*never/total:.1f}%)\")"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -42,7 +106,7 @@
    "source": [
     "## Coverage by transaction type\n",
     "\n",
-    "Summary of how many transactions were seen in the public mempool before block inclusion. Low coverage indicates private or MEV transactions that bypass the public mempool."
+    "Percentage of transactions seen in the public mempool *before* the slot they were included in. Low coverage indicates private or MEV transactions that bypass the public mempool or are submitted just-in-time."
    ]
   },
   {
@@ -54,14 +118,19 @@
     "# Aggregate by type\n",
     "df_summary = df.groupby([\"tx_type\", \"tx_type_label\"]).agg({\n",
     "    \"total_txs\": \"sum\",\n",
-    "    \"seen_in_mempool\": \"sum\",\n",
+    "    \"seen_before_slot\": \"sum\",\n",
+    "    \"seen_after_slot\": \"sum\",\n",
     "}).reset_index()\n",
-    "df_summary[\"coverage_pct\"] = df_summary[\"seen_in_mempool\"] / df_summary[\"total_txs\"] * 100\n",
+    "df_summary[\"never_seen\"] = df_summary[\"total_txs\"] - df_summary[\"seen_before_slot\"] - df_summary[\"seen_after_slot\"]\n",
+    "df_summary[\"before_pct\"] = df_summary[\"seen_before_slot\"] / df_summary[\"total_txs\"] * 100\n",
+    "df_summary[\"after_pct\"] = df_summary[\"seen_after_slot\"] / df_summary[\"total_txs\"] * 100\n",
+    "df_summary[\"never_pct\"] = df_summary[\"never_seen\"] / df_summary[\"total_txs\"] * 100\n",
     "\n",
     "# Display summary table\n",
-    "summary_display = df_summary[[\"tx_type_label\", \"total_txs\", \"seen_in_mempool\", \"coverage_pct\"]].copy()\n",
-    "summary_display.columns = [\"Type\", \"Total\", \"Seen\", \"Coverage %\"]\n",
-    "summary_display[\"Coverage %\"] = summary_display[\"Coverage %\"].round(1)\n",
+    "summary_display = df_summary[[\"tx_type_label\", \"total_txs\", \"before_pct\", \"after_pct\", \"never_pct\"]].copy()\n",
+    "summary_display.columns = [\"Type\", \"Total\", \"Before slot %\", \"After slot %\", \"Never seen %\"]\n",
+    "for col in summary_display.columns[2:]:\n",
+    "    summary_display[col] = summary_display[col].round(1)\n",
     "summary_display"
    ]
   },
@@ -71,20 +140,41 @@
    "metadata": {},
    "outputs": [],
    "source": [
-    "# Coverage bar chart\n",
-    "fig = px.bar(\n",
-    "    df_summary,\n",
-    "    x=\"tx_type_label\",\n",
-    "    y=\"coverage_pct\",\n",
-    "    color=\"tx_type\",\n",
-    "    color_discrete_map=TX_TYPE_COLORS,\n",
-    "    labels={\"tx_type_label\": \"Transaction type\", \"coverage_pct\": \"Mempool visibility (%)\"},\n",
-    "    text=\"coverage_pct\",\n",
-    ")\n",
-    "fig.update_traces(texttemplate=\"%{text:.1f}%\", textposition=\"outside\", showlegend=False)\n",
+    "# Coverage stacked bar chart showing before/after/never breakdown\n",
+    "fig = go.Figure()\n",
+    "\n",
+    "fig.add_trace(go.Bar(\n",
+    "    x=df_summary[\"tx_type_label\"],\n",
+    "    y=df_summary[\"before_pct\"],\n",
+    "    name=\"Before slot (public)\",\n",
+    "    marker_color=\"#27ae60\",\n",
+    "    text=df_summary[\"before_pct\"].round(1),\n",
+    "    textposition=\"inside\",\n",
+    "))\n",
+    "fig.add_trace(go.Bar(\n",
+    "    x=df_summary[\"tx_type_label\"],\n",
+    "    y=df_summary[\"after_pct\"],\n",
+    "    name=\"After slot (propagated)\",\n",
+    "    marker_color=\"#3498db\",\n",
+    "    text=df_summary[\"after_pct\"].round(1),\n",
+    "    textposition=\"inside\",\n",
+    "))\n",
+    "fig.add_trace(go.Bar(\n",
+    "    x=df_summary[\"tx_type_label\"],\n",
+    "    y=df_summary[\"never_pct\"],\n",
+    "    name=\"Never seen (private)\",\n",
+    "    marker_color=\"#95a5a6\",\n",
+    "    text=df_summary[\"never_pct\"].round(1),\n",
+    "    textposition=\"inside\",\n",
+    "))\n",
+    "\n",
+    "fig.update_traces(texttemplate=\"%{text:.1f}%\")\n",
     "fig.update_layout(\n",
+    "    barmode=\"stack\",\n",
     "    margin=dict(l=60, r=30, t=30, b=60),\n",
-    "    yaxis=dict(range=[0, 105]),\n",
+    "    xaxis=dict(title=\"Transaction type\"),\n",
+    "    yaxis=dict(title=\"Percentage\", range=[0, 105]),\n",
+    "    legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n",
     "    height=400,\n",
     ")\n",
     "fig.show(config={\"responsive\": True})"
@@ -104,7 +194,31 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Data is already hourly from the query\nfig = px.line(\n    df,\n    x=\"hour\",\n    y=\"coverage_pct\",\n    color=\"tx_type_label\",\n    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},\n    labels={\"hour\": \"Time\", \"coverage_pct\": \"Mempool visibility (%)\", \"tx_type_label\": \"Type\"},\n    markers=True,\n)\nfig.update_layout(\n    margin=dict(l=60, r=30, t=30, b=60),\n    legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n    height=400,\n)\nfig.show(config={\"responsive\": True})"
+   "source": [
+    "# Aggregate to hourly for time-series\n",
+    "df_hourly = df.groupby([\"hour\", \"tx_type\", \"tx_type_label\"]).agg({\n",
+    "    \"total_txs\": \"sum\",\n",
+    "    \"seen_before_slot\": \"sum\",\n",
+    "    \"seen_after_slot\": \"sum\",\n",
+    "}).reset_index()\n",
+    "df_hourly[\"coverage_pct\"] = df_hourly[\"seen_before_slot\"] / df_hourly[\"total_txs\"] * 100\n",
+    "\n",
+    "fig = px.line(\n",
+    "    df_hourly,\n",
+    "    x=\"hour\",\n",
+    "    y=\"coverage_pct\",\n",
+    "    color=\"tx_type_label\",\n",
+    "    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},\n",
+    "    labels={\"hour\": \"Time\", \"coverage_pct\": \"Seen before slot (%)\", \"tx_type_label\": \"Type\"},\n",
+    "    markers=True,\n",
+    ")\n",
+    "fig.update_layout(\n",
+    "    margin=dict(l=60, r=30, t=30, b=60),\n",
+    "    legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n",
+    "    height=400,\n",
+    ")\n",
+    "fig.show(config={\"responsive\": True})"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -120,19 +234,338 @@
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Aggregate across types by hour (already hourly data)\ndf_volume = df.groupby(\"hour\").agg({\n    \"total_txs\": \"sum\",\n    \"seen_in_mempool\": \"sum\",\n}).reset_index()\ndf_volume[\"private_txs\"] = df_volume[\"total_txs\"] - df_volume[\"seen_in_mempool\"]\n\nfig = go.Figure()\nfig.add_trace(go.Bar(\n    x=df_volume[\"hour\"],\n    y=df_volume[\"seen_in_mempool\"],\n    name=\"Public (seen in mempool)\",\n    marker_color=\"#3498db\",\n))\nfig.add_trace(go.Bar(\n    x=df_volume[\"hour\"],\n    y=df_volume[\"private_txs\"],\n    name=\"Private (not seen)\",\n    marker_color=\"#95a5a6\",\n))\nfig.update_layout(\n    barmode=\"stack\",\n    margin=dict(l=60, r=30, t=30, b=60),\n    xaxis=dict(title=\"Time\"),\n    yaxis=dict(title=\"Transaction count\"),\n    legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n    height=400,\n)\nfig.show(config={\"responsive\": True})"
+   "source": [
+    "# Aggregate across types by hour - 3-way breakdown\n",
+    "df_volume = df.groupby(\"hour\").agg({\n",
+    "    \"total_txs\": \"sum\",\n",
+    "    \"seen_before_slot\": \"sum\",\n",
+    "    \"seen_after_slot\": \"sum\",\n",
+    "}).reset_index()\n",
+    "df_volume[\"never_seen\"] = df_volume[\"total_txs\"] - df_volume[\"seen_before_slot\"] - df_volume[\"seen_after_slot\"]\n",
+    "\n",
+    "fig = go.Figure()\n",
+    "fig.add_trace(go.Bar(\n",
+    "    x=df_volume[\"hour\"],\n",
+    "    y=df_volume[\"seen_before_slot\"],\n",
+    "    name=\"Before slot (public)\",\n",
+    "    marker_color=\"#27ae60\",\n",
+    "))\n",
+    "fig.add_trace(go.Bar(\n",
+    "    x=df_volume[\"hour\"],\n",
+    "    y=df_volume[\"seen_after_slot\"],\n",
+    "    name=\"After slot (propagated)\",\n",
+    "    marker_color=\"#3498db\",\n",
+    "))\n",
+    "fig.add_trace(go.Bar(\n",
+    "    x=df_volume[\"hour\"],\n",
+    "    y=df_volume[\"never_seen\"],\n",
+    "    name=\"Never seen (private)\",\n",
+    "    marker_color=\"#95a5a6\",\n",
+    "))\n",
+    "fig.update_layout(\n",
+    "    barmode=\"stack\",\n",
+    "    margin=dict(l=60, r=30, t=30, b=60),\n",
+    "    xaxis=dict(title=\"Time\"),\n",
+    "    yaxis=dict(title=\"Transaction count\"),\n",
+    "    legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n",
+    "    height=400,\n",
+    ")\n",
+    "fig.show(config={\"responsive\": True})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Coverage heatmap\n",
+    "\n",
+    "Heatmap showing mempool visibility over time for each transaction type. Darker colors indicate higher coverage (more transactions seen in the public mempool)."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Pivot for heatmap using hourly aggregated data\n",
+    "df_pivot = df_hourly.pivot(index=\"tx_type_label\", columns=\"hour\", values=\"coverage_pct\").fillna(0)\n",
+    "\n",
+    "fig = go.Figure(\n",
+    "    data=go.Heatmap(\n",
+    "        z=df_pivot.values,\n",
+    "        x=df_pivot.columns,\n",
+    "        y=df_pivot.index,\n",
+    "        colorscale=\"Greens\",\n",
+    "        colorbar=dict(title=dict(text=\"Coverage %\", side=\"right\")),\n",
+    "    )\n",
+    ")\n",
+    "fig.update_layout(\n",
+    "    margin=dict(l=100, r=30, t=30, b=60),\n",
+    "    xaxis=dict(title=\"Time\"),\n",
+    "    yaxis=dict(title=\"Transaction type\"),\n",
+    "    height=300,\n",
+    ")\n",
+    "fig.show(config={\"responsive\": True})"
+   ]
   },
   {
    "cell_type": "markdown",
    "metadata": {},
-   "source": "## Coverage heatmap\n\nHeatmap showing mempool visibility over time for each transaction type. Darker colors indicate higher coverage (more transactions seen in the public mempool)."
+   "source": [
+    "## Mempool age distribution\n",
+    "\n",
+    "How long transactions waited in the mempool before being included in a block. The age is measured from first observation in our sentries to the slot start time. Only transactions seen *before* their inclusion slot are counted."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extract all percentiles for each type\n",
+    "def extract_percentiles(group):\n",
+    "    # Collect all non-null percentile arrays, weighted by seen_before_slot count\n",
+    "    pct_arrays = []\n",
+    "    for _, row in group.iterrows():\n",
+    "        if row['seen_before_slot'] > 0 and row['age_percentiles_ms'] is not None:\n",
+    "            pcts = row['age_percentiles_ms']\n",
+    "            if not any(np.isnan(pcts)):\n",
+    "                pct_arrays.append(pcts)\n",
+    "    \n",
+    "    if not pct_arrays:\n",
+    "        return pd.Series({'p50': np.nan, 'p75': np.nan, 'p80': np.nan, 'p85': np.nan, 'p90': np.nan, 'p95': np.nan, 'p99': np.nan})\n",
+    "    \n",
+    "    # Average percentiles across slots (simple mean for now)\n",
+    "    avg_pcts = np.nanmean(pct_arrays, axis=0)\n",
+    "    return pd.Series({\n",
+    "        'p50': avg_pcts[0] / 1000,\n",
+    "        'p75': avg_pcts[1] / 1000,\n",
+    "        'p80': avg_pcts[2] / 1000,\n",
+    "        'p85': avg_pcts[3] / 1000,\n",
+    "        'p90': avg_pcts[4] / 1000,\n",
+    "        'p95': avg_pcts[5] / 1000,\n",
+    "        'p99': avg_pcts[6] / 1000,\n",
+    "    })\n",
+    "\n",
+    "df_age = df.groupby(['tx_type', 'tx_type_label']).apply(extract_percentiles, include_groups=False).reset_index()\n",
+    "\n",
+    "# Display age table\n",
+    "age_display = df_age[['tx_type_label', 'p50', 'p75', 'p90', 'p95', 'p99']].copy()\n",
+    "age_display.columns = ['Type', 'p50 (s)', 'p75 (s)', 'p90 (s)', 'p95 (s)', 'p99 (s)']\n",
+    "for col in age_display.columns[1:]:\n",
+    "    age_display[col] = age_display[col].round(1)\n",
+    "age_display"
+   ]
   },
   {
    "cell_type": "code",
    "execution_count": null,
    "metadata": {},
    "outputs": [],
-   "source": "# Pivot for heatmap using hourly data directly\ndf_pivot = df.pivot(index=\"tx_type_label\", columns=\"hour\", values=\"coverage_pct\").fillna(0)\n\nfig = go.Figure(\n    data=go.Heatmap(\n        z=df_pivot.values,\n        x=df_pivot.columns,\n        y=df_pivot.index,\n        colorscale=\"Greens\",\n        colorbar=dict(title=dict(text=\"Coverage %\", side=\"right\")),\n    )\n)\nfig.update_layout(\n    margin=dict(l=100, r=30, t=30, b=60),\n    xaxis=dict(title=\"Time\"),\n    yaxis=dict(title=\"Transaction type\"),\n    height=300,\n)\nfig.show(config={\"responsive\": True})"
+   "source": [
+    "# Visualize age percentiles as line chart\n",
+    "df_age_long = df_age.melt(\n",
+    "    id_vars=['tx_type', 'tx_type_label'],\n",
+    "    value_vars=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99'],\n",
+    "    var_name='percentile',\n",
+    "    value_name='age_s'\n",
+    ")\n",
+    "# Convert percentile labels to numeric for x-axis\n",
+    "df_age_long['pct_num'] = df_age_long['percentile'].str.replace('p', '').astype(int)\n",
+    "\n",
+    "fig = px.line(\n",
+    "    df_age_long,\n",
+    "    x='pct_num',\n",
+    "    y='age_s',\n",
+    "    color='tx_type_label',\n",
+    "    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},\n",
+    "    markers=True,\n",
+    "    log_y=True,\n",
+    "    labels={'pct_num': 'Percentile', 'age_s': 'Age (seconds)', 'tx_type_label': 'Type'},\n",
+    ")\n",
+    "fig.update_layout(\n",
+    "    margin=dict(l=60, r=30, t=30, b=60),\n",
+    "    xaxis=dict(tickvals=[50, 75, 80, 85, 90, 95, 99], ticktext=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99']),\n",
+    "    legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n",
+    "    height=400,\n",
+    ")\n",
+    "fig.show(config={\"responsive\": True})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate histogram buckets across all slots per tx type\n",
+    "hist_cols = [f'age_hist_{i}' for i in range(15)]\n",
+    "df_hist = df.groupby(['tx_type', 'tx_type_label'])[hist_cols].sum().reset_index()\n",
+    "\n",
+    "# Melt to long format for plotting\n",
+    "df_hist_long = df_hist.melt(\n",
+    "    id_vars=['tx_type', 'tx_type_label'],\n",
+    "    value_vars=hist_cols,\n",
+    "    var_name='bucket',\n",
+    "    value_name='count'\n",
+    ")\n",
+    "df_hist_long['bucket_idx'] = df_hist_long['bucket'].str.extract(r'(\\d+)').astype(int)\n",
+    "df_hist_long['bucket_label'] = df_hist_long['bucket_idx'].map(dict(enumerate(HIST_LABELS)))\n",
+    "\n",
+    "# Sort by bucket index for proper ordering\n",
+    "df_hist_long = df_hist_long.sort_values(['tx_type', 'bucket_idx'])\n",
+    "\n",
+    "fig = px.bar(\n",
+    "    df_hist_long,\n",
+    "    x='bucket_label',\n",
+    "    y='count',\n",
+    "    color='tx_type_label',\n",
+    "    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},\n",
+    "    facet_col='tx_type_label',\n",
+    "    facet_col_wrap=2,\n",
+    "    labels={'bucket_label': 'Age bucket', 'count': 'Count', 'tx_type_label': 'Type'},\n",
+    "    category_orders={'bucket_label': HIST_LABELS},\n",
+    ")\n",
+    "fig.update_yaxes(matches=None, showticklabels=True)\n",
+    "fig.update_layout(\n",
+    "    margin=dict(l=60, r=30, t=60, b=100),\n",
+    "    showlegend=False,\n",
+    "    height=600,\n",
+    ")\n",
+    "fig.update_xaxes(tickangle=45)\n",
+    "fig.for_each_annotation(lambda a: a.update(text=a.text.split(\"=\")[-1]))\n",
+    "fig.show(config={\"responsive\": True})"
+   ]
+  },
+  {
+   "cell_type": "markdown",
+   "metadata": {},
+   "source": [
+    "## Propagation delay (seen after slot)\n",
+    "\n",
+    "For transactions first seen in the mempool *after* block inclusion, this measures how long after the slot start they appeared."
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Extract delay percentiles for transactions seen AFTER slot start\n",
+    "def extract_delay_percentiles(group):\n",
+    "    pct_arrays = []\n",
+    "    for _, row in group.iterrows():\n",
+    "        if row['seen_after_slot'] > 0 and row['delay_percentiles_ms'] is not None:\n",
+    "            pcts = row['delay_percentiles_ms']\n",
+    "            if not any(np.isnan(pcts)):\n",
+    "                pct_arrays.append(pcts)\n",
+    "    \n",
+    "    if not pct_arrays:\n",
+    "        return pd.Series({'p50': np.nan, 'p75': np.nan, 'p80': np.nan, 'p85': np.nan, 'p90': np.nan, 'p95': np.nan, 'p99': np.nan})\n",
+    "    \n",
+    "    avg_pcts = np.nanmean(pct_arrays, axis=0)\n",
+    "    return pd.Series({\n",
+    "        'p50': avg_pcts[0] / 1000,\n",
+    "        'p75': avg_pcts[1] / 1000,\n",
+    "        'p80': avg_pcts[2] / 1000,\n",
+    "        'p85': avg_pcts[3] / 1000,\n",
+    "        'p90': avg_pcts[4] / 1000,\n",
+    "        'p95': avg_pcts[5] / 1000,\n",
+    "        'p99': avg_pcts[6] / 1000,\n",
+    "    })\n",
+    "\n",
+    "df_delay = df.groupby(['tx_type', 'tx_type_label']).apply(extract_delay_percentiles, include_groups=False).reset_index()\n",
+    "\n",
+    "# Display delay table\n",
+    "delay_display = df_delay[['tx_type_label', 'p50', 'p75', 'p90', 'p95', 'p99']].copy()\n",
+    "delay_display.columns = ['Type', 'p50 (s)', 'p75 (s)', 'p90 (s)', 'p95 (s)', 'p99 (s)']\n",
+    "for col in delay_display.columns[1:]:\n",
+    "    delay_display[col] = delay_display[col].round(2)\n",
+    "delay_display"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Visualize delay percentiles as line chart\n",
+    "df_delay_long = df_delay.melt(\n",
+    "    id_vars=['tx_type', 'tx_type_label'],\n",
+    "    value_vars=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99'],\n",
+    "    var_name='percentile',\n",
+    "    value_name='delay_s'\n",
+    ")\n",
+    "# Convert percentile labels to numeric for x-axis\n",
+    "df_delay_long['pct_num'] = df_delay_long['percentile'].str.replace('p', '').astype(int)\n",
+    "\n",
+    "fig = px.line(\n",
+    "    df_delay_long,\n",
+    "    x='pct_num',\n",
+    "    y='delay_s',\n",
+    "    color='tx_type_label',\n",
+    "    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},\n",
+    "    markers=True,\n",
+    "    log_y=True,\n",
+    "    labels={'pct_num': 'Percentile', 'delay_s': 'Delay (seconds)', 'tx_type_label': 'Type'},\n",
+    ")\n",
+    "fig.update_layout(\n",
+    "    margin=dict(l=60, r=30, t=30, b=60),\n",
+    "    xaxis=dict(tickvals=[50, 75, 80, 85, 90, 95, 99], ticktext=['p50', 'p75', 'p80', 'p85', 'p90', 'p95', 'p99']),\n",
+    "    legend=dict(orientation=\"h\", yanchor=\"bottom\", y=1.02, xanchor=\"left\", x=0),\n",
+    "    height=400,\n",
+    ")\n",
+    "fig.show(config={\"responsive\": True})"
+   ]
+  },
+  {
+   "cell_type": "code",
+   "execution_count": null,
+   "metadata": {},
+   "outputs": [],
+   "source": [
+    "# Aggregate delay histogram buckets across all slots per tx type\n",
+    "delay_hist_cols = [f'delay_hist_{i}' for i in range(15)]\n",
+    "df_delay_hist = df.groupby(['tx_type', 'tx_type_label'])[delay_hist_cols].sum().reset_index()\n",
+    "\n",
+    "# Melt to long format for plotting\n",
+    "df_delay_hist_long = df_delay_hist.melt(\n",
+    "    id_vars=['tx_type', 'tx_type_label'],\n",
+    "    value_vars=delay_hist_cols,\n",
+    "    var_name='bucket',\n",
+    "    value_name='count'\n",
+    ")\n",
+    "df_delay_hist_long['bucket_idx'] = df_delay_hist_long['bucket'].str.extract(r'(\\d+)').astype(int)\n",
+    "df_delay_hist_long['bucket_label'] = df_delay_hist_long['bucket_idx'].map(dict(enumerate(HIST_LABELS)))\n",
+    "\n",
+    "# Sort by bucket index for proper ordering\n",
+    "df_delay_hist_long = df_delay_hist_long.sort_values(['tx_type', 'bucket_idx'])\n",
+    "\n",
+    "fig = px.bar(\n",
+    "    df_delay_hist_long,\n",
+    "    x='bucket_label',\n",
+    "    y='count',\n",
+    "    color='tx_type_label',\n",
+    "    color_discrete_map={v: TX_TYPE_COLORS[k] for k, v in TX_TYPE_LABELS.items()},\n",
+    "    facet_col='tx_type_label',\n",
+    "    facet_col_wrap=2,\n",
+    "    labels={'bucket_label': 'Delay bucket', 'count': 'Count', 'tx_type_label': 'Type'},\n",
+    "    category_orders={'bucket_label': HIST_LABELS},\n",
+    ")\n",
+    "fig.update_yaxes(matches=None, showticklabels=True)\n",
+    "fig.update_layout(\n",
+    "    margin=dict(l=60, r=30, t=60, b=100),\n",
+    "    showlegend=False,\n",
+    "    height=600,\n",
+    ")\n",
+    "fig.update_xaxes(tickangle=45)\n",
+    "fig.for_each_annotation(lambda a: a.update(text=a.text.split(\"=\")[-1]))\n",
+    "fig.show(config={\"responsive\": True})"
+   ]
   },
   {
    "cell_type": "markdown",
@@ -199,4 +632,4 @@
  },
  "nbformat": 4,
  "nbformat_minor": 4
-}
\ No newline at end of file
+}
diff --git a/pipeline.yaml b/pipeline.yaml
index 07af0c6..99d1dc2 100644
--- a/pipeline.yaml
+++ b/pipeline.yaml
@@ -88,6 +88,12 @@ queries:
     description: Per-sentry mempool coverage rates
     output_file: sentry_coverage.parquet
 
+  mempool_availability:
+    module: queries.mempool_visibility
+    function: fetch_mempool_availability
+    description: Per-slot mempool availability with age percentiles
+    output_file: mempool_availability.parquet
+
   block_production_timeline:
     module: queries.block_production_timeline
     function: fetch_block_production_timeline
@@ -153,6 +159,7 @@ notebooks:
       - tx_per_slot
       - mempool_coverage
       - sentry_coverage
+      - mempool_availability
     parameters:
       - name: target_date
         type: date
diff --git a/queries/mempool_visibility.py b/queries/mempool_visibility.py
index 4b7435f..5ae013b 100644
--- a/queries/mempool_visibility.py
+++ b/queries/mempool_visibility.py
@@ -113,3 +113,117 @@ def fetch_sentry_coverage(
 
     df = client.query_df(query)
     return df, query
+
+
+def fetch_mempool_availability(
+    client,
+    target_date: str,
+    network: str = "mainnet",
+) -> tuple:
+    """Fetch per-slot mempool availability with age percentiles and histograms.
+
+    Categorizes transactions into:
+    - seen_before_slot: Available in mempool before inclusion (public)
+    - seen_after_slot: First appeared in mempool after block propagation
+    - neither: Truly private (never seen in mempool)
+
+    Returns per slot per tx type:
+    - age/delay percentiles (p50, p75, p80, p85, p90, p95, p99)
+    - age/delay histograms (log2 buckets in seconds)
+
+    Histogram buckets (log2 seconds):
+      0: <0.5s, 1: 0.5-1s, 2: 1-2s, 3: 2-4s, 4: 4-8s, 5: 8-16s,
+      6: 16-32s, 7: 32-64s, 8: 64-128s, 9: 128-256s, 10: 256-512s, 11: >=512s
+
+    Returns (df, query).
+    """
+    date_filter = _get_date_filter(target_date)
+
+    # Define reusable condition fragments
+    seen_before = """
+        m.first_event_time IS NOT NULL
+        AND m.first_event_time > '2020-01-01'
+        AND m.first_event_time < c.slot_start_date_time"""
+    seen_after = """
+        m.first_event_time IS NOT NULL
+        AND m.first_event_time > '2020-01-01'
+        AND m.first_event_time >= c.slot_start_date_time"""
+
+    # Age = time from first seen to slot start (for seen_before)
+    age_ms = "dateDiff('millisecond', m.first_event_time, c.slot_start_date_time)"
+    # Delay = time from slot start to first seen (for seen_after)
+    delay_ms = "dateDiff('millisecond', c.slot_start_date_time, m.first_event_time)"
+
+    # Log2 bucket boundaries in milliseconds (up to 1 hour)
+    # Buckets: <0.5s, 0.5-1s, 1-2s, 2-4s, 4-8s, 8-16s, 16-32s, 32-64s (32s-1m),
+    #          64-128s (1-2m), 128-256s (2-4m), 256-512s (4-8m), 512-1024s (8-17m),
+    #          1024-2048s (17-34m), 2048-3600s (34-60m), >=3600s (>=1h)
+    bounds_ms = [500, 1000, 2000, 4000, 8000, 16000, 32000, 64000, 128000, 256000, 512000, 1024000, 2048000, 3600000]
+
+    # Generate histogram countIf expressions
+    def hist_columns(value_expr: str, condition: str, prefix: str) -> str:
+        cols = []
+        # Bucket 0: < 0.5s
+        cols.append(f"countIf({value_expr} < {bounds_ms[0]} AND {condition}) AS {prefix}_0")
+        # Buckets 1-10: range buckets
+        for i in range(len(bounds_ms) - 1):
+            cols.append(
+                f"countIf({value_expr} >= {bounds_ms[i]} AND {value_expr} < {bounds_ms[i+1]} AND {condition}) AS {prefix}_{i+1}"
+            )
+        # Bucket 11: >= 512s
+        cols.append(f"countIf({value_expr} >= {bounds_ms[-1]} AND {condition}) AS {prefix}_{len(bounds_ms)}")
+        return ",\n    ".join(cols)
+
+    age_hist = hist_columns(age_ms, seen_before, "age_hist")
+    delay_hist = hist_columns(delay_ms, seen_after, "delay_hist")
+
+    query = f"""
+WITH first_seen AS (
+    SELECT
+        hash,
+        min(event_date_time) AS first_event_time
+    FROM mempool_transaction
+    WHERE meta_network_name = '{network}'
+      AND event_date_time >= '{target_date}'::date - INTERVAL 1 DAY
+      AND event_date_time < '{target_date}'::date + INTERVAL 2 DAY
+    GROUP BY hash
+)
+SELECT
+    c.slot,
+    c.slot_start_date_time,
+    c.type AS tx_type,
+    count() AS total_txs,
+    -- Seen BEFORE slot start (public, available for inclusion)
+    countIf({seen_before}) AS seen_before_slot,
+    -- Seen AFTER slot start (appeared after block propagation)
+    countIf({seen_after}) AS seen_after_slot,
+    -- Age percentiles for transactions seen BEFORE (how long in mempool)
+    quantilesIf(0.50, 0.75, 0.80, 0.85, 0.90, 0.95, 0.99)(
+        {age_ms}, {seen_before}
+    ) AS age_percentiles_ms,
+    -- Delay percentiles for transactions seen AFTER (propagation delay)
+    quantilesIf(0.50, 0.75, 0.80, 0.85, 0.90, 0.95, 0.99)(
+        {delay_ms}, {seen_after}
+    ) AS delay_percentiles_ms,
+    -- Age histogram (log2 buckets in seconds)
+    {age_hist},
+    -- Delay histogram (log2 buckets in seconds)
+    {delay_hist}
+FROM canonical_beacon_block_execution_transaction c
+GLOBAL LEFT JOIN first_seen m ON c.hash = m.hash
+WHERE c.meta_network_name = '{network}'
+  AND {date_filter}
+GROUP BY c.slot, c.slot_start_date_time, c.type
+ORDER BY c.slot, c.type
+"""
+
+    df = client.query_df(query)
+    return df, query
+
+
+# Histogram bucket labels for visualization
+AGE_HIST_LABELS = [
+    "<0.5s", "0.5-1s", "1-2s", "2-4s", "4-8s", "8-16s",
+    "16-32s", "32s-1m", "1-2m", "2-4m", "4-8m", "8-17m",
+    "17-34m", "34-60m", ">=1h"
+]