agntcy · muscariello · Sep 29, 2025 · Sep 29, 2025 · Sep 29, 2025
@@ -0,0 +1,171 @@
+import re
+# Scrape all package URLs from the agntcy org packages page
+def get_all_package_urls(org):
+    urls = []
+    # Helper to extract package name and type from URL
+    def parse_package_info(url):
+        # Example: https://github.com/orgs/agntcy/packages/container/package/dir-apiserver
+        m = re.match(r"https://github.com/orgs/[^/]+/packages/(?P<type>[^/]+)/package/(?P<name>.+)", url)
+        if m:
+            return m.group("name"), m.group("type")
+        return url, "unknown"
+    page = 1
+    while True:
+        url = f"https://github.com/orgs/{org}/packages?page={page}"
+        resp = requests.get(url)
+        resp.raise_for_status()
+        soup = BeautifulSoup(resp.text, "html.parser")
+        from bs4 import Tag
+        links = soup.find_all("a", href=True)
+        found = False
+        for link in links:
+            if isinstance(link, Tag):
+                href = link.get("href")
+                if isinstance(href, str) and href.startswith(f"/orgs/{org}/packages/container/package/"):
+                    urls.append(f"https://github.com{href}")
+                    found = True
+        # If no package links found, break
+        if not found:
+            break
+        page += 1
+    return urls, parse_package_info
+    urls = []
+    page = 1
+    while True:
+        url = f"https://github.com/orgs/{org}/packages?page={page}"
+        resp = requests.get(url)
+        resp.raise_for_status()
+        soup = BeautifulSoup(resp.text, "html.parser")
+        from bs4 import Tag
+        links = soup.find_all("a", href=True)
+        found = False
+        for link in links:
+            if isinstance(link, Tag):
+                href = link.get("href")
+                if isinstance(href, str) and href.startswith(f"/orgs/{org}/packages/container/package/"):
+                    urls.append(f"https://github.com{href}")
+                    found = True
+        # If no package links found, break
+        if not found:
+            break
+        page += 1
+    return urls
+import os
+import requests
+import csv
+from bs4 import BeautifulSoup
+
+ORG = "agntcy"
+GITHUB_API = "https://api.github.com"
+TOKEN = os.environ.get("GITHUB_TOKEN")
+HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}
+
+CSV_PATH = os.path.join(os.path.dirname(__file__), "agntcy_org_stats.csv")
+
+fields = [
+    "name", "full_name", "description", "html_url", "created_at", "updated_at", "pushed_at",
+    "stargazers_count", "forks_count", "open_issues_count", "archived", "disabled",
+    "unique_views", "artifact_downloads", "package_downloads"
+]
+
+def get_all_repos(org):
+    repos = []
+    page = 1
+    while True:
+        url = f"{GITHUB_API}/orgs/{org}/repos?per_page=100&page={page}"
+        resp = requests.get(url, headers=HEADERS)
+        resp.raise_for_status()
+        data = resp.json()
+        if not data:
+            break
+        repos.extend(data)
+        page += 1
+    return repos
+
+def get_repo_views(owner, repo):
+    url = f"{GITHUB_API}/repos/{owner}/{repo}/traffic/views"
+    resp = requests.get(url, headers=HEADERS)
+    if resp.status_code == 200:
+        data = resp.json()
+        return data.get("uniques", 0)
+    return ""
+
+def get_artifact_downloads(owner, repo):
+    url = f"{GITHUB_API}/repos/{owner}/{repo}/actions/artifacts"
+    resp = requests.get(url, headers=HEADERS)
+    if resp.status_code == 200:
+        data = resp.json()
+        total = 0
+        for artifact in data.get("artifacts", []):
+            total += artifact.get("download_count", 0)
+        return total
+    return ""
+
+# Scrape GitHub Packages download count for a given package URL
+def scrape_package_downloads(package_url):
+    # Instead of scraping the individual package page, scrape the org packages list page for all counts at once
+    # This function will be replaced by scrape_all_package_downloads
+    return None
+
+# Scrape all package download counts from the org packages list page
+def scrape_all_package_downloads(org):
+    url = f"https://github.com/orgs/{org}/packages"
+    resp = requests.get(url)
+    resp.raise_for_status()
+    html = resp.text
+    # Regex to match: [name](url) ... <number>k
+    # Example: [dir-apiserver](...) ... 9.49k
+    pattern = re.compile(r"\[(?P<name>[^\]]+)\]\(https://github.com/orgs/[^/]+/packages/container/package/(?P<id>[^)]+)\)[^\n]*?(?P<count>[\d\.]+k|[\d,]+)")
+    results = {}
+    for match in pattern.finditer(html):
+        name = match.group("name")
+        count = match.group("count")
+        # Convert k to integer
+        if "k" in count:
+            count = int(float(count.replace("k", "")) * 1000)
+        else:
+            count = int(count.replace(",", ""))
+        results[name] = count
+    return results
+
+def main():
+    repos = get_all_repos(ORG)
+    # Scrape all package URLs and their download counts
+    package_urls, parse_package_info = get_all_package_urls(ORG)
+    # Scrape all package download counts from the org packages list page
+    package_counts = scrape_all_package_downloads(ORG)
+    package_info_list = []
+    for url in package_urls:
+        name, ptype = parse_package_info(url)
+        count = package_counts.get(name, "")
+        package_info_list.append({"name": name, "type": ptype, "download_count": count})
+
+    # Write package stats to a separate CSV file
+    package_csv_path = os.path.join(os.path.dirname(__file__), "agntcy_packages_stats.csv")
+    with open(package_csv_path, "w", newline="") as pkgfile:
+        pkg_writer = csv.DictWriter(pkgfile, fieldnames=["name", "type", "download_count"])
+        pkg_writer.writeheader()
+        for pkg in package_info_list:
+            pkg_writer.writerow(pkg)
+
+    with open(CSV_PATH, "w", newline="") as csvfile:
+        writer = csv.DictWriter(csvfile, fieldnames=fields)
+        writer.writeheader()
+        for repo in repos:
+            row = {key: repo.get(key, "") for key in fields}
+            owner = repo.get("owner", {}).get("login", ORG)
+            repo_name = repo.get("name", "")
+            row["unique_views"] = get_repo_views(owner, repo_name)
+            row["artifact_downloads"] = get_artifact_downloads(owner, repo_name)
+            # Find matching package URLs for this repo
+            matching_names = [parse_package_info(u)[0] for u in package_urls if f"/{repo_name}" in u]
+            if matching_names:
+                row["package_downloads"] = ", ".join(str(package_counts.get(n, "")) for n in matching_names)
+            else:
+                row["package_downloads"] = ""
+            writer.writerow(row)
+    print(f"Wrote {len(repos)} repos to {CSV_PATH}")
+    print(f"Wrote {len(package_info_list)} packages to {package_csv_path}")
+
+if __name__ == "__main__":
+    main()
@@ -0,0 +1,72 @@
+
+import requests
+import csv
+import os
+
+CSV_PATH = os.path.join(os.path.dirname(__file__), "agntcy_pypi_stats_full.csv")
+
+agntcy_pkgs = [
+    "agntcy-app-sdk",
+    "metrics-computation-engine",
+    "ioa-observe-sdk",
+    "agntcy-dir",
+    "slim-mcp",
+    "slima2a",
+    "slimrpc",
+    "slim-bindings",
+    "mce-metrics-plugin",
+    "mce-ragas-adapter",
+    "mce-opik-adapter",
+    "mce-deepeval-adapter",
+    "agntcy-identity-sdk",
+    "agntcy-dir-sdk",
+    "ioa-metrics-computation-engine",
+    "agntcy-dir-client-sdk",
+    "agntcy-acp",
+    "agp-mcp",
+    "agp-bindings",
+    "agntcy-iomapper",
+    "agntcy-pypi-sample"
+]
+
+rows = []
+for pkg in agntcy_pkgs:
+    meta_url = f"https://pypi.org/pypi/{pkg}/json"
+    r = requests.get(meta_url)
+    version = ""
+    if r.status_code == 200:
+        info = r.json().get("info", {})
+        version = info.get("version", "")
+    # Get downloads
+    stats_url = f"https://pypistats.org/api/packages/{pkg}/recent"
+    s = requests.get(stats_url)
+    last_day = last_week = last_month = ""
+    if s.status_code == 200:
+        stats = s.json().get("data", {})
+        last_day = stats.get("last_day", 0)
+        last_week = stats.get("last_week", 0)
+        last_month = stats.get("last_month", 0)
+    rows.append({
+        "name": pkg,
+        "version": version,
+        "last_day_downloads": last_day,
+        "last_week_downloads": last_week,
+        "last_month_downloads": last_month
+    })
+
+with open(CSV_PATH, "w", newline="") as csvfile:
+    writer = csv.DictWriter(csvfile, fieldnames=["name", "version", "last_day_downloads", "last_week_downloads", "last_month_downloads"])
+    writer.writeheader()
+    for row in rows:
+        writer.writerow(row)
+print(f"Wrote {len(rows)} agntcy-maintained PyPI packages to {CSV_PATH}")
+
+# Write markdown report
+MD_PATH = os.path.join(os.path.dirname(__file__), "agntcy_pypi_stats_report.md")
+with open(MD_PATH, "w") as mdfile:
+    mdfile.write("# AGNTCY PyPI Package Download Stats\n\n")
+    mdfile.write("| Package | Version | Last Day | Last Week | Last Month |\n")
+    mdfile.write("|---------|---------|----------|-----------|------------|\n")
+    for row in rows:
+        mdfile.write(f"| {row['name']} | {row['version']} | {row['last_day_downloads']} | {row['last_week_downloads']} | {row['last_month_downloads']} |\n")
+print(f"Wrote markdown report to {MD_PATH}")
@@ -0,0 +1,2 @@
+requests
+playwright
@@ -0,0 +1,46 @@
+import asyncio
+from playwright.async_api import async_playwright
+import csv
+
+ORG_URL = "https://github.com/orgs/agntcy/packages?type=container"
+
+async def scrape_ghcr_downloads():
+    async with async_playwright() as p:
+        browser = await p.chromium.launch(headless=True)
+        containers = []
+        # Get total number of pages
+        page = await browser.new_page()
+        await page.goto(ORG_URL, timeout=60000)
+        await page.wait_for_selector("#org-packages", timeout=60000)
+        pagination = await page.query_selector(".pagination")
+        total_pages = 1
+        if pagination:
+            current = await pagination.query_selector("em.current")
+            if current:
+                total_pages = int(await current.get_attribute("data-total-pages") or "1")
+        await page.close()
+        # Scrape all pages
+        for i in range(1, total_pages+1):
+            url = f"https://github.com/orgs/agntcy/packages?page={i}&type=container"
+            page = await browser.new_page()
+            await page.goto(url, timeout=60000)
+            await page.wait_for_selector("#org-packages", timeout=60000)
+            await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
+            await asyncio.sleep(2)
+            for row in await page.query_selector_all("#org-packages ul li.Box-row"):
+                name_tag = await row.query_selector('a.Link--primary')
+                downloads_tag = await row.query_selector('span.color-fg-muted')
+                if name_tag and downloads_tag:
+                    name = (await name_tag.text_content()).strip()
+                    downloads = (await downloads_tag.text_content()).strip()
+                    containers.append({"name": name, "downloads": downloads})
+            await page.close()
+        await browser.close()
+        with open("agntcy_ghcr_downloads.csv", "w", newline="") as f:
+            writer = csv.DictWriter(f, fieldnames=["name", "downloads"])
+            writer.writeheader()
+            writer.writerows(containers)
+        print(f"Wrote {len(containers)} container download stats to agntcy_ghcr_downloads.csv")
+
+if __name__ == "__main__":
+    asyncio.run(scrape_ghcr_downloads())
@@ -7,8 +7,14 @@ on:
   push:
     tags:
       - 'v*.*.*'
+    paths:
+      - 'docs/**'
+      - 'mkdocs/**'
 
   pull_request:
+    paths:
+      - 'docs/**'
+      - 'mkdocs/**'
 
 concurrency:
   group: ${{ github.workflow }}-${{ github.ref }}

@@ -0,0 +1,61 @@
+name: Collect AGNTCY Org Repo Statistics
+
+on:
+  schedule:
+    - cron: '0 0 * * *' # daily at midnight UTC
+  workflow_dispatch:
+
+jobs:
+  collect-stats:
+    runs-on: ubuntu-latest
+    steps:
+      - name: Checkout repository
+        uses: actions/checkout@v4
+
+      - name: Set up Python
+        uses: actions/setup-python@v5
+        with:
+          python-version: '3.x'
+
+      - name: Install dependencies
+        run: |
+          python -m pip install --upgrade pip
+          pip install -r .github/scripts/requirements.txt
+          playwright install chromium
+
+      - name: Collect org repo statistics
+        env:
+          GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
+        run: |
+          python .github/scripts/collect_org_stats.py
+
+      - name: Upload statistics CSV
+        uses: actions/upload-artifact@v4
+        with:
+          name: agntcy-org-repo-stats
+          path: .github/scripts/agntcy_org_stats.csv
+
+      - name: Collect PyPI package statistics
+        run: |
+          python .github/scripts/collect_pypi_stats_full.py
+
+      - name: Install Playwright and dependencies
+        run: |
+          pip install playwright
+          playwright install chromium
+
+      - name: Collect GHCR container download stats
+        run: |
+          python .github/scripts/scrape_ghcr_downloads_playwright.py
+
+      - name: Upload PyPI statistics CSV
+        uses: actions/upload-artifact@v4
+        with:
+          name: agntcy-pypi-stats
+          path: .github/scripts/agntcy_pypi_stats_full.csv
+
+      - name: Upload GHCR statistics CSV
+        uses: actions/upload-artifact@v4
+        with:
+          name: agntcy-ghcr-downloads
+          path: .github/scripts/agntcy_ghcr_downloads.csv