Skip to content
Merged
Show file tree
Hide file tree
Changes from all commits
Commits
File filter

Filter by extension

Filter by extension


Conversations
Failed to load comments.
Loading
Jump to
Jump to file
Failed to load files.
Loading
Diff view
Diff view
171 changes: 171 additions & 0 deletions .github/scripts/collect_org_stats.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,171 @@
import re
# Scrape all package URLs from the agntcy org packages page
def get_all_package_urls(org):
urls = []
# Helper to extract package name and type from URL
def parse_package_info(url):
# Example: https://github.com/orgs/agntcy/packages/container/package/dir-apiserver
m = re.match(r"https://github.com/orgs/[^/]+/packages/(?P<type>[^/]+)/package/(?P<name>.+)", url)
if m:
return m.group("name"), m.group("type")
return url, "unknown"
page = 1
while True:
url = f"https://github.com/orgs/{org}/packages?page={page}"
resp = requests.get(url)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
from bs4 import Tag
links = soup.find_all("a", href=True)
found = False
for link in links:
if isinstance(link, Tag):
href = link.get("href")
if isinstance(href, str) and href.startswith(f"/orgs/{org}/packages/container/package/"):
urls.append(f"https://github.com{href}")
found = True
# If no package links found, break
if not found:
break
page += 1
return urls, parse_package_info
urls = []
page = 1
while True:
url = f"https://github.com/orgs/{org}/packages?page={page}"
resp = requests.get(url)
resp.raise_for_status()
soup = BeautifulSoup(resp.text, "html.parser")
from bs4 import Tag
links = soup.find_all("a", href=True)
found = False
for link in links:
if isinstance(link, Tag):
href = link.get("href")
if isinstance(href, str) and href.startswith(f"/orgs/{org}/packages/container/package/"):
urls.append(f"https://github.com{href}")
found = True
# If no package links found, break
if not found:
break
page += 1
return urls
import os
import requests
import csv
from bs4 import BeautifulSoup

ORG = "agntcy"
GITHUB_API = "https://api.github.com"
TOKEN = os.environ.get("GITHUB_TOKEN")
HEADERS = {"Authorization": f"Bearer {TOKEN}", "Accept": "application/vnd.github+json"}

CSV_PATH = os.path.join(os.path.dirname(__file__), "agntcy_org_stats.csv")

fields = [
"name", "full_name", "description", "html_url", "created_at", "updated_at", "pushed_at",
"stargazers_count", "forks_count", "open_issues_count", "archived", "disabled",
"unique_views", "artifact_downloads", "package_downloads"
]

def get_all_repos(org):
repos = []
page = 1
while True:
url = f"{GITHUB_API}/orgs/{org}/repos?per_page=100&page={page}"
resp = requests.get(url, headers=HEADERS)
resp.raise_for_status()
data = resp.json()
if not data:
break
repos.extend(data)
page += 1
return repos

def get_repo_views(owner, repo):
url = f"{GITHUB_API}/repos/{owner}/{repo}/traffic/views"
resp = requests.get(url, headers=HEADERS)
if resp.status_code == 200:
data = resp.json()
return data.get("uniques", 0)
return ""

def get_artifact_downloads(owner, repo):
url = f"{GITHUB_API}/repos/{owner}/{repo}/actions/artifacts"
resp = requests.get(url, headers=HEADERS)
if resp.status_code == 200:
data = resp.json()
total = 0
for artifact in data.get("artifacts", []):
total += artifact.get("download_count", 0)
return total
return ""

# Scrape GitHub Packages download count for a given package URL
def scrape_package_downloads(package_url):
# Instead of scraping the individual package page, scrape the org packages list page for all counts at once
# This function will be replaced by scrape_all_package_downloads
return None

# Scrape all package download counts from the org packages list page
def scrape_all_package_downloads(org):
url = f"https://github.com/orgs/{org}/packages"
resp = requests.get(url)
resp.raise_for_status()
html = resp.text
# Regex to match: [name](url) ... <number>k
# Example: [dir-apiserver](...) ... 9.49k
pattern = re.compile(r"\[(?P<name>[^\]]+)\]\(https://github.com/orgs/[^/]+/packages/container/package/(?P<id>[^)]+)\)[^\n]*?(?P<count>[\d\.]+k|[\d,]+)")
results = {}
for match in pattern.finditer(html):
name = match.group("name")
count = match.group("count")
# Convert k to integer
if "k" in count:
count = int(float(count.replace("k", "")) * 1000)
else:
count = int(count.replace(",", ""))
results[name] = count
return results

def main():
repos = get_all_repos(ORG)
# Scrape all package URLs and their download counts
package_urls, parse_package_info = get_all_package_urls(ORG)
# Scrape all package download counts from the org packages list page
package_counts = scrape_all_package_downloads(ORG)
package_info_list = []
for url in package_urls:
name, ptype = parse_package_info(url)
count = package_counts.get(name, "")
package_info_list.append({"name": name, "type": ptype, "download_count": count})

# Write package stats to a separate CSV file
package_csv_path = os.path.join(os.path.dirname(__file__), "agntcy_packages_stats.csv")
with open(package_csv_path, "w", newline="") as pkgfile:
pkg_writer = csv.DictWriter(pkgfile, fieldnames=["name", "type", "download_count"])
pkg_writer.writeheader()
for pkg in package_info_list:
pkg_writer.writerow(pkg)

with open(CSV_PATH, "w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=fields)
writer.writeheader()
for repo in repos:
row = {key: repo.get(key, "") for key in fields}
owner = repo.get("owner", {}).get("login", ORG)
repo_name = repo.get("name", "")
row["unique_views"] = get_repo_views(owner, repo_name)
row["artifact_downloads"] = get_artifact_downloads(owner, repo_name)
# Find matching package URLs for this repo
matching_names = [parse_package_info(u)[0] for u in package_urls if f"/{repo_name}" in u]
if matching_names:
row["package_downloads"] = ", ".join(str(package_counts.get(n, "")) for n in matching_names)
else:
row["package_downloads"] = ""
writer.writerow(row)
print(f"Wrote {len(repos)} repos to {CSV_PATH}")
print(f"Wrote {len(package_info_list)} packages to {package_csv_path}")

if __name__ == "__main__":
main()
72 changes: 72 additions & 0 deletions .github/scripts/collect_pypi_stats_full.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,72 @@

import requests
import csv
import os

CSV_PATH = os.path.join(os.path.dirname(__file__), "agntcy_pypi_stats_full.csv")

agntcy_pkgs = [
"agntcy-app-sdk",
"metrics-computation-engine",
"ioa-observe-sdk",
"agntcy-dir",
"slim-mcp",
"slima2a",
"slimrpc",
"slim-bindings",
"mce-metrics-plugin",
"mce-ragas-adapter",
"mce-opik-adapter",
"mce-deepeval-adapter",
"agntcy-identity-sdk",
"agntcy-dir-sdk",
"ioa-metrics-computation-engine",
"agntcy-dir-client-sdk",
"agntcy-acp",
"agp-mcp",
"agp-bindings",
"agntcy-iomapper",
"agntcy-pypi-sample"
]

rows = []
for pkg in agntcy_pkgs:
meta_url = f"https://pypi.org/pypi/{pkg}/json"
r = requests.get(meta_url)
version = ""
if r.status_code == 200:
info = r.json().get("info", {})
version = info.get("version", "")
# Get downloads
stats_url = f"https://pypistats.org/api/packages/{pkg}/recent"
s = requests.get(stats_url)
last_day = last_week = last_month = ""
if s.status_code == 200:
stats = s.json().get("data", {})
last_day = stats.get("last_day", 0)
last_week = stats.get("last_week", 0)
last_month = stats.get("last_month", 0)
rows.append({
"name": pkg,
"version": version,
"last_day_downloads": last_day,
"last_week_downloads": last_week,
"last_month_downloads": last_month
})

with open(CSV_PATH, "w", newline="") as csvfile:
writer = csv.DictWriter(csvfile, fieldnames=["name", "version", "last_day_downloads", "last_week_downloads", "last_month_downloads"])
writer.writeheader()
for row in rows:
writer.writerow(row)
print(f"Wrote {len(rows)} agntcy-maintained PyPI packages to {CSV_PATH}")

# Write markdown report
MD_PATH = os.path.join(os.path.dirname(__file__), "agntcy_pypi_stats_report.md")
with open(MD_PATH, "w") as mdfile:
mdfile.write("# AGNTCY PyPI Package Download Stats\n\n")
mdfile.write("| Package | Version | Last Day | Last Week | Last Month |\n")
mdfile.write("|---------|---------|----------|-----------|------------|\n")
for row in rows:
mdfile.write(f"| {row['name']} | {row['version']} | {row['last_day_downloads']} | {row['last_week_downloads']} | {row['last_month_downloads']} |\n")
print(f"Wrote markdown report to {MD_PATH}")
2 changes: 2 additions & 0 deletions .github/scripts/requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,2 @@
requests
playwright
46 changes: 46 additions & 0 deletions .github/scripts/scrape_ghcr_downloads_playwright.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,46 @@
import asyncio
from playwright.async_api import async_playwright
import csv

ORG_URL = "https://github.com/orgs/agntcy/packages?type=container"

async def scrape_ghcr_downloads():
async with async_playwright() as p:
browser = await p.chromium.launch(headless=True)
containers = []
# Get total number of pages
page = await browser.new_page()
await page.goto(ORG_URL, timeout=60000)
await page.wait_for_selector("#org-packages", timeout=60000)
pagination = await page.query_selector(".pagination")
total_pages = 1
if pagination:
current = await pagination.query_selector("em.current")
if current:
total_pages = int(await current.get_attribute("data-total-pages") or "1")
await page.close()
# Scrape all pages
for i in range(1, total_pages+1):
url = f"https://github.com/orgs/agntcy/packages?page={i}&type=container"
page = await browser.new_page()
await page.goto(url, timeout=60000)
await page.wait_for_selector("#org-packages", timeout=60000)
await page.evaluate("window.scrollTo(0, document.body.scrollHeight)")
await asyncio.sleep(2)
for row in await page.query_selector_all("#org-packages ul li.Box-row"):
name_tag = await row.query_selector('a.Link--primary')
downloads_tag = await row.query_selector('span.color-fg-muted')
if name_tag and downloads_tag:
name = (await name_tag.text_content()).strip()
downloads = (await downloads_tag.text_content()).strip()
containers.append({"name": name, "downloads": downloads})
await page.close()
await browser.close()
with open("agntcy_ghcr_downloads.csv", "w", newline="") as f:
writer = csv.DictWriter(f, fieldnames=["name", "downloads"])
writer.writeheader()
writer.writerows(containers)
print(f"Wrote {len(containers)} container download stats to agntcy_ghcr_downloads.csv")

if __name__ == "__main__":
asyncio.run(scrape_ghcr_downloads())
6 changes: 6 additions & 0 deletions .github/workflows/cicd.yml
Original file line number Diff line number Diff line change
Expand Up @@ -7,8 +7,14 @@ on:
push:
tags:
- 'v*.*.*'
paths:
- 'docs/**'
- 'mkdocs/**'

pull_request:
paths:
- 'docs/**'
- 'mkdocs/**'

concurrency:
group: ${{ github.workflow }}-${{ github.ref }}
Expand Down
61 changes: 61 additions & 0 deletions .github/workflows/org-stats.yml
Original file line number Diff line number Diff line change
@@ -0,0 +1,61 @@
name: Collect AGNTCY Org Repo Statistics

on:
schedule:
- cron: '0 0 * * *' # daily at midnight UTC
workflow_dispatch:

jobs:
collect-stats:
runs-on: ubuntu-latest
steps:
- name: Checkout repository
uses: actions/checkout@v4

- name: Set up Python
uses: actions/setup-python@v5
with:
python-version: '3.x'

- name: Install dependencies
run: |
python -m pip install --upgrade pip
pip install -r .github/scripts/requirements.txt
playwright install chromium

- name: Collect org repo statistics
env:
GITHUB_TOKEN: ${{ secrets.GITHUB_TOKEN }}
run: |
python .github/scripts/collect_org_stats.py

- name: Upload statistics CSV
uses: actions/upload-artifact@v4
with:
name: agntcy-org-repo-stats
path: .github/scripts/agntcy_org_stats.csv

- name: Collect PyPI package statistics
run: |
python .github/scripts/collect_pypi_stats_full.py

- name: Install Playwright and dependencies
run: |
pip install playwright
playwright install chromium

- name: Collect GHCR container download stats
run: |
python .github/scripts/scrape_ghcr_downloads_playwright.py

- name: Upload PyPI statistics CSV
uses: actions/upload-artifact@v4
with:
name: agntcy-pypi-stats
path: .github/scripts/agntcy_pypi_stats_full.csv

- name: Upload GHCR statistics CSV
uses: actions/upload-artifact@v4
with:
name: agntcy-ghcr-downloads
path: .github/scripts/agntcy_ghcr_downloads.csv