Skip to content

Commit 67acf19

Browse files
committed
ci: Improve weekly download flow
1 parent 867e837 commit 67acf19

File tree

7 files changed

+373
-66
lines changed

7 files changed

+373
-66
lines changed

.github/workflows/test.yml

Lines changed: 15 additions & 0 deletions
Original file line numberDiff line numberDiff line change
@@ -27,3 +27,18 @@ jobs:
2727

2828
- name: Run tests
2929
run: uv run pytest tests
30+
31+
test-weekly-download:
32+
runs-on: ubuntu-latest
33+
if: "!startsWith(github.event.head_commit.message, 'bump:')"
34+
steps:
35+
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
36+
37+
- name: Install uv
38+
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
39+
40+
- name: Install the dependencies
41+
run: uv sync --locked --group download --group dev --python 3.14
42+
43+
- name: Run tests
44+
run: uv run pytest dependencies/tests

.github/workflows/weekly_download.yml

Lines changed: 35 additions & 31 deletions
Original file line numberDiff line numberDiff line change
@@ -2,42 +2,46 @@ name: Weekly download
22

33
on:
44
schedule:
5-
- cron: "0 0 * * 1" # every Monday at 00:00 UTC
5+
- cron: "0 0 * * 1" # every Monday at 00:00 UTC
66
workflow_dispatch:
77

8-
98
jobs:
109
download:
1110
runs-on: ubuntu-latest
1211
steps:
13-
- uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 # v2.1.4
14-
id: app-token
15-
with:
16-
app-id: ${{ vars.ELEMENTSINTERACTIVE_BOT_APP_ID }}
17-
private-key: ${{ secrets.ELEMENTSINTERACTIVE_BOT_PRIVATE_KEY }}
18-
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
19-
with:
20-
fetch-depth: 0
21-
token: ${{ steps.app-token.outputs.token }}
22-
ref: ${{ github.head_ref }}
23-
- name: Install uv
24-
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
12+
- uses: actions/create-github-app-token@67018539274d69449ef7c02e8e71183d1719ab42 # v2.1.4
13+
id: app-token
14+
with:
15+
app-id: ${{ vars.ELEMENTSINTERACTIVE_BOT_APP_ID }}
16+
private-key: ${{ secrets.ELEMENTSINTERACTIVE_BOT_PRIVATE_KEY }}
17+
- uses: actions/checkout@08c6903cd8c0fde910a37f88322edcfb5dd907a8 # v5.0.0
18+
with:
19+
fetch-depth: 0
20+
token: ${{ steps.app-token.outputs.token }}
21+
ref: ${{ github.head_ref }}
22+
- name: Install uv
23+
uses: astral-sh/setup-uv@85856786d1ce8acfbcc2f13a5f3fbd6b938f9f41 # v7.1.2
24+
25+
- name: Install the project
26+
run: uv sync --locked --only-group download --python 3.14
27+
28+
- name: Download Pypi packages
29+
continue-on-error: true
30+
run: |
31+
uv run --no-project dependencies/scripts/download_packages.py download pypi
32+
33+
- name: Download NPM packages
34+
continue-on-error: true
35+
run: |
36+
uv run --no-project dependencies/scripts/download_packages.py download npm
2537
26-
- name: Install the project
27-
run: uv sync --locked --only-group download
38+
- name: Configure git
39+
run: |
40+
git config user.name "github-actions[bot]"
41+
git config user.email "github-actions[bot]@users.noreply.github.com"
2842
29-
- name: Download packages from trusted sources
30-
run: |
31-
uv run --no-project dependencies/scripts/download_packages.py download pypi || echo 'Failed to download trusted pypi packages'
32-
uv run --no-project dependencies/scripts/download_packages.py download npm || echo 'Failed to download trusted npm packages'
33-
34-
- name: Configure git
35-
run: |
36-
git config user.name "github-actions[bot]"
37-
git config user.email "github-actions[bot]@users.noreply.github.com"
38-
39-
- name: Push changes to repo
40-
run: |
41-
git add .
42-
git commit -m "chore: Weekly update of trusted packages"
43-
git push origin HEAD:main
43+
- name: Push changes to repo
44+
run: |
45+
git add .
46+
git commit -m "chore: Weekly update of trusted packages"
47+
git push origin HEAD:main

dependencies/scripts/__init__.py

Whitespace-only changes.

dependencies/scripts/download_packages.py

Lines changed: 63 additions & 34 deletions
Original file line numberDiff line numberDiff line change
@@ -19,22 +19,42 @@
1919
)
2020

2121

22+
class ServerError(Exception):
23+
"""Custom exception for HTTP 5xx errors."""
24+
25+
26+
class InvalidJSONError(Exception):
27+
"""Custom exception for when the received JSON does not match the expected format."""
28+
29+
30+
# Directory name
2231
DEPENDENCIES_DIR = "dependencies"
32+
33+
# Sources
2334
TOP_PYPI_SOURCE = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
2435
TOP_NPM_SOURCE = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
36+
37+
# Retry constants
38+
RETRY_ON = (httpx.TransportError, httpx.TimeoutException, ServerError)
39+
RETRY_ATTEMPTS = 10
40+
RETRY_WAIT_JITTER = 1
41+
RETRY_WAIT_EXP_BASE = 2
42+
RETRY_WAIT_MAX = 8
2543
TIMEOUT = 90
2644

2745

2846
def parse_npm(data: list[dict[str, Any]]) -> set[str]:
29-
return {x["name"] for x in data}
47+
try:
48+
return {x["name"] for x in data}
49+
except KeyError as e:
50+
raise InvalidJSONError from e
3051

3152

3253
def parse_pypi(data: dict[str, Any]) -> set[str]:
33-
return {row["project"] for row in data["rows"]}
34-
35-
36-
class ServerError(Exception):
37-
"""Custom exception for HTTP 5xx errors."""
54+
try:
55+
return {row["project"] for row in data["rows"]}
56+
except KeyError as e:
57+
raise InvalidJSONError from e
3858

3959

4060
@dataclass(frozen=True)
@@ -61,6 +81,22 @@ class Ecosystem:
6181
ECOSYSTEMS = {"pypi": pypi_ecosystem, "npm": npm_ecosystem}
6282

6383

84+
def _run(ecosystem: str) -> None:
85+
selected_ecosystem = ECOSYSTEMS[ecosystem]
86+
all_packages: set[str] = set()
87+
88+
n_pages = selected_ecosystem.pages or 1
89+
params = selected_ecosystem.params.copy()
90+
for page in range(1, n_pages + 1):
91+
if selected_ecosystem.pages:
92+
params["page"] = page
93+
94+
all_packages.update(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))
95+
96+
fpath = Path(DEPENDENCIES_DIR) / f"{ecosystem}.json"
97+
save_data_to_file(list(all_packages), fpath)
98+
99+
64100
@click.group()
65101
def entry_point() -> None:
66102
pass
@@ -78,39 +114,32 @@ def download(
78114
if ecosystem not in ECOSYSTEMS:
79115
raise click.BadParameter("Not a valid ecosystem")
80116

81-
selected_ecosystem = ECOSYSTEMS[ecosystem]
82-
all_packages: set[str] = set()
83-
84-
n_pages = selected_ecosystem.pages or 1
85-
params = selected_ecosystem.params.copy()
86-
for page in range(1, n_pages + 1):
87-
if selected_ecosystem.pages:
88-
params["page"] = page
89-
90-
all_packages.update(get_packages(selected_ecosystem.url, selected_ecosystem.parser, params))
117+
return _run(ecosystem)
91118

92-
fpath = Path(DEPENDENCIES_DIR) / f"{ecosystem}.json"
93-
save_data_to_file(list(all_packages), fpath)
94119

95-
96-
@stamina.retry(
97-
on=(httpx.TransportError, httpx.TimeoutException, ServerError),
98-
attempts=10,
99-
wait_jitter=1,
100-
wait_exp_base=2,
101-
wait_max=8,
102-
)
103120
def get_packages(
104121
base_url: str, parser: Callable[[dict[str, Any]], set[str]], params: dict[str, Any] | None = None
105122
) -> set[str]:
106-
with httpx.Client(timeout=TIMEOUT) as client:
107-
response = client.get(str(base_url), params=params)
108-
try:
109-
response.raise_for_status()
110-
except httpx.HTTPStatusError as e:
111-
if e.response.is_server_error:
112-
raise ServerError from e
113-
return parser(response.json())
123+
for attempt in stamina.retry_context(
124+
on=RETRY_ON,
125+
attempts=RETRY_ATTEMPTS,
126+
wait_jitter=RETRY_WAIT_JITTER,
127+
wait_exp_base=RETRY_WAIT_EXP_BASE,
128+
wait_max=RETRY_WAIT_MAX,
129+
):
130+
with attempt, httpx.Client(timeout=TIMEOUT) as client:
131+
response = client.get(str(base_url), params=params)
132+
try:
133+
response.raise_for_status()
134+
except httpx.HTTPStatusError as e:
135+
if e.response.is_server_error:
136+
raise ServerError from e
137+
try:
138+
json_data = response.json()
139+
except json.JSONDecodeError as e:
140+
raise InvalidJSONError from e
141+
142+
return parser(json_data)
114143

115144

116145
def save_data_to_file(all_packages: list[str], fpath: Path) -> None:

dependencies/tests/__init__.py

Whitespace-only changes.

0 commit comments

Comments
 (0)