1919)
2020
2121
22+ class ServerError (Exception ):
23+ """Custom exception for HTTP 5xx errors."""
24+
25+
26+ class InvalidJSONError (Exception ):
27+ """Custom exception for when the received JSON does not match the expected format."""
28+
29+
30+ # Directory name
2231DEPENDENCIES_DIR = "dependencies"
32+
33+ # Sources
2334TOP_PYPI_SOURCE = "https://hugovk.github.io/top-pypi-packages/top-pypi-packages.min.json"
2435TOP_NPM_SOURCE = "https://packages.ecosyste.ms/api/v1/registries/npmjs.org/packages"
36+
37+ # Retry constants
38+ RETRY_ON = (httpx .TransportError , httpx .TimeoutException , ServerError )
39+ RETRY_ATTEMPTS = 10
40+ RETRY_WAIT_JITTER = 1
41+ RETRY_WAIT_EXP_BASE = 2
42+ RETRY_WAIT_MAX = 8
2543TIMEOUT = 90
2644
2745
2846def parse_npm (data : list [dict [str , Any ]]) -> set [str ]:
29- return {x ["name" ] for x in data }
47+ try :
48+ return {x ["name" ] for x in data }
49+ except KeyError as e :
50+ raise InvalidJSONError from e
3051
3152
3253def parse_pypi (data : dict [str , Any ]) -> set [str ]:
33- return {row ["project" ] for row in data ["rows" ]}
34-
35-
36- class ServerError (Exception ):
37- """Custom exception for HTTP 5xx errors."""
54+ try :
55+ return {row ["project" ] for row in data ["rows" ]}
56+ except KeyError as e :
57+ raise InvalidJSONError from e
3858
3959
4060@dataclass (frozen = True )
@@ -61,6 +81,22 @@ class Ecosystem:
6181ECOSYSTEMS = {"pypi" : pypi_ecosystem , "npm" : npm_ecosystem }
6282
6383
84+ def _run (ecosystem : str ) -> None :
85+ selected_ecosystem = ECOSYSTEMS [ecosystem ]
86+ all_packages : set [str ] = set ()
87+
88+ n_pages = selected_ecosystem .pages or 1
89+ params = selected_ecosystem .params .copy ()
90+ for page in range (1 , n_pages + 1 ):
91+ if selected_ecosystem .pages :
92+ params ["page" ] = page
93+
94+ all_packages .update (get_packages (selected_ecosystem .url , selected_ecosystem .parser , params ))
95+
96+ fpath = Path (DEPENDENCIES_DIR ) / f"{ ecosystem } .json"
97+ save_data_to_file (list (all_packages ), fpath )
98+
99+
64100@click .group ()
65101def entry_point () -> None :
66102 pass
@@ -78,39 +114,32 @@ def download(
78114 if ecosystem not in ECOSYSTEMS :
79115 raise click .BadParameter ("Not a valid ecosystem" )
80116
81- selected_ecosystem = ECOSYSTEMS [ecosystem ]
82- all_packages : set [str ] = set ()
83-
84- n_pages = selected_ecosystem .pages or 1
85- params = selected_ecosystem .params .copy ()
86- for page in range (1 , n_pages + 1 ):
87- if selected_ecosystem .pages :
88- params ["page" ] = page
89-
90- all_packages .update (get_packages (selected_ecosystem .url , selected_ecosystem .parser , params ))
117+ return _run (ecosystem )
91118
92- fpath = Path (DEPENDENCIES_DIR ) / f"{ ecosystem } .json"
93- save_data_to_file (list (all_packages ), fpath )
94119
95-
96- @stamina .retry (
97- on = (httpx .TransportError , httpx .TimeoutException , ServerError ),
98- attempts = 10 ,
99- wait_jitter = 1 ,
100- wait_exp_base = 2 ,
101- wait_max = 8 ,
102- )
103120def get_packages (
104121 base_url : str , parser : Callable [[dict [str , Any ]], set [str ]], params : dict [str , Any ] | None = None
105122) -> set [str ]:
106- with httpx .Client (timeout = TIMEOUT ) as client :
107- response = client .get (str (base_url ), params = params )
108- try :
109- response .raise_for_status ()
110- except httpx .HTTPStatusError as e :
111- if e .response .is_server_error :
112- raise ServerError from e
113- return parser (response .json ())
123+ for attempt in stamina .retry_context (
124+ on = RETRY_ON ,
125+ attempts = RETRY_ATTEMPTS ,
126+ wait_jitter = RETRY_WAIT_JITTER ,
127+ wait_exp_base = RETRY_WAIT_EXP_BASE ,
128+ wait_max = RETRY_WAIT_MAX ,
129+ ):
130+ with attempt , httpx .Client (timeout = TIMEOUT ) as client :
131+ response = client .get (str (base_url ), params = params )
132+ try :
133+ response .raise_for_status ()
134+ except httpx .HTTPStatusError as e :
135+ if e .response .is_server_error :
136+ raise ServerError from e
137+ try :
138+ json_data = response .json ()
139+ except json .JSONDecodeError as e :
140+ raise InvalidJSONError from e
141+
142+ return parser (json_data )
114143
115144
116145def save_data_to_file (all_packages : list [str ], fpath : Path ) -> None :
0 commit comments