Skip to content

Commit f208168

Browse files
committed
shadowmire.py: update to taoky/shadowmire@1856a97
Signed-off-by: Shengqi Chen <[email protected]>
1 parent 69b7d8d commit f208168

File tree

1 file changed

+50
-5
lines changed

1 file changed

+50
-5
lines changed

shadowmire.py

+50-5
Original file line numberDiff line numberDiff line change
@@ -42,6 +42,8 @@
4242
IOWORKERS = int(os.environ.get("SHADOWMIRE_IOWORKERS", "2"))
4343
# A safety net -- to avoid upstream issues casuing too many packages removed when determinating sync plan.
4444
MAX_DELETION = int(os.environ.get("SHADOWMIRE_MAX_DELETION", "50000"))
45+
# Sometimes PyPI is not consistent -- new packages could not be fetched. This option tries to avoid permanently mark that kind of package as nonexist.
46+
IGNORE_THRESHOLD = int(os.environ.get("SHADOWMIRE_IGNORE_THRESHOLD", "1024"))
4547

4648
# https://github.com/pypa/bandersnatch/blob/a05af547f8d1958217ef0dc0028890b1839e6116/src/bandersnatch_filter_plugins/prerelease_name.py#L18C1-L23C6
4749
PRERELEASE_PATTERNS = (
@@ -115,6 +117,13 @@ def remove(self, key: str) -> None:
115117
cur.execute("DELETE FROM local WHERE key = ?", (key,))
116118
self.conn.commit()
117119

120+
def remove_invalid(self) -> int:
121+
cur = self.conn.cursor()
122+
cur.execute("DELETE FROM local WHERE value = -1")
123+
rowcnt = cur.rowcount
124+
self.conn.commit()
125+
return rowcnt
126+
118127
def nuke(self, commit: bool = True) -> None:
119128
cur = self.conn.cursor()
120129
cur.execute("DELETE FROM local")
@@ -337,6 +346,9 @@ def list_packages_with_serial(self, do_normalize: bool = True) -> dict[str, int]
337346
del ret[key]
338347
return ret
339348

349+
def changelog_last_serial(self) -> int:
350+
return self.xmlrpc_client.changelog_last_serial() # type: ignore
351+
340352
def get_package_metadata(self, package_name: str) -> dict:
341353
req = self.session.get(urljoin(self.host, f"pypi/{package_name}/json"))
342354
if req.status_code == 404:
@@ -828,15 +840,18 @@ def __init__(
828840
) -> None:
829841
self.pypi = PyPI()
830842
self.session = create_requests_session()
843+
self.last_serial: Optional[int] = None
844+
self.remote_packages: Optional[dict[str, int]] = None
831845
super().__init__(basedir, local_db, sync_packages)
832846

833847
def fetch_remote_versions(self) -> dict[str, int]:
834-
ret = self.pypi.list_packages_with_serial()
835-
logger.info("Remote has %s packages", len(ret))
848+
self.last_serial = self.pypi.changelog_last_serial()
849+
self.remote_packages = self.pypi.list_packages_with_serial()
850+
logger.info("Remote has %s packages", len(self.remote_packages))
836851
with overwrite(self.basedir / "remote.json") as f:
837-
json.dump(ret, f)
852+
json.dump(self.remote_packages, f)
838853
logger.info("File saved to remote.json.")
839-
return ret
854+
return self.remote_packages
840855

841856
def do_update(
842857
self,
@@ -852,9 +867,31 @@ def do_update(
852867
meta_original = deepcopy(meta)
853868
logger.debug("%s meta: %s", package_name, meta)
854869
except PackageNotFoundError:
870+
if (
871+
self.remote_packages is not None
872+
and package_name in self.remote_packages
873+
):
874+
recorded_serial = self.remote_packages[package_name]
875+
else:
876+
recorded_serial = None
877+
if (
878+
recorded_serial is not None
879+
and self.last_serial is not None
880+
and abs(recorded_serial - self.last_serial) < IGNORE_THRESHOLD
881+
):
882+
logger.warning(
883+
"%s missing from upstream (its serial %s, remote last serial %s), try next time...",
884+
package_name,
885+
recorded_serial,
886+
self.last_serial,
887+
)
888+
return None
889+
855890
logger.warning(
856-
"%s missing from upstream, remove and ignore in the future.",
891+
"%s missing from upstream (its serial %s, remote last serial %s), remove and ignore in the future.",
857892
package_name,
893+
recorded_serial,
894+
self.last_serial,
858895
)
859896
# try remove it locally, if it does not exist upstream
860897
self.do_remove(package_name, use_db=False)
@@ -1446,5 +1483,13 @@ def list_packages_with_serial(ctx: click.Context) -> None:
14461483
syncer.fetch_remote_versions()
14471484

14481485

1486+
@cli.command(help="Clear invalid package status in local database")
1487+
@click.pass_context
1488+
def clear_invalid_packages(ctx: click.Context) -> None:
1489+
local_db: LocalVersionKV = ctx.obj["local_db"]
1490+
total = local_db.remove_invalid()
1491+
logger.info("Removed %s invalid status in local database", total)
1492+
1493+
14491494
if __name__ == "__main__":
14501495
cli(obj={})

0 commit comments

Comments
 (0)