42
42
IOWORKERS = int (os .environ .get ("SHADOWMIRE_IOWORKERS" , "2" ))
43
43
# A safety net -- to avoid upstream issues casuing too many packages removed when determinating sync plan.
44
44
MAX_DELETION = int (os .environ .get ("SHADOWMIRE_MAX_DELETION" , "50000" ))
45
+ # Sometimes PyPI is not consistent -- new packages could not be fetched. This option tries to avoid permanently mark that kind of package as nonexist.
46
+ IGNORE_THRESHOLD = int (os .environ .get ("SHADOWMIRE_IGNORE_THRESHOLD" , "1024" ))
45
47
46
48
# https://github.com/pypa/bandersnatch/blob/a05af547f8d1958217ef0dc0028890b1839e6116/src/bandersnatch_filter_plugins/prerelease_name.py#L18C1-L23C6
47
49
PRERELEASE_PATTERNS = (
@@ -115,6 +117,13 @@ def remove(self, key: str) -> None:
115
117
cur .execute ("DELETE FROM local WHERE key = ?" , (key ,))
116
118
self .conn .commit ()
117
119
120
+ def remove_invalid (self ) -> int :
121
+ cur = self .conn .cursor ()
122
+ cur .execute ("DELETE FROM local WHERE value = -1" )
123
+ rowcnt = cur .rowcount
124
+ self .conn .commit ()
125
+ return rowcnt
126
+
118
127
def nuke (self , commit : bool = True ) -> None :
119
128
cur = self .conn .cursor ()
120
129
cur .execute ("DELETE FROM local" )
@@ -337,6 +346,9 @@ def list_packages_with_serial(self, do_normalize: bool = True) -> dict[str, int]
337
346
del ret [key ]
338
347
return ret
339
348
349
+ def changelog_last_serial (self ) -> int :
350
+ return self .xmlrpc_client .changelog_last_serial () # type: ignore
351
+
340
352
def get_package_metadata (self , package_name : str ) -> dict :
341
353
req = self .session .get (urljoin (self .host , f"pypi/{ package_name } /json" ))
342
354
if req .status_code == 404 :
@@ -828,15 +840,18 @@ def __init__(
828
840
) -> None :
829
841
self .pypi = PyPI ()
830
842
self .session = create_requests_session ()
843
+ self .last_serial : Optional [int ] = None
844
+ self .remote_packages : Optional [dict [str , int ]] = None
831
845
super ().__init__ (basedir , local_db , sync_packages )
832
846
833
847
def fetch_remote_versions (self ) -> dict [str , int ]:
834
- ret = self .pypi .list_packages_with_serial ()
835
- logger .info ("Remote has %s packages" , len (ret ))
848
+ self .last_serial = self .pypi .changelog_last_serial ()
849
+ self .remote_packages = self .pypi .list_packages_with_serial ()
850
+ logger .info ("Remote has %s packages" , len (self .remote_packages ))
836
851
with overwrite (self .basedir / "remote.json" ) as f :
837
- json .dump (ret , f )
852
+ json .dump (self . remote_packages , f )
838
853
logger .info ("File saved to remote.json." )
839
- return ret
854
+ return self . remote_packages
840
855
841
856
def do_update (
842
857
self ,
@@ -852,9 +867,31 @@ def do_update(
852
867
meta_original = deepcopy (meta )
853
868
logger .debug ("%s meta: %s" , package_name , meta )
854
869
except PackageNotFoundError :
870
+ if (
871
+ self .remote_packages is not None
872
+ and package_name in self .remote_packages
873
+ ):
874
+ recorded_serial = self .remote_packages [package_name ]
875
+ else :
876
+ recorded_serial = None
877
+ if (
878
+ recorded_serial is not None
879
+ and self .last_serial is not None
880
+ and abs (recorded_serial - self .last_serial ) < IGNORE_THRESHOLD
881
+ ):
882
+ logger .warning (
883
+ "%s missing from upstream (its serial %s, remote last serial %s), try next time..." ,
884
+ package_name ,
885
+ recorded_serial ,
886
+ self .last_serial ,
887
+ )
888
+ return None
889
+
855
890
logger .warning (
856
- "%s missing from upstream, remove and ignore in the future." ,
891
+ "%s missing from upstream (its serial %s, remote last serial %s) , remove and ignore in the future." ,
857
892
package_name ,
893
+ recorded_serial ,
894
+ self .last_serial ,
858
895
)
859
896
# try remove it locally, if it does not exist upstream
860
897
self .do_remove (package_name , use_db = False )
@@ -1446,5 +1483,13 @@ def list_packages_with_serial(ctx: click.Context) -> None:
1446
1483
syncer .fetch_remote_versions ()
1447
1484
1448
1485
1486
+ @cli .command (help = "Clear invalid package status in local database" )
1487
+ @click .pass_context
1488
+ def clear_invalid_packages (ctx : click .Context ) -> None :
1489
+ local_db : LocalVersionKV = ctx .obj ["local_db" ]
1490
+ total = local_db .remove_invalid ()
1491
+ logger .info ("Removed %s invalid status in local database" , total )
1492
+
1493
+
1449
1494
if __name__ == "__main__" :
1450
1495
cli (obj = {})
0 commit comments