diff --git a/instalooter/cli/__init__.py b/instalooter/cli/__init__.py index d7c6c92..25a9b6d 100644 --- a/instalooter/cli/__init__.py +++ b/instalooter/cli/__init__.py @@ -168,6 +168,7 @@ def main(argv=None, stream=None): destination=dest_fs, media_count=args['--num-to-dl'], timeframe=args['--time'], + cursor=args['--cursor'], new_only=args['--new'], pgpbar_cls=None if args['--quiet'] else TqdmProgressBar, dlpbar_cls=None if args['--quiet'] else TqdmProgressBar) diff --git a/instalooter/cli/constants.py b/instalooter/cli/constants.py index f627682..b9c594c 100644 --- a/instalooter/cli/constants.py +++ b/instalooter/cli/constants.py @@ -54,6 +54,7 @@ in the destination directory (faster). -t TIME, --time TIME The time limit within which to download pictures and video (see *Time*). + -c CURSOR, --cursor CURSOR Use saved cursor to resume looting Options - Metadata: -m, --add-metadata Add date and caption metadata to downloaded diff --git a/instalooter/looters.py b/instalooter/looters.py index 70d1a75..f9919df 100644 --- a/instalooter/looters.py +++ b/instalooter/looters.py @@ -320,7 +320,7 @@ def _medias(self, return TimedMediasIterator(pages_iterator, timeframe) return MediasIterator(pages_iterator) - def medias(self, timeframe=None): + def medias(self, timeframe=None, cursor=None): # type: (Optional[_Timeframe]) -> Iterator[Dict[Text, Any]] """Obtain an iterator over the Instagram medias. @@ -331,7 +331,7 @@ def medias(self, timeframe=None): MediasIterator: an iterator over the medias in every pages. """ - return self._medias(self.pages(), timeframe) + return self._medias(self.pages(cursor=cursor), timeframe) def get_post_info(self, code): # type: (str) -> dict @@ -409,6 +409,7 @@ def download(self, condition=None, # type: Optional[Callable[[dict], bool]] media_count=None, # type: Optional[int] timeframe=None, # type: Optional[_Timeframe] + cursor=None, # type: Optional[str] new_only=False, # type: bool pgpbar_cls=None, # type: Optional[Type[ProgressBar]] dlpbar_cls=None, # type: Optional[Type[ProgressBar]] @@ -432,6 +433,7 @@ def download(self, timeframe (tuple or None): a tuple of two `~datetime.datetime` objects to enforce a time frame (the first item must be more recent). Leave to `None` to ignore times. + cursor (str or none): a cursor used to resume looting new_only (bool): stop media discovery when already downloaded medias are encountered. pgpbar_cls (type or None): an optional `~.pbar.ProgressBar` @@ -450,7 +452,7 @@ def download(self, destination, close_destination = self._init_destfs(destination) # Create an iterator over the pages with an optional progress bar - pages_iterator = self.pages() # type: Iterable[Dict[Text, Any]] + pages_iterator = self.pages(cursor=cursor) # type: Iterable[Dict[Text, Any]] pages_iterator = pgpbar = self._init_pbar(pages_iterator, pgpbar_cls) # Create an iterator over the medias @@ -708,7 +710,7 @@ def __init__(self, username, **kwargs): self._username = username self._owner_id = None - def pages(self): + def pages(self, cursor=None): # type: () -> ProfileIterator """Obtain an iterator over Instagram post pages. @@ -723,10 +725,10 @@ def pages(self): """ if self._owner_id is None: - it = ProfileIterator.from_username(self._username, self.session) + it = ProfileIterator.from_username(self._username, self.session, cursor=cursor) self._owner_id = it.owner_id return it - return ProfileIterator(self._owner_id, self.session, self.rhx) + return ProfileIterator(self._owner_id, self.session, self.rhx, cursor=cursor) class HashtagLooter(InstaLooter): diff --git a/instalooter/pages.py b/instalooter/pages.py index ee20be0..f4d3176 100644 --- a/instalooter/pages.py +++ b/instalooter/pages.py @@ -41,10 +41,11 @@ class PageIterator(typing.Iterator[typing.Dict[typing.Text, typing.Any]]): _section_media = NotImplemented # type: Text _URL = NotImplemented # type: Text - def __init__(self, session, rhx): + def __init__(self, session, rhx, cursor=None): # type: (Session, Text) -> None self._finished = False - self._cursor = None # type: Optional[Text] + self._cursor = cursor # type: Optional[Text] + self._current_page = 0 self._data_it = iter(self._page_loader(session, rhx)) @@ -62,6 +63,7 @@ def _page_loader(self, session, rhx): try: # Prepare the query params = self._getparams(cursor) + json_params = json.dumps(params, separators=(',', ':')) magic = "{}:{}".format(rhx, json_params) session.headers['x-instagram-gis'] = hashlib.md5(magic.encode('utf-8')).hexdigest() @@ -173,7 +175,7 @@ def _user_data(cls, username, session): raise ValueError("user not found: '{}'".format(username)) @classmethod - def from_username(cls, username, session): + def from_username(cls, username, session, cursor=None): user_data = cls._user_data(username, session) if 'ProfilePage' not in user_data['entry_data']: raise ValueError("user not found: '{}'".format(username)) @@ -182,10 +184,10 @@ def from_username(cls, username, session): con_id = next((c.value for c in session.cookies if c.name == "ds_user_id"), None) if con_id != data['id']: raise RuntimeError("user '{}' is private".format(username)) - return cls(data['id'], session, user_data.get('rhx_gis', '')) + return cls(data['id'], session, user_data.get('rhx_gis', ''), cursor=cursor) - def __init__(self, owner_id, session, rhx): - super(ProfileIterator, self).__init__(session, rhx) + def __init__(self, owner_id, session, rhx, cursor=None): + super(ProfileIterator, self).__init__(session, rhx, cursor=cursor) self.owner_id = owner_id def _getparams(self, cursor):