diff --git a/instalooter/_utils.py b/instalooter/_utils.py index 8a195df..b598058 100644 --- a/instalooter/_utils.py +++ b/instalooter/_utils.py @@ -43,7 +43,7 @@ def _get_info(cls, media): if timestamp is not None: dt = datetime.datetime.fromtimestamp(timestamp) info['datetime'] = ("{0.year}-{0.month:02d}-{0.day:02d} {0.hour:02d}" - "h{0.minute:02d}m{0.second:02d}s{0.microsecond}").format(dt) + "h{0.minute:02d}m{0.second:02d}s{0.microsecond}").format(dt) info['date'] = datetime.date.fromtimestamp(timestamp) return dict(six.moves.filter( @@ -91,4 +91,9 @@ def __set__(self, obj, value): def get_shared_data(html): match = re.search(r'window._sharedData = ({[^\n]*});', html) - return json.loads(match.group(1)) + # append in 2019.7.15 in order to fix random 'NoneType' error + # return json.loads(match.group(1)) + if match: + return json.loads(match.group(1)) + else: + return None diff --git a/instalooter/looters.py b/instalooter/looters.py index c12cbe6..6390bdc 100644 --- a/instalooter/looters.py +++ b/instalooter/looters.py @@ -143,7 +143,12 @@ def _login(cls, username, password, session=None): }) with session.get(homepage) as res: - token = get_shared_data(res.text)['config']['csrf_token'] + # to fix 'NoneType' error 2019.7.15 + # token = get_shared_data(res.text)['config']['csrf_token'] + token = None + while token is None: + token = get_shared_data(res.text)['config']['csrf_token'] if not None else None + session.headers.update({'X-CSRFToken': token}) time.sleep(5 * random.random()) # nosec @@ -283,9 +288,17 @@ def __init__(self, # Get CSRFToken and RHX with self.session.get('https://www.instagram.com/') as res: - token = get_shared_data(res.text)['config']['csrf_token'] + # to fix 'NoneType' error 2019.7.15 + # token = get_shared_data(res.text)['config']['csrf_token'] + token = None + while token is None: + token = get_shared_data(res.text)['config']['csrf_token'] if not None else None self.session.headers['X-CSRFToken'] = token - self.rhx = get_shared_data(res.text).get('rhx_gis', '') + # self.rhx = get_shared_data(res.text).get('rhx_gis', '') + self.rhx = None + while self.rhx is None: + self.rhx = get_shared_data(res.text).get('rhx_gis', '') if not None else None + @abc.abstractmethod def pages(self): @@ -345,7 +358,11 @@ def get_post_info(self, code): """ url = "https://www.instagram.com/p/{}/".format(code) with self.session.get(url) as res: - data = get_shared_data(res.text) + # to fix 'NoneType' error 2019.7.15 + # data = get_shared_data(res.text) + data = None + while data is None: + data = get_shared_data(res.text) if not None else None return data['entry_data']['PostPage'][0]['graphql']['shortcode_media'] def download_pictures(self, diff --git a/instalooter/pages.py b/instalooter/pages.py index 0564171..6e07da5 100644 --- a/instalooter/pages.py +++ b/instalooter/pages.py @@ -34,7 +34,8 @@ class PageIterator(typing.Iterator[typing.Dict[typing.Text, typing.Any]]): """ PAGE_SIZE = 50 - INTERVAL = 2 + # slower rate + INTERVAL = 50 _BASE_URL = "https://www.instagram.com/graphql/query/" _section_generic = NotImplemented # type: Text @@ -73,8 +74,10 @@ def _page_loader(self, session, rhx): yield data['data'] except KeyError as e: if data.get('message') == 'rate limited': - raise RuntimeError("Query rate exceeded (wait before next run)") - time.sleep(10) + # So that RuntimeError never interupt the process + # raise RuntimeError("Query rate exceeded (wait before next run)") + print("Query rate exceeded (wait before next run)") + time.sleep(600) # Sleep before next query time.sleep(self.INTERVAL)