From 63bf48d7b41e8aa9be1999d6dd43d913088aee6c Mon Sep 17 00:00:00 2001 From: james Date: Sun, 25 Sep 2022 14:31:59 +0800 Subject: [PATCH 1/9] Add optional timeout option to all requests --- itunes_app_scraper/scraper.py | 28 ++++++++++++++-------------- 1 file changed, 14 insertions(+), 14 deletions(-) diff --git a/itunes_app_scraper/scraper.py b/itunes_app_scraper/scraper.py index aa8b5bf..f3732c7 100644 --- a/itunes_app_scraper/scraper.py +++ b/itunes_app_scraper/scraper.py @@ -25,7 +25,7 @@ class AppStoreScraper: can be found at https://github.com/facundoolano/app-store-scraper. """ - def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"): + def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl", timeout=None): """ Retrieve suggested app IDs for search query @@ -53,7 +53,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"): } try: - result = requests.get(url, headers=headers).json() + result = requests.get(url, headers=headers, timeout=timeout).json() except ConnectionError as ce: raise AppStoreException("Cannot connect to store: {0}".format(str(ce))) except json.JSONDecodeError: @@ -61,7 +61,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl"): return [app["id"] for app in result["bubbles"][0]["results"][:amount]] - def get_app_ids_for_collection(self, collection="", category="", num=50, country="nl", lang=""): + def get_app_ids_for_collection(self, collection="", category="", num=50, country="nl", lang="", timeout=None): """ Retrieve app IDs in given App Store collection @@ -86,13 +86,13 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country url = "http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/%s/%s/limit=%s/json?s=%s" % params try: - result = requests.get(url).json() + result = requests.get(url, timeout=timeout).json() except json.JSONDecodeError: raise AppStoreException("Could not parse app store response") return [entry["id"]["attributes"]["im:id"] for entry in result["feed"]["entry"]] - def get_app_ids_for_developer(self, developer_id, country="nl", lang=""): + def get_app_ids_for_developer(self, developer_id, country="nl", lang="", timeout=None): """ Retrieve App IDs linked to given developer @@ -106,7 +106,7 @@ def get_app_ids_for_developer(self, developer_id, country="nl", lang=""): url = "https://itunes.apple.com/lookup?id=%s&country=%s&entity=software" % (developer_id, country) try: - result = requests.get(url).json() + result = requests.get(url, timeout=timeout).json() except json.JSONDecodeError: raise AppStoreException("Could not parse app store response") @@ -116,7 +116,7 @@ def get_app_ids_for_developer(self, developer_id, country="nl", lang=""): # probably an invalid developer ID return [] - def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"): + def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl", timeout=None): """ Retrieve list of App IDs of apps similar to given app @@ -139,7 +139,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"): "Accept-Language": lang } - result = requests.get(url, headers=headers).text + result = requests.get(url, headers=headers, timeout=timeout).text if "customersAlsoBoughtApps" not in result: return [] @@ -154,7 +154,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl"): return ids - def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flatten=True, sleep=None, force=False): + def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flatten=True, sleep=None, force=False, timeout=None): """ Get app details for given app ID @@ -194,13 +194,13 @@ def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flat try: if sleep is not None: time.sleep(sleep) - result = requests.get(url).json() + result = requests.get(url, timeout=timeout).json() except Exception: try: # handle the retry here. # Take an extra sleep as back off and then retry the URL once. time.sleep(2) - result = requests.get(url).json() + result = requests.get(url, timeout=timeout).json() except Exception: raise AppStoreException("Could not parse app store response for ID %s" % app_id) @@ -268,7 +268,7 @@ def get_store_id_for_country(self, country): else: raise AppStoreException("Country code not found for {0}".format(country)) - def get_app_ratings(self, app_id, countries=None, sleep=1): + def get_app_ratings(self, app_id, countries=None, sleep=1, timeout=None): """ Get app ratings for given app ID @@ -299,13 +299,13 @@ def get_app_ratings(self, app_id, countries=None, sleep=1): try: if sleep is not None: time.sleep(sleep) - result = requests.get(url, headers=headers).text + result = requests.get(url, headers=headers, timeout=timeout).text except Exception: try: # handle the retry here. # Take an extra sleep as back off and then retry the URL once. time.sleep(2) - result = requests.get(url, headers=headers).text + result = requests.get(url, headers=headers, timeout=timeout).text except Exception: raise AppStoreException("Could not parse app store rating response for ID %s" % app_id) From 189b954a156e225fae1e830c1391769de8dbd154 Mon Sep 17 00:00:00 2001 From: james Date: Sun, 25 Sep 2022 14:38:13 +0800 Subject: [PATCH 2/9] Add description of timeout option --- itunes_app_scraper/scraper.py | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/itunes_app_scraper/scraper.py b/itunes_app_scraper/scraper.py index f3732c7..e71fc28 100644 --- a/itunes_app_scraper/scraper.py +++ b/itunes_app_scraper/scraper.py @@ -35,6 +35,7 @@ def get_app_ids_for_query(self, term, num=50, page=1, country="nl", lang="nl", t :param str country: Two-letter country code of store to search in, default 'nl' :param str lang: Language code to search with, default 'nl' + :param int timeout: Seconds to wait for response before stopping. :return list: List of App IDs returned for search query """ @@ -75,6 +76,7 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country :param str country: Two-letter country code for the store to search in. Defaults to 'nl'. :param str lang: Dummy argument for compatibility. Unused. + :param int timeout: Seconds to wait for response before stopping. :return: List of App IDs in collection. """ @@ -100,6 +102,7 @@ def get_app_ids_for_developer(self, developer_id, country="nl", lang="", timeout :param str country: Two-letter country code for the store to search in. Defaults to 'nl'. :param str lang: Dummy argument for compatibility. Unused. + :param int timeout: Seconds to wait for response before stopping. :return list: List of App IDs linked to developer """ @@ -128,6 +131,7 @@ def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl", timeout=N :param str country: Two-letter country code for the store to search in. Defaults to 'nl'. :param str lang: Language code to search with, default 'nl' + :param int timeout: Seconds to wait for response before stopping. :return list: List of similar app IDs """ @@ -173,6 +177,7 @@ def get_app_details(self, app_id, country="nl", lang="", add_ratings=False, flat short time. Defaults to None. :param bool force: by-passes the server side caching by adding a timestamp to the request (default is False) + :param int timeout: Seconds to wait for response before stopping. :return dict: App details, as returned by the app store. The result is not processed any further, unless `flatten` is True @@ -280,6 +285,7 @@ def get_app_ratings(self, app_id, countries=None, sleep=1, timeout=None): :param int sleep: Seconds to sleep before request to prevent being temporary blocked if there are many requests in a short time. Defaults to 1. + :param int timeout: Seconds to wait for response before stopping. :return dict: App ratings, as scraped from the app store. """ From 093a722569abf460a1c964607491ffd94229285d Mon Sep 17 00:00:00 2001 From: james Date: Fri, 14 Oct 2022 21:51:44 +0800 Subject: [PATCH 3/9] Since API already returns all apps for a developer, add function to return all results --- itunes_app_scraper/scraper.py | 28 ++++++++++++++++++++++++---- setup.py | 2 +- 2 files changed, 25 insertions(+), 5 deletions(-) diff --git a/itunes_app_scraper/scraper.py b/itunes_app_scraper/scraper.py index e71fc28..359c6ab 100644 --- a/itunes_app_scraper/scraper.py +++ b/itunes_app_scraper/scraper.py @@ -94,9 +94,9 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country return [entry["id"]["attributes"]["im:id"] for entry in result["feed"]["entry"]] - def get_app_ids_for_developer(self, developer_id, country="nl", lang="", timeout=None): + def get_apps_for_developer(self, developer_id, country="nl", lang="", timeout=None): """ - Retrieve App IDs linked to given developer + Retrieve Apps linked to given developer :param int developer_id: Developer ID :param str country: Two-letter country code for the store to search in. @@ -104,7 +104,7 @@ def get_app_ids_for_developer(self, developer_id, country="nl", lang="", timeout :param str lang: Dummy argument for compatibility. Unused. :param int timeout: Seconds to wait for response before stopping. - :return list: List of App IDs linked to developer + :return list[dict]: List of Apps linked to developer """ url = "https://itunes.apple.com/lookup?id=%s&country=%s&entity=software" % (developer_id, country) @@ -114,11 +114,31 @@ def get_app_ids_for_developer(self, developer_id, country="nl", lang="", timeout raise AppStoreException("Could not parse app store response") if "results" in result: - return [app["trackId"] for app in result["results"] if app["wrapperType"] == "software"] + return [app for app in result["results"] if app["wrapperType"] == "software"] else: # probably an invalid developer ID return [] + def get_app_ids_for_developer(self, developer_id, country="nl", lang="", timeout=None): + """ + Retrieve App IDs linked to given developer + + :param int developer_id: Developer ID + :param str country: Two-letter country code for the store to search in. + Defaults to 'nl'. + :param str lang: Dummy argument for compatibility. Unused. + :param int timeout: Seconds to wait for response before stopping. + + :return list: List of App IDs linked to developer + """ + apps = self.get_apps_for_developer(developer_id, country=country, lang=lang, timeout=timeout) + if len(apps) > 0: + app_ids =[app["trackId"] for app in apps["results"] if app["wrapperType"] == "software"] + else: + return [] + return app_ids + + def get_similar_app_ids_for_app(self, app_id, country="nl", lang="nl", timeout=None): """ Retrieve list of App IDs of apps similar to given app diff --git a/setup.py b/setup.py index 5e8d7cc..5802e4f 100644 --- a/setup.py +++ b/setup.py @@ -5,7 +5,7 @@ setuptools.setup( name="itunes-app-scraper-dmi", - version="0.9.5", + version="0.9.6", author="Digital Methods Initiative", author_email="stijn.peeters@uva.nl", description="A lightweight iTunes App Store scraper", From e9f2bad341030273ba9dcc76581cd9e8358ce5b1 Mon Sep 17 00:00:00 2001 From: james Date: Fri, 13 Oct 2023 13:38:44 +0800 Subject: [PATCH 4/9] Update to new charts url --- itunes_app_scraper/scraper.py | 8 ++++++-- 1 file changed, 6 insertions(+), 2 deletions(-) diff --git a/itunes_app_scraper/scraper.py b/itunes_app_scraper/scraper.py index 359c6ab..5920671 100644 --- a/itunes_app_scraper/scraper.py +++ b/itunes_app_scraper/scraper.py @@ -84,8 +84,12 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country collection = AppStoreCollections.TOP_FREE_IOS country = self.get_store_id_for_country(country) - params = (collection, category, num, country) - url = "http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/%s/%s/limit=%s/json?s=%s" % params + #params = (collection, category, num, country) + params = (country, category, collection, num) + #url = "http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/%s/%s/limit=%s/json?s=%s" % params + url = "https://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/charts?cc=%s&g=%s&name=%s&limit=%s" % params + + try: result = requests.get(url, timeout=timeout).json() From 7fb488c1a4bdcaf36e48c5fabf786ead802c2964 Mon Sep 17 00:00:00 2001 From: james Date: Fri, 13 Oct 2023 13:45:17 +0800 Subject: [PATCH 5/9] Rever to tabs for consistency with old library --- itunes_app_scraper/scraper.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/itunes_app_scraper/scraper.py b/itunes_app_scraper/scraper.py index 5920671..502021d 100644 --- a/itunes_app_scraper/scraper.py +++ b/itunes_app_scraper/scraper.py @@ -84,11 +84,8 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country collection = AppStoreCollections.TOP_FREE_IOS country = self.get_store_id_for_country(country) - #params = (collection, category, num, country) - params = (country, category, collection, num) - #url = "http://ax.itunes.apple.com/WebObjects/MZStoreServices.woa/ws/RSS/%s/%s/limit=%s/json?s=%s" % params - url = "https://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/charts?cc=%s&g=%s&name=%s&limit=%s" % params - + params = (country, category, collection, num) + url = "https://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/charts?cc=%s&g=%s&name=%s&limit=%s" % params try: From 730289633be7a6288c007341efe8bfc5c147b3ab Mon Sep 17 00:00:00 2001 From: james Date: Fri, 13 Oct 2023 16:06:16 +0800 Subject: [PATCH 6/9] Seems the collections need a new name to use charts API --- itunes_app_scraper/util.py | 21 ++++++++------------- 1 file changed, 8 insertions(+), 13 deletions(-) diff --git a/itunes_app_scraper/util.py b/itunes_app_scraper/util.py index c0fed9a..8c1f354 100644 --- a/itunes_app_scraper/util.py +++ b/itunes_app_scraper/util.py @@ -25,23 +25,18 @@ class AppStoreCollections: """ App store collection IDs - Borrowed from https://github.com/facundoolano/app-store-scraper. These are + Based on https://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/genres?id=6000. These are the various collections displayed in the app store, usually on the front page. """ - TOP_MAC = 'topmacapps' - TOP_FREE_MAC = 'topfreemacapps' - TOP_GROSSING_MAC = 'topgrossingmacapps' + TOP_FREE_MAC = 'freeMacAppsV2' TOP_PAID_MAC = 'toppaidmacapps' - NEW_IOS = 'newapplications' - NEW_FREE_IOS = 'newfreeapplications' - NEW_PAID_IOS = 'newpaidapplications' - TOP_FREE_IOS = 'topfreeapplications' - TOP_FREE_IPAD = 'topfreeipadapplications' - TOP_GROSSING_IOS = 'topgrossingapplications' - TOP_GROSSING_IPAD = 'topgrossingipadapplications' - TOP_PAID_IOS = 'toppaidapplications' - TOP_PAID_IPAD = 'toppaidipadapplications' + TOP_FREE_IOS = 'freeAppsV2' + TOP_FREE_IPAD = 'freeIpadApplications' + TOP_GROSSING_IOS = 'appsByRevenue' + TOP_GROSSING_IPAD = 'ipadAppsByRevenue' + TOP_PAID_IOS = 'paidApplications' + TOP_PAID_IPAD = 'paidIpadApplications' class AppStoreCategories: """ From 12e4f439530904aa9a5da8d08418f3e7dbcdea13 Mon Sep 17 00:00:00 2001 From: james Date: Fri, 13 Oct 2023 16:09:22 +0800 Subject: [PATCH 7/9] Now API only returns IDs --- itunes_app_scraper/scraper.py | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/itunes_app_scraper/scraper.py b/itunes_app_scraper/scraper.py index 502021d..42a8164 100644 --- a/itunes_app_scraper/scraper.py +++ b/itunes_app_scraper/scraper.py @@ -93,7 +93,7 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country except json.JSONDecodeError: raise AppStoreException("Could not parse app store response") - return [entry["id"]["attributes"]["im:id"] for entry in result["feed"]["entry"]] + return result["resultIds"] def get_apps_for_developer(self, developer_id, country="nl", lang="", timeout=None): """ From 98d18cc918c0a0013388f4e18ace3fb53d446eb0 Mon Sep 17 00:00:00 2001 From: james Date: Fri, 13 Oct 2023 16:14:45 +0800 Subject: [PATCH 8/9] Requires two letter country code, upper or lower --- itunes_app_scraper/scraper.py | 3 ++- 1 file changed, 2 insertions(+), 1 deletion(-) diff --git a/itunes_app_scraper/scraper.py b/itunes_app_scraper/scraper.py index 42a8164..958dbb4 100644 --- a/itunes_app_scraper/scraper.py +++ b/itunes_app_scraper/scraper.py @@ -83,12 +83,13 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country if not collection: collection = AppStoreCollections.TOP_FREE_IOS - country = self.get_store_id_for_country(country) + country = country.lower() params = (country, category, collection, num) url = "https://itunes.apple.com/WebObjects/MZStoreServices.woa/ws/charts?cc=%s&g=%s&name=%s&limit=%s" % params try: + print(url) result = requests.get(url, timeout=timeout).json() except json.JSONDecodeError: raise AppStoreException("Could not parse app store response") From 83bdab199bb04c7ecb8cc255dc340949307ddca2 Mon Sep 17 00:00:00 2001 From: james Date: Fri, 13 Oct 2023 16:15:00 +0800 Subject: [PATCH 9/9] Requires two letter country code, upper or lower --- itunes_app_scraper/scraper.py | 1 - 1 file changed, 1 deletion(-) diff --git a/itunes_app_scraper/scraper.py b/itunes_app_scraper/scraper.py index 958dbb4..111266b 100644 --- a/itunes_app_scraper/scraper.py +++ b/itunes_app_scraper/scraper.py @@ -89,7 +89,6 @@ def get_app_ids_for_collection(self, collection="", category="", num=50, country try: - print(url) result = requests.get(url, timeout=timeout).json() except json.JSONDecodeError: raise AppStoreException("Could not parse app store response")