From 657679140dad0369af1f97d43c5dbe811ad7e7bf Mon Sep 17 00:00:00 2001 From: Machine Learning by Manish Date: Sat, 2 Dec 2023 17:08:23 +0530 Subject: [PATCH 1/3] minor changes in __init__ of flipkart class --- flipkart.py | 13 +++++++------ 1 file changed, 7 insertions(+), 6 deletions(-) diff --git a/flipkart.py b/flipkart.py index 2593d54..2d9a9c5 100644 --- a/flipkart.py +++ b/flipkart.py @@ -14,12 +14,12 @@ class flipkart: def __init__(self, url): self.url = url - request_status_code = requests.get(url, headers=header).status_code - if request_status_code != 200: - sys.exit(f"Unable to get the page. Error code: {request_status_code}") - - html_text = requests.get(url, headers=header).text + response = requests.get(url, headers=header) + if response.status_code != 200: + sys.exit(f"Unable to get the page. Error code: {response.status_code}") + + html_text = response.text soup = BeautifulSoup(html_text, 'lxml') product_html_element = soup.find('span', class_='B_NuCI') @@ -50,8 +50,9 @@ def search_item(prod_name): url = "https://www.flipkart.com/search?q=" + prod_name request_status_code = requests.get(url, headers=header).status_code + if request_status_code != 200: - sys.exit(f"Unable to get the page. Error code: {request_status_code}") + sys.exit(f"- Unable to get the page. Error code: {request_status_code}") html_text = requests.get(url, headers=header).text From 630c82bfd65b3d15408ddb03955025c8fa287aa5 Mon Sep 17 00:00:00 2001 From: Machine Learning by Manish Date: Sat, 2 Dec 2023 18:34:40 +0530 Subject: [PATCH 2/3] Include price information for Amazon(which is missing in previous code) and optimize code for efficiency(made some chnages in basic code) --- amazon.py | 30 ++++++++++++++++-------------- flipkart.py | 8 ++++---- 2 files changed, 20 insertions(+), 18 deletions(-) diff --git a/amazon.py b/amazon.py index dfe317e..cde4fdd 100644 --- a/amazon.py +++ b/amazon.py @@ -15,25 +15,23 @@ class amazon: def __init__(self, url): self.url = url - request_status_code = requests.get(url, headers=header).status_code - if request_status_code != 200: - sys.exit(f"Unable to get the page. Error code: {request_status_code}") + response = requests.get(url, headers=header) + if response.status_code != 200: + sys.exit(f"Unable to get the page. Error code: {response.status_code}") - html_text = requests.get(url, headers=header).text + html_text = response.content soup = BeautifulSoup(html_text, 'lxml') - - + product_html_element = soup.find('span', id='productTitle') + if self.__check_if_product_exists(product_html_element): self.name = product_html_element.text.strip() - + self.price = soup.find('span', class_="a-size-base a-color-price a-color-price").text + else: sys.exit("Unable to get the product. Please check the URL and try again.") - self.price = soup.find('span', class_='a-price-whole').text - - def __check_if_product_exists(self, soup): if soup is None: return False @@ -52,11 +50,15 @@ def search_item(prod_name): prod_name = prod_name.replace(" ", "+") url = "https://www.amazon.in/s?k=" + prod_name - request_status_code = requests.get(url, headers=header).status_code - if request_status_code != 200: - sys.exit(f"Unable to get the page. Error code: {request_status_code}") + response = requests.get(url, headers=header) + + for one in range(50): + if response.status_code != 200: + sys.exit(f"Unable to get the page. Error code: {response.status_code}") + else: + break - html_text = requests.get(url, headers=header).text + html_text = response.text soup = BeautifulSoup(html_text, 'lxml') diff --git a/flipkart.py b/flipkart.py index 2d9a9c5..3c9b47b 100644 --- a/flipkart.py +++ b/flipkart.py @@ -49,12 +49,12 @@ def search_item(prod_name): prod_name = prod_name.replace(" ", "+") url = "https://www.flipkart.com/search?q=" + prod_name - request_status_code = requests.get(url, headers=header).status_code + response = requests.get(url, headers=header) - if request_status_code != 200: - sys.exit(f"- Unable to get the page. Error code: {request_status_code}") + if response.status_code != 200: + sys.exit(f"- Unable to get the page. Error code: {response.status_code}") - html_text = requests.get(url, headers=header).text + html_text = response.text soup = BeautifulSoup(html_text, 'lxml') From 28898e0486975b7dc71fa426849e6563a2dafb6e Mon Sep 17 00:00:00 2001 From: Machine Learning by Manish Date: Sat, 2 Dec 2023 22:14:00 +0530 Subject: [PATCH 3/3] Useless loop removed, updated code --- amazon.py | 7 ++----- 1 file changed, 2 insertions(+), 5 deletions(-) diff --git a/amazon.py b/amazon.py index cde4fdd..a6a58ab 100644 --- a/amazon.py +++ b/amazon.py @@ -52,11 +52,8 @@ def search_item(prod_name): response = requests.get(url, headers=header) - for one in range(50): - if response.status_code != 200: - sys.exit(f"Unable to get the page. Error code: {response.status_code}") - else: - break + if response.status_code != 200: + sys.exit(f"Unable to get the page. Error code: {response.status_code}") html_text = response.text