Uploaded Files from local PC to GitHub

yasharya2901 · Nov 26, 2023 · cf8d75b · cf8d75b
1 parent bd4022a
commit cf8d75b
Show file tree

Hide file tree

Showing 4 changed files with 197 additions and 0 deletions.
diff --git a/amazon_scrapper.py b/amazon_scrapper.py
@@ -0,0 +1,73 @@
+from bs4 import BeautifulSoup
+import requests
+import sys
+
+header = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0', 
+                    'Accept-Language':'en-US,en;q=0.5',
+                    'Sec-Fetch-Dest':'document',
+                    'Sec-Fetch-Mode':'navigate',
+                    'Sec-Fetch-Site':'same-origin',
+                    'Sec-Fetch-User':'?1',
+                    'Upgrade-Insecure-Requests':'1'
+                    }
+
+
+class amazon:
+    def __init__(self, url):
+        self.url = url
+        request_status_code = requests.get(url, headers=header).status_code
+        if request_status_code != 200:
+            sys.exit(f"Unable to get the page. Error code: {request_status_code}")
+
+        html_text = requests.get(url, headers=header).text
+
+        soup = BeautifulSoup(html_text, 'lxml')
+
+
+        product_html_element = soup.find('span', id='productTitle')
+        if self.__check_if_product_exists(product_html_element):
+            self.name = product_html_element.text.strip()
+
+        else:
+            sys.exit("Unable to get the product. Please check the URL and try again.")
+
+        self.price = soup.find('span', class_='a-price-whole').text
+
+
+    def __check_if_product_exists(self, soup):
+        if soup is None:
+            return False
+        else:
+            return True
+
+
+    def print_product_info(self):
+        print("Amazon")
+        print(f"Product Name: {self.name}")
+        print(f"Product Price: Rs. {self.price}")
+        print("-----------------------------------------------------------------------------------------")
+
+    @staticmethod
+    def search_item(prod_name):
+        prod_name = prod_name.replace(" ", "+")
+        url = "https://www.amazon.in/s?k=" + prod_name
+
+        request_status_code = requests.get(url, headers=header).status_code
+        if request_status_code != 200:
+            sys.exit(f"Unable to get the page. Error code: {request_status_code}")
+
+        html_text = requests.get(url, headers=header).text
+
+        soup = BeautifulSoup(html_text, 'lxml')
+
+        href_attr = soup.find('a', class_="a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal")
+        link = ""
+        if (not href_attr):
+            print('''We were unable to find the product on Amazon. Please paste the link of the product if you have any. Else type "exit"''')
+            link = input("> ")
+            return link
+        if (link == "exit"):
+            return link
+
+        link = "https://www.amazon.in" + href_attr['href']
+        return link
diff --git a/flipkart_scrapper.py b/flipkart_scrapper.py
@@ -0,0 +1,71 @@
+from bs4 import BeautifulSoup
+import requests
+import sys
+
+header = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0', 
+                        'Accept-Language':'en-US,en;q=0.5',
+                        'Sec-Fetch-Dest':'document',
+                        'Sec-Fetch-Mode':'navigate',
+                        'Sec-Fetch-Site':'same-origin',
+                        'Sec-Fetch-User':'?1',
+                        'Upgrade-Insecure-Requests':'1'
+                        }
+
+class flipkart:
+    def __init__(self, url):
+        self.url = url
+        request_status_code = requests.get(url, headers=header).status_code
+        if request_status_code != 200:
+            sys.exit(f"Unable to get the page. Error code: {request_status_code}")
+
+        html_text = requests.get(url, headers=header).text
+
+        soup = BeautifulSoup(html_text, 'lxml')
+
+        product_html_element = soup.find('span', class_='B_NuCI')
+        if self.__check_if_product_exists(product_html_element):
+            self.name = product_html_element.text.strip()
+
+        else:
+            sys.exit("Unable to get the product. Please check the URL and try again.")
+
+
+        self.price = soup.find('div', class_=['_30jeq3', '_16Jk6d']).text[1:]
+
+    def __check_if_product_exists(self, soup):
+        if soup is None:
+            return False
+        else:
+            return True
+
+    def print_product_info(self):
+        print("Flipkart")
+        print(f"Product Name: {self.name}")
+        print(f"Product Price: Rs. {self.price}")
+        print("-----------------------------------------------------------------------------------------")
+
+    @staticmethod
+    def search_item(prod_name):
+        prod_name = prod_name.replace(" ", "+")
+        url = "https://www.flipkart.com/search?q=" + prod_name
+
+        request_status_code = requests.get(url, headers=header).status_code
+        if request_status_code != 200:
+            sys.exit(f"Unable to get the page. Error code: {request_status_code}")
+
+        html_text = requests.get(url, headers=header).text
+
+        soup = BeautifulSoup(html_text, 'lxml')
+
+        href_attr = soup.find('a', class_="_1fQZEK")
+        link = ""
+        if (not href_attr):
+            print('''We were unable to find the product on Flipkart. Please paste the link of the product if you have any. Else type "exit"''')
+            link = input("> ")
+            return link
+        if (link == "exit"):
+            return link
+
+        link = "https://www.flipkart.com" + href_attr['href']
+        return link
+
diff --git a/main.py b/main.py
@@ -0,0 +1,50 @@
+from amazon_scrapper import *
+from flipkart_scrapper import *
+import sys
+
+def main():
+    url = input("Enter the URL: ")
+    if "amazon" in url:
+        product_amazon = amazon(url)
+        product_amazon.print_product_info()
+
+        print("Would you like to search for the product automatically on Flipkart?")
+        print("Press 'y' to continue. Press any other key to enter the link manually.")
+        response = input("> ")
+
+        if response == "y" or response == "Y":
+            flipkart_link = flipkart.search_item(product_amazon.name)
+            if (flipkart_link == "exit"):
+                sys.exit("Exiting...")
+        else:
+            flipkart_link = input("Enter the Flipkart URL: ")
+
+        product_flipkart = flipkart(flipkart_link)
+        product_flipkart.print_product_info()
+
+
+    elif "flipkart" in url:
+        product_flipkart = flipkart(url)
+        product_flipkart.print_product_info()
+
+        print("Would you like to search for the product automatically on Amazon? Press 'y' to continue. Press any other key to enter the link manually.")
+        response = input("> ")
+
+        if response == "y" or response == "Y":
+            amazon_link = amazon.search_item(product_flipkart.name)
+            if (amazon_link == "exit"):
+                sys.exit("Exiting...")
+        else:
+            amazon_link = input("Enter the Amazon URL: ")
+
+        product_amazon = amazon(amazon_link)
+        product_amazon.print_product_info()
+
+    else:
+        print("Website Not Supported")
+
+    return 0
+
+if __name__ == '__main__':
+    main()
+
diff --git a/requirements.txt b/requirements.txt
@@ -0,0 +1,3 @@
+lxml
+bs4
+requests