diff --git a/amazon_scrapper.py b/amazon_scrapper.py new file mode 100644 index 0000000..dfe317e --- /dev/null +++ b/amazon_scrapper.py @@ -0,0 +1,73 @@ +from bs4 import BeautifulSoup +import requests +import sys + +header = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0', + 'Accept-Language':'en-US,en;q=0.5', + 'Sec-Fetch-Dest':'document', + 'Sec-Fetch-Mode':'navigate', + 'Sec-Fetch-Site':'same-origin', + 'Sec-Fetch-User':'?1', + 'Upgrade-Insecure-Requests':'1' + } + + +class amazon: + def __init__(self, url): + self.url = url + request_status_code = requests.get(url, headers=header).status_code + if request_status_code != 200: + sys.exit(f"Unable to get the page. Error code: {request_status_code}") + + html_text = requests.get(url, headers=header).text + + soup = BeautifulSoup(html_text, 'lxml') + + + product_html_element = soup.find('span', id='productTitle') + if self.__check_if_product_exists(product_html_element): + self.name = product_html_element.text.strip() + + else: + sys.exit("Unable to get the product. Please check the URL and try again.") + + self.price = soup.find('span', class_='a-price-whole').text + + + def __check_if_product_exists(self, soup): + if soup is None: + return False + else: + return True + + + def print_product_info(self): + print("Amazon") + print(f"Product Name: {self.name}") + print(f"Product Price: Rs. {self.price}") + print("-----------------------------------------------------------------------------------------") + + @staticmethod + def search_item(prod_name): + prod_name = prod_name.replace(" ", "+") + url = "https://www.amazon.in/s?k=" + prod_name + + request_status_code = requests.get(url, headers=header).status_code + if request_status_code != 200: + sys.exit(f"Unable to get the page. Error code: {request_status_code}") + + html_text = requests.get(url, headers=header).text + + soup = BeautifulSoup(html_text, 'lxml') + + href_attr = soup.find('a', class_="a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal") + link = "" + if (not href_attr): + print('''We were unable to find the product on Amazon. Please paste the link of the product if you have any. Else type "exit"''') + link = input("> ") + return link + if (link == "exit"): + return link + + link = "https://www.amazon.in" + href_attr['href'] + return link diff --git a/flipkart_scrapper.py b/flipkart_scrapper.py new file mode 100644 index 0000000..2593d54 --- /dev/null +++ b/flipkart_scrapper.py @@ -0,0 +1,71 @@ +from bs4 import BeautifulSoup +import requests +import sys + +header = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0', + 'Accept-Language':'en-US,en;q=0.5', + 'Sec-Fetch-Dest':'document', + 'Sec-Fetch-Mode':'navigate', + 'Sec-Fetch-Site':'same-origin', + 'Sec-Fetch-User':'?1', + 'Upgrade-Insecure-Requests':'1' + } + +class flipkart: + def __init__(self, url): + self.url = url + request_status_code = requests.get(url, headers=header).status_code + if request_status_code != 200: + sys.exit(f"Unable to get the page. Error code: {request_status_code}") + + html_text = requests.get(url, headers=header).text + + soup = BeautifulSoup(html_text, 'lxml') + + product_html_element = soup.find('span', class_='B_NuCI') + if self.__check_if_product_exists(product_html_element): + self.name = product_html_element.text.strip() + + else: + sys.exit("Unable to get the product. Please check the URL and try again.") + + + self.price = soup.find('div', class_=['_30jeq3', '_16Jk6d']).text[1:] + + def __check_if_product_exists(self, soup): + if soup is None: + return False + else: + return True + + def print_product_info(self): + print("Flipkart") + print(f"Product Name: {self.name}") + print(f"Product Price: Rs. {self.price}") + print("-----------------------------------------------------------------------------------------") + + @staticmethod + def search_item(prod_name): + prod_name = prod_name.replace(" ", "+") + url = "https://www.flipkart.com/search?q=" + prod_name + + request_status_code = requests.get(url, headers=header).status_code + if request_status_code != 200: + sys.exit(f"Unable to get the page. Error code: {request_status_code}") + + html_text = requests.get(url, headers=header).text + + soup = BeautifulSoup(html_text, 'lxml') + + href_attr = soup.find('a', class_="_1fQZEK") + link = "" + if (not href_attr): + print('''We were unable to find the product on Flipkart. Please paste the link of the product if you have any. Else type "exit"''') + link = input("> ") + return link + if (link == "exit"): + return link + + link = "https://www.flipkart.com" + href_attr['href'] + return link + diff --git a/main.py b/main.py new file mode 100644 index 0000000..d296b27 --- /dev/null +++ b/main.py @@ -0,0 +1,50 @@ +from amazon_scrapper import * +from flipkart_scrapper import * +import sys + +def main(): + url = input("Enter the URL: ") + if "amazon" in url: + product_amazon = amazon(url) + product_amazon.print_product_info() + + print("Would you like to search for the product automatically on Flipkart?") + print("Press 'y' to continue. Press any other key to enter the link manually.") + response = input("> ") + + if response == "y" or response == "Y": + flipkart_link = flipkart.search_item(product_amazon.name) + if (flipkart_link == "exit"): + sys.exit("Exiting...") + else: + flipkart_link = input("Enter the Flipkart URL: ") + + product_flipkart = flipkart(flipkart_link) + product_flipkart.print_product_info() + + + elif "flipkart" in url: + product_flipkart = flipkart(url) + product_flipkart.print_product_info() + + print("Would you like to search for the product automatically on Amazon? Press 'y' to continue. Press any other key to enter the link manually.") + response = input("> ") + + if response == "y" or response == "Y": + amazon_link = amazon.search_item(product_flipkart.name) + if (amazon_link == "exit"): + sys.exit("Exiting...") + else: + amazon_link = input("Enter the Amazon URL: ") + + product_amazon = amazon(amazon_link) + product_amazon.print_product_info() + + else: + print("Website Not Supported") + + return 0 + +if __name__ == '__main__': + main() + diff --git a/requirements.txt b/requirements.txt new file mode 100644 index 0000000..7175645 --- /dev/null +++ b/requirements.txt @@ -0,0 +1,3 @@ +lxml +bs4 +requests \ No newline at end of file