Skip to content

Commit

Permalink
Uploaded Files from local PC to GitHub
Browse files Browse the repository at this point in the history
  • Loading branch information
yasharya2901 authored Nov 26, 2023
1 parent bd4022a commit cf8d75b
Show file tree
Hide file tree
Showing 4 changed files with 197 additions and 0 deletions.
73 changes: 73 additions & 0 deletions amazon_scrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,73 @@
from bs4 import BeautifulSoup
import requests
import sys

header = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0',
'Accept-Language':'en-US,en;q=0.5',
'Sec-Fetch-Dest':'document',
'Sec-Fetch-Mode':'navigate',
'Sec-Fetch-Site':'same-origin',
'Sec-Fetch-User':'?1',
'Upgrade-Insecure-Requests':'1'
}


class amazon:
def __init__(self, url):
self.url = url
request_status_code = requests.get(url, headers=header).status_code
if request_status_code != 200:
sys.exit(f"Unable to get the page. Error code: {request_status_code}")

html_text = requests.get(url, headers=header).text

soup = BeautifulSoup(html_text, 'lxml')


product_html_element = soup.find('span', id='productTitle')
if self.__check_if_product_exists(product_html_element):
self.name = product_html_element.text.strip()

else:
sys.exit("Unable to get the product. Please check the URL and try again.")

self.price = soup.find('span', class_='a-price-whole').text


def __check_if_product_exists(self, soup):
if soup is None:
return False
else:
return True


def print_product_info(self):
print("Amazon")
print(f"Product Name: {self.name}")
print(f"Product Price: Rs. {self.price}")
print("-----------------------------------------------------------------------------------------")

@staticmethod
def search_item(prod_name):
prod_name = prod_name.replace(" ", "+")
url = "https://www.amazon.in/s?k=" + prod_name

request_status_code = requests.get(url, headers=header).status_code
if request_status_code != 200:
sys.exit(f"Unable to get the page. Error code: {request_status_code}")

html_text = requests.get(url, headers=header).text

soup = BeautifulSoup(html_text, 'lxml')

href_attr = soup.find('a', class_="a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal")
link = ""
if (not href_attr):
print('''We were unable to find the product on Amazon. Please paste the link of the product if you have any. Else type "exit"''')
link = input("> ")
return link
if (link == "exit"):
return link

link = "https://www.amazon.in" + href_attr['href']
return link
71 changes: 71 additions & 0 deletions flipkart_scrapper.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,71 @@
from bs4 import BeautifulSoup
import requests
import sys

header = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0',
'Accept-Language':'en-US,en;q=0.5',
'Sec-Fetch-Dest':'document',
'Sec-Fetch-Mode':'navigate',
'Sec-Fetch-Site':'same-origin',
'Sec-Fetch-User':'?1',
'Upgrade-Insecure-Requests':'1'
}

class flipkart:
def __init__(self, url):
self.url = url
request_status_code = requests.get(url, headers=header).status_code
if request_status_code != 200:
sys.exit(f"Unable to get the page. Error code: {request_status_code}")

html_text = requests.get(url, headers=header).text

soup = BeautifulSoup(html_text, 'lxml')

product_html_element = soup.find('span', class_='B_NuCI')
if self.__check_if_product_exists(product_html_element):
self.name = product_html_element.text.strip()

else:
sys.exit("Unable to get the product. Please check the URL and try again.")


self.price = soup.find('div', class_=['_30jeq3', '_16Jk6d']).text[1:]

def __check_if_product_exists(self, soup):
if soup is None:
return False
else:
return True

def print_product_info(self):
print("Flipkart")
print(f"Product Name: {self.name}")
print(f"Product Price: Rs. {self.price}")
print("-----------------------------------------------------------------------------------------")

@staticmethod
def search_item(prod_name):
prod_name = prod_name.replace(" ", "+")
url = "https://www.flipkart.com/search?q=" + prod_name

request_status_code = requests.get(url, headers=header).status_code
if request_status_code != 200:
sys.exit(f"Unable to get the page. Error code: {request_status_code}")

html_text = requests.get(url, headers=header).text

soup = BeautifulSoup(html_text, 'lxml')

href_attr = soup.find('a', class_="_1fQZEK")
link = ""
if (not href_attr):
print('''We were unable to find the product on Flipkart. Please paste the link of the product if you have any. Else type "exit"''')
link = input("> ")
return link
if (link == "exit"):
return link

link = "https://www.flipkart.com" + href_attr['href']
return link

50 changes: 50 additions & 0 deletions main.py
Original file line number Diff line number Diff line change
@@ -0,0 +1,50 @@
from amazon_scrapper import *
from flipkart_scrapper import *
import sys

def main():
url = input("Enter the URL: ")
if "amazon" in url:
product_amazon = amazon(url)
product_amazon.print_product_info()

print("Would you like to search for the product automatically on Flipkart?")
print("Press 'y' to continue. Press any other key to enter the link manually.")
response = input("> ")

if response == "y" or response == "Y":
flipkart_link = flipkart.search_item(product_amazon.name)
if (flipkart_link == "exit"):
sys.exit("Exiting...")
else:
flipkart_link = input("Enter the Flipkart URL: ")

product_flipkart = flipkart(flipkart_link)
product_flipkart.print_product_info()


elif "flipkart" in url:
product_flipkart = flipkart(url)
product_flipkart.print_product_info()

print("Would you like to search for the product automatically on Amazon? Press 'y' to continue. Press any other key to enter the link manually.")
response = input("> ")

if response == "y" or response == "Y":
amazon_link = amazon.search_item(product_flipkart.name)
if (amazon_link == "exit"):
sys.exit("Exiting...")
else:
amazon_link = input("Enter the Amazon URL: ")

product_amazon = amazon(amazon_link)
product_amazon.print_product_info()

else:
print("Website Not Supported")

return 0

if __name__ == '__main__':
main()

3 changes: 3 additions & 0 deletions requirements.txt
Original file line number Diff line number Diff line change
@@ -0,0 +1,3 @@
lxml
bs4
requests

0 comments on commit cf8d75b

Please sign in to comment.