-
Notifications
You must be signed in to change notification settings - Fork 3
Commit
This commit does not belong to any branch on this repository, and may belong to a fork outside of the repository.
Uploaded Files from local PC to GitHub
- Loading branch information
1 parent
bd4022a
commit cf8d75b
Showing
4 changed files
with
197 additions
and
0 deletions.
There are no files selected for viewing
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,73 @@ | ||
from bs4 import BeautifulSoup | ||
import requests | ||
import sys | ||
|
||
header = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0', | ||
'Accept-Language':'en-US,en;q=0.5', | ||
'Sec-Fetch-Dest':'document', | ||
'Sec-Fetch-Mode':'navigate', | ||
'Sec-Fetch-Site':'same-origin', | ||
'Sec-Fetch-User':'?1', | ||
'Upgrade-Insecure-Requests':'1' | ||
} | ||
|
||
|
||
class amazon: | ||
def __init__(self, url): | ||
self.url = url | ||
request_status_code = requests.get(url, headers=header).status_code | ||
if request_status_code != 200: | ||
sys.exit(f"Unable to get the page. Error code: {request_status_code}") | ||
|
||
html_text = requests.get(url, headers=header).text | ||
|
||
soup = BeautifulSoup(html_text, 'lxml') | ||
|
||
|
||
product_html_element = soup.find('span', id='productTitle') | ||
if self.__check_if_product_exists(product_html_element): | ||
self.name = product_html_element.text.strip() | ||
|
||
else: | ||
sys.exit("Unable to get the product. Please check the URL and try again.") | ||
|
||
self.price = soup.find('span', class_='a-price-whole').text | ||
|
||
|
||
def __check_if_product_exists(self, soup): | ||
if soup is None: | ||
return False | ||
else: | ||
return True | ||
|
||
|
||
def print_product_info(self): | ||
print("Amazon") | ||
print(f"Product Name: {self.name}") | ||
print(f"Product Price: Rs. {self.price}") | ||
print("-----------------------------------------------------------------------------------------") | ||
|
||
@staticmethod | ||
def search_item(prod_name): | ||
prod_name = prod_name.replace(" ", "+") | ||
url = "https://www.amazon.in/s?k=" + prod_name | ||
|
||
request_status_code = requests.get(url, headers=header).status_code | ||
if request_status_code != 200: | ||
sys.exit(f"Unable to get the page. Error code: {request_status_code}") | ||
|
||
html_text = requests.get(url, headers=header).text | ||
|
||
soup = BeautifulSoup(html_text, 'lxml') | ||
|
||
href_attr = soup.find('a', class_="a-link-normal s-underline-text s-underline-link-text s-link-style a-text-normal") | ||
link = "" | ||
if (not href_attr): | ||
print('''We were unable to find the product on Amazon. Please paste the link of the product if you have any. Else type "exit"''') | ||
link = input("> ") | ||
return link | ||
if (link == "exit"): | ||
return link | ||
|
||
link = "https://www.amazon.in" + href_attr['href'] | ||
return link |
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,71 @@ | ||
from bs4 import BeautifulSoup | ||
import requests | ||
import sys | ||
|
||
header = {'User-Agent':'Mozilla/5.0 (X11; Ubuntu; Linux x86_64; rv:120.0) Gecko/20100101 Firefox/120.0', | ||
'Accept-Language':'en-US,en;q=0.5', | ||
'Sec-Fetch-Dest':'document', | ||
'Sec-Fetch-Mode':'navigate', | ||
'Sec-Fetch-Site':'same-origin', | ||
'Sec-Fetch-User':'?1', | ||
'Upgrade-Insecure-Requests':'1' | ||
} | ||
|
||
class flipkart: | ||
def __init__(self, url): | ||
self.url = url | ||
request_status_code = requests.get(url, headers=header).status_code | ||
if request_status_code != 200: | ||
sys.exit(f"Unable to get the page. Error code: {request_status_code}") | ||
|
||
html_text = requests.get(url, headers=header).text | ||
|
||
soup = BeautifulSoup(html_text, 'lxml') | ||
|
||
product_html_element = soup.find('span', class_='B_NuCI') | ||
if self.__check_if_product_exists(product_html_element): | ||
self.name = product_html_element.text.strip() | ||
|
||
else: | ||
sys.exit("Unable to get the product. Please check the URL and try again.") | ||
|
||
|
||
self.price = soup.find('div', class_=['_30jeq3', '_16Jk6d']).text[1:] | ||
|
||
def __check_if_product_exists(self, soup): | ||
if soup is None: | ||
return False | ||
else: | ||
return True | ||
|
||
def print_product_info(self): | ||
print("Flipkart") | ||
print(f"Product Name: {self.name}") | ||
print(f"Product Price: Rs. {self.price}") | ||
print("-----------------------------------------------------------------------------------------") | ||
|
||
@staticmethod | ||
def search_item(prod_name): | ||
prod_name = prod_name.replace(" ", "+") | ||
url = "https://www.flipkart.com/search?q=" + prod_name | ||
|
||
request_status_code = requests.get(url, headers=header).status_code | ||
if request_status_code != 200: | ||
sys.exit(f"Unable to get the page. Error code: {request_status_code}") | ||
|
||
html_text = requests.get(url, headers=header).text | ||
|
||
soup = BeautifulSoup(html_text, 'lxml') | ||
|
||
href_attr = soup.find('a', class_="_1fQZEK") | ||
link = "" | ||
if (not href_attr): | ||
print('''We were unable to find the product on Flipkart. Please paste the link of the product if you have any. Else type "exit"''') | ||
link = input("> ") | ||
return link | ||
if (link == "exit"): | ||
return link | ||
|
||
link = "https://www.flipkart.com" + href_attr['href'] | ||
return link | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,50 @@ | ||
from amazon_scrapper import * | ||
from flipkart_scrapper import * | ||
import sys | ||
|
||
def main(): | ||
url = input("Enter the URL: ") | ||
if "amazon" in url: | ||
product_amazon = amazon(url) | ||
product_amazon.print_product_info() | ||
|
||
print("Would you like to search for the product automatically on Flipkart?") | ||
print("Press 'y' to continue. Press any other key to enter the link manually.") | ||
response = input("> ") | ||
|
||
if response == "y" or response == "Y": | ||
flipkart_link = flipkart.search_item(product_amazon.name) | ||
if (flipkart_link == "exit"): | ||
sys.exit("Exiting...") | ||
else: | ||
flipkart_link = input("Enter the Flipkart URL: ") | ||
|
||
product_flipkart = flipkart(flipkart_link) | ||
product_flipkart.print_product_info() | ||
|
||
|
||
elif "flipkart" in url: | ||
product_flipkart = flipkart(url) | ||
product_flipkart.print_product_info() | ||
|
||
print("Would you like to search for the product automatically on Amazon? Press 'y' to continue. Press any other key to enter the link manually.") | ||
response = input("> ") | ||
|
||
if response == "y" or response == "Y": | ||
amazon_link = amazon.search_item(product_flipkart.name) | ||
if (amazon_link == "exit"): | ||
sys.exit("Exiting...") | ||
else: | ||
amazon_link = input("Enter the Amazon URL: ") | ||
|
||
product_amazon = amazon(amazon_link) | ||
product_amazon.print_product_info() | ||
|
||
else: | ||
print("Website Not Supported") | ||
|
||
return 0 | ||
|
||
if __name__ == '__main__': | ||
main() | ||
|
This file contains bidirectional Unicode text that may be interpreted or compiled differently than what appears below. To review, open the file in an editor that reveals hidden Unicode characters.
Learn more about bidirectional Unicode characters
Original file line number | Diff line number | Diff line change |
---|---|---|
@@ -0,0 +1,3 @@ | ||
lxml | ||
bs4 | ||
requests |