-
Notifications
You must be signed in to change notification settings - Fork 3
Expand file tree
/
Copy pathflipkart.py
More file actions
127 lines (97 loc) · 3.91 KB
/
flipkart.py
File metadata and controls
127 lines (97 loc) · 3.91 KB
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
109
110
111
112
113
114
115
116
117
118
119
120
121
122
123
124
125
126
127
import csv
import requests
from bs4 import BeautifulSoup
from DataWriter import DataWriter
class Flipkart(DataWriter):
name = ''
details = []
# Taking product name by using a constructor
def __init__(self, name):
self.name = name
# Generate a url
def get_url(self):
fname = self.name
fname = fname.replace(' ', '%20')
url = 'https://www.flipkart.com/search?q={}&otracker=search&otracker1=search&marketplace=FLIPKART&as-show=on&as=off'
url = url.format(fname)
url += '&page{}'
return url
# Generate soup for a given url
def get_soup(self, url):
url = self.get_url()
headers = {
'User-Agent': 'Mozilla/5.0 (Windows NT 10.0; Win64; x64) AppleWebKit/537.36 (KHTML, like Gecko) Chrome/87.0.4280.141 Safari/537.36'
}
r = requests.get(url, headers=headers)
soup = BeautifulSoup(r.content, 'html.parser')
return soup
# Fetch all products details from first type of page
def get_details(self):
print("\n Fetching data on Flipkart.in ...")
page = self.get_url()
soup = self.get_soup(page)
items = soup.find_all('div', class_='_13oc-S')
for item in items:
# Title
try:
title = item.find('div', class_='_4rR01T').text
except AttributeError:
self.get_data(page)
return
# Price
try:
price = item.find('div', class_='_30jeq3 _1_WHN1').text
except AttributeError:
price = 'price not available'
# Rating
try:
rating = item.find('div', class_='_3LWZlK').text
rating += ' out of 5'
except AttributeError:
rating = "rating not available"
# Rating number
try:
rating_number = item.find('span', class_='_2_R_DZ').text
except AttributeError:
rating_number = 'not available'
# Buy link
buy_link = item.find('a', class_='_1fQZEK')
link = buy_link['href']
link = 'https://www.flipkart.com' + link
d = (title, price, rating, rating_number, link)
self.details.append(d)
# It fetch products details from second type of page
def get_data(self, page):
soup = self.get_soup(page)
items = soup.find_all('div', class_='_4ddWXP')
for item in items:
# Title
try:
title = item.find('a', class_='s1Q9rs').text.strip()
except AttributeError:
title = 'Title not available'
# Price
try:
price = item.find('div', class_='_30jeq3').text.strip()
except AttributeError:
price = 'price not available'
# Rating
try:
rating = item.find('div', class_='_3LWZlK').text.strip()
except AttributeError:
rating = 'rating not available'
# Reviews
try:
review = item.find('span', class_='_2_R_DZ').text.strip()
except AttributeError:
review = 'reviews not available'
# Buy link
link = soup.find('a', class_='_2rpwqI')
link = 'https://www.flipkart.com' + link['href']
d = (title, price, rating, review, link)
self.details.append(d)
# Store fetched data into a CSV file
def store_data(self):
rowtitle = ["Product Name", "Price", "Rating", "No. of reviews", "Buy link"]
w = DataWriter("Flipkart.csv", rowtitle)
w.writer(self.details)