1- from tkinter import Tk , Entry , Label , Button , HORIZONTAL
1+ import csv
2+ import threading
3+ import urllib .request
4+ from tkinter import HORIZONTAL , Button , Entry , Label , Tk
25from tkinter .ttk import Progressbar
3- from bs4 import BeautifulSoup
46
5- import urllib .request
6- import threading
7- import csv
7+ from bs4 import BeautifulSoup
88
99
1010class ScrapperLogic :
@@ -21,80 +21,81 @@ def inner_html(element):
2121
2222 @staticmethod
2323 def get_name (body ):
24- return body .find (' span' , {' class' : ' jcn' }).a .string
24+ return body .find (" span" , {" class" : " jcn" }).a .string
2525
2626 @staticmethod
2727 def which_digit (html ):
28- mapping_dict = {'icon-ji' : 9 ,
29- 'icon-dc' : '+' ,
30- 'icon-fe' : '(' ,
31- 'icon-hg' : ')' ,
32- 'icon-ba' : '-' ,
33- 'icon-lk' : 8 ,
34- 'icon-nm' : 7 ,
35- 'icon-po' : 6 ,
36- 'icon-rq' : 5 ,
37- 'icon-ts' : 4 ,
38- 'icon-vu' : 3 ,
39- 'icon-wx' : 2 ,
40- 'icon-yz' : 1 ,
41- 'icon-acb' : 0 ,
42- }
43- return mapping_dict .get (html , '' )
28+ mapping_dict = {
29+ "icon-ji" : 9 ,
30+ "icon-dc" : "+" ,
31+ "icon-fe" : "(" ,
32+ "icon-hg" : ")" ,
33+ "icon-ba" : "-" ,
34+ "icon-lk" : 8 ,
35+ "icon-nm" : 7 ,
36+ "icon-po" : 6 ,
37+ "icon-rq" : 5 ,
38+ "icon-ts" : 4 ,
39+ "icon-vu" : 3 ,
40+ "icon-wx" : 2 ,
41+ "icon-yz" : 1 ,
42+ "icon-acb" : 0 ,
43+ }
44+ return mapping_dict .get (html , "" )
4445
4546 def get_phone_number (self , body ):
4647 i = 0
4748 phone_no = "No Number!"
4849 try :
49- for item in body .find ('p' , {' class' : ' contact-info' }):
50+ for item in body .find ("p" , {" class" : " contact-info" }):
5051 i += 1
5152 if i == 2 :
52- phone_no = ''
53+ phone_no = ""
5354 try :
5455 for element in item .find_all (class_ = True ):
5556 classes = []
5657 classes .extend (element ["class" ])
5758 phone_no += str ((self .which_digit (classes [1 ])))
58- except :
59+ except Exception :
5960 pass
60- except :
61+ except Exception :
6162 pass
62- body = body [' data-href' ]
63- soup = BeautifulSoup (body , ' html.parser' )
64- for a in soup .find_all ('a' , {"id" : "whatsapptriggeer" }):
63+ body = body [" data-href" ]
64+ soup = BeautifulSoup (body , " html.parser" )
65+ for a in soup .find_all ("a" , {"id" : "whatsapptriggeer" }):
6566 # print (a)
66- phone_no = str (a [' href' ][- 10 :])
67+ phone_no = str (a [" href" ][- 10 :])
6768
6869 return phone_no
6970
7071 @staticmethod
7172 def get_rating (body ):
7273 rating = 0.0
73- text = body .find (' span' , {' class' : ' star_m' })
74+ text = body .find (" span" , {" class" : " star_m" })
7475 if text is not None :
7576 for item in text :
76- rating += float (item [' class' ][0 ][1 :]) / 10
77+ rating += float (item [" class" ][0 ][1 :]) / 10
7778
7879 return rating
7980
8081 @staticmethod
8182 def get_rating_count (body ):
82- text = body .find (' span' , {' class' : ' rt_count' }).string
83+ text = body .find (" span" , {" class" : " rt_count" }).string
8384
8485 # Get only digits
85- rating_count = '' .join (i for i in text if i .isdigit ())
86- return rating_count
87-
86+ rating_count = "" .join (i for i in text if i .isdigit ())
87+ return rating_count
88+
8889 @staticmethod
8990 def get_address (body ):
90- return body .find (' span' , {' class' : ' mrehover' }).text .strip ()
91+ return body .find (" span" , {" class" : " mrehover" }).text .strip ()
9192
9293 @staticmethod
9394 def get_location (body ):
94- text = body .find ('a' , {' class' : ' rsmap' })
95+ text = body .find ("a" , {" class" : " rsmap" })
9596 if not text :
9697 return
97- text_list = text [' onclick' ].split ("," )
98+ text_list = text [" onclick" ].split ("," )
9899
99100 latitude = text_list [3 ].strip ().replace ("'" , "" )
100101 longitude = text_list [4 ].strip ().replace ("'" , "" )
@@ -107,44 +108,48 @@ def start_scrapping_logic(self):
107108
108109 total_url = "https://www.justdial.com/{0}/{1}" .format (self .location , self .query )
109110
110- fields = ['Name' , 'Phone' , 'Rating' , 'Rating Count' , 'Address' , 'Location' ]
111- out_file = open ('{0}.csv' .format (self .file_name ), 'w' )
112- csvwriter = csv .DictWriter (out_file , delimiter = ',' , fieldnames = fields )
113- csvwriter .writerow ({
114- 'Name' : 'Name' , #Shows the name
115- 'Phone' : 'Phone' ,#shows the phone
116- 'Rating' : 'Rating' ,#shows the ratings
117- 'Rating Count' : 'Rating Count' ,#Shows the stars for ex: 4 stars
118- 'Address' : 'Address' ,#Shows the address of the place
119- 'Location' : 'Location' #shows the location
120- })
111+ fields = ["Name" , "Phone" , "Rating" , "Rating Count" , "Address" , "Location" ]
112+ out_file = open ("{0}.csv" .format (self .file_name ), "w" )
113+ csvwriter = csv .DictWriter (out_file , delimiter = "," , fieldnames = fields )
114+ csvwriter .writerow (
115+ {
116+ "Name" : "Name" , # Shows the name
117+ "Phone" : "Phone" , # shows the phone
118+ "Rating" : "Rating" , # shows the ratings
119+ "Rating Count" : "Rating Count" , # Shows the stars for ex: 4 stars
120+ "Address" : "Address" , # Shows the address of the place
121+ "Location" : "Location" , # shows the location
122+ }
123+ )
121124
122125 progress_value = 0
123126 while True :
124127 # Check if reached end of result
125128 if page_number > 50 :
126129 progress_value = 100
127- self .progressbar [' value' ] = progress_value
130+ self .progressbar [" value" ] = progress_value
128131 break
129132
130133 if progress_value != 0 :
131134 progress_value += 1
132- self .label_progress [' text' ] = "{0}{1}" .format (progress_value , '%' )
133- self .progressbar [' value' ] = progress_value
135+ self .label_progress [" text" ] = "{0}{1}" .format (progress_value , "%" )
136+ self .progressbar [" value" ] = progress_value
134137
135138 url = total_url + "/page-%s" % page_number
136139 print ("{0} {1}, {2}" .format ("Scrapping page number: " , page_number , url ))
137- req = urllib .request .Request (url , headers = {'User-Agent' : "Mozilla/5.0 (Windows NT 6.1; Win64; x64)" })
140+ req = urllib .request .Request (
141+ url , headers = {"User-Agent" : "Mozilla/5.0 (Windows NT 6.1; Win64; x64)" }
142+ )
138143 page = urllib .request .urlopen (req )
139144
140145 soup = BeautifulSoup (page .read (), "html.parser" )
141- services = soup .find_all ('li' , {' class' : ' cntanr' })
146+ services = soup .find_all ("li" , {" class" : " cntanr" })
142147
143148 # Iterate through the 10 results in the page
144149
145150 progress_value += 1
146- self .label_progress [' text' ] = "{0}{1}" .format (progress_value , '%' )
147- self .progressbar [' value' ] = progress_value
151+ self .label_progress [" text" ] = "{0}{1}" .format (progress_value , "%" )
152+ self .progressbar [" value" ] = progress_value
148153
149154 for service_html in services :
150155 try :
@@ -158,18 +163,18 @@ def start_scrapping_logic(self):
158163 address = self .get_address (service_html )
159164 location = self .get_location (service_html )
160165 if name is not None :
161- dict_service [' Name' ] = name
166+ dict_service [" Name" ] = name
162167 if phone is not None :
163- print (' getting phone number' )
164- dict_service [' Phone' ] = phone
168+ print (" getting phone number" )
169+ dict_service [" Phone" ] = phone
165170 if rating is not None :
166- dict_service [' Rating' ] = rating
171+ dict_service [" Rating" ] = rating
167172 if count is not None :
168- dict_service [' Rating Count' ] = count
173+ dict_service [" Rating Count" ] = count
169174 if address is not None :
170- dict_service [' Address' ] = address
175+ dict_service [" Address" ] = address
171176 if location is not None :
172- dict_service [' Address' ] = location
177+ dict_service [" Address" ] = location
173178
174179 # Write row to CSV
175180 csvwriter .writerow (dict_service )
@@ -207,42 +212,50 @@ def start_scrapping(self):
207212 query = self .entry_query .get ()
208213 location = self .entry_location .get ()
209214 file_name = self .entry_file_name .get ()
210- scrapper = ScrapperLogic (query , location , file_name , self .progress , self .label_progress )
215+ scrapper = ScrapperLogic (
216+ query , location , file_name , self .progress , self .label_progress
217+ )
211218 t1 = threading .Thread (target = scrapper .start_scrapping_logic , args = [])
212219 t1 .start ()
213220
214221 def start (self ):
215- self .label_query = Label (self .master , text = ' Query' )
222+ self .label_query = Label (self .master , text = " Query" )
216223 self .label_query .grid (row = 0 , column = 0 )
217224
218225 self .entry_query = Entry (self .master , width = 23 )
219226 self .entry_query .grid (row = 0 , column = 1 )
220227
221- self .label_location = Label (self .master , text = ' Location' )
228+ self .label_location = Label (self .master , text = " Location" )
222229 self .label_location .grid (row = 1 , column = 0 )
223230
224231 self .entry_location = Entry (self .master , width = 23 )
225232 self .entry_location .grid (row = 1 , column = 1 )
226233
227- self .label_file_name = Label (self .master , text = ' File Name' )
234+ self .label_file_name = Label (self .master , text = " File Name" )
228235 self .label_file_name .grid (row = 2 , column = 0 )
229236
230237 self .entry_file_name = Entry (self .master , width = 23 )
231238 self .entry_file_name .grid (row = 2 , column = 1 )
232239
233- self .label_progress = Label (self .master , text = '0%' )
240+ self .label_progress = Label (self .master , text = "0%" )
234241 self .label_progress .grid (row = 3 , column = 0 )
235242
236- self .button_start = Button (self .master , text = "Start" , command = self .start_scrapping )
243+ self .button_start = Button (
244+ self .master , text = "Start" , command = self .start_scrapping
245+ )
237246 self .button_start .grid (row = 3 , column = 1 )
238247
239- self .progress = Progressbar (self .master , orient = HORIZONTAL , length = 350 , mode = 'determinate' )
248+ self .progress = Progressbar (
249+ self .master , orient = HORIZONTAL , length = 350 , mode = "determinate"
250+ )
240251 self .progress .grid (row = 4 , columnspan = 2 )
241- #Above is the progress bar
242252
243- if __name__ == '__main__' :
253+ # Above is the progress bar
254+
255+
256+ if __name__ == "__main__" :
244257 root = Tk ()
245- root .geometry (' 350x130+600+100' )
258+ root .geometry (" 350x130+600+100" )
246259 root .title ("Just Dial Scrapper - Cool" )
247260 JDScrapperGUI (root ).start ()
248261 root .mainloop ()
0 commit comments