forked from prankshaw/Domain-Explorer
-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathopengenus_task.py
44 lines (20 loc) · 1.39 KB
/
opengenus_task.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
import urllib.request #Importing necessary libraries
from urllib.parse import urlparse
from bs4 import BeautifulSoup
url= input( "Enter the Url to be Explored : ") #Input the URL
page = urllib.request.urlopen(url)
print("Total Size of web page is: " , len(page.read()), "bytes") #Outputting size of web page in bytes
links=[]
domains=[]
page = urllib.request.urlopen(url)
soup = BeautifulSoup(page ,'html.parser')
for link in soup.find_all('a'):
links.append(link.get('href')) #Adding all links to list named "Links"
parsed_uri = urlparse(link.get('href')) #Parsing the URL for domain name
domains.append('{uri.scheme}://{uri.netloc}/'.format(uri=parsed_uri)) #Appending parse url to list named "Domains"
print("The links present are:")
print(links) #Printing all links
print ("Total number of links in web page are : ", len(links) , "Links") #Printing the total number of links
count_dict = {i:domains.count(i) for i in domains}
for c in count_dict:
print("count of domain name ", c ,"=", domains.count(c)) #Printing Count of links pointing to same domain