-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathscrappy.py
39 lines (38 loc) · 1.48 KB
/
scrappy.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
import requests
from bs4 import BeautifulSoup
from termcolor import colored
from requests.exceptions import ConnectionError
import time
import replit
replit.clear()
print(colored("Welcome to the", "magenta"), colored("Subordinate Intelligence Encrypted", "cyan"), colored("web crawler.", "magenta"))
time.sleep(2)
print()
time.sleep(4)
print()
website = input(colored("Please enter your website (do NOT include http:// or https://) >", "yellow"))
print()
if "http://" not in website:
website = ''.join(("http://", website))
print(colored("We're scraping %s. If this takes longer than 15 seconds, the website failed to connect." % (website), "magenta"))
time.sleep(2)
try:
page = requests.get(website)
except ConnectionError:
print()
print(colored("The website you entered does not exist.", "red"))
else:
soup = BeautifulSoup(page.content, "html.parser")
title = soup.find('title')
print()
print(colored("The title of %s is %s." % (website, title.text), "cyan"))
time.sleep(4)
print()
setContinue = input(colored("Would you like to scrape the rest of this page?", "yellow")).lower()
if setContinue == "yes" or setContinue == "y" or setContinue == "yea" or setContinue == "yeah" or setContinue == "sure" or setContinue == "o yea" or setContinue == "I thought that's what I was doing bro":
print()
print(colored("Creating your scraped page...", "magenta"))
time.sleep(5)
print()
replit.clear()
print(colored(soup, "cyan"))