Skip to content

Commit c274b98

Browse files
committed
Finished Script
I guess when I updated all my python pakages my selenium got to cheat the cloudflare server. The delay of 10 seconds is a little steep. Ill try to find a different way, I tried the following This didnt work as there about 5 seconds during which CloudFlare transfers us to the actual site. so "driver.find_element_by_id('head')" wont work from selenium.webdriver.support.ui import WebDriverWait from selenium.webdriver.support import expected_conditions as EC from selenium.common.exceptions import TimeoutException delay = 10 # seconds try: WebDriverWait(driver, delay).until(EC.presence_of_element_located(driver.find_element_by_id('head'))) print "Page is ready!" except TimeoutException: print "Loading took too much time!" the point is we need to wait for some elements of the page to load. This should be possible, we dont even need to wait for all the ajax requests to complete. the adverts and all take so much time. A work around I found is to stop the loading in the firefox browser after it seems to have loaded. I encountered cases where the page seemed to be loading for ever!! Other than that, the blogspot links shouldnt be taken, the option of choosing anime and quailty of download should be added. Signed-off-by: Aditya A Prasad <[email protected]>
1 parent a9cff8e commit c274b98

File tree

1 file changed

+82
-24
lines changed

1 file changed

+82
-24
lines changed

KissAnimeDownloader.py

+82-24
Original file line numberDiff line numberDiff line change
@@ -1,41 +1,99 @@
11
# KISSANIME - http://kissanime.com/ ANIME DOWNLOADER
2-
import urllib, urllib2, httplib
3-
httplib.HTTPConnection.debuglevel = 1
2+
43
from bs4 import BeautifulSoup
54
from selenium import webdriver
5+
from selenium.webdriver.common.keys import Keys
6+
import time
67

78
BASE_URL = "http://kissanime.com/Anime/"
8-
# EDIT THIS AND ADD YOUR REQUIRED ANIME NAME
9+
DELAY = 10 # change it depending on your internet connectivity
10+
episodeURLs = []
11+
downloadURLs = []
12+
13+
#------------------------------- EDIT THIS AND ADD YOUR REQUIRED ANIME NAME
914
AnimeName = "Nodame-Cantabile"
15+
#-------------------------------
16+
1017
URL = BASE_URL + AnimeName
1118

19+
print "Opening firefox Browser"
20+
driver = webdriver.Firefox()
1221

13-
episodeURLs = []
14-
downloadURLs = []
22+
print "Navigating to Login Page"
23+
driver.get("http://kissanime.com/Login")
1524

16-
def getDownloadURLs(url):
17-
print url
18-
driver = webdriver.Firefox()
19-
driver.get(url)
20-
# because they block scrapers, we use "magic browser"! lol
21-
req = urllib2.Request(URL, headers={'User-Agent' : "Magic Browser"})
25+
print "DELAY start"
26+
time.sleep(DELAY)
27+
print "DELAY end"
28+
29+
print "Logging in"
30+
user = driver.find_element_by_name("username")
31+
passwd = driver.find_element_by_name("password")
32+
user.send_keys("<ur username>")
33+
passwd.send_keys("<ur password>")
34+
passwd.send_keys(Keys.RETURN)
2235

23-
con = urllib2.urlopen(req)
36+
print "DELAY start"
37+
time.sleep(DELAY)
38+
print "DELAY end"
2439

25-
soup = BeautifulSoup(con)
40+
print "Navigating to anime episode page"
41+
driver.get(URL)
2642

27-
# gets all the tables
28-
tables = soup.findAll('td')
43+
print "DELAY start"
44+
time.sleep(DELAY)
45+
print "DELAY end"
2946

30-
# we go through the tables
31-
for table in tables:
47+
html = driver.page_source
48+
soup = BeautifulSoup(html)
49+
epListTable = soup.find("table", {"class" : "listing"})
50+
51+
for row in epListTable.findAll('tr'):
52+
# each row is <td> tag enclosed
3253
try:
33-
# whenever we get a 'a' tag we extract the 'href' attribute
34-
episodeURLs.append(table.findAll('a')[0].get('href'))
35-
# In every alternate line no 'a' exists trying to access the
36-
# first element ([0]) of an empty list results in this
54+
episodeURLs.append("http://kissanime.com"+row.findAll('a')[0].get('href'))
3755
except IndexError:
38-
pass
56+
print "\n Obtaining episode URL's ....\n"
57+
58+
print "These are the episode URL's"
59+
print episodeURLs
60+
61+
for url in episodeURLs:
62+
print "\n Navigating to get Video for the URL => "+url
63+
driver.get(url)
64+
65+
print "DELAY start"
66+
time.sleep(DELAY)
67+
print "DELAY end"
68+
69+
temp = []
70+
71+
html = driver.page_source
72+
soup = BeautifulSoup(html)
73+
for div in soup.findAll('div', {"id" : "divDownload"}):
74+
links = div.findAll('a')
75+
for link in links:
76+
dummy = (url[url.find('?')-2:url.find('?')], link.text.strip(), link.attrs['href'])
77+
temp.append(dummy)
78+
print "\n\n Temp for"+link.text.strip()
79+
print temp
80+
81+
downloadURLs.append(temp)
82+
83+
for link in downloadURLs:
84+
print link
85+
print "\n"
86+
87+
88+
print "Copy paste the above links to a text file, use import from tezt file option in IDM to download all"
89+
90+
91+
92+
93+
94+
95+
96+
97+
3998

40-
for episodeURL in episodeURLs:
41-
downloadURLs = getDownloadURLs("http:/kissanime.com/Anime" + episodeURL)
99+

0 commit comments

Comments
 (0)