Finished Script

anantham · anantham · commit c274b98dc840 · 2015-09-24T14:55:57.000-07:00
I guess when I updated all my python pakages
my selenium got to cheat the cloudflare server.

The delay of 10 seconds is a little steep. Ill try to find
a different way, I tried the following

This didnt work as there about 5 seconds during which CloudFlare transfers
us to the actual site. so "driver.find_element_by_id('head')" wont work

from selenium.webdriver.support.ui import WebDriverWait
from selenium.webdriver.support import expected_conditions as EC
from selenium.common.exceptions import TimeoutException
delay = 10 # seconds
try:
    WebDriverWait(driver, delay).until(EC.presence_of_element_located(driver.find_element_by_id('head')))
    print "Page is ready!"
except TimeoutException:
    print "Loading took too much time!"

the point is we need to wait for some elements of the page to load.
This should be possible, we dont even need to wait for all the ajax requests to
complete. the adverts and all take so much time.

A work around I found is to stop the loading in the firefox browser after it seems to have loaded.
I encountered cases where the page seemed to be loading for ever!!

Other than that, the blogspot links shouldnt be taken, the option of choosing anime
and quailty of download should be added.

Signed-off-by: Aditya A Prasad &lt;adityaprasadiskool@gmail.com&gt;
diff --git a/KissAnimeDownloader.py b/KissAnimeDownloader.py
@@ -1,41 +1,99 @@
 # KISSANIME - http://kissanime.com/ ANIME DOWNLOADER
-import urllib, urllib2, httplib
-httplib.HTTPConnection.debuglevel = 1
+
 from bs4 import BeautifulSoup
 from selenium import webdriver
+from selenium.webdriver.common.keys import Keys
+import time
 
 BASE_URL = "http://kissanime.com/Anime/"
-# EDIT THIS AND ADD YOUR REQUIRED ANIME NAME
+DELAY = 10 # change it depending on your internet connectivity
+episodeURLs = []
+downloadURLs = []
+
+#------------------------------- EDIT THIS AND ADD YOUR REQUIRED ANIME NAME
 AnimeName = "Nodame-Cantabile"
+#-------------------------------
+
 URL = BASE_URL + AnimeName
 
+print "Opening firefox Browser"
+driver = webdriver.Firefox()
 
-episodeURLs = []
-downloadURLs = []
+print "Navigating to Login Page"
+driver.get("http://kissanime.com/Login")
 
-def getDownloadURLs(url):
-    print url
-    driver = webdriver.Firefox()
-    driver.get(url)
-# because they block scrapers, we use "magic browser"! lol
-req = urllib2.Request(URL, headers={'User-Agent' : "Magic Browser"})
+print "DELAY start"
+time.sleep(DELAY)
+print "DELAY end"
+
+print "Logging in"
+user = driver.find_element_by_name("username")
+passwd = driver.find_element_by_name("password")
+user.send_keys("<ur username>")
+passwd.send_keys("<ur password>")
+passwd.send_keys(Keys.RETURN)
 
-con = urllib2.urlopen(req)
+print "DELAY start"
+time.sleep(DELAY)
+print "DELAY end"
 
-soup = BeautifulSoup(con)
+print "Navigating to anime episode page"
+driver.get(URL)
 
-# gets all the tables
-tables = soup.findAll('td')
+print "DELAY start"
+time.sleep(DELAY)
+print "DELAY end"
 
-# we go through the tables
-for table in tables:
+html = driver.page_source
+soup = BeautifulSoup(html)
+epListTable = soup.find("table", {"class" : "listing"})
+
+for row in epListTable.findAll('tr'):
+    # each row is <td> tag enclosed
     try:
-        # whenever we get a 'a' tag we extract the 'href' attribute
-        episodeURLs.append(table.findAll('a')[0].get('href'))
-    # In every alternate line no 'a' exists trying to access the
-    # first element ([0]) of an empty list results in this
+        episodeURLs.append("http://kissanime.com"+row.findAll('a')[0].get('href'))
     except IndexError:
-        pass
+        print "\n Obtaining episode URL's ....\n"
+
+print "These are the episode URL's"
+print episodeURLs
+
+for url in episodeURLs:
+    print "\n Navigating to get Video for the URL => "+url
+    driver.get(url)
+
+    print "DELAY start"
+    time.sleep(DELAY)
+    print "DELAY end"
+
+    temp = []
+    
+    html = driver.page_source
+    soup = BeautifulSoup(html)
+    for div in soup.findAll('div', {"id" : "divDownload"}):
+        links = div.findAll('a')
+        for link in links:
+            dummy = (url[url.find('?')-2:url.find('?')], link.text.strip(), link.attrs['href'])
+            temp.append(dummy)
+            print "\n\n Temp for"+link.text.strip()
+            print temp
+            
+    downloadURLs.append(temp)
+    
+for link in downloadURLs:
+    print link
+    print "\n"
+
+
+print "Copy paste the above links to a text file, use import from tezt file option in IDM to download all"
+
+
+            
+            
+    
+    
+
+
+
 
-for episodeURL in episodeURLs:
-    downloadURLs = getDownloadURLs("http:/kissanime.com/Anime" + episodeURL)
+