-
Notifications
You must be signed in to change notification settings - Fork 0
/
Copy pathManga Downloader.py
109 lines (78 loc) · 3.48 KB
/
Manga Downloader.py
1
2
3
4
5
6
7
8
9
10
11
12
13
14
15
16
17
18
19
20
21
22
23
24
25
26
27
28
29
30
31
32
33
34
35
36
37
38
39
40
41
42
43
44
45
46
47
48
49
50
51
52
53
54
55
56
57
58
59
60
61
62
63
64
65
66
67
68
69
70
71
72
73
74
75
76
77
78
79
80
81
82
83
84
85
86
87
88
89
90
91
92
93
94
95
96
97
98
99
100
101
102
103
104
105
106
107
108
import urllib2
import urllib
from bs4 import BeautifulSoup
import os
import requests
import socket
from time import sleep
def download(url, filename, path1):
# open the page url and get the html stuff into the page variable
page=urllib2.urlopen(url)
#soupyfy it and store in soup
soup=BeautifulSoup(page)
# get the required images (a list with tag matching "img") with
# ALL its attributes by looking for the tag "img"
img=soup.findAll("img")
# in the case of mutiple images matching the img tag condition then we are
# taking each element of the list and getting its Source(src) url
for imglink in img:
# final url of the .jpg file
finalurl=imglink.get("src")
try:
# using finalurl we get the whole image into image
image=urllib.urlopen(finalurl)
if not(image.headers.maintype=="image"):
print "not a image"
return "done"
except requests.exceptions.ConnectionError:
import sys
# prints `type(e), e` where `e` is the last exception
print sys.exc_info()[:2]
return "try"
# read the file into buf
buf=image.read()
# simply catenate the strings to get the file path
# path1 is the path of the file's location
filepath="%s%s" % (path1,filename)
# print the message to show we have done it
print filename+" stored to "+filepath
# open a file handle to write with
download=file(filepath,"wb")
# WRITE file to location
download.write(buf)
#close both handles
download.close()
image.close()
return "done"
# url of the base wesite with which to work with
base="http://www.mangareader.net/388-26158-1/red-river/chapter-1.html"
# chapter number - ch_no ; the value will change for different manga its base[34:36]
ch_no=58
#set the storage path HERE (my default download path)
path='C:\\Users\\Aditya\\Desktop\\Work\\WallPapers\\Manga\\'
# this downloads chapters 5 to 97
for j in range(1,97):
# base[0:34] give the first 34 characters
tempurl=base[0:34]+str(ch_no+j)+"-"
#check if the folder for the 'i+1'th chapter exists
if not os.path.exists(path+str(j+1)+"\\"):
#if not make it
os.makedirs(path+str(j+1)+"\\")
#and store the pages of the chapter in the chapter folder by changing the path
temppath=path+str(j+1)+"\\"
# the loop over the pages
for i in range(42):
finalurl=tempurl+str(i+1)+base[38:]
name=str(i+1)+".jpg"
#checks for 404 error
check=requests.head(finalurl)
if check.status_code==404:
break
con="try"
while(con=="try"):
# this is in order to avoid the 'Max retries exceeded with url' error
# this actually means No connection could be made because the
# target machine actively refused it
sleep(.05)
# call the function to start downloading!
con=download(finalurl,name,temppath)