Commit | Line | Data |
---|---|---|
59e32d8d | 1 | import urllib, re |
f3ad0817 FT |
2 | import BeautifulSoup |
3 | import lib, htcache | |
4 | soup = BeautifulSoup.BeautifulSoup | |
5 | ||
3bba3a7b | 6 | class imgstream(lib.imgstream): |
f3ad0817 FT |
7 | def __init__(self, url): |
8 | self.bk = urllib.urlopen(url) | |
30053c2c FT |
9 | ok = False |
10 | try: | |
11 | if self.bk.getcode() != 200: | |
12 | raise IOError("Server error: " + str(self.bk.getcode())) | |
13 | self.ctype = self.bk.info()["Content-Type"] | |
14 | self.clen = int(self.bk.info()["Content-Length"]) | |
15 | ok = True | |
16 | finally: | |
17 | if not ok: | |
18 | self.bk.close() | |
f3ad0817 | 19 | |
af730068 FT |
20 | def fileno(self): |
21 | return self.bk.fileno() | |
22 | ||
f3ad0817 FT |
23 | def close(self): |
24 | self.bk.close() | |
25 | ||
f3ad0817 FT |
26 | def read(self, sz = None): |
27 | if sz is None: | |
28 | return self.bk.read() | |
29 | else: | |
30 | return self.bk.read(sz) | |
31 | ||
32 | class page(lib.page): | |
3683ab38 FT |
33 | def __init__(self, chapter, stack, n, url): |
34 | self.stack = stack | |
f3ad0817 FT |
35 | self.chapter = chapter |
36 | self.volume = self.chapter.volume | |
37 | self.manga = self.volume.manga | |
38 | self.n = n | |
46b3b90e | 39 | self.id = str(n) |
699d0c17 | 40 | self.name = u"Page %s" % n |
f3ad0817 FT |
41 | self.url = url |
42 | self.ciurl = None | |
43 | ||
44 | def iurl(self): | |
45 | if self.ciurl is None: | |
46 | page = soup(htcache.fetch(self.url)) | |
47 | self.ciurl = page.find("div", id="viewer").find("img", id="image")["src"] | |
48 | return self.ciurl | |
49 | ||
50 | def open(self): | |
51 | return imgstream(self.iurl()) | |
52 | ||
699d0c17 FT |
53 | def __str__(self): |
54 | return self.name | |
55 | ||
56 | def __repr__(self): | |
57 | return "<mangafox.page %r.%r.%r.%r>" % (self.manga.name, self.volume.name, self.chapter.name, self.name) | |
58 | ||
f3ad0817 | 59 | class chapter(lib.pagelist): |
46b3b90e | 60 | def __init__(self, volume, stack, id, name, url): |
3683ab38 | 61 | self.stack = stack |
f3ad0817 FT |
62 | self.volume = volume |
63 | self.manga = volume.manga | |
46b3b90e | 64 | self.id = id |
f3ad0817 FT |
65 | self.name = name |
66 | self.url = url | |
67 | self.cpag = None | |
68 | ||
69 | def __getitem__(self, i): | |
70 | return self.pages()[i] | |
71 | ||
72 | def __len__(self): | |
73 | return len(self.pages()) | |
74 | ||
75 | def pages(self): | |
76 | if self.cpag is None: | |
77 | pg = soup(htcache.fetch(self.url + "1.html")) | |
78 | l = pg.find("form", id="top_bar").find("div", attrs={"class": "l"}) | |
79 | if len(l.contents) != 3: | |
80 | raise Exception("parse error: weird page list for %r" % self) | |
81 | m = l.contents[2].strip() | |
82 | if m[:3] != u"of ": | |
83 | raise Exception("parse error: weird page list for %r" % self) | |
3683ab38 | 84 | self.cpag = [page(self, self.stack + [(self, n)], n + 1, self.url + ("%i.html" % (n + 1))) for n in xrange(int(m[3:]))] |
f3ad0817 FT |
85 | return self.cpag |
86 | ||
87 | def __str__(self): | |
88 | return self.name | |
89 | ||
90 | def __repr__(self): | |
91 | return "<mangafox.chapter %r.%r.%r>" % (self.manga.name, self.volume.name, self.name) | |
92 | ||
93 | class volume(lib.pagelist): | |
46b3b90e | 94 | def __init__(self, manga, stack, id, name): |
3683ab38 | 95 | self.stack = stack |
f3ad0817 | 96 | self.manga = manga |
46b3b90e | 97 | self.id = id |
f3ad0817 FT |
98 | self.name = name |
99 | self.ch = [] | |
100 | ||
101 | def __getitem__(self, i): | |
102 | return self.ch[i] | |
103 | ||
104 | def __len__(self): | |
105 | return len(self.ch) | |
106 | ||
107 | def __str__(self): | |
108 | return self.name | |
109 | ||
110 | def __repr__(self): | |
111 | return "<mangafox.volume %r.%r>" % (self.manga.name, self.name) | |
112 | ||
113 | def nextel(el): | |
114 | while True: | |
115 | el = el.nextSibling | |
116 | if isinstance(el, BeautifulSoup.Tag): | |
117 | return el | |
118 | ||
119 | class manga(lib.manga): | |
59e32d8d FT |
120 | cure = re.compile(r"/v\d+/c[\d.]+/$") |
121 | ||
46b3b90e | 122 | def __init__(self, lib, id, name, url): |
f3ad0817 | 123 | self.lib = lib |
46b3b90e | 124 | self.id = id |
f3ad0817 FT |
125 | self.name = name |
126 | self.url = url | |
127 | self.cvol = None | |
3683ab38 | 128 | self.stack = [] |
f3ad0817 FT |
129 | |
130 | def __getitem__(self, i): | |
131 | return self.vols()[i] | |
132 | ||
133 | def __len__(self): | |
134 | return len(self.vols()) | |
135 | ||
136 | def vols(self): | |
137 | if self.cvol is None: | |
138 | page = soup(htcache.fetch(self.url)) | |
139 | vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"}) | |
75732d5a | 140 | cvol = [] |
3683ab38 | 141 | for i, vn in enumerate(reversed(vls)): |
46b3b90e FT |
142 | name = vn.find("h3", attrs={"class": "volume"}).contents[0].strip() |
143 | vid = name.encode("utf8") | |
144 | vol = volume(self, [(self, i)], vid, name) | |
3683ab38 | 145 | cls = nextel(vn) |
f3ad0817 FT |
146 | if cls.name != u"ul" or cls["class"] != u"chlist": |
147 | raise Exception("parse error: weird volume list for %r" % self) | |
3683ab38 | 148 | for o, ch in enumerate(reversed(cls.findAll("li"))): |
f3ad0817 FT |
149 | n = ch.div.h3 or ch.div.h4 |
150 | name = n.a.string | |
46b3b90e | 151 | chid = name.encode("utf8") |
f3ad0817 FT |
152 | for span in ch("span"): |
153 | try: | |
154 | if u" title " in (u" " + span["class"] + u" "): | |
155 | name += " " + span.string | |
156 | except KeyError: | |
157 | pass | |
158 | url = n.a["href"].encode("us-ascii") | |
59e32d8d FT |
159 | if url[-7:] == "/1.html": |
160 | url = url[:-6] | |
161 | elif self.cure.search(url) is not None: | |
162 | pass | |
163 | else: | |
f3ad0817 | 164 | raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url)) |
59e32d8d | 165 | vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url)) |
75732d5a FT |
166 | cvol.append(vol) |
167 | self.cvol = cvol | |
f3ad0817 FT |
168 | return self.cvol |
169 | ||
170 | def __str__(self): | |
171 | return self.name | |
172 | ||
173 | def __repr__(self): | |
174 | return "<mangafox.manga %r>" % self.name | |
175 | ||
176 | def libalphacmp(a, b): | |
177 | return cmp(a.upper(), b.upper()) | |
178 | ||
179 | class library(lib.library): | |
180 | def __init__(self): | |
6fab7b55 | 181 | self.base = "http://mangafox.me/" |
f3ad0817 FT |
182 | |
183 | def alphapage(self, pno): | |
184 | page = soup(htcache.fetch(self.base + ("directory/%i.htm?az" % pno))) | |
185 | ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li") | |
186 | ret = [] | |
46b3b90e | 187 | ubase = self.base + "manga/" |
f3ad0817 FT |
188 | for m in ls: |
189 | t = m.find("div", attrs={"class": "manga_text"}).find("a", attrs={"class": "title"}) | |
190 | name = t.string | |
191 | url = t["href"].encode("us-ascii") | |
46b3b90e FT |
192 | if url[:len(ubase)] != ubase or url.find('/', len(ubase)) != (len(url) - 1): |
193 | raise Exception("parse error: unexpected manga URL for %r: %s" % (name, url)) | |
194 | ret.append(manga(self, url[len(ubase):-1], name, url)) | |
f3ad0817 FT |
195 | return ret |
196 | ||
197 | def alphapages(self): | |
198 | page = soup(htcache.fetch(self.base + "directory/?az")) | |
199 | ls = page.find("div", id="mangalist").find("div", id="nav").find("ul").findAll("li") | |
200 | return int(ls[-2].find("a").string) | |
201 | ||
202 | def byname(self, prefix): | |
203 | if not isinstance(prefix, unicode): | |
204 | prefix = prefix.decode("utf8") | |
205 | l = 1 | |
206 | r = self.alphapages() | |
207 | while True: | |
208 | if l > r: | |
209 | return | |
210 | c = l + ((r + 1 - l) // 2) | |
211 | ls = self.alphapage(c) | |
212 | if libalphacmp(ls[0].name, prefix) > 0: | |
213 | r = c - 1 | |
214 | elif libalphacmp(ls[-1].name, prefix) < 0: | |
215 | l = c + 1 | |
216 | else: | |
217 | pno = c | |
218 | break | |
219 | i = 0 | |
220 | while i < len(ls): | |
221 | m = ls[i] | |
222 | if libalphacmp(m.name, prefix) >= 0: | |
223 | break | |
224 | i += 1 | |
225 | while True: | |
226 | while i < len(ls): | |
227 | m = ls[i] | |
228 | if not m.name[:len(prefix)].upper() == prefix.upper(): | |
229 | return | |
230 | yield m | |
231 | i += 1 | |
232 | pno += 1 | |
233 | ls = self.alphapage(pno) | |
234 | i = 0 | |
943a9376 | 235 | |
46b3b90e FT |
236 | def byid(self, id): |
237 | url = self.base + ("manga/%s/" % id) | |
238 | page = soup(htcache.fetch(url)) | |
239 | if page.find("div", id="title") is None: | |
240 | # Assume we got the search page | |
241 | raise KeyError(id) | |
242 | name = page.find("div", id="series_info").find("div", attrs={"class": "cover"}).img["alt"] | |
243 | return manga(self, id, name, url) | |
244 | ||
943a9376 FT |
245 | def __iter__(self): |
246 | raise NotImplementedError("mangafox iterator") |