Commit | Line | Data |
---|---|---|
c6e9d820 FT |
1 | import urllib.request, re |
2 | import bs4, json | |
3 | from . import lib, htcache | |
4 | soup = bs4.BeautifulSoup | |
5 | soupify = lambda cont: soup(cont) | |
f3ad0817 | 6 | |
f3ad0817 | 7 | class page(lib.page): |
3683ab38 FT |
8 | def __init__(self, chapter, stack, n, url): |
9 | self.stack = stack | |
f3ad0817 FT |
10 | self.chapter = chapter |
11 | self.volume = self.chapter.volume | |
12 | self.manga = self.volume.manga | |
13 | self.n = n | |
46b3b90e | 14 | self.id = str(n) |
c6e9d820 | 15 | self.name = "Page %s" % n |
f3ad0817 FT |
16 | self.url = url |
17 | self.ciurl = None | |
18 | ||
19 | def iurl(self): | |
20 | if self.ciurl is None: | |
c0d3b1a2 | 21 | page = soupify(htcache.fetch(self.url)) |
f3ad0817 FT |
22 | self.ciurl = page.find("div", id="viewer").find("img", id="image")["src"] |
23 | return self.ciurl | |
24 | ||
25 | def open(self): | |
b9e558ac | 26 | return lib.stdimgstream(self.iurl()) |
f3ad0817 | 27 | |
699d0c17 FT |
28 | def __str__(self): |
29 | return self.name | |
30 | ||
31 | def __repr__(self): | |
32 | return "<mangafox.page %r.%r.%r.%r>" % (self.manga.name, self.volume.name, self.chapter.name, self.name) | |
33 | ||
f3ad0817 | 34 | class chapter(lib.pagelist): |
46b3b90e | 35 | def __init__(self, volume, stack, id, name, url): |
3683ab38 | 36 | self.stack = stack |
f3ad0817 FT |
37 | self.volume = volume |
38 | self.manga = volume.manga | |
46b3b90e | 39 | self.id = id |
f3ad0817 FT |
40 | self.name = name |
41 | self.url = url | |
42 | self.cpag = None | |
43 | ||
44 | def __getitem__(self, i): | |
45 | return self.pages()[i] | |
46 | ||
47 | def __len__(self): | |
48 | return len(self.pages()) | |
49 | ||
50 | def pages(self): | |
51 | if self.cpag is None: | |
c0d3b1a2 | 52 | pg = soupify(htcache.fetch(self.url + "1.html")) |
f3ad0817 FT |
53 | l = pg.find("form", id="top_bar").find("div", attrs={"class": "l"}) |
54 | if len(l.contents) != 3: | |
55 | raise Exception("parse error: weird page list for %r" % self) | |
56 | m = l.contents[2].strip() | |
c6e9d820 | 57 | if m[:3] != "of ": |
f3ad0817 | 58 | raise Exception("parse error: weird page list for %r" % self) |
c6e9d820 | 59 | self.cpag = [page(self, self.stack + [(self, n)], n + 1, self.url + ("%i.html" % (n + 1))) for n in range(int(m[3:]))] |
f3ad0817 FT |
60 | return self.cpag |
61 | ||
62 | def __str__(self): | |
63 | return self.name | |
64 | ||
65 | def __repr__(self): | |
66 | return "<mangafox.chapter %r.%r.%r>" % (self.manga.name, self.volume.name, self.name) | |
67 | ||
68 | class volume(lib.pagelist): | |
46b3b90e | 69 | def __init__(self, manga, stack, id, name): |
3683ab38 | 70 | self.stack = stack |
f3ad0817 | 71 | self.manga = manga |
46b3b90e | 72 | self.id = id |
f3ad0817 FT |
73 | self.name = name |
74 | self.ch = [] | |
75 | ||
76 | def __getitem__(self, i): | |
77 | return self.ch[i] | |
78 | ||
79 | def __len__(self): | |
80 | return len(self.ch) | |
81 | ||
82 | def __str__(self): | |
83 | return self.name | |
84 | ||
85 | def __repr__(self): | |
86 | return "<mangafox.volume %r.%r>" % (self.manga.name, self.name) | |
87 | ||
88 | def nextel(el): | |
89 | while True: | |
90 | el = el.nextSibling | |
c6e9d820 | 91 | if isinstance(el, bs4.Tag): |
f3ad0817 FT |
92 | return el |
93 | ||
94 | class manga(lib.manga): | |
0cddd237 | 95 | cure = re.compile(r"/c[\d.]+/$") |
59e32d8d | 96 | |
46b3b90e | 97 | def __init__(self, lib, id, name, url): |
f3ad0817 | 98 | self.lib = lib |
46b3b90e | 99 | self.id = id |
f3ad0817 FT |
100 | self.name = name |
101 | self.url = url | |
102 | self.cvol = None | |
3683ab38 | 103 | self.stack = [] |
f3ad0817 FT |
104 | |
105 | def __getitem__(self, i): | |
106 | return self.vols()[i] | |
107 | ||
108 | def __len__(self): | |
109 | return len(self.vols()) | |
110 | ||
111 | def vols(self): | |
112 | if self.cvol is None: | |
c0d3b1a2 | 113 | page = soupify(htcache.fetch(self.url)) |
f3ad0817 | 114 | vls = page.find("div", id="chapters").findAll("div", attrs={"class": "slide"}) |
75732d5a | 115 | cvol = [] |
3683ab38 | 116 | for i, vn in enumerate(reversed(vls)): |
46b3b90e | 117 | name = vn.find("h3", attrs={"class": "volume"}).contents[0].strip() |
c6e9d820 | 118 | vol = volume(self, [(self, i)], name, name) |
3683ab38 | 119 | cls = nextel(vn) |
c6e9d820 | 120 | if cls.name != "ul" or "chlist" not in cls["class"]: |
f3ad0817 | 121 | raise Exception("parse error: weird volume list for %r" % self) |
3683ab38 | 122 | for o, ch in enumerate(reversed(cls.findAll("li"))): |
f3ad0817 | 123 | n = ch.div.h3 or ch.div.h4 |
c6ee84b1 | 124 | chid = name = n.a.string |
f3ad0817 FT |
125 | for span in ch("span"): |
126 | try: | |
c6e9d820 | 127 | if "title" in span["class"]: |
f3ad0817 FT |
128 | name += " " + span.string |
129 | except KeyError: | |
130 | pass | |
c6e9d820 | 131 | url = n.a["href"] |
59e32d8d FT |
132 | if url[-7:] == "/1.html": |
133 | url = url[:-6] | |
134 | elif self.cure.search(url) is not None: | |
135 | pass | |
136 | else: | |
f3ad0817 | 137 | raise Exception("parse error: unexpected chapter URL for %r: %s" % (self, url)) |
c6ee84b1 | 138 | vol.ch.append(chapter(vol, vol.stack + [(vol, o)], chid, name, url)) |
75732d5a FT |
139 | cvol.append(vol) |
140 | self.cvol = cvol | |
f3ad0817 FT |
141 | return self.cvol |
142 | ||
143 | def __str__(self): | |
144 | return self.name | |
145 | ||
146 | def __repr__(self): | |
147 | return "<mangafox.manga %r>" % self.name | |
148 | ||
149 | def libalphacmp(a, b): | |
20a9e62a FT |
150 | if a.upper() < b.upper(): |
151 | return -1 | |
152 | elif a.upper() > b.upper(): | |
153 | return 1 | |
154 | return 0 | |
f3ad0817 FT |
155 | |
156 | class library(lib.library): | |
157 | def __init__(self): | |
6fab7b55 | 158 | self.base = "http://mangafox.me/" |
f3ad0817 FT |
159 | |
160 | def alphapage(self, pno): | |
c0d3b1a2 | 161 | page = soupify(htcache.fetch(self.base + ("directory/%i.htm?az" % pno))) |
f3ad0817 FT |
162 | ls = page.find("div", id="mangalist").find("ul", attrs={"class": "list"}).findAll("li") |
163 | ret = [] | |
46b3b90e | 164 | ubase = self.base + "manga/" |
f3ad0817 FT |
165 | for m in ls: |
166 | t = m.find("div", attrs={"class": "manga_text"}).find("a", attrs={"class": "title"}) | |
167 | name = t.string | |
c6e9d820 | 168 | url = t["href"] |
46b3b90e FT |
169 | if url[:len(ubase)] != ubase or url.find('/', len(ubase)) != (len(url) - 1): |
170 | raise Exception("parse error: unexpected manga URL for %r: %s" % (name, url)) | |
171 | ret.append(manga(self, url[len(ubase):-1], name, url)) | |
f3ad0817 FT |
172 | return ret |
173 | ||
174 | def alphapages(self): | |
c0d3b1a2 | 175 | page = soupify(htcache.fetch(self.base + "directory/?az")) |
f3ad0817 FT |
176 | ls = page.find("div", id="mangalist").find("div", id="nav").find("ul").findAll("li") |
177 | return int(ls[-2].find("a").string) | |
178 | ||
179 | def byname(self, prefix): | |
f3ad0817 FT |
180 | l = 1 |
181 | r = self.alphapages() | |
182 | while True: | |
183 | if l > r: | |
184 | return | |
185 | c = l + ((r + 1 - l) // 2) | |
186 | ls = self.alphapage(c) | |
187 | if libalphacmp(ls[0].name, prefix) > 0: | |
188 | r = c - 1 | |
189 | elif libalphacmp(ls[-1].name, prefix) < 0: | |
190 | l = c + 1 | |
191 | else: | |
192 | pno = c | |
193 | break | |
194 | i = 0 | |
195 | while i < len(ls): | |
196 | m = ls[i] | |
197 | if libalphacmp(m.name, prefix) >= 0: | |
198 | break | |
199 | i += 1 | |
200 | while True: | |
201 | while i < len(ls): | |
202 | m = ls[i] | |
203 | if not m.name[:len(prefix)].upper() == prefix.upper(): | |
204 | return | |
205 | yield m | |
206 | i += 1 | |
207 | pno += 1 | |
208 | ls = self.alphapage(pno) | |
209 | i = 0 | |
943a9376 | 210 | |
7038902e | 211 | def search(self, expr): |
20a9e62a FT |
212 | req = urllib.request.Request(self.base + ("ajax/search.php?term=%s" % urllib.parse.quote(expr)), |
213 | headers={"User-Agent": "automanga/1"}) | |
214 | with urllib.request.urlopen(req) as resp: | |
215 | rc = json.loads(resp.read().decode("utf-8")) | |
c6e9d820 | 216 | return [manga(self, id, name, self.base + ("manga/%s/" % id)) for num, name, id, genres, author in rc] |
7038902e | 217 | |
46b3b90e FT |
218 | def byid(self, id): |
219 | url = self.base + ("manga/%s/" % id) | |
c0d3b1a2 | 220 | page = soupify(htcache.fetch(url)) |
46b3b90e FT |
221 | if page.find("div", id="title") is None: |
222 | # Assume we got the search page | |
223 | raise KeyError(id) | |
224 | name = page.find("div", id="series_info").find("div", attrs={"class": "cover"}).img["alt"] | |
225 | return manga(self, id, name, url) | |
226 | ||
943a9376 FT |
227 | def __iter__(self): |
228 | raise NotImplementedError("mangafox iterator") |