Commit | Line | Data |
---|---|---|
c39028a4 | 1 | import urllib.request, urllib.parse, http.cookiejar, re, bs4, os, time |
81be6921 FT |
2 | from . import profile, lib, htcache |
3 | soup = bs4.BeautifulSoup | |
4 | soupify = lambda cont: soup(cont, "html.parser") | |
5 | ||
6 | class pageerror(Exception): | |
7 | def __init__(self, message, page): | |
8 | super().__init__(message) | |
9 | self.page = page | |
08e259d7 | 10 | |
c39028a4 FT |
11 | def iterlast(itr, default=None): |
12 | if default is not None: | |
13 | ret = default | |
14 | try: | |
15 | while True: | |
16 | ret = next(itr) | |
17 | except StopIteration: | |
18 | return ret | |
19 | ||
20 | def find1(el, *args, **kwargs): | |
21 | ret = el.find(*args, **kwargs) | |
22 | if ret is None: | |
23 | raise pageerror("could not find expected element", iterlast(el.parents, el)) | |
24 | return ret | |
25 | ||
08e259d7 FT |
26 | def byclass(el, name, cl): |
27 | for ch in el.findAll(name): | |
81be6921 FT |
28 | if not isinstance(ch, bs4.Tag): continue |
29 | cll = ch.get("class", []) | |
30 | if cl in cll: | |
08e259d7 FT |
31 | return ch |
32 | return None | |
33 | ||
34 | def nextel(el): | |
35 | while True: | |
36 | el = el.nextSibling | |
81be6921 | 37 | if isinstance(el, bs4.Tag): |
08e259d7 FT |
38 | return el |
39 | ||
c39028a4 | 40 | def fetchreader(lib, readerid, page): |
ebc277d3 FT |
41 | pg = soupify(lib.sess.fetch(lib.base + "areader?" + urllib.parse.urlencode({"id": readerid, |
42 | "p": str(page), | |
43 | "supress_webtoon": "t"}), | |
c39028a4 FT |
44 | headers={"Referer": "http://bato.to/reader"})) |
45 | return pg | |
46 | ||
08e259d7 | 47 | class page(lib.page): |
c39028a4 | 48 | def __init__(self, chapter, stack, readerid, n): |
08e259d7 | 49 | self.stack = stack |
c39028a4 | 50 | self.lib = chapter.lib |
08e259d7 FT |
51 | self.chapter = chapter |
52 | self.n = n | |
53 | self.id = str(n) | |
81be6921 | 54 | self.name = "Page %s" % n |
c39028a4 | 55 | self.readerid = readerid |
08e259d7 FT |
56 | self.ciurl = None |
57 | ||
58 | def iurl(self): | |
59 | if self.ciurl is None: | |
c39028a4 FT |
60 | page = fetchreader(self.lib, self.readerid, self.n) |
61 | img = find1(page, "img", id="comic_page") | |
81be6921 | 62 | self.ciurl = img["src"] |
08e259d7 FT |
63 | return self.ciurl |
64 | ||
65 | def open(self): | |
66 | return lib.stdimgstream(self.iurl()) | |
67 | ||
68 | def __str__(self): | |
69 | return self.name | |
70 | ||
71 | def __repr(self): | |
5c11ebea | 72 | return "<batoto.page %r.%r.%r.%r>" % (self.chapter.manga.name, self.chapter.group.name, self.chapter.name, self.name) |
08e259d7 FT |
73 | |
74 | class chapter(lib.pagelist): | |
5c11ebea | 75 | def __init__(self, group, stack, id, name, readerid): |
08e259d7 | 76 | self.stack = stack |
5c11ebea FT |
77 | self.group = group |
78 | self.manga = group.manga | |
79 | self.lib = self.manga.lib | |
08e259d7 FT |
80 | self.id = id |
81 | self.name = name | |
c39028a4 | 82 | self.readerid = readerid |
08e259d7 FT |
83 | self.cpag = None |
84 | ||
85 | def __getitem__(self, i): | |
86 | return self.pages()[i] | |
87 | ||
88 | def __len__(self): | |
89 | return len(self.pages()) | |
90 | ||
91 | pnre = re.compile(r"page (\d+)") | |
92 | def pages(self): | |
93 | if self.cpag is None: | |
c39028a4 | 94 | pg = fetchreader(self.lib, self.readerid, 1) |
08e259d7 | 95 | cpag = [] |
c39028a4 | 96 | for opt in find1(pg, "select", id="page_select").findAll("option"): |
08e259d7 | 97 | n = int(self.pnre.match(opt.string).group(1)) |
c39028a4 | 98 | cpag.append(page(self, self.stack + [(self, len(cpag))], self.readerid, n)) |
08e259d7 FT |
99 | self.cpag = cpag |
100 | return self.cpag | |
101 | ||
102 | def __str__(self): | |
103 | return self.name | |
104 | ||
105 | def __repr__(self): | |
5c11ebea FT |
106 | return "<batoto.chapter %r.%r.%r>" % (self.manga.name, self.group.name, self.name) |
107 | ||
108 | class group(lib.pagelist): | |
109 | def __init__(self, manga, stack, id, name): | |
110 | self.stack = stack | |
111 | self.manga = manga | |
112 | self.id = id | |
113 | self.name = name | |
114 | self.ch = [] | |
115 | ||
116 | def __getitem__(self, i): | |
117 | return self.ch[i] | |
118 | ||
119 | def __len__(self): | |
120 | return len(self.ch) | |
121 | ||
122 | def __str__(self): | |
123 | return self.name | |
124 | ||
125 | def __repr__(self): | |
126 | return "<batoto.group %r.%r" % (self.manga.name, self.name) | |
08e259d7 FT |
127 | |
128 | class manga(lib.manga): | |
129 | def __init__(self, lib, id, name, url): | |
130 | self.lib = lib | |
81be6921 | 131 | self.sess = lib.sess |
08e259d7 FT |
132 | self.id = id |
133 | self.name = name | |
134 | self.url = url | |
135 | self.cch = None | |
136 | self.stack = [] | |
1043cbdb | 137 | self.cnames = None |
08e259d7 FT |
138 | |
139 | def __getitem__(self, i): | |
140 | return self.ch()[i] | |
141 | ||
142 | def __len__(self): | |
143 | return len(self.ch()) | |
144 | ||
81be6921 FT |
145 | @staticmethod |
146 | def vfylogin(page): | |
147 | if page.find("div", id="register_notice"): | |
148 | return False | |
149 | if not byclass(page, "table", "chapters_list"): | |
150 | return False | |
151 | return True | |
152 | ||
c39028a4 | 153 | cure = re.compile(r"/reader#([a-z0-9]+)") |
08e259d7 FT |
154 | def ch(self): |
155 | if self.cch is None: | |
81be6921 FT |
156 | page = self.sess.lfetch(self.url, self.vfylogin) |
157 | cls = byclass(page, "table", "chapters_list") | |
08e259d7 FT |
158 | if cls.tbody is not None: |
159 | cls = cls.tbody | |
81be6921 | 160 | scl = "lang_" + self.lib.lang |
08e259d7 FT |
161 | cch = [] |
162 | for ch in cls.childGenerator(): | |
81be6921 FT |
163 | if isinstance(ch, bs4.Tag) and ch.name == "tr": |
164 | cll = ch.get("class", []) | |
165 | if "row" in cll and scl in cll: | |
166 | url = ch.td.a["href"] | |
08e259d7 | 167 | m = self.cure.search(url) |
81be6921 | 168 | if m is None: raise pageerror("Got weird chapter URL: %r" % url, page) |
c39028a4 | 169 | readerid = m.group(1) |
08e259d7 | 170 | name = ch.td.a.text |
5c11ebea FT |
171 | gname = nextel(nextel(ch.td)).text.strip() |
172 | cch.append((readerid, name, gname)) | |
08e259d7 | 173 | cch.reverse() |
5c11ebea FT |
174 | groups = {} |
175 | for n, (readerid, name, gname) in enumerate(cch): | |
176 | groups.setdefault(gname, [n, []])[1].append((readerid, name)) | |
177 | groups = sorted(groups.items(), key=lambda o: o[1][0]) | |
178 | rgrp = [] | |
179 | for n, (gname, (_, gch)) in enumerate(groups): | |
180 | ngrp = group(self, [(self, n)], gname, gname) | |
181 | for m, (readerid, name) in enumerate(gch): | |
182 | ngrp.ch.append(chapter(ngrp, ngrp.stack + [(ngrp, m)], readerid, name, readerid)) | |
183 | rgrp.append(ngrp) | |
184 | self.cch = rgrp | |
08e259d7 FT |
185 | return self.cch |
186 | ||
1043cbdb FT |
187 | def altnames(self): |
188 | if self.cnames is None: | |
81be6921 | 189 | page = soupify(self.sess.fetch(self.url)) |
1043cbdb FT |
190 | cnames = None |
191 | for tbl in page.findAll("table", attrs={"class": "ipb_table"}): | |
192 | if tbl.tbody is not None: tbl = tbl.tbody | |
193 | for tr in tbl.findAll("tr"): | |
81be6921 | 194 | if "Alt Names:" in tr.td.text: |
1043cbdb | 195 | nls = nextel(tr.td) |
81be6921 FT |
196 | if nls.name != "td" or nls.span is None: |
197 | raise pageerror("Weird altnames table in " + self.id, page) | |
1043cbdb FT |
198 | cnames = [nm.text.strip() for nm in nls.findAll("span")] |
199 | break | |
200 | if cnames is not None: | |
201 | break | |
202 | if cnames is None: | |
81be6921 | 203 | raise pageerror("Could not find altnames for " + self.id, page) |
1043cbdb FT |
204 | self.cnames = cnames |
205 | return self.cnames | |
206 | ||
08e259d7 FT |
207 | def __str__(self): |
208 | return self.name | |
209 | ||
210 | def __repr__(self): | |
211 | return "<batoto.manga %r>" % self.name | |
212 | ||
81be6921 FT |
213 | class credentials(object): |
214 | def __init__(self, username, password): | |
215 | self.username = username | |
216 | self.password = password | |
217 | ||
218 | @classmethod | |
219 | def fromfile(cls, path): | |
220 | username, password = None, None | |
221 | with open(path) as fp: | |
222 | for words in profile.splitlines(fp): | |
223 | if words[0] == "username": | |
224 | username = words[1] | |
225 | elif words[0] == "password": | |
226 | password = words[1] | |
227 | elif words[0] == "pass64": | |
228 | import binascii | |
229 | password = binascii.a2b_base64(words[1]).decode("utf8") | |
230 | if None in (username, password): | |
231 | raise ValueError("Incomplete profile: " + path) | |
232 | return cls(username, password) | |
233 | ||
234 | @classmethod | |
235 | def default(cls): | |
236 | path = os.path.join(profile.confdir, "batoto") | |
237 | if os.path.exists(path): | |
238 | return cls.fromfile(path) | |
239 | return None | |
240 | ||
241 | class session(object): | |
242 | def __init__(self, base, credentials): | |
243 | self.base = base | |
244 | self.creds = credentials | |
245 | self.jar = http.cookiejar.CookieJar() | |
246 | self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar)) | |
c39028a4 | 247 | self.lastlogin = 0 |
81be6921 FT |
248 | |
249 | rlre = re.compile(r"Welcome, (.*) ") | |
c39028a4 FT |
250 | def dologin(self, pre=None): |
251 | now = time.time() | |
252 | if now - self.lastlogin < 60: | |
253 | raise Exception("Too soon since last login attempt") | |
254 | if pre is None: | |
255 | with self.web.open(self.base) as hs: | |
256 | page = soupify(hs.read()) | |
257 | else: | |
258 | page = pre | |
81be6921 FT |
259 | |
260 | cur = page.find("a", id="user_link") | |
81be6921 | 261 | if cur: |
c39028a4 | 262 | m = self.rlre.search(cur.text) |
81be6921 | 263 | if not m or m.group(1) != self.creds.username: |
81be6921 FT |
264 | outurl = None |
265 | nav = page.find("div", id="user_navigation") | |
266 | if nav: | |
267 | for li in nav.findAll("li"): | |
268 | if li.a and "Sign Out" in li.a.string: | |
269 | outurl = li.a["href"] | |
270 | if not outurl: | |
271 | raise pageerror("Could not find logout URL", page) | |
272 | with self.wep.open(outurl) as hs: | |
273 | hs.read() | |
274 | with self.web.open(self.base) as hs: | |
275 | page = soupify(hs.read()) | |
276 | else: | |
81be6921 FT |
277 | return |
278 | else: | |
81be6921 FT |
279 | |
280 | form = page.find("form", id="login") | |
c39028a4 FT |
281 | if not form and pre: |
282 | return self.dologin() | |
81be6921 FT |
283 | values = {} |
284 | for el in form.findAll("input", type="hidden"): | |
285 | values[el["name"]] = el["value"] | |
286 | values["ips_username"] = self.creds.username | |
287 | values["ips_password"] = self.creds.password | |
c39028a4 | 288 | values["rememberMe"] = "1" |
81be6921 FT |
289 | values["anonymous"] = "1" |
290 | req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii")) | |
742cc396 | 291 | req.add_header("User-Agent", self.useragent) |
81be6921 FT |
292 | with self.web.open(req) as hs: |
293 | page = soupify(hs.read()) | |
294 | for resp in page.findAll("p", attrs={"class": "message"}): | |
295 | if resp.strong and "You are now signed in" in resp.strong.string: | |
296 | break | |
297 | else: | |
298 | raise pageerror("Could not log in", page) | |
c39028a4 | 299 | self.lastlogin = now |
81be6921 FT |
300 | |
301 | def open(self, url): | |
302 | return self.web.open(url) | |
303 | ||
742cc396 | 304 | useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.160 Safari/537.22" |
c39028a4 FT |
305 | def fetch(self, url, headers=None): |
306 | req = urllib.request.Request(url) | |
742cc396 | 307 | req.add_header("User-Agent", self.useragent) |
c39028a4 FT |
308 | if headers is not None: |
309 | for k, v in headers.items(): | |
310 | req.add_header(k, v) | |
311 | with self.open(req) as hs: | |
81be6921 FT |
312 | return hs.read() |
313 | ||
314 | def lfetch(self, url, ck): | |
315 | page = soupify(self.fetch(url)) | |
316 | if not ck(page): | |
c39028a4 | 317 | self.dologin(pre=page) |
81be6921 FT |
318 | page = soupify(self.fetch(url)) |
319 | if not ck(page): | |
320 | raise pageerror("Could not verify login status despite having logged in", page) | |
321 | return page | |
322 | ||
08e259d7 | 323 | class library(lib.library): |
81be6921 FT |
324 | def __init__(self, *, creds=None): |
325 | if creds is None: | |
326 | creds = credentials.default() | |
327 | self.base = "http://bato.to/" | |
328 | self.sess = session(self.base, creds) | |
329 | self.lang = "English" | |
08e259d7 FT |
330 | |
331 | def byid(self, id): | |
332 | url = self.base + "comic/_/comics/" + id | |
81be6921 | 333 | page = soupify(self.sess.fetch(url)) |
08e259d7 FT |
334 | title = page.find("h1", attrs={"class": "ipsType_pagetitle"}) |
335 | if title is None: | |
336 | raise KeyError(id) | |
337 | return manga(self, id, title.string.strip(), url) | |
338 | ||
24f0a3b7 | 339 | def _search(self, pars): |
1043cbdb FT |
340 | p = 1 |
341 | while True: | |
24f0a3b7 FT |
342 | _pars = dict(pars) |
343 | _pars["p"] = str(p) | |
742cc396 FT |
344 | req = urllib.request.Request(self.base + "search?" + urllib.parse.urlencode(_pars)) |
345 | req.add_header("User-Agent", session.useragent) | |
346 | resp = urllib.request.urlopen(req) | |
1043cbdb | 347 | try: |
c0d3b1a2 | 348 | page = soupify(resp.read()) |
1043cbdb FT |
349 | finally: |
350 | resp.close() | |
351 | rls = page.find("div", id="comic_search_results").table | |
352 | if rls.tbody is not None: | |
353 | rls = rls.tbody | |
354 | hasmore = False | |
355 | for child in rls.findAll("tr"): | |
356 | if child.th is not None: continue | |
81be6921 FT |
357 | if child.get("id", "")[:11] == "comic_rowo_": continue |
358 | if child.get("id") == "show_more_row": | |
1043cbdb FT |
359 | hasmore = True |
360 | continue | |
361 | link = child.td.strong.a | |
81be6921 | 362 | url = link["href"] |
1043cbdb FT |
363 | m = self.rure.search(url) |
364 | if m is None: raise Exception("Got weird manga URL: %r" % url) | |
365 | id = m.group(1) | |
366 | name = link.text.strip() | |
1043cbdb FT |
367 | yield manga(self, id, name, url) |
368 | p += 1 | |
369 | if not hasmore: | |
370 | break | |
24f0a3b7 FT |
371 | |
372 | rure = re.compile(r"/comic/_/([^/]*)$") | |
373 | def search(self, expr): | |
81be6921 | 374 | return self._search({"name": expr, "name_cond": "c"}) |
24f0a3b7 FT |
375 | |
376 | def byname(self, prefix): | |
81be6921 | 377 | for res in self._search({"name": prefix, "name_cond": "s"}): |
24f0a3b7 FT |
378 | if res.name[:len(prefix)].lower() == prefix.lower(): |
379 | yield res | |
380 | else: | |
381 | for aname in res.altnames(): | |
382 | if aname[:len(prefix)].lower() == prefix.lower(): | |
383 | yield manga(self, res.id, aname, res.url) | |
384 | break | |
385 | else: | |
386 | if False: | |
81be6921 FT |
387 | print("eliding " + res.name) |
388 | print(res.altnames()) |