Fixed some Mangafox update issues.
[automanga.git] / manga / batoto.py
1 import urllib.request, urllib.parse, http.cookiejar, re, bs4, os, time
2 from . import profile, lib, htcache
3 soup = bs4.BeautifulSoup
4 soupify = lambda cont: soup(cont, "html.parser")
5
6 class pageerror(Exception):
7     def __init__(self, message, page):
8         super().__init__(message)
9         self.page = page
10
11 def iterlast(itr, default=None):
12     if default is not None:
13         ret = default
14     try:
15         while True:
16             ret = next(itr)
17     except StopIteration:
18         return ret
19
20 def find1(el, *args, **kwargs):
21     ret = el.find(*args, **kwargs)
22     if ret is None:
23         raise pageerror("could not find expected element", iterlast(el.parents, el))
24     return ret
25
26 def byclass(el, name, cl):
27     for ch in el.findAll(name):
28         if not isinstance(ch, bs4.Tag): continue
29         cll = ch.get("class", [])
30         if cl in cll:
31             return ch
32     return None
33
34 def nextel(el):
35     while True:
36         el = el.nextSibling
37         if isinstance(el, bs4.Tag):
38             return el
39
40 def fetchreader(lib, readerid, page):
41     pg = soupify(lib.sess.fetch(lib.base + "areader?" + urllib.parse.urlencode({"id": readerid,
42                                                                                 "p": str(page),
43                                                                                 "supress_webtoon": "t"}),
44                                 headers={"Referer": "http://bato.to/reader"}))
45     return pg
46
47 class page(lib.page):
48     def __init__(self, chapter, stack, readerid, n):
49         self.stack = stack
50         self.lib = chapter.lib
51         self.chapter = chapter
52         self.n = n
53         self.id = str(n)
54         self.name = "Page %s" % n
55         self.readerid = readerid
56         self.ciurl = None
57
58     def iurl(self):
59         if self.ciurl is None:
60             page = fetchreader(self.lib, self.readerid, self.n)
61             img = find1(page, "img", id="comic_page")
62             self.ciurl = img["src"]
63         return self.ciurl
64
65     def open(self):
66         return lib.stdimgstream(self.iurl())
67
68     def __str__(self):
69         return self.name
70
71     def __repr(self):
72         return "<batoto.page %r.%r.%r.%r>" % (self.chapter.manga.name, self.chapter.group.name, self.chapter.name, self.name)
73
74 class chapter(lib.pagelist):
75     def __init__(self, group, stack, id, name, readerid):
76         self.stack = stack
77         self.group = group
78         self.manga = group.manga
79         self.lib = self.manga.lib
80         self.id = id
81         self.name = name
82         self.readerid = readerid
83         self.cpag = None
84
85     def __getitem__(self, i):
86         return self.pages()[i]
87
88     def __len__(self):
89         return len(self.pages())
90
91     pnre = re.compile(r"page (\d+)")
92     def pages(self):
93         if self.cpag is None:
94             pg = fetchreader(self.lib, self.readerid, 1)
95             cpag = []
96             for opt in find1(pg, "select", id="page_select").findAll("option"):
97                 n = int(self.pnre.match(opt.string).group(1))
98                 cpag.append(page(self, self.stack + [(self, len(cpag))], self.readerid, n))
99             self.cpag = cpag
100         return self.cpag
101
102     def __str__(self):
103         return self.name
104
105     def __repr__(self):
106         return "<batoto.chapter %r.%r.%r>" % (self.manga.name, self.group.name, self.name)
107
108 class group(lib.pagelist):
109     def __init__(self, manga, stack, id, name):
110         self.stack = stack
111         self.manga = manga
112         self.id = id
113         self.name = name
114         self.ch = []
115
116     def __getitem__(self, i):
117         return self.ch[i]
118
119     def __len__(self):
120         return len(self.ch)
121
122     def __str__(self):
123         return self.name
124
125     def __repr__(self):
126         return "<batoto.group %r.%r" % (self.manga.name, self.name)
127
128 class manga(lib.manga):
129     def __init__(self, lib, id, name, url):
130         self.lib = lib
131         self.sess = lib.sess
132         self.id = id
133         self.name = name
134         self.url = url
135         self.cch = None
136         self.stack = []
137         self.cnames = None
138
139     def __getitem__(self, i):
140         return self.ch()[i]
141
142     def __len__(self):
143         return len(self.ch())
144
145     @staticmethod
146     def vfylogin(page):
147         if page.find("div", id="register_notice"):
148             return False
149         if not byclass(page, "table", "chapters_list"):
150             return False
151         return True
152
153     cure = re.compile(r"/reader#([a-z0-9]+)")
154     def ch(self):
155         if self.cch is None:
156             page = self.sess.lfetch(self.url, self.vfylogin)
157             cls = byclass(page, "table", "chapters_list")
158             if cls.tbody is not None:
159                 cls = cls.tbody
160             scl = "lang_" + self.lib.lang
161             cch = []
162             for ch in cls.childGenerator():
163                 if isinstance(ch, bs4.Tag) and ch.name == "tr":
164                     cll = ch.get("class", [])
165                     if "row" in cll and scl in cll:
166                         url = ch.td.a["href"]
167                         m = self.cure.search(url)
168                         if m is None: raise pageerror("Got weird chapter URL: %r" % url, page)
169                         readerid = m.group(1)
170                         name = ch.td.a.text
171                         gname = nextel(nextel(ch.td)).text.strip()
172                         cch.append((readerid, name, gname))
173             cch.reverse()
174             groups = {}
175             for n, (readerid, name, gname) in enumerate(cch):
176                 groups.setdefault(gname, [n, []])[1].append((readerid, name))
177             groups = sorted(groups.items(), key=lambda o: o[1][0])
178             rgrp = []
179             for n, (gname, (_, gch)) in enumerate(groups):
180                 ngrp = group(self, [(self, n)], gname, gname)
181                 for m, (readerid, name) in enumerate(gch):
182                     ngrp.ch.append(chapter(ngrp, ngrp.stack + [(ngrp, m)], readerid, name, readerid))
183                 rgrp.append(ngrp)
184             self.cch = rgrp
185         return self.cch
186
187     def altnames(self):
188         if self.cnames is None:
189             page = soupify(self.sess.fetch(self.url))
190             cnames = None
191             for tbl in page.findAll("table", attrs={"class": "ipb_table"}):
192                 if tbl.tbody is not None: tbl = tbl.tbody
193                 for tr in tbl.findAll("tr"):
194                     if "Alt Names:" in tr.td.text:
195                         nls = nextel(tr.td)
196                         if nls.name != "td" or nls.span is None:
197                             raise pageerror("Weird altnames table in " + self.id, page)
198                         cnames = [nm.text.strip() for nm in nls.findAll("span")]
199                         break
200                 if cnames is not None:
201                     break
202             if cnames is None:
203                 raise pageerror("Could not find altnames for " + self.id, page)
204             self.cnames = cnames
205         return self.cnames
206
207     def __str__(self):
208         return self.name
209
210     def __repr__(self):
211         return "<batoto.manga %r>" % self.name
212
213 class credentials(object):
214     def __init__(self, username, password):
215         self.username = username
216         self.password = password
217
218     @classmethod
219     def fromfile(cls, path):
220         username, password = None, None
221         with open(path) as fp:
222             for words in profile.splitlines(fp):
223                 if words[0] == "username":
224                     username = words[1]
225                 elif words[0] == "password":
226                     password = words[1]
227                 elif words[0] == "pass64":
228                     import binascii
229                     password = binascii.a2b_base64(words[1]).decode("utf8")
230         if None in (username, password):
231             raise ValueError("Incomplete profile: " + path)
232         return cls(username, password)
233
234     @classmethod
235     def default(cls):
236         path = os.path.join(profile.confdir, "batoto")
237         if os.path.exists(path):
238             return cls.fromfile(path)
239         return None
240
241 class session(object):
242     def __init__(self, base, credentials):
243         self.base = base
244         self.creds = credentials
245         self.jar = http.cookiejar.CookieJar()
246         self.web = urllib.request.build_opener(urllib.request.HTTPCookieProcessor(self.jar))
247         self.lastlogin = 0
248
249     rlre = re.compile(r"Welcome, (.*) ")
250     def dologin(self, pre=None):
251         now = time.time()
252         if now - self.lastlogin < 60:
253             raise Exception("Too soon since last login attempt")
254         if pre is None:
255             with self.web.open(self.base) as hs:
256                 page = soupify(hs.read())
257         else:
258             page = pre
259
260         cur = page.find("a", id="user_link")
261         if cur:
262             m = self.rlre.search(cur.text)
263             if not m or m.group(1) != self.creds.username:
264                 outurl = None
265                 nav = page.find("div", id="user_navigation")
266                 if nav:
267                     for li in nav.findAll("li"):
268                         if li.a and "Sign Out" in li.a.string:
269                             outurl = li.a["href"]
270                 if not outurl:
271                     raise pageerror("Could not find logout URL", page)
272                 with self.wep.open(outurl) as hs:
273                     hs.read()
274                 with self.web.open(self.base) as hs:
275                     page = soupify(hs.read())
276             else:
277                 return
278         else:
279             pass
280
281         form = page.find("form", id="login")
282         if not form and pre:
283             return self.dologin()
284         values = {}
285         for el in form.findAll("input", type="hidden"):
286             values[el["name"]] = el["value"]
287         values["ips_username"] = self.creds.username
288         values["ips_password"] = self.creds.password
289         values["rememberMe"] = "1"
290         values["anonymous"] = "1"
291         req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii"))
292         req.add_header("User-Agent", self.useragent)
293         with self.web.open(req) as hs:
294             page = soupify(hs.read())
295         for resp in page.findAll("p", attrs={"class": "message"}):
296             if resp.strong and "You are now signed in" in resp.strong.string:
297                 break
298         else:
299             raise pageerror("Could not log in", page)
300         self.lastlogin = now
301
302     def open(self, url):
303         return self.web.open(url)
304
305     useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.160 Safari/537.22"
306     def fetch(self, url, headers=None):
307         req = urllib.request.Request(url)
308         req.add_header("User-Agent", self.useragent)
309         if headers is not None:
310             for k, v in headers.items():
311                 req.add_header(k, v)
312         with self.open(req) as hs:
313             return hs.read()
314
315     def lfetch(self, url, ck):
316         page = soupify(self.fetch(url))
317         if not ck(page):
318             self.dologin(pre=page)
319             page = soupify(self.fetch(url))
320             if not ck(page):
321                 raise pageerror("Could not verify login status despite having logged in", page)
322         return page
323
324 class library(lib.library):
325     def __init__(self, *, creds=None):
326         if creds is None:
327             creds = credentials.default()
328         self.base = "http://bato.to/"
329         self.sess = session(self.base, creds)
330         self.lang = "English"
331
332     def byid(self, id):
333         url = self.base + "comic/_/comics/" + id
334         page = soupify(self.sess.fetch(url))
335         title = page.find("h1", attrs={"class": "ipsType_pagetitle"})
336         if title is None:
337             raise KeyError(id)
338         return manga(self, id, title.string.strip(), url)
339
340     def _search(self, pars):
341         p = 1
342         while True:
343             _pars = dict(pars)
344             _pars["p"] = str(p)
345             req = urllib.request.Request(self.base + "search?" + urllib.parse.urlencode(_pars))
346             req.add_header("User-Agent", session.useragent)
347             resp = urllib.request.urlopen(req)
348             try:
349                 page = soupify(resp.read())
350             finally:
351                 resp.close()
352             rls = page.find("div", id="comic_search_results").table
353             if rls.tbody is not None:
354                 rls = rls.tbody
355             hasmore = False
356             for child in rls.findAll("tr"):
357                 if child.th is not None: continue
358                 if child.get("id", "")[:11] == "comic_rowo_": continue
359                 if child.get("id") == "show_more_row":
360                     hasmore = True
361                     continue
362                 link = child.td.strong.a
363                 url = link["href"]
364                 m = self.rure.search(url)
365                 if m is None: raise Exception("Got weird manga URL: %r" % url)
366                 id = m.group(1)
367                 name = link.text.strip()
368                 yield manga(self, id, name, url)
369             p += 1
370             if not hasmore:
371                 break
372
373     rure = re.compile(r"/comic/_/([^/]*)$")
374     def search(self, expr):
375         return self._search({"name": expr, "name_cond": "c"})
376
377     def byname(self, prefix):
378         for res in self._search({"name": prefix, "name_cond": "s"}):
379             if res.name[:len(prefix)].lower() == prefix.lower():
380                 yield res
381             else:
382                 for aname in res.altnames():
383                     if aname[:len(prefix)].lower() == prefix.lower():
384                         yield manga(self, res.id, aname, res.url)
385                         break
386                 else:
387                     if False:
388                         print("eliding " + res.name)
389                         print(res.altnames())