Commit | Line | Data |
---|---|---|
e7cc7606 FT |
1 | import bs4 |
2 | from . import lib, htcache | |
3 | from urllib.parse import urljoin | |
4 | soup = bs4.BeautifulSoup | |
5 | soupify = lambda cont: soup(cont) | |
50f7a215 FT |
6 | |
7 | class page(lib.page): | |
8 | def __init__(self, chapter, stack, n, url): | |
9 | self.stack = stack | |
10 | self.chapter = chapter | |
11 | self.manga = chapter.manga | |
12 | self.n = n | |
13 | self.id = str(n) | |
e7cc7606 | 14 | self.name = "Page " + unicode(n) |
50f7a215 FT |
15 | self.url = url |
16 | self.ciurl = None | |
17 | ||
18 | def iurl(self): | |
19 | if self.ciurl is None: | |
c0d3b1a2 | 20 | page = soupify(htcache.fetch(self.url)) |
50f7a215 FT |
21 | for tr in page.findAll("tr"): |
22 | img = tr.find("img", id="picture") | |
23 | if img is not None: | |
e7cc7606 | 24 | self.ciurl = urljoin(self.url, img["src"]) |
50f7a215 FT |
25 | if self.ciurl is None: |
26 | raise Exception("parse error: could not find image url for %r" % self) | |
27 | return self.ciurl | |
28 | ||
29 | def open(self): | |
30 | return lib.stdimgstream(self.iurl()) | |
31 | ||
32 | def __str__(self): | |
33 | return self.name | |
34 | ||
35 | def __repr__(self): | |
36 | return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name) | |
37 | ||
38 | class chapter(lib.pagelist): | |
39 | def __init__(self, manga, stack, id, name, url): | |
40 | self.stack = stack | |
41 | self.manga = manga | |
42 | self.id = id | |
43 | self.name = name | |
44 | self.url = url | |
45 | self.cpag = None | |
46 | ||
47 | def __getitem__(self, i): | |
48 | return self.pages()[i] | |
49 | ||
50 | def __len__(self): | |
51 | return len(self.pages()) | |
52 | ||
53 | def pages(self): | |
54 | if self.cpag is None: | |
55 | if self.url[-2:] != "/1": | |
56 | raise Exception("parse error: unexpected first page url for %r" % self) | |
57 | base = self.url[:-1] | |
c0d3b1a2 | 58 | pg = soupify(htcache.fetch(self.url)) |
50f7a215 FT |
59 | pag = [] |
60 | for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"): | |
61 | n = int(opt["value"]) | |
e7cc7606 | 62 | url = urljoin(base, str(n)) |
50f7a215 FT |
63 | pag.append(page(self, self.stack + [(self, len(pag))], n, url)) |
64 | self.cpag = pag | |
65 | return self.cpag | |
66 | ||
67 | def __str__(self): | |
68 | return self.name | |
69 | ||
70 | def __repr__(self): | |
71 | return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name) | |
72 | ||
73 | class manga(lib.manga): | |
74 | def __init__(self, lib, id, name, url): | |
75 | self.lib = lib | |
76 | self.id = id | |
77 | self.name = name | |
78 | self.url = url | |
79 | self.cch = None | |
80 | self.stack = [] | |
81 | ||
82 | def __getitem__(self, i): | |
83 | return self.ch()[i] | |
84 | ||
85 | def __len__(self): | |
86 | return len(self.ch()) | |
87 | ||
88 | def ch(self): | |
89 | if self.cch is None: | |
c0d3b1a2 | 90 | page = soupify(htcache.fetch(self.url)) |
50f7a215 FT |
91 | cls = None |
92 | for div in page.findAll("div", attrs={"class": "post"}): | |
e7cc7606 | 93 | if div.h3 is not None and "Chapter List" in div.h3.string: |
50f7a215 FT |
94 | cls = div |
95 | break | |
96 | if cls is None: | |
97 | raise Exception("parse error: no chapter list found for %r" % self) | |
98 | cch = [] | |
99 | for tr in cls.table.findAll("tr"): | |
100 | lcol = tr.findAll("td")[1] | |
101 | if lcol.a is None: continue | |
102 | link = lcol.a | |
e7cc7606 | 103 | url = link["href"] |
50f7a215 | 104 | name = link["title"] |
e7cc7606 | 105 | cid = name |
50f7a215 FT |
106 | cch.append(chapter(self, [(self, len(cch))], cid, name, url)) |
107 | self.cch = cch | |
108 | return self.cch | |
109 | ||
110 | def __str__(self): | |
111 | return self.name | |
112 | ||
113 | def __repr__(self): | |
114 | return "<rawsen.manga %r>" % self.name | |
115 | ||
116 | class library(lib.library): | |
117 | def __init__(self): | |
118 | self.base = "http://raw.senmanga.com/" | |
119 | ||
120 | def byid(self, id): | |
e7cc7606 | 121 | url = urljoin(self.base, id + "/") |
c0d3b1a2 | 122 | page = soupify(htcache.fetch(url)) |
50f7a215 | 123 | name = None |
e7cc7606 FT |
124 | for div in page.findAll("div", id="post"): |
125 | if div.h1 is not None and div.h1.a is not None: | |
126 | curl = div.h1.a["href"] | |
50f7a215 FT |
127 | if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue |
128 | if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue | |
e7cc7606 | 129 | name = div.h1.a.string |
50f7a215 FT |
130 | if name is None: |
131 | raise KeyError(id) | |
132 | return manga(self, id, name, url) | |
133 | ||
134 | def __iter__(self): | |
c0d3b1a2 | 135 | page = soupify(htcache.fetch(self.base + "Manga/")) |
50f7a215 FT |
136 | for part in page.find("div", attrs={"class": "post"}).findAll("table"): |
137 | for row in part.findAll("tr"): | |
138 | link = row.findAll("td")[1].a | |
139 | if link is None: | |
140 | continue | |
e7cc7606 | 141 | url = link["href"] |
50f7a215 FT |
142 | name = link.string |
143 | if len(url) < 3 or url[:1] != '/' or url[-1:] != '/': | |
144 | continue | |
145 | id = url[1:-1] | |
e7cc7606 | 146 | yield manga(self, id, name, urljoin(self.base, url)) |
50f7a215 FT |
147 | |
148 | def byname(self, prefix): | |
149 | if not isinstance(prefix, unicode): | |
150 | prefix = prefix.decode("utf8") | |
151 | prefix = prefix.lower() | |
152 | for manga in self: | |
153 | if manga.name.lower()[:len(prefix)] == prefix: | |
154 | yield manga | |
155 | ||
156 | def search(self, expr): | |
157 | if not isinstance(expr, unicode): | |
158 | expr = expr.decode("utf8") | |
159 | expr = expr.lower() | |
160 | for manga in self: | |
161 | if expr in manga.name.lower(): | |
162 | yield manga |