Commit | Line | Data |
---|---|---|
50f7a215 FT |
1 | import BeautifulSoup, urlparse |
2 | import lib, htcache | |
3 | soup = BeautifulSoup.BeautifulSoup | |
c0d3b1a2 | 4 | soupify = lambda cont: soup(cont, convertEntities=soup.HTML_ENTITIES) |
50f7a215 FT |
5 | |
6 | class page(lib.page): | |
7 | def __init__(self, chapter, stack, n, url): | |
8 | self.stack = stack | |
9 | self.chapter = chapter | |
10 | self.manga = chapter.manga | |
11 | self.n = n | |
12 | self.id = str(n) | |
13 | self.name = u"Page " + unicode(n) | |
14 | self.url = url | |
15 | self.ciurl = None | |
16 | ||
17 | def iurl(self): | |
18 | if self.ciurl is None: | |
c0d3b1a2 | 19 | page = soupify(htcache.fetch(self.url)) |
50f7a215 FT |
20 | for tr in page.findAll("tr"): |
21 | img = tr.find("img", id="picture") | |
22 | if img is not None: | |
23 | self.ciurl = urlparse.urljoin(self.url, img["src"].encode("us-ascii")) | |
24 | if self.ciurl is None: | |
25 | raise Exception("parse error: could not find image url for %r" % self) | |
26 | return self.ciurl | |
27 | ||
28 | def open(self): | |
29 | return lib.stdimgstream(self.iurl()) | |
30 | ||
31 | def __str__(self): | |
32 | return self.name | |
33 | ||
34 | def __repr__(self): | |
35 | return "<rawsen.page %r.%r.%r>" % (self.manga.name, self.chapter.name, self.name) | |
36 | ||
37 | class chapter(lib.pagelist): | |
38 | def __init__(self, manga, stack, id, name, url): | |
39 | self.stack = stack | |
40 | self.manga = manga | |
41 | self.id = id | |
42 | self.name = name | |
43 | self.url = url | |
44 | self.cpag = None | |
45 | ||
46 | def __getitem__(self, i): | |
47 | return self.pages()[i] | |
48 | ||
49 | def __len__(self): | |
50 | return len(self.pages()) | |
51 | ||
52 | def pages(self): | |
53 | if self.cpag is None: | |
54 | if self.url[-2:] != "/1": | |
55 | raise Exception("parse error: unexpected first page url for %r" % self) | |
56 | base = self.url[:-1] | |
c0d3b1a2 | 57 | pg = soupify(htcache.fetch(self.url)) |
50f7a215 FT |
58 | pag = [] |
59 | for opt in pg.find("div", attrs={"class": "pager"}).find("select", attrs={"name": "page"}).findAll("option"): | |
60 | n = int(opt["value"]) | |
61 | url = urlparse.urljoin(base, str(n)) | |
62 | pag.append(page(self, self.stack + [(self, len(pag))], n, url)) | |
63 | self.cpag = pag | |
64 | return self.cpag | |
65 | ||
66 | def __str__(self): | |
67 | return self.name | |
68 | ||
69 | def __repr__(self): | |
70 | return "<rawsen.chapter %r.%r>" % (self.manga.name, self.name) | |
71 | ||
72 | class manga(lib.manga): | |
73 | def __init__(self, lib, id, name, url): | |
74 | self.lib = lib | |
75 | self.id = id | |
76 | self.name = name | |
77 | self.url = url | |
78 | self.cch = None | |
79 | self.stack = [] | |
80 | ||
81 | def __getitem__(self, i): | |
82 | return self.ch()[i] | |
83 | ||
84 | def __len__(self): | |
85 | return len(self.ch()) | |
86 | ||
87 | def ch(self): | |
88 | if self.cch is None: | |
c0d3b1a2 | 89 | page = soupify(htcache.fetch(self.url)) |
50f7a215 FT |
90 | cls = None |
91 | for div in page.findAll("div", attrs={"class": "post"}): | |
92 | if div.h3 is not None and u"Chapter List" in div.h3.string: | |
93 | cls = div | |
94 | break | |
95 | if cls is None: | |
96 | raise Exception("parse error: no chapter list found for %r" % self) | |
97 | cch = [] | |
98 | for tr in cls.table.findAll("tr"): | |
99 | lcol = tr.findAll("td")[1] | |
100 | if lcol.a is None: continue | |
101 | link = lcol.a | |
102 | url = link["href"].encode("us-ascii") | |
103 | name = link["title"] | |
104 | cid = name.encode("utf-8") | |
105 | cch.append(chapter(self, [(self, len(cch))], cid, name, url)) | |
106 | self.cch = cch | |
107 | return self.cch | |
108 | ||
109 | def __str__(self): | |
110 | return self.name | |
111 | ||
112 | def __repr__(self): | |
113 | return "<rawsen.manga %r>" % self.name | |
114 | ||
115 | class library(lib.library): | |
116 | def __init__(self): | |
117 | self.base = "http://raw.senmanga.com/" | |
118 | ||
119 | def byid(self, id): | |
120 | url = urlparse.urljoin(self.base, id + "/") | |
c0d3b1a2 | 121 | page = soupify(htcache.fetch(url)) |
50f7a215 FT |
122 | name = None |
123 | for div in page.findAll("div", attrs={"class": "post"}): | |
124 | if div.h2 is not None and div.h2.a is not None: | |
125 | curl = div.h2.a["href"].encode("us-ascii") | |
126 | if curl[-1] != '/' or curl.rfind('/', 0, -1) < 0: continue | |
127 | if curl[curl.rindex('/', 0, -1) + 1:-1] != id: continue | |
128 | name = div.h2.a.string | |
129 | if name is None: | |
130 | raise KeyError(id) | |
131 | return manga(self, id, name, url) | |
132 | ||
133 | def __iter__(self): | |
c0d3b1a2 | 134 | page = soupify(htcache.fetch(self.base + "Manga/")) |
50f7a215 FT |
135 | for part in page.find("div", attrs={"class": "post"}).findAll("table"): |
136 | for row in part.findAll("tr"): | |
137 | link = row.findAll("td")[1].a | |
138 | if link is None: | |
139 | continue | |
140 | url = link["href"].encode("us-ascii") | |
141 | name = link.string | |
142 | if len(url) < 3 or url[:1] != '/' or url[-1:] != '/': | |
143 | continue | |
144 | id = url[1:-1] | |
145 | yield manga(self, id, name, urlparse.urljoin(self.base, url)) | |
146 | ||
147 | def byname(self, prefix): | |
148 | if not isinstance(prefix, unicode): | |
149 | prefix = prefix.decode("utf8") | |
150 | prefix = prefix.lower() | |
151 | for manga in self: | |
152 | if manga.name.lower()[:len(prefix)] == prefix: | |
153 | yield manga | |
154 | ||
155 | def search(self, expr): | |
156 | if not isinstance(expr, unicode): | |
157 | expr = expr.decode("utf8") | |
158 | expr = expr.lower() | |
159 | for manga in self: | |
160 | if expr in manga.name.lower(): | |
161 | yield manga |