From: Fredrik Tolf Date: Sun, 4 Sep 2016 13:48:21 +0000 (+0200) Subject: Merge branch 'master' of git.dolda2000.com:/srv/git/r/automanga X-Git-Url: http://www.dolda2000.com/gitweb/?a=commitdiff_plain;h=7b75f5a132d06c1a1d1688a559d36cf737bcac3a;hp=49f0c16f7fe5c160fa64793dd03ada5386bf7c4f;p=automanga.git Merge branch 'master' of git.dolda2000.com:/srv/git/r/automanga --- diff --git a/manga/batoto.py b/manga/batoto.py index 92c1032..42edfac 100644 --- a/manga/batoto.py +++ b/manga/batoto.py @@ -289,6 +289,7 @@ class session(object): values["rememberMe"] = "1" values["anonymous"] = "1" req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii")) + req.add_header("User-Agent", self.useragent) with self.web.open(req) as hs: page = soupify(hs.read()) for resp in page.findAll("p", attrs={"class": "message"}): @@ -301,8 +302,10 @@ class session(object): def open(self, url): return self.web.open(url) + useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.160 Safari/537.22" def fetch(self, url, headers=None): req = urllib.request.Request(url) + req.add_header("User-Agent", self.useragent) if headers is not None: for k, v in headers.items(): req.add_header(k, v) @@ -339,7 +342,9 @@ class library(lib.library): while True: _pars = dict(pars) _pars["p"] = str(p) - resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars)) + req = urllib.request.Request(self.base + "search?" + urllib.parse.urlencode(_pars)) + req.add_header("User-Agent", session.useragent) + resp = urllib.request.urlopen(req) try: page = soupify(resp.read()) finally: diff --git a/manga/htcache.py b/manga/htcache.py index 2aa594e..a53aa45 100644 --- a/manga/htcache.py +++ b/manga/htcache.py @@ -2,6 +2,9 @@ import os, hashlib, urllib.request, time from . import profile pj = os.path.join +class notfound(Exception): + pass + class cache(object): def __init__(self, dir): self.dir = dir @@ -11,9 +14,18 @@ class cache(object): n.update(url.encode("ascii")) return n.hexdigest() - def miss(self, url): + def open(self, url): req = urllib.request.Request(url, headers={"User-Agent": "automanga/1"}) - with urllib.request.urlopen(req) as s: + return urllib.request.urlopen(req) + + def miss(self, url): + try: + s = self.open(url) + except urllib.error.HTTPError as exc: + if exc.code == 404: + raise notfound(url) + raise + with s: if s.headers.get("content-encoding") == "gzip": import gzip, io return gzip.GzipFile(fileobj=io.BytesIO(s.read()), mode="r").read()