Merge branch 'master' of git.dolda2000.com:/srv/git/r/automanga
authorFredrik Tolf <fredrik@dolda2000.com>
Sun, 4 Sep 2016 13:48:21 +0000 (15:48 +0200)
committerFredrik Tolf <fredrik@dolda2000.com>
Sun, 4 Sep 2016 13:48:21 +0000 (15:48 +0200)
manga/batoto.py
manga/htcache.py

index 92c1032..42edfac 100644 (file)
@@ -289,6 +289,7 @@ class session(object):
         values["rememberMe"] = "1"
         values["anonymous"] = "1"
         req = urllib.request.Request(form["action"], urllib.parse.urlencode(values).encode("ascii"))
+        req.add_header("User-Agent", self.useragent)
         with self.web.open(req) as hs:
             page = soupify(hs.read())
         for resp in page.findAll("p", attrs={"class": "message"}):
@@ -301,8 +302,10 @@ class session(object):
     def open(self, url):
         return self.web.open(url)
 
+    useragent = "Mozilla/5.0 (X11; Linux x86_64) AppleWebKit/537.22 (KHTML, like Gecko) Chrome/25.0.1364.160 Safari/537.22"
     def fetch(self, url, headers=None):
         req = urllib.request.Request(url)
+        req.add_header("User-Agent", self.useragent)
         if headers is not None:
             for k, v in headers.items():
                 req.add_header(k, v)
@@ -339,7 +342,9 @@ class library(lib.library):
         while True:
             _pars = dict(pars)
             _pars["p"] = str(p)
-            resp = urllib.request.urlopen(self.base + "search?" + urllib.parse.urlencode(_pars))
+            req = urllib.request.Request(self.base + "search?" + urllib.parse.urlencode(_pars))
+            req.add_header("User-Agent", session.useragent)
+            resp = urllib.request.urlopen(req)
             try:
                 page = soupify(resp.read())
             finally:
index 2aa594e..a53aa45 100644 (file)
@@ -2,6 +2,9 @@ import os, hashlib, urllib.request, time
 from . import profile
 pj = os.path.join
 
+class notfound(Exception):
+    pass
+
 class cache(object):
     def __init__(self, dir):
         self.dir = dir
@@ -11,9 +14,18 @@ class cache(object):
         n.update(url.encode("ascii"))
         return n.hexdigest()
 
-    def miss(self, url):
+    def open(self, url):
         req = urllib.request.Request(url, headers={"User-Agent": "automanga/1"})
-        with urllib.request.urlopen(req) as s:
+        return urllib.request.urlopen(req)
+
+    def miss(self, url):
+        try:
+            s = self.open(url)
+        except urllib.error.HTTPError as exc:
+            if exc.code == 404:
+                raise notfound(url)
+            raise
+        with s:
             if s.headers.get("content-encoding") == "gzip":
                 import gzip, io
                 return gzip.GzipFile(fileobj=io.BytesIO(s.read()), mode="r").read()