python: Rewrote SSI handler as a reusable module, and in Python3.
authorFredrik Tolf <fredrik@dolda2000.com>
Sat, 14 Jul 2012 19:59:39 +0000 (21:59 +0200)
committerFredrik Tolf <fredrik@dolda2000.com>
Sat, 14 Jul 2012 19:59:39 +0000 (21:59 +0200)
python/serve-ssi [deleted file]
python/setup.py
python3/ashd/ssi.py [new file with mode: 0644]
python3/serve-ssi [new file with mode: 0755]
python3/setup.py

diff --git a/python/serve-ssi b/python/serve-ssi
deleted file mode 100755 (executable)
index abdb701..0000000
+++ /dev/null
@@ -1,172 +0,0 @@
-#!/usr/bin/python
-
-# This program is quite incomplete. I might complete it with more
-# features as I need them. It will probably never be entirely
-# compliant with Apache's version due to architectural differences.
-
-import sys, os, time
-
-def htmlquote(text):
-    ret = ""
-    for c in text:
-        if c == '&':
-            ret += "&amp;"
-        elif c == '<':
-            ret += "&lt;"
-        elif c == '>':
-            ret += "&gt;"
-        elif c == '"':
-            ret += "&quot;"
-        else:
-            ret += c
-    return ret
-
-def simpleerror(out, code, title, msg):
-    html = """<?xml version="1.0" encoding="US-ASCII"?>
-<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
-<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
-<head>
-<title>%s</title>
-</head>
-<body>
-<h1>%s</h1>
-<p>%s</p>
-</body>
-</html>
-""" % (title, title, htmlquote(msg))
-    out.write("HTTP/1.1 %d %s\n" % (code, title))
-    out.write("Content-Type: text/html\n")
-    out.write("Content-Length: %d\n" % len(html))
-    out.write("\n")
-    out.write(html)
-
-ssivars = {}
-
-def parsecmd(line, p):
-    try:
-        while line[p].isspace(): p += 1
-        cmd = ""
-        while not line[p].isspace():
-            cmd += line[p]
-            p += 1
-        pars = {}
-        while True:
-            while line[p].isspace(): p += 1
-            if line[p:p + 3] == "-->":
-                return cmd, pars, p + 3
-            key = ""
-            while line[p].isalnum():
-                key += line[p]
-                p += 1
-            if key == "":
-                return None, {}, p
-            while line[p].isspace(): p += 1
-            if line[p] != '=':
-                continue
-            p += 1
-            while line[p].isspace(): p += 1
-            q = line[p]
-            if q != '"' and q != "'" and q != '`':
-                continue
-            val = ""
-            p += 1
-            while line[p] != q:
-                val += line[p]
-                p += 1
-            p += 1
-            pars[key] = val
-    except IndexError:
-        return None, {}, len(line)
-
-class ssifile(object):
-    def __init__(self, s, url, path):
-        self.s = s
-        self.url = url
-        self.path = path
-
-    def close(self):
-        self.s.close();
-
-    def initvars(self, vars):
-        now = time.time()
-        vars["DOCUMENT_NAME"] = os.path.basename(self.path)
-        vars["DATE_GMT"] = time.asctime(time.gmtime(now))
-        vars["DATE_LOCAL"] = time.asctime(time.localtime(now))
-        vars["LAST_MODIFIED"] = time.asctime(time.localtime(os.stat(self.path).st_mtime))
-
-    def includefile(self, path):
-        path = os.path.join(os.path.dirname(self.path), path)
-        try:
-            f = ssifile(open(path), url, path)
-        except Exception:
-            sys.stderr.write("serve-ssi: included file not found: %s\n" % path)
-            return
-        try:
-            f.process()
-        finally:
-            f.close
-
-    def docmd(self, cmd, pars):
-        if cmd == "include":
-            if "file" in pars:
-                self.includefile(pars["file"])
-            elif "virtual" in pars:
-                # XXX: For now, just include the file as-is. Change
-                # when necessary.
-                self.includefile(pars["virtual"])
-        elif cmd == "echo":
-            enc = htmlquote
-            if "encoding" in pars:
-                if pars["encoding"] == "entity":
-                    enc = htmlquote
-            if "var" in pars:
-                if pars["var"] in ssivars:
-                    sys.stdout.write(enc(ssivars[pars["var"]]))
-        else:
-            sys.stderr.write("serve-ssi: unknown SSI command: %s\n" % cmd)
-
-    def process(self):
-        for line in self.s:
-            p = 0
-            while True:
-                p2 = line.find("<!--#", p)
-                if p2 < 0:
-                    sys.stdout.write(line[p:])
-                    break
-                sys.stdout.write(line[p:p2])
-                cmd, pars, p = parsecmd(line, p2 + 5)
-                if cmd is not None:
-                    self.docmd(cmd, pars)
-
-if len(sys.argv) < 4:
-    sys.stderr.write("usage: serve-ssi METHOD URL REST\n")
-    sys.exit(1)
-method, url, rest = sys.argv[1:]
-path = os.getenv("REQ_X_ASH_FILE")
-if path is None:
-    sys.stderr.write("serve-ssi: must be called with the X-Ash-File header\n")
-    sys.exit(1)
-if rest != "":
-    simpleerror(sys.stdout, 404, "Not Found", "The resource specified by the URL does not exist.")
-    sys.exit(0)
-
-try:
-    try:
-        f = ssifile(open(path), url, path)
-    except Exception:
-        simpleerror(sys.stdout, 500, "Server Error", "The server could not access its data.")
-        sys.exit(1)
-    try:
-        sys.stdout.write("HTTP/1.1 200 OK\n")
-        sys.stdout.write("Content-Type: text/html\n")
-        sys.stdout.write("\n")
-        f.initvars(ssivars)
-        f.process()
-    finally:
-        f.close()
-except IOError:
-    # This is for catching EPIPE, when the client has closed the
-    # connection. This shouldn't *really* be necessary since the
-    # process should terminate with SIGPIPE, but apparently Python
-    # ignores that.
-    sys.exit(1)
index 2409e19..ed6f192 100755 (executable)
@@ -13,5 +13,5 @@ setup(name = "ashd-py",
       url = "http://www.dolda2000.com/~fredrik/ashd/",
       ext_modules = [htlib],
       packages = ["ashd"],
-      scripts = ["ashd-wsgi", "scgi-wsgi", "serve-ssi", "htredir"],
+      scripts = ["ashd-wsgi", "scgi-wsgi", "htredir"],
       license = "GPL-3")
diff --git a/python3/ashd/ssi.py b/python3/ashd/ssi.py
new file mode 100644 (file)
index 0000000..ff59e6e
--- /dev/null
@@ -0,0 +1,146 @@
+"""Module for handling server-side-include formatted files
+
+This module is quite incomplete. I might complete it with more
+features as I need them. It will probably never be entirely compliant
+with Apache's version due to architectural differences.
+"""
+
+import sys, os, io, time, logging, functools
+from . import wsgiutil
+
+log = logging.getLogger("ssi")
+
+def parsecmd(text, p):
+    try:
+        while text[p].isspace(): p += 1
+        cmd = ""
+        while not text[p].isspace():
+            cmd += text[p]
+            p += 1
+        pars = {}
+        while True:
+            while text[p].isspace(): p += 1
+            if text[p:p + 3] == "-->":
+                return cmd, pars, p + 3
+            key = ""
+            while text[p].isalnum():
+                key += text[p]
+                p += 1
+            if key == "":
+                return None, {}, p
+            while text[p].isspace(): p += 1
+            if text[p] != '=':
+                continue
+            p += 1
+            while text[p].isspace(): p += 1
+            q = text[p]
+            if q != '"' and q != "'" and q != '`':
+                continue
+            val = ""
+            p += 1
+            while text[p] != q:
+                val += text[p]
+                p += 1
+            p += 1
+            pars[key] = val
+    except IndexError:
+        return None, {}, len(text)
+
+class context(object):
+    def __init__(self, out, root):
+        self.out = out
+        self.vars = {}
+        now = time.time()
+        self.vars["DOCUMENT_NAME"] = os.path.basename(root.path)
+        self.vars["DATE_GMT"] = time.asctime(time.gmtime(now))
+        self.vars["DATE_LOCAL"] = time.asctime(time.localtime(now))
+        self.vars["LAST_MODIFIED"] = time.asctime(time.localtime(root.mtime))
+
+class ssifile(object):
+    def __init__(self, path):
+        self.path = path
+        self.mtime = os.stat(self.path).st_mtime
+        with open(path) as fp:
+            self.parts = self.parse(fp.read())
+
+    def text(self, text, ctx):
+        ctx.out.write(text)
+
+    def echo(self, var, enc, ctx):
+        if var in ctx.vars:
+            ctx.out.write(enc(ctx.vars[var]))
+
+    def include(self, path, ctx):
+        try:
+            nest = getfile(os.path.join(os.path.dirname(self.path), path))
+        except Exception:
+            log.warning("%s: could not find included file %s" % (self.path, path))
+            return
+        nest.process(ctx)
+
+    def process(self, ctx):
+        for part in self.parts:
+            part(ctx)
+
+    def resolvecmd(self, cmd, pars):
+        if cmd == "include":
+            if "file" in pars:
+                return functools.partial(self.include, pars["file"])
+            elif "virtual" in pars:
+                # XXX: For now, just include the file as-is. Change
+                # when necessary.
+                return functools.partial(self.include, pars["virtual"])
+            else:
+                log.warning("%s: invalid `include' directive" % self.path)
+                return None
+        elif cmd == "echo":
+            if not "var" in pars:
+                log.warning("%s: invalid `echo' directive" % self.path)
+                return None
+            enc = wsgiutil.htmlquote
+            if "encoding" in pars:
+                if pars["encoding"] == "entity":
+                    enc = wsgiutil.htmlquote
+            return functools.partial(self.echo, pars["var"], enc)
+        else:
+            log.warning("%s: unknown SSI command `%s'" % (self.path, cmd))
+            return None
+
+    def parse(self, text):
+        ret = []
+        p = 0
+        while True:
+            p2 = text.find("<!--#", p)
+            if p2 < 0:
+                ret.append(functools.partial(self.text, text[p:]))
+                return ret
+            ret.append(functools.partial(self.text, text[p:p2]))
+            cmd, pars, p = parsecmd(text, p2 + 5)
+            if cmd is not None:
+                cmd = self.resolvecmd(cmd, pars)
+                if cmd is not None:
+                    ret.append(cmd)
+
+filecache = {}
+
+def getfile(path):
+    path = os.path.normpath(path)
+    cf = filecache.get(path)
+    if not cf:
+        cf = filecache[path] = ssifile(path)
+    elif os.stat(path).st_mtime != cf.mtime:
+        cf = filecache[path] = ssifile(path)
+    return cf
+
+def wsgi(env, startreq):
+    try:
+        if env["PATH_INFO"] != "":
+            return wsgiutil.simpleerror(env, startreq, 404, "Not Found", "The resource specified by the URL does not exist.")
+        root = getfile(env["SCRIPT_FILENAME"])
+        buf = io.StringIO()
+        root.process(context(buf, root))
+    except Exception:
+        return wsgituil.simpleerror(env, startreq, 500, "Internal Error", "The server encountered an unpexpected error while handling SSI.")
+    ret = buf.getvalue().encode("utf8")
+    startreq("200 OK", [("Content-Type", "text/html; charset=UTF-8"), ("Content-Length", str(len(ret)))])
+    return [ret]
diff --git a/python3/serve-ssi b/python3/serve-ssi
new file mode 100755 (executable)
index 0000000..dc99f77
--- /dev/null
@@ -0,0 +1,62 @@
+#!/usr/bin/python3
+
+import sys, os, io, logging
+import ashd.ssi, ashd.wsgiutil
+
+def simpleerror(out, code, title, msg):
+    html = """<?xml version="1.0" encoding="US-ASCII"?>
+<!DOCTYPE html PUBLIC "-//W3C//DTD XHTML 1.0 Strict//EN" "http://www.w3.org/TR/xhtml1/DTD/xhtml1-strict.dtd">
+<html xmlns="http://www.w3.org/1999/xhtml" lang="en-US" xml:lang="en-US">
+<head>
+<title>%s</title>
+</head>
+<body>
+<h1>%s</h1>
+<p>%s</p>
+</body>
+</html>
+""" % (title, title, ashd.wsgiutil.htmlquote(msg))
+    out.write("HTTP/1.1 %d %s\n" % (code, title))
+    out.write("Content-Type: text/html\n")
+    out.write("Content-Length: %d\n" % len(html))
+    out.write("\n")
+    out.write(html)
+
+if len(sys.argv) < 4:
+    sys.stderr.write("usage: serve-ssi METHOD URL REST\n")
+    sys.exit(1)
+method, url, rest = sys.argv[1:]
+path = os.getenv("REQ_X_ASH_FILE")
+if path is None:
+    simpleerror(sys.stdout, 500, "Server Error", "The server is erroneously configured.")
+    sys.stderr.write("serve-ssi: must be called with the X-Ash-File header\n")
+    sys.exit(1)
+if rest != "":
+    simpleerror(sys.stdout, 404, "Not Found", "The resource specified by the URL does not exist.")
+    sys.exit(0)
+
+class encwrap(io.TextIOWrapper):
+    def close(self):
+        pass
+
+logging.basicConfig(format="serve-ssi: %(message)s")
+try:
+    try:
+        f = ashd.ssi.getfile(path)
+    except Exception as e:
+        sys.stderr.write("server-ssi: %s\n" % e)
+        simpleerror(sys.stdout, 500, "Server Error", "The server could not access its data.")
+        sys.exit(1)
+    sys.stdout.write("HTTP/1.1 200 OK\n")
+    sys.stdout.write("Content-Type: text/html; charset=UTF-8\n")
+    sys.stdout.write("\n")
+    sys.stdout.flush()
+    wrap = encwrap(sys.stdout.buffer, encoding="utf8")
+    f.process(ashd.ssi.context(wrap, f))
+    wrap.flush()
+except IOError:
+    # This is for catching EPIPE, when the client has closed the
+    # connection. This shouldn't *really* be necessary since the
+    # process should terminate with SIGPIPE, but apparently Python
+    # ignores that.
+    sys.exit(1)
index 049f93a..79c30bb 100755 (executable)
@@ -13,5 +13,5 @@ setup(name = "ashd-py3",
       url = "http://www.dolda2000.com/~fredrik/ashd/",
       ext_modules = [htlib],
       packages = ["ashd"],
-      scripts = ["ashd-wsgi3", "scgi-wsgi3"],
+      scripts = ["ashd-wsgi3", "scgi-wsgi3", "serve-ssi"],
       license = "GPL-3")