From fc2aa563ea22418e23abe89285c7a87c1ee90c5b Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Thu, 14 Apr 2011 12:18:46 +0200 Subject: [PATCH 01/16] python: Documented the ashd.wsgidir module. --- python/ashd/wsgidir.py | 75 ++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 75 insertions(+) diff --git a/python/ashd/wsgidir.py b/python/ashd/wsgidir.py index 83d96a0..6080d91 100644 --- a/python/ashd/wsgidir.py +++ b/python/ashd/wsgidir.py @@ -1,7 +1,61 @@ +"""WSGI handler for serving chained WSGI modules from physical files + +The WSGI handler in this module examines the SCRIPT_FILENAME variable +of the requests it handles -- that is, the physical file corresponding +to the request, as determined by the webserver -- determining what to +do with the request based on the extension of that file. + +By default, it handles files named `.wsgi' by compiling them into +Python modules and using them, in turn, as chained WSGI handlers, but +handlers for other extensions can be installed as well. + +When handling `.wsgi' files, the compiled modules are cached and +reused until the file is modified, in which case the previous module +is discarded and the new file contents are loaded into a new module in +its place. When chaining such modules, an object named `wmain' is +first looked for and called with no arguments if found. The object it +returns is then used as the WSGI application object for that module, +which is reused until the module is reloaded. If `wmain' is not found, +an object named `application' is looked for instead. If found, it is +used directly as the WSGI application object. + +This module itself contains both an `application' and a `wmain' +object. If this module is used by ashd-wsgi(1) or scgi-wsgi(1) so that +its wmain function is called, arguments can be specified to it to +install handlers for other file extensions. Such arguments take the +form `.EXT=MODULE.HANDLER', where EXT is the file extension to be +handled, and the MODULE.HANDLER string is treated by splitting it +along its last constituent dot. The part left of the dot is the name +of a module which is imported, and the part right of the dot is the +name of an object in that module, which should be a callable of three +arguments. When files of the given extension are handled, that +callable is called with the file's absolute path, the WSGI environment +and the WSGI `start_response' function, in that order. For example, +the argument `.fpy=my.module.foohandler' can be given to pass requests +for `.fpy' files to the function `foohandler' in the module +`my.module' (which must, of course, be importable). When writing such +handler functions, you will probably want to use the getmod() function +in this module. +""" + import os, threading, types import wsgiutil +__all__ = ["application", "wmain", "getmod", "cachedmod"] + class cachedmod(object): + """Cache entry for modules loaded by getmod() + + Instances of this class are returned by the getmod() + function. They contain three data attributes: + * mod - The loaded module + * lock - A threading.Lock object, which can be used for + manipulating this instance in a thread-safe manner + * mtime - The time the file was last modified + + Additional data attributes can be arbitrarily added for recording + any meta-data about the module. + """ def __init__(self, mod, mtime): self.lock = threading.Lock() self.mod = mod @@ -21,6 +75,18 @@ def mangle(path): return ret def getmod(path): + """Load the given file as a module, caching it appropriately + + The given file is loaded and compiled into a Python module. The + compiled module is cached and returned upon subsequent requests + for the same file, unless the file has changed (as determined by + its mtime), in which case the cached module is discarded and the + new file contents are reloaded in its place. + + The return value is an instance of the cachedmod class, which can + be used for locking purposes and for storing arbitrary meta-data + about the module. See its documentation for details. + """ sb = os.stat(path) cachelock.acquire() try: @@ -73,6 +139,10 @@ def addext(ext, handler): exts[ext] = getattr(mod, hname) def application(env, startreq): + """WSGI handler function + + Handles WSGI requests as per the module documentation. + """ if not "SCRIPT_FILENAME" in env: return wsgiutil.simpleerror(env, startreq, 500, "Internal Error", "The server is erroneously configured.") path = env["SCRIPT_FILENAME"] @@ -86,6 +156,11 @@ def application(env, startreq): return(exts[ext](path, env, startreq)) def wmain(*argv): + """Main function for ashd(7)-compatible WSGI handlers + + Returns the `application' function. If any arguments are given, + they are parsed according to the module documentation. + """ for arg in argv: if arg[0] == '.': p = arg.index('=') -- 2.11.0 From e66efe5f71c2bb6182055dffc9beefd0373ff3ad Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Thu, 14 Apr 2011 12:20:47 +0200 Subject: [PATCH 02/16] python: Added __all__ to ashd.proto and ashd.util. --- python/ashd/proto.py | 2 ++ python/ashd/util.py | 2 ++ 2 files changed, 4 insertions(+) diff --git a/python/ashd/proto.py b/python/ashd/proto.py index c1ae1b0..4a48304 100644 --- a/python/ashd/proto.py +++ b/python/ashd/proto.py @@ -10,6 +10,8 @@ ashd.util module provides an easier-to-use interface. import os, socket import htlib +__all__ = ["req", "recvreq", "sendreq"] + class protoerr(Exception): pass diff --git a/python/ashd/util.py b/python/ashd/util.py index 2ac7dc0..0ff3878 100644 --- a/python/ashd/util.py +++ b/python/ashd/util.py @@ -7,6 +7,8 @@ handlers, wrapping the low-level ashd.proto module. import os, socket import proto +__all__ = ["stdfork", "pchild", "respond", "serveloop"] + def stdfork(argv, chinit = None): """Fork a persistent handler process using the `argv' argument list, as per the standard ashd(7) calling convention. For an -- 2.11.0 From 075a379e5dfba9584cc5f3b473ae343628fc160d Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Thu, 14 Apr 2011 12:52:22 +0200 Subject: [PATCH 03/16] python: Fixed scgi-wsgi typo in the -T option. --- python/scgi-wsgi | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/python/scgi-wsgi b/python/scgi-wsgi index 006f375..5ffcf6e 100755 --- a/python/scgi-wsgi +++ b/python/scgi-wsgi @@ -20,7 +20,7 @@ for o, a in opts: sk = socket.socket(socket.AF_INET, socket.SOCK_STREAM) p = a.rfind(":") if p < 0: - bindhost = "hostname" + bindhost = "localhost" bindport = int(a) else: bindhost = a[:p] -- 2.11.0 From 338bee9d3d0ad8f236a17d9d1b8e7433e1d17c21 Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Thu, 14 Apr 2011 13:00:15 +0200 Subject: [PATCH 04/16] python: Added (unused) documentation for ashd-wsgi and scgi-wsgi. Unfortunately, I know not yet how to make distutils compile and install them. --- python/doc/.gitignore | 3 ++ python/doc/ashd-wsgi.doc | 107 +++++++++++++++++++++++++++++++++++++++++++++++ python/doc/scgi-wsgi.doc | 63 ++++++++++++++++++++++++++++ 3 files changed, 173 insertions(+) create mode 100644 python/doc/.gitignore create mode 100644 python/doc/ashd-wsgi.doc create mode 100644 python/doc/scgi-wsgi.doc diff --git a/python/doc/.gitignore b/python/doc/.gitignore new file mode 100644 index 0000000..494c1f8 --- /dev/null +++ b/python/doc/.gitignore @@ -0,0 +1,3 @@ +/*.1 +/*.html +/*.css diff --git a/python/doc/ashd-wsgi.doc b/python/doc/ashd-wsgi.doc new file mode 100644 index 0000000..25531ff --- /dev/null +++ b/python/doc/ashd-wsgi.doc @@ -0,0 +1,107 @@ +ashd-wsgi(1) +============ + +NAME +---- +ashd-wsgi - WSGI adapter for ashd(7) + +SYNOPSIS +-------- +*ashd-wsgi* [*-hA*] [*-p* 'MODPATH'] 'HANDLER-MODULE' ['ARGS'...] + +DESCRIPTION +----------- + +The *ashd-wsgi* handler translates *ashd*(7) requests to WSGI +requests, and passes them to a specified Python handler module. The +precise Python convention for doing so is described in the PROTOCOL +section, below. + +*ashd-wsgi* is a persistent handler, as defined in *ashd*(7). It uses +multithreaded dispatching in a single Python interpreter, which means +that WSGI applications that use it need to be thread-safe, but that +they can also share all Python data structures and global variables +between requests. + +The Python module that *ashd-wsgi* comes with also contains a standard +handler module, `ashd.wsgidir`, which serves individual WSGI +applications directly from the files in which they reside and as such +makes this program useful as a *dirplex*(1) handler. Please see its +Python documentation for further details. + +*ashd-wsgi* requires the `ashd.proto` and `ashd.util` modules, which +are only available for CPython. If you want to use some other Python +implementation instead, you may want to use the *scgi-wsgi*(1) program +instead, along with *callscgi*(1). + +OPTIONS +------- + +*-h*:: + + Print a brief help message to standard output and exit. + +*-A*:: + + Use the convention used by Apache's mod_wsgi module to find + the WSGI application object. See the PROTOCOL section, below, + for details. + +*-p* 'MODPATH':: + + Prepend 'MODPATH' to Python's `sys.path`; can be given multiple + times. Note that the working directory of *ashd-wsgi* is not + on Python's module path by default, so if you want to use a + module in that directory, you will need to specify "`-p .`". + +PROTOCOL +-------- + +When starting, *ashd-wsgi* will attempt to import the module named by +'HANDLER-MODULE', look for an object named `wmain` in that module, +call that object passing the 'ARGS' (as Python strings) as positional +parameters, and use the returned object as the WSGI application +object. If the *-A* option was specified, it will look for an object +named `application` instead of `wmain`, and use that object directly +as the WSGI application object. + +When calling the WSGI application, a new thread is started for each +request, in which the WSGI application object is called. All requests +run in the same interpreter, so it is guaranteed that data structures +and global variables can be shared between requests. + +The WSGI environment is the standard CGI environment, including the +`SCRIPT_FILENAME` variable whenever the `X-Ash-File` header was +included in the request. + +EXAMPLES +-------- + +The following *dirplex*(1) configuration can be used for serving WSGI +modules directly from the filesystem. + +-------- +child wsgidir + exec ashd-wsgi ashd.wsgidir +match + filename *.wsgi + handler wsgidir +-------- + +Since *ashd-wsgi* is a persistent handler, it can be used directly as +a root handler for *htparser*(1). For instance, if the directory +`/srv/www/foo` contains a `wsgi.py` file, which declares a standard +WSGI `application` object, it can be served with the following +command: + +-------- +htparser plain:port=8080 -- ashd-wsgi -Ap /srv/www/foo wsgi +-------- + +AUTHOR +------ +Fredrik Tolf + +SEE ALSO +-------- +*scgi-wsgi*(1), *ashd*(7), diff --git a/python/doc/scgi-wsgi.doc b/python/doc/scgi-wsgi.doc new file mode 100644 index 0000000..1aab621 --- /dev/null +++ b/python/doc/scgi-wsgi.doc @@ -0,0 +1,63 @@ +scgi-wsgi(1) +============ + +NAME +---- +scgi-wsgi - WSGI adapter for SCGI + +SYNOPSIS +-------- +*scgi-wsgi* [*-hA*] [*-p* 'MODPATH'] [*-T* \[HOST:]'PORT'] 'HANDLER-MODULE' ['ARGS'...] + +DESCRIPTION +----------- + +The *scgi-wsgi* program translates SCGI requests to WSGI requests, and +passes them to a specified Python module. It is mainly written to +emulate the behavior of *ashd-wsgi*(1), but over SCGI instead of the +native *ashd*(7) protocol, so please see its documentation for details +of Python interoperation. Unlike *ashd-wsgi* which requires CPython, +however, *scgi-wsgi* is written in pure Python using only the standard +library, and so should be usable by any Python implementation. If +using it under *ashd*(7), please see the documentation for +*callscgi*(1) as well. + +Following *callscgi*(1) conventions, *scgi-wsgi* will, by default, +accept connections on a socket passed on its standard input (a +behavior which is, obviously, not available on all Python +implementations). Use the *-T* option to listen to a TCP address +instead. + +OPTIONS +------- + +*-h*:: + + Print a brief help message to standard output and exit. + +*-A*:: + + Use the convention used by Apache's mod_wsgi module to find + the WSGI application object. See the PROTOCOL section of + *ashd-wsgi*(1) for details. + +*-p* 'MODPATH':: + + Prepend 'MODPATH' to Python's `sys.path`; can be given multiple + times. + +*-T* \[HOST:]'PORT':: + + Instead of using a listening socket passed on standard input + to accept SCGI connections, bind a TCP socket to the 'HOST' + address listening for connections on 'PORT' instead. If 'HOST' + is not given, `localhost` is used by default. + +AUTHOR +------ +Fredrik Tolf + +SEE ALSO +-------- +*ashd-wsgi*(1), *callscgi*(1), , + -- 2.11.0 From 6586b2b37418f8cb54c5b82bec64bfb7af72725c Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Thu, 14 Apr 2011 15:57:57 +0200 Subject: [PATCH 05/16] sendfile: Only allow GET and HEAD requests. --- src/sendfile.c | 9 ++++++++- 1 file changed, 8 insertions(+), 1 deletion(-) diff --git a/src/sendfile.c b/src/sendfile.c index e6f6786..81fb67a 100644 --- a/src/sendfile.c +++ b/src/sendfile.c @@ -271,13 +271,20 @@ int main(int argc, char **argv) simpleerror(1, 500, "Internal Error", "The server could not access its own data."); exit(1); } + if(!strcasecmp(argv[optind], "get")) { + ishead = 0; + } else if(!strcasecmp(argv[optind], "head")) { + ishead = 1; + } else { + simpleerror(1, 405, "Method not allowed", "The requested method is not defined for this resource."); + return(0); + } if(contype == NULL) contype = getmimetype(file, &sb); contype = ckctype(contype); checkcache(file, &sb); - ishead = !strcasecmp(argv[optind], "head"); if((hdr = getenv("REQ_RANGE")) != NULL) sendrange(fd, &sb, contype, hdr, ishead); else -- 2.11.0 From 3e11d7ede84e52974e41542ed60899ff340088d7 Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Thu, 21 Apr 2011 04:46:21 +0200 Subject: [PATCH 06/16] python: Added a request limiter to ashd-wsgi. --- python/ashd-wsgi | 34 ++++++++++++++++++++++++++++++---- python/doc/ashd-wsgi.doc | 10 +++++++++- 2 files changed, 39 insertions(+), 5 deletions(-) diff --git a/python/ashd-wsgi b/python/ashd-wsgi index 79aad4f..e43dbc0 100755 --- a/python/ashd-wsgi +++ b/python/ashd-wsgi @@ -1,13 +1,14 @@ #!/usr/bin/python -import sys, os, getopt, threading +import sys, os, getopt, threading, time import ashd.proto, ashd.util def usage(out): - out.write("usage: ashd-wsgi [-hA] [-p MODPATH] HANDLER-MODULE [ARGS...]\n") + out.write("usage: ashd-wsgi [-hA] [-p MODPATH] [-l REQLIMIT] HANDLER-MODULE [ARGS...]\n") +reqlimit = 0 modwsgi_compat = False -opts, args = getopt.getopt(sys.argv[1:], "+hAp:") +opts, args = getopt.getopt(sys.argv[1:], "+hAp:l:") for o, a in opts: if o == "-h": usage(sys.stdout) @@ -16,6 +17,8 @@ for o, a in opts: sys.path.insert(0, a) elif o == "-A": modwsgi_compat = True + elif o == "-l": + reqlimit = int(a) if len(args) < 1: usage(sys.stderr) sys.exit(1) @@ -170,14 +173,37 @@ def dowsgi(req): if hasattr(respiter, "close"): respiter.close() +flightlock = threading.Condition() +inflight = 0 + class reqthread(threading.Thread): def __init__(self, req): super(reqthread, self).__init__(name = "Request handler") self.req = req.dup() def run(self): + global inflight try: - dowsgi(self.req) + flightlock.acquire() + try: + if reqlimit != 0: + start = time.time() + while inflight >= reqlimit: + flightlock.wait(10) + if time.time() - start > 10: + os.abort() + inflight += 1 + finally: + flightlock.release() + try: + dowsgi(self.req) + finally: + flightlock.acquire() + try: + inflight -= 1 + flightlock.notify() + finally: + flightlock.release() finally: self.req.close() diff --git a/python/doc/ashd-wsgi.doc b/python/doc/ashd-wsgi.doc index 25531ff..b73f90d 100644 --- a/python/doc/ashd-wsgi.doc +++ b/python/doc/ashd-wsgi.doc @@ -7,7 +7,7 @@ ashd-wsgi - WSGI adapter for ashd(7) SYNOPSIS -------- -*ashd-wsgi* [*-hA*] [*-p* 'MODPATH'] 'HANDLER-MODULE' ['ARGS'...] +*ashd-wsgi* [*-hA*] [*-p* 'MODPATH'] [*-l* 'LIMIT'] 'HANDLER-MODULE' ['ARGS'...] DESCRIPTION ----------- @@ -54,6 +54,14 @@ OPTIONS on Python's module path by default, so if you want to use a module in that directory, you will need to specify "`-p .`". +*-l* 'LIMIT':: + + Allow at most 'LIMIT' requests to run concurrently. If a new + request is made when 'LIMIT' requests are executing, the new + request will wait up to ten seconds for one of them to + complete; if none does, the *ashd-wsgi* will assume that the + process is foobar and *abort*(3). + PROTOCOL -------- -- 2.11.0 From 254eb937b8c3298a191863e88e39f9a5035fabf9 Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Wed, 27 Apr 2011 05:29:02 +0200 Subject: [PATCH 07/16] call[fs]cgi: Ensure less delay in closing stdin when shutting down. --- src/callfcgi.c | 1 + src/callscgi.c | 1 + 2 files changed, 2 insertions(+) diff --git a/src/callfcgi.c b/src/callfcgi.c index 27c9c5e..9af5929 100644 --- a/src/callfcgi.c +++ b/src/callfcgi.c @@ -806,6 +806,7 @@ static void sigign(int sig) static void sigexit(int sig) { + shutdown(0, SHUT_RDWR); exit(0); } diff --git a/src/callscgi.c b/src/callscgi.c index 07064f3..062baa1 100644 --- a/src/callscgi.c +++ b/src/callscgi.c @@ -595,6 +595,7 @@ static void sigign(int sig) static void sigexit(int sig) { + shutdown(0, SHUT_RDWR); exit(0); } -- 2.11.0 From 9f974c1ff83d52fe3ba11040d46dfb2540bb0630 Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Fri, 29 Apr 2011 03:10:05 +0200 Subject: [PATCH 08/16] doc: Fixed a couple of errors. --- doc/dirplex.doc | 16 ++++++++-------- 1 file changed, 8 insertions(+), 8 deletions(-) diff --git a/doc/dirplex.doc b/doc/dirplex.doc index d41c1ad..cc794ba 100644 --- a/doc/dirplex.doc +++ b/doc/dirplex.doc @@ -82,11 +82,11 @@ CONFIGURATION Configuration in *dirplex* comes from several sources. When *dirplex* starts, unless the *-N* option is given, it tries to find a global configuration file named `dirplex.rc`. It looks in all directories -named by the *PATH* environment variable, appended with `../etc`. For -example, then, if *PATH* is `/usr/local/bin:/bin:/usr/bin`, the -directories `/usr/local/etc`, `/etc` and `/usr/etc` are searched for -`dirplex.rc`, in that order. Only the first file found is used, should -there exist several. +named by the *PATH* environment variable, appended with +`../etc/ashd`. For example, then, if *PATH* is +`/usr/local/bin:/bin:/usr/bin`, the directories `/usr/local/etc/ashd`, +`/etc/ashd` and `/usr/etc/ashd` are searched for `dirplex.rc`, in that +order. Only the first file found is used, should there exist several. If the *-c* option is given to *dirplex*, it too specifies a configuration file to load. If the name given contains any slashes, it @@ -158,9 +158,9 @@ The follow configuration directives are recognized: ['ARGS'...], specifying the program to execute and the arguments to pass it. In addition to the specified arguments, the HTTP method, raw URL and the rest string will be appended - added as described in *ashd*(7). If given in a `.htrc` file, - the program will be started in the same directory as the - `.htrc` file itself. + as described in *ashd*(7). If given in a `.htrc` file, the + program will be started in the same directory as the `.htrc` + file itself. *match* [*directory*]:: -- 2.11.0 From 0370bd82128b41fdbd5fbcbcac35104c0ede60fa Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Fri, 29 Apr 2011 03:20:45 +0200 Subject: [PATCH 09/16] htparser: chdir as part of chrooting. --- src/htparser.c | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/src/htparser.c b/src/htparser.c index ceae319..5179a25 100644 --- a/src/htparser.c +++ b/src/htparser.c @@ -499,7 +499,7 @@ int main(int argc, char **argv) if(usesyslog) opensyslog(); if(root) { - if(chroot(root)) { + if(chdir(root) || chroot(root)) { flog(LOG_ERR, "could not chroot to %s: %s", root, strerror(errno)); exit(1); } -- 2.11.0 From aa7e44069384016c57c417904bb45f6fa554981a Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Fri, 29 Apr 2011 05:39:35 +0200 Subject: [PATCH 10/16] doc: Made mention of the include directive. --- doc/dirplex.doc | 10 ++++++++++ doc/patplex.doc | 6 +++--- 2 files changed, 13 insertions(+), 3 deletions(-) diff --git a/doc/dirplex.doc b/doc/dirplex.doc index cc794ba..7d1e41f 100644 --- a/doc/dirplex.doc +++ b/doc/dirplex.doc @@ -124,6 +124,16 @@ treated as comments and ignored. The follow configuration directives are recognized: +*include* ['FILENAME'...]:: + + Read the the named files and act as if their contents stood in + place of the *include* stanza. A 'FILENAME' may be a glob + pattern, in which case all matching files are used, sorted by + their filenames. If a 'FILENAME' is a relative path, it is + treated relative to the directory containing the file from + which the *include* stanza was read, even if the inclusion has + been nested. Inclusions may be nested to any level. + *index-file* ['FILENAME'...]:: The given 'FILENAMEs' are used for finding index files (see diff --git a/doc/patplex.doc b/doc/patplex.doc index dbe8e96..2d9acbd 100644 --- a/doc/patplex.doc +++ b/doc/patplex.doc @@ -46,9 +46,9 @@ Should the global and the given configuration files conflict, the directives from the given file take precedence. The configuration files follow the same general format as for -*dirplex*(1), though the recognized stanzas differ. The *child* and -*fchild* stanzas are also shared with *dirplex*(1), so see its manpage -for a description thereof. +*dirplex*(1), though the recognized stanzas differ. The *child*, +*fchild* and *include* stanzas are also shared with *dirplex*(1), so +see its manpage for a description thereof. *patplex* recognizes the *match* stanza, which takes no arguments, but must contain at least one follow-up line to specify match rules. All -- 2.11.0 From 90b0ba0f9d93e454cc08a566b718abdcbfd0d9f6 Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Sat, 30 Apr 2011 08:45:29 +0200 Subject: [PATCH 11/16] Imposed some limits on request parts. --- lib/req.c | 4 ++++ src/htparser.c | 6 ++++++ 2 files changed, 10 insertions(+) diff --git a/lib/req.c b/lib/req.c index a3e7273..da8e3f0 100644 --- a/lib/req.c +++ b/lib/req.c @@ -106,12 +106,16 @@ int parseheaders(struct hthead *head, FILE *in) { int c, state; struct charbuf name, val; + size_t tsz; bufinit(name); bufinit(val); state = 0; + tsz = 0; while(1) { c = fgetc(in); + if(++tsz >= 65536) + goto fail; again: if(state == 0) { if(c == '\r') { diff --git a/src/htparser.c b/src/htparser.c index 5179a25..1ed9175 100644 --- a/src/htparser.c +++ b/src/htparser.c @@ -77,6 +77,8 @@ static struct hthead *parsereq(FILE *in) goto fail; } else { bufadd(method, c); + if(method.d >= 128) + goto fail; } } while(1) { @@ -87,6 +89,8 @@ static struct hthead *parsereq(FILE *in) goto fail; } else { bufadd(url, c); + if(url.d >= 65536) + goto fail; } } while(1) { @@ -98,6 +102,8 @@ static struct hthead *parsereq(FILE *in) goto fail; } else { bufadd(ver, c); + if(ver.d >= 128) + goto fail; } } bufadd(method, 0); -- 2.11.0 From e2b404ab58259bb538e2d785d6ebbe9e9263bbef Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Sat, 30 Apr 2011 08:53:14 +0200 Subject: [PATCH 12/16] htparser: Implemented resource limits for response headers as well. --- src/htparser.c | 6 ++++++ 1 file changed, 6 insertions(+) diff --git a/src/htparser.c b/src/htparser.c index 1ed9175..ba76d01 100644 --- a/src/htparser.c +++ b/src/htparser.c @@ -146,6 +146,8 @@ static struct hthead *parseresp(FILE *in) goto fail; } else { bufadd(ver, c); + if(ver.d >= 128) + goto fail; } } while(1) { @@ -156,6 +158,8 @@ static struct hthead *parseresp(FILE *in) goto fail; } else { code = (code * 10) + (c - '0'); + if(code >= 10000) + goto fail; } } while(1) { @@ -167,6 +171,8 @@ static struct hthead *parseresp(FILE *in) goto fail; } else { bufadd(msg, c); + if(msg.d >= 512) + goto fail; } } bufadd(msg, 0); -- 2.11.0 From 64a9096a7cd4acf3bbf4a7bf4a214f4b5cca7fdc Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Sat, 30 Apr 2011 09:20:14 +0200 Subject: [PATCH 13/16] htparser: Handle absolute-URI requests. --- src/htparser.c | 47 +++++++++++++++++++++++++++++++++++++++++------ 1 file changed, 41 insertions(+), 6 deletions(-) diff --git a/src/htparser.c b/src/htparser.c index ba76d01..bc8f7fd 100644 --- a/src/htparser.c +++ b/src/htparser.c @@ -240,12 +240,50 @@ static int hasheader(struct hthead *head, char *name, char *val) return(!strcasecmp(hd, val)); } +static int canonreq(struct hthead *req) +{ + char *p, *p2, *r; + int n; + + if(req->url[0] == '/') { + replrest(req, req->url + 1); + if((p = strchr(req->rest, '?')) != NULL) + *p = 0; + return(1); + } + if((p = strstr(req->url, "://")) != NULL) { + n = p - req->url; + if(((n == 4) && !strncasecmp(req->url, "http", 4)) || + ((n == 5) && !strncasecmp(req->url, "https", 5))) { + if(getheader(req, "host")) + return(0); + p += 3; + if((p2 = strchr(p, '/')) == NULL) { + headappheader(req, "Host", p); + free(req->url); + req->url = sstrdup("/"); + } else { + r = sstrdup(p2); + *(p2++) = 0; + headappheader(req, "Host", p); + free(req->url); + req->url = r; + } + replrest(req, req->url + 1); + if((p = strchr(req->rest, '?')) != NULL) + *p = 0; + return(1); + } + } + return(0); +} + void serve(FILE *in, struct conn *conn) { int pfds[2]; FILE *out; struct hthead *req, *resp; - char *hd, *p; + char *hd; off_t dlen; out = NULL; @@ -253,11 +291,8 @@ void serve(FILE *in, struct conn *conn) while(1) { if((req = parsereq(in)) == NULL) break; - replrest(req, req->url); - if(req->rest[0] == '/') - replrest(req, req->rest + 1); - if((p = strchr(req->rest, '?')) != NULL) - *p = 0; + if(!canonreq(req)) + break; if((conn->initreq != NULL) && conn->initreq(conn, req)) break; -- 2.11.0 From 01bb89b0e602cfcc259f90388eb8e06834b60c48 Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Sun, 1 May 2011 20:57:23 +0200 Subject: [PATCH 14/16] doc: Fixed markup error. --- doc/htparser.doc | 2 +- 1 file changed, 1 insertion(+), 1 deletion(-) diff --git a/doc/htparser.doc b/doc/htparser.doc index 757f8cd..320e549 100644 --- a/doc/htparser.doc +++ b/doc/htparser.doc @@ -37,7 +37,7 @@ PORT SPECIFICATION Currently, the available 'HANDLERs' are *plain* and *ssl*, for handling plain TCP connections and SSL/TLS-protected connections, respectively. For details regarding the arguments that each handler -accept, simply run *htparser* with 'HANDLER':*help*. For example, the +accept, simply run *htparser* with 'HANDLER'*:help*. For example, the command "`htparser ssl:help`" will display help for the *ssl* handler to standard output and then exit. -- 2.11.0 From 16c2bec346ae486bd09d0b18ab276b4c89005cad Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Sun, 1 May 2011 22:04:18 +0200 Subject: [PATCH 15/16] doc: Fixed up various parts of the documentation a bit. --- doc/ashd.doc | 6 +++--- doc/callcgi.doc | 2 +- doc/dirplex.doc | 50 ++++++++++++++++++++++++++++++-------------------- doc/htparser.doc | 19 +++++++++++++------ 4 files changed, 47 insertions(+), 30 deletions(-) diff --git a/doc/ashd.doc b/doc/ashd.doc index eb081b7..f2790ba 100644 --- a/doc/ashd.doc +++ b/doc/ashd.doc @@ -15,9 +15,9 @@ technically. If you want a brief overview, please see the homepage at The basic premise of ashd is that of standard Unix philosophy; it consists of a number of different programs, each specialized to one precise task, passing HTTP requests around to each other in a manner -akin to standard Unix pipelines. This document describes the protocols -and conventions used between such programs that allows them to -interoperate. +akin to standard Unix pipelines. This document describes the set of +protocols and conventions used between such programs that allows them +to interoperate. REQUESTS -------- diff --git a/doc/callcgi.doc b/doc/callcgi.doc index 4564279..2a61e10 100644 --- a/doc/callcgi.doc +++ b/doc/callcgi.doc @@ -47,4 +47,4 @@ Fredrik Tolf SEE ALSO -------- -*dirplex*(1), *ashd*(7), RFC 3875 +*dirplex*(1), *ashd*(7), *callscgi*(1), *callfcgi*(1), RFC 3875 diff --git a/doc/dirplex.doc b/doc/dirplex.doc index 7d1e41f..362a5d7 100644 --- a/doc/dirplex.doc +++ b/doc/dirplex.doc @@ -126,7 +126,7 @@ The follow configuration directives are recognized: *include* ['FILENAME'...]:: - Read the the named files and act as if their contents stood in + Read the named files and act as if their contents stood in place of the *include* stanza. A 'FILENAME' may be a glob pattern, in which case all matching files are used, sorted by their filenames. If a 'FILENAME' is a relative path, it is @@ -163,7 +163,7 @@ The follow configuration directives are recognized: *fchild* 'NAME':: Declares a named, transient request handler (see *ashd*(7) for - a more detailed description of persistent handlers). It must + a more detailed description of transient handlers). It must contain exactly one follow-up line, *exec* 'PROGRAM' ['ARGS'...], specifying the program to execute and the arguments to pass it. In addition to the specified arguments, @@ -187,7 +187,8 @@ The follow configuration directives are recognized: remaining rest string, to the specified 'HANDLER', which must by a named request handler specified either in the same `.htrc` file or elsewhere. The *capture* directive accepts no - follow-up lines. + follow-up lines. Note that the `X-Ash-File` header is not + added to requests passed via *capture* directives. MATCHING -------- @@ -228,7 +229,7 @@ The following rules are recognized: *default*:: Matches if and only if no *match* stanza without a *default* - rule has matched. + rule matches (in any configuration file). *local*:: @@ -259,26 +260,26 @@ following actions are recognized: 404 RESPONSES ------------- -Any of the following cases will result in a 404 response being sent to -the client. +A HTTP 404 response is sent to the client if - * Failure of the mapping procedure to find a matching physical file. - * Presence of a path element during mapping that begins with a dot. - * A path element which, after URL unescaping, contains slashes. - * The mapping procedure finding a file which is neither a directory - nor a regular file. - * Presence of a non-final but empty path element during mapping. - * A physical file having been found which is not being matched by any + * The mapping procedure fails to find a matching physical file; + * A path element is encountered during mapping which, after URL + unescaping, either begins with a dot or contains slashes; + * The mapping procedure finds a file which is neither a directory nor + a regular file; + * An empty, non-final path element is encountered during mapping; or + * The mapping procedure results in a file which is not matched by any *match* stanza. -*dirplex* will send a built-in 404 response by default, but any +By default, *dirplex* will send a built-in 404 response, but any `.htrc` file or global configuration may define a request handler named `.notfound` to customize the behavior. Note that, unlike successful requests, such a handler will not be passed the `X-Ash-File` header. The built-in `.notfound` handler can also be used in *match* or -*capture* stanzas. +*capture* stanzas (for example, to restrict access to certain files or +directories). EXAMPLES -------- @@ -296,8 +297,16 @@ scripts can be used with the following configuration, using the *callcgi*(1) program. -------- +# To use plain CGI, which uses more resources per handled request, +# but less static resources: fchild php exec callcgi -p php-cgi + +# To use FastCGI, which keeps PHP running at all times, but uses less +# resources per handled request: +child php + exec callfcgi multifscgi 5 php-cgi + match filename *.php handler php @@ -312,11 +321,12 @@ match directory fork htls -------- -If you want an entire directory to be dedicated to some external SCGI -script engine, you can use the *callscgi*(1) program to serve it as -follows. Note that *callscgi*, and therefore the script engine itself, -is started in the directory itself, so that arbitrary code modules or -data files can be put directly in that directory and easily found. +The following configuration can be placed in a `.htrc` file in order +to dedicate the directory containing that file to some external SCGI +script engine. Note that *callscgi*, and therefore the script engine +itself, is started in the directory itself, so that arbitrary code +modules or data files can be put directly in that directory and easily +found. -------- child foo diff --git a/doc/htparser.doc b/doc/htparser.doc index 320e549..698e559 100644 --- a/doc/htparser.doc +++ b/doc/htparser.doc @@ -54,9 +54,9 @@ OPTIONS *-S*:: Log messages to *syslog*(3) instead of standard error. Also - sets the environment ASHD_USESYSLOG environment variable in - the root handler process, which indicates to the standard ashd - programs to do the same thing. + sets the ASHD_USESYSLOG environment variable in the root + handler process, which indicates to the standard ashd programs + to do the same thing. *-f*:: @@ -85,8 +85,8 @@ EXAMPLES `htparser plain -- dirplex /srv/www`:: - This simple invocation will simply listen for HTTP requests on - port 80 and use *dirplex*(1) to serve files from the /srv/www + This simple invocation will listen for HTTP requests on port + 80 and use *dirplex*(1) to serve files from the /srv/www directory. `htparser plain:port=8080 -- dirplex /srv/www`:: @@ -97,7 +97,7 @@ EXAMPLES `htparser plain ssl:cert=/etc/ssl/private/web.pem -- dirplex /srv/www`:: The same as above, but will listen on port 443 for SSL - connections as well. The file `/etc/ssl/privte/web.pem` needs + connections as well. The file `/etc/ssl/private/web.pem` needs to contain both the server certificate and its private key. `htparser plain -- sudo -u www-user dirplex /srv/www`:: @@ -116,6 +116,13 @@ EXAMPLES file system, so that it can start other handler programs as needed. +`htparser -f plain -- errlogger -n ashd dirplex /srv/www`:: + + The same as the first example, but will daemonize and use the + *errlogger*(1) program to ensure that any errors or other + messages written by any handler program to its stderr are + recorded in the *syslog*(3). + X-ASH HEADERS ------------- -- 2.11.0 From 82e84769bb12dd531f44f83498f4ef2a146da63e Mon Sep 17 00:00:00 2001 From: Fredrik Tolf Date: Sun, 1 May 2011 22:08:21 +0200 Subject: [PATCH 16/16] Updated changelog. --- ChangeLog | 5 +++++ 1 file changed, 5 insertions(+) diff --git a/ChangeLog b/ChangeLog index 3b03efb..a8decb6 100644 --- a/ChangeLog +++ b/ChangeLog @@ -1,3 +1,8 @@ +Version 0.8: + + * Various bug fixes, protocol compliance fixes, tunings, + documentation improvements and other minor improvements. + Version 0.7: * Added some configuration examples. -- 2.11.0