htparser: Be more tolerant to broken clients.
[ashd.git] / python3 / ashd-wsgi3
CommitLineData
55fa3f63 1#!/usr/bin/python3
c270f222 2
14640dcc 3import sys, os, getopt, threading, logging, time, locale, collections
d5ee5cde
FT
4import ashd.proto, ashd.util, ashd.perf
5try:
6 import pdm.srv
7except:
8 pdm = None
c270f222
FT
9
10def usage(out):
14640dcc 11 out.write("usage: ashd-wsgi3 [-hAL] [-m PDM-SPEC] [-p MODPATH] [-l REQLIMIT] HANDLER-MODULE [ARGS...]\n")
c270f222 12
3e11d7ed 13reqlimit = 0
c270f222 14modwsgi_compat = False
14640dcc
FT
15setlog = True
16opts, args = getopt.getopt(sys.argv[1:], "+hALp:l:m:")
c270f222
FT
17for o, a in opts:
18 if o == "-h":
19 usage(sys.stdout)
20 sys.exit(0)
21 elif o == "-p":
22 sys.path.insert(0, a)
14640dcc
FT
23 elif o == "-L":
24 setlog = False
c270f222
FT
25 elif o == "-A":
26 modwsgi_compat = True
3e11d7ed
FT
27 elif o == "-l":
28 reqlimit = int(a)
d5ee5cde
FT
29 elif o == "-m":
30 if pdm is not None:
31 pdm.srv.listen(a)
c270f222
FT
32if len(args) < 1:
33 usage(sys.stderr)
34 sys.exit(1)
14640dcc
FT
35if setlog:
36 logging.basicConfig(format="ashd-wsgi3(%(name)s): %(levelname)s: %(message)s")
64a8cd9f 37log = logging.getLogger("ashd-wsgi3")
c270f222
FT
38
39try:
40 handlermod = __import__(args[0], fromlist = ["dummy"])
55fa3f63 41except ImportError as exc:
1f3d7aa3 42 sys.stderr.write("ashd-wsgi3: handler %s not found: %s\n" % (args[0], exc.args[0]))
c270f222
FT
43 sys.exit(1)
44if not modwsgi_compat:
45 if not hasattr(handlermod, "wmain"):
1f3d7aa3 46 sys.stderr.write("ashd-wsgi3: handler %s has no `wmain' function\n" % args[0])
c270f222 47 sys.exit(1)
adb11d5f 48 handler = handlermod.wmain(*args[1:])
c270f222
FT
49else:
50 if not hasattr(handlermod, "application"):
1f3d7aa3 51 sys.stderr.write("ashd-wsgi3: handler %s has no `application' object\n" % args[0])
c270f222
FT
52 sys.exit(1)
53 handler = handlermod.application
54
81a0ca30
FT
55class closed(IOError):
56 def __init__(self):
55fa3f63 57 super().__init__("The client has closed the connection.")
81a0ca30 58
70d942a7
FT
59cwd = os.getcwd()
60def absolutify(path):
61 if path[0] != '/':
62 return os.path.join(cwd, path)
63 return path
64
09c82f9c 65def unquoteurl(url):
55fa3f63 66 buf = bytearray()
09c82f9c
FT
67 i = 0
68 while i < len(url):
69 c = url[i]
70 i += 1
55fa3f63 71 if c == ord(b'%'):
370d235f 72 if len(url) >= i + 2:
09c82f9c 73 c = 0
55fa3f63
FT
74 if ord(b'0') <= url[i] <= ord(b'9'):
75 c |= (url[i] - ord(b'0')) << 4
76 elif ord(b'a') <= url[i] <= ord(b'f'):
77 c |= (url[i] - ord(b'a') + 10) << 4
78 elif ord(b'A') <= url[i] <= ord(b'F'):
79 c |= (url[i] - ord(b'A') + 10) << 4
09c82f9c
FT
80 else:
81 raise ValueError("Illegal URL escape character")
55fa3f63
FT
82 if ord(b'0') <= url[i + 1] <= ord(b'9'):
83 c |= url[i + 1] - ord('0')
84 elif ord(b'a') <= url[i + 1] <= ord(b'f'):
85 c |= url[i + 1] - ord(b'a') + 10
86 elif ord(b'A') <= url[i + 1] <= ord(b'F'):
87 c |= url[i + 1] - ord(b'A') + 10
09c82f9c
FT
88 else:
89 raise ValueError("Illegal URL escape character")
55fa3f63 90 buf.append(c)
09c82f9c
FT
91 i += 2
92 else:
93 raise ValueError("Incomplete URL escape character")
94 else:
55fa3f63 95 buf.append(c)
09c82f9c 96 return buf
81a0ca30 97
c270f222
FT
98def dowsgi(req):
99 env = {}
100 env["wsgi.version"] = 1, 0
101 for key, val in req.headers:
55fa3f63 102 env["HTTP_" + key.upper().replace(b"-", b"_").decode("latin-1")] = val.decode("latin-1")
c270f222
FT
103 env["SERVER_SOFTWARE"] = "ashd-wsgi/1"
104 env["GATEWAY_INTERFACE"] = "CGI/1.1"
55fa3f63
FT
105 env["SERVER_PROTOCOL"] = req.ver.decode("latin-1")
106 env["REQUEST_METHOD"] = req.method.decode("latin-1")
107 try:
108 rawpi = unquoteurl(req.rest)
109 except:
110 rawpi = req.rest
111 try:
112 name, rest, pi = (v.decode("utf-8") for v in (req.url, req.rest, rawpi))
113 env["wsgi.uri_encoding"] = "utf-8"
114 except UnicodeError as exc:
115 name, rest, pi = (v.decode("latin-1") for v in (req.url, req.rest, rawpi))
116 env["wsgi.uri_encoding"] = "latin-1"
117 env["REQUEST_URI"] = name
c270f222
FT
118 p = name.find('?')
119 if p >= 0:
c270f222 120 env["QUERY_STRING"] = name[p + 1:]
8498ab28 121 name = name[:p]
c270f222
FT
122 else:
123 env["QUERY_STRING"] = ""
55fa3f63 124 if name[-len(rest):] == rest:
53d666ca 125 # This is the same hack used in call*cgi.
55fa3f63
FT
126 name = name[:-len(rest)]
127 if name == "/":
53d666ca
FT
128 # This seems to be normal CGI behavior, but see callcgi.c for
129 # details.
130 pi = "/" + pi
131 name = ""
c270f222 132 env["SCRIPT_NAME"] = name
53d666ca 133 env["PATH_INFO"] = pi
55fa3f63
FT
134 for src, tgt in [("HTTP_HOST", "SERVER_NAME"), ("HTTP_X_ASH_SERVER_PORT", "SERVER_PORT"),
135 ("HTTP_X_ASH_ADDRESS", "REMOTE_ADDR"), ("HTTP_CONTENT_TYPE", "CONTENT_TYPE"),
136 ("HTTP_CONTENT_LENGTH", "CONTENT_LENGTH"), ("HTTP_X_ASH_PROTOCOL", "wsgi.url_scheme")]:
137 if src in env: env[tgt] = env[src]
0bf0720d
FT
138 for key in ["HTTP_CONTENT_TYPE", "HTTP_CONTENT_LENGTH"]:
139 # The CGI specification does not strictly require this, but
140 # many actualy programs and libraries seem to.
141 if key in env: del env[key]
55fa3f63
FT
142 if "X-Ash-Protocol" in req and req["X-Ash-Protocol"] == b"https": env["HTTPS"] = "on"
143 if "X-Ash-File" in req: env["SCRIPT_FILENAME"] = absolutify(req["X-Ash-File"].decode(locale.getpreferredencoding()))
c270f222
FT
144 env["wsgi.input"] = req.sk
145 env["wsgi.errors"] = sys.stderr
146 env["wsgi.multithread"] = True
147 env["wsgi.multiprocess"] = False
148 env["wsgi.run_once"] = False
149
150 resp = []
151 respsent = []
152
55fa3f63
FT
153 def recode(thing):
154 if isinstance(thing, collections.ByteString):
155 return thing
156 else:
157 return str(thing).encode("latin-1")
158
699754de 159 def flushreq():
c270f222
FT
160 if not respsent:
161 if not resp:
55fa3f63 162 raise Exception("Trying to write data before starting response.")
c270f222
FT
163 status, headers = resp
164 respsent[:] = [True]
55fa3f63
FT
165 buf = bytearray()
166 buf += b"HTTP/1.1 " + recode(status) + b"\n"
167 for nm, val in headers:
168 buf += recode(nm) + b": " + recode(val) + b"\n"
169 buf += b"\n"
8bb0e3c1 170 try:
55fa3f63 171 req.sk.write(buf)
8bb0e3c1
FT
172 except IOError:
173 raise closed()
699754de
FT
174
175 def write(data):
176 if not data:
177 return
8bb0e3c1 178 flushreq()
81a0ca30
FT
179 try:
180 req.sk.write(data)
181 req.sk.flush()
182 except IOError:
183 raise closed()
c270f222
FT
184
185 def startreq(status, headers, exc_info = None):
186 if resp:
187 if exc_info: # Interesting, this...
188 try:
189 if respsent:
55fa3f63 190 raise exc_info[1]
c270f222
FT
191 finally:
192 exc_info = None # CPython GC bug?
193 else:
55fa3f63 194 raise Exception("Can only start responding once.")
c270f222
FT
195 resp[:] = status, headers
196 return write
197
d5ee5cde 198 with ashd.perf.request(env) as reqevent:
8bb0e3c1 199 try:
64a8cd9f 200 respiter = handler(env, startreq)
d5ee5cde
FT
201 try:
202 for data in respiter:
203 write(data)
204 if resp:
205 flushreq()
64a8cd9f
FT
206 finally:
207 if hasattr(respiter, "close"):
208 respiter.close()
209 except closed:
210 pass
d5ee5cde
FT
211 if resp:
212 reqevent.response(resp)
c270f222 213
3e11d7ed
FT
214flightlock = threading.Condition()
215inflight = 0
216
c270f222
FT
217class reqthread(threading.Thread):
218 def __init__(self, req):
55fa3f63 219 super().__init__(name = "Request handler")
c270f222
FT
220 self.req = req.dup()
221
222 def run(self):
3e11d7ed 223 global inflight
c270f222 224 try:
55fa3f63 225 with flightlock:
3e11d7ed
FT
226 if reqlimit != 0:
227 start = time.time()
228 while inflight >= reqlimit:
229 flightlock.wait(10)
230 if time.time() - start > 10:
231 os.abort()
232 inflight += 1
3e11d7ed
FT
233 try:
234 dowsgi(self.req)
235 finally:
55fa3f63 236 with flightlock:
3e11d7ed
FT
237 inflight -= 1
238 flightlock.notify()
64a8cd9f
FT
239 except:
240 log.error("exception occurred in handler thread", exc_info=True)
c270f222
FT
241 finally:
242 self.req.close()
a83cfbbc 243 sys.stderr.flush()
c270f222
FT
244
245def handle(req):
246 reqthread(req).start()
247
4e7888f7 248ashd.util.serveloop(handle)