From: Fredrik Tolf Date: Sat, 23 Mar 2013 05:46:36 +0000 (+0100) Subject: patplex: Added URL unquoting functionality. X-Git-Tag: 0.12~1 X-Git-Url: http://www.dolda2000.com/gitweb/?p=ashd.git;a=commitdiff_plain;h=4dc7f716a7b4be064ecf86f6e1444b7b39dfdfe7 patplex: Added URL unquoting functionality. --- diff --git a/doc/patplex.doc b/doc/patplex.doc index be3da48..86d6ace 100644 --- a/doc/patplex.doc +++ b/doc/patplex.doc @@ -67,14 +67,15 @@ rules are recognized: matched case-independently. If the *match* stanza as a whole matches and contains no *restpat* line (as described below), the rest string of the request is replaced by the remainder of - the rest string after the portion that was matched by 'REGEX'. + the rest string after the portion that was matched by + 'REGEX'. See also URL UNQUOTING, below. *url* 'REGEX' 'FLAGS':: 'REGEX' must be an extended regular expression. The rule is considered to match if 'REGEX' matches the raw URL of the request. If 'FLAGS' contain the character `i`, 'REGEX' is - matched case-independently. + matched case-independently. See also URL UNQUOTING, below. *method* 'REGEX' 'FLAGS':: @@ -155,6 +156,20 @@ optional lines: If no *match* stanza matches, a 404 response is returned to the client. +URL UNQUOTING +------------- + +If the 'FLAGS' of a *point* or *url* rule contain the character `q`, +then the rule's pattern will be matched against a copy of the input +string where URL percent-escapes have been decoded so that, for +example, the regular expression `^~` will match an input string that +begins with either `~`, `%7E` or `%7e`. + +Even if such percent-escapes were decoded, however, the original +version of the string will be used for any *restpat* expansion, +regardlessly of whether the escapes were unquoted inside or outside +the matched part of the string. + SIGNALS ------- diff --git a/lib/utils.c b/lib/utils.c index 33e55c7..459afed 100644 --- a/lib/utils.c +++ b/lib/utils.c @@ -307,6 +307,14 @@ char *base64decode(char *data, size_t *datalen) return(buf.b); } +int hexdigit(char c) +{ + if((c >= '0') && (c <= '9')) return(c - '0'); + else if((c >= 'a') && (c <= 'f')) return(c - 'a' + 10); + else if((c >= 'A') && (c <= 'F')) return(c - 'A' + 10); + return(-1); +} + static int btheight(struct btree *tree) { if(tree == NULL) diff --git a/lib/utils.h b/lib/utils.h index 2066918..080864b 100644 --- a/lib/utils.h +++ b/lib/utils.h @@ -81,6 +81,7 @@ void bprintf(struct charbuf *buf, char *format, ...); void replstr(char **p, char *n); char *base64encode(char *data, size_t datalen); char *base64decode(char *data, size_t *datalen); +int hexdigit(char c); int bbtreedel(struct btree **tree, void *item, int (*cmp)(void *, void *)); void freebtree(struct btree **tree, void (*ffunc)(void *)); int bbtreeput(struct btree **tree, void *item, int (*cmp)(void *, void *)); diff --git a/src/patplex.c b/src/patplex.c index a0818d2..23fcf4e 100644 --- a/src/patplex.c +++ b/src/patplex.c @@ -44,6 +44,7 @@ #define PAT_DEFAULT 5 #define PATFL_MSS 1 +#define PATFL_UNQ 2 struct config { struct child *children; @@ -207,6 +208,8 @@ static struct pattern *parsepattern(struct cfstate *s) if(s->argc >= 3) { if(strchr(s->argv[2], 's')) rule->fl |= PATFL_MSS; + if(strchr(s->argv[2], 'q')) + rule->fl |= PATFL_UNQ; } } else if(!strcmp(s->argv[0], "header")) { if(s->argc < 3) { @@ -360,12 +363,40 @@ static void exprestpat(struct hthead *req, struct pattern *pat, char **mstr) buffree(buf); } +static void qoffsets(char *buf, int *obuf, char *pstr, int unquote) +{ + int i, o, d1, d2; + + if(unquote) { + i = o = 0; + while(pstr[i]) { + obuf[o] = i; + if((pstr[i] == '%') && ((d1 = hexdigit(pstr[i + 1])) >= 0) && ((d2 = hexdigit(pstr[i + 2])) >= 0)) { + buf[o] = (d1 << 4) | d2; + i += 3; + } else { + buf[o] = pstr[i]; + i++; + } + o++; + } + buf[o] = 0; + } else { + for(i = 0; pstr[i]; i++) { + buf[i] = pstr[i]; + obuf[i] = i; + } + buf[i] = 0; + } +} + static struct pattern *findmatch(struct config *cf, struct hthead *req, int trydefault) { int i, o; struct pattern *pat; struct rule *rule; - int rmo, matched; + int rmo; + regex_t *rx; char *pstr; char **mstr; regmatch_t gr[10]; @@ -374,46 +405,52 @@ static struct pattern *findmatch(struct config *cf, struct hthead *req, int tryd for(pat = cf->patterns; pat != NULL; pat = pat->next) { rmo = -1; for(i = 0; (rule = pat->rules[i]) != NULL; i++) { - matched = 0; + rx = NULL; if(rule->type == PAT_REST) { - if((matched = !regexec(rule->pattern, pstr = req->rest, 10, gr, 0))) - rmo = gr[0].rm_eo; - else - break; + rx = rule->pattern; + pstr = req->rest; } else if(rule->type == PAT_URL) { - if(!(matched = !regexec(rule->pattern, pstr = req->url, 10, gr, 0))) - break; + rx = rule->pattern; + pstr = req->url; } else if(rule->type == PAT_METHOD) { - if(!(matched = !regexec(rule->pattern, pstr = req->method, 10, gr, 0))) - break; + rx = rule->pattern; + pstr = req->method; } else if(rule->type == PAT_HEADER) { + rx = rule->pattern; if(!(pstr = getheader(req, rule->header))) break; - if(!(matched = !regexec(rule->pattern, pstr, 10, gr, 0))) + } + if(rx != NULL) { + char pbuf[strlen(pstr) + 1]; + int obuf[strlen(pstr) + 1]; + qoffsets(pbuf, obuf, pstr, !!(rule->fl & PATFL_UNQ)); + if(regexec(rx, pbuf, 10, gr, 0)) break; + else if(rule->type == PAT_REST) + rmo = obuf[gr[0].rm_eo]; + if(rule->fl & PATFL_MSS) { + if(mstr) { + flog(LOG_WARNING, "two pattern rules marked with `s' flag found (for handler %s)", pat->childnm); + freeca(mstr); + } + for(o = 0; o < 10; o++) { + if(gr[o].rm_so < 0) + break; + } + mstr = szmalloc((o + 1) * sizeof(*mstr)); + for(o = 0; o < 10; o++) { + if(gr[o].rm_so < 0) + break; + mstr[o] = smalloc(obuf[gr[o].rm_eo] - obuf[gr[o].rm_so] + 1); + memcpy(mstr[o], pstr + obuf[gr[o].rm_so], obuf[gr[o].rm_eo] - obuf[gr[o].rm_so]); + mstr[o][obuf[gr[o].rm_eo] - obuf[gr[o].rm_so]] = 0; + } + } } else if(rule->type == PAT_ALL) { } else if(rule->type == PAT_DEFAULT) { if(!trydefault) break; } - if(matched && (rule->fl & PATFL_MSS)) { - if(mstr) { - flog(LOG_WARNING, "two pattern rules marked with `s' flag found (for handler %s)", pat->childnm); - freeca(mstr); - } - for(o = 0; o < 10; o++) { - if(gr[o].rm_so < 0) - break; - } - mstr = szmalloc((o + 1) * sizeof(*mstr)); - for(o = 0; o < 10; o++) { - if(gr[o].rm_so < 0) - break; - mstr[o] = smalloc(gr[o].rm_eo - gr[o].rm_so + 1); - memcpy(mstr[o], pstr + gr[o].rm_so, gr[o].rm_eo - gr[o].rm_so); - mstr[o][gr[o].rm_eo - gr[o].rm_so] = 0; - } - } } if(!rule) { if(pat->restpat) {