Bug 332173 - Problems with regexp parsing of '~' in nsIZipReader.findEntries (and other nsWildCard uses). Patch by Nelson Bolyard <nelson@bolyard.me>, r=jwalden, a=ss

git-svn-id: svn://10.0.0.236/trunk@257851 18797224-902f-48f8-a5cc-f745e15eee43
This commit is contained in:
jwalden%mit.edu 2009-07-29 21:12:47 +00:00
parent ea17f6ca1a
commit 15334bed7b
3 changed files with 323 additions and 215 deletions

View File

@ -18,7 +18,7 @@
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* Portions created by the Initial Developer are Copyright (C) 1998-2009
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
@ -27,6 +27,7 @@
* Samir Gehani <sgehani@netscape.com>
* Mitch Stoltz <mstoltz@netscape.com>
* Jeff Walden <jwalden+code@mit.edu>
* Nelson Bolyard <nelson@bolyard.me>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -143,37 +144,35 @@ interface nsIZipReader : nsISupports
* Returns a string enumerator containing the matching entry names.
*
* @param aPattern
* A regular expression used to find matching entries in the zip file.
* A globbing pattern used to find matching names in the zip file.
* Set this parameter to null to get all entries; otherwise, use the
* following syntax:
*
* o * matches anything
* o ? matches one character
* o $ matches the end of the string
* o [abc] matches one occurrence of a, b, or c. The only character that
* must be escaped inside the brackets is ]. ^ and - must never
* appear in the first and second positions within the brackets,
* respectively. (In the former case, the behavior specified for
* '[^az]' will happen.)
* o [a-z] matches any character between a and z. The characters a and z
* must either both be letters or both be numbers, with the
* character represented by 'a' having a lower ASCII value than
* the character represented by 'z'.
* o [^az] matches any character except a or z. If ] is to appear inside
* the brackets as a character to not match, it must be escaped.
* o pat~pat2 returns matches to the pattern 'pat' which do not also match
* the pattern 'pat2'. This may be used to perform filtering
* upon the results of one pattern to remove all matches which
* also match another pattern. For example, because '*'
* matches any string and '*z*' matches any string containing a
* 'z', '*~*z*' will match all strings except those containing
* a 'z'. Note that a pattern may not use '~' multiple times,
* so a string such as '*~*z*~*y*' is not a valid pattern.
* o [abc] matches one occurrence of a, b, or c.
* o [^az] matches any character except a or z. Between brackets,
* the only characters that must be escaped are \ and ].
* o [a-z] matches any character between a and z, inclusive.
* The a and z characters must be alphanumeric ASCII characters.
* If one is upper case and one is lower case, then the ASCII
* non-alphanumeric characters between Z and a will be in range.
* o [^a-z] matches any character except those between a and z, inclusive.
* These forms cannot be combined, e.g [a-gp-z] does not work.
* o yes~no returns matches to the pattern 'yes' that do not also match
* the pattern 'no'. This may be used to filter the results
* of one pattern to remove all matches of a second pattern.
* Only the outer-most pattern may use this, and at most once.
* For example: *~abc will match any string except abc .
* o (foo|bar) will match either the pattern foo or the pattern bar.
* Neither of the patterns foo or bar may use the 'pat~pat2'
* syntax described immediately above.
* o \ will escape a special character. Escaping is required for all
* special characters unless otherwise specified.
* At least one pipe and two inner patterns are required.
* More are allowed. These inner patterns may NOT use the
* 'yes~no' syntax described immediately above, and may not
* contain patterns of this same (foo|bar) form.
* o \ will escape a special character. To treat special characters as
* ordinary matching characters, escaping is required for all
* special characters, unless otherwise specified above.
* o All other characters match case-sensitively.
*
* An aPattern not conforming to this syntax has undefined behavior.

View File

@ -1,4 +1,3 @@
/* -*- Mode: C; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 2 -*- */
/* ***** BEGIN LICENSE BLOCK *****
* Version: MPL 1.1/GPL 2.0/LGPL 2.1
*
@ -16,10 +15,13 @@
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* Portions created by the Initial Developer are Copyright (C) 1998-2009
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Rob McCool (original author)
* Ken Key <key+mozilla@ksquared.net>
* Nelson Bolyard <nelson@bolyard.me>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -36,7 +38,6 @@
* ***** END LICENSE BLOCK ***** */
/* *
*
*
* nsWildCard.cpp: shell-like wildcard match routines
*
@ -44,7 +45,7 @@
* a description of the syntax supported by the routines in this file.
*
* Rob McCool
*
*
*/
#include "nsWildCard.h"
@ -54,82 +55,85 @@
/* ----------------------------- _valid_subexp ------------------------------ */
static int
_valid_subexp(char *expr, char stop)
static int
_valid_subexp(const char *expr, char stop1, char stop2)
{
register int x,y,t;
int nsc,np,tld;
register int x;
int nsc = 0; /* Number of special characters */
int np; /* Number of pipe characters in union */
int tld = 0; /* Number of tilde characters */
x=0;nsc=0;tld=0;
while(expr[x] && (expr[x] != stop)) {
for (x = 0; expr[x] && (expr[x] != stop1) && (expr[x] != stop2); ++x) {
switch(expr[x]) {
case '~':
if(tld) return INVALID_SXP;
else ++tld;
case '*':
case '?':
case '^':
case '$':
case '~':
if(tld) /* at most one exclusion */
return INVALID_SXP;
if (stop1) /* no exclusions within unions */
return INVALID_SXP;
if (!expr[x+1]) /* exclusion cannot be last character */
return INVALID_SXP;
if (!x) /* exclusion cannot be first character */
return INVALID_SXP;
++tld;
/* fall through */
case '*':
case '?':
case '$':
++nsc;
break;
case '[':
case '[':
++nsc;
if((!expr[++x]) || (expr[x] == ']'))
return INVALID_SXP;
for(;expr[x] && (expr[x] != ']');++x)
if(expr[x] == '\\')
if(!expr[++x])
return INVALID_SXP;
for(; expr[x] && (expr[x] != ']'); ++x) {
if(expr[x] == '\\' && !expr[++x])
return INVALID_SXP;
}
if(!expr[x])
return INVALID_SXP;
break;
case '(':
++nsc;np = 0;
while(1) {
if(expr[++x] == ')')
return INVALID_SXP;
for(y=x;(expr[y]) && (expr[y] != '|') && (expr[y] != ')');++y)
if(expr[y] == '\\')
if(!expr[++y])
return INVALID_SXP;
if(!expr[y])
return INVALID_SXP;
if(expr[y] == '|')
++np;
t = _valid_subexp(&expr[x],expr[y]);
if(t == INVALID_SXP)
case '(':
++nsc;
if (stop1) /* no nested unions */
return INVALID_SXP;
np = -1;
do {
int t = _valid_subexp(&expr[++x], ')', '|');
if(t == 0 || t == INVALID_SXP)
return INVALID_SXP;
x+=t;
if(expr[x] == ')') {
if(!np)
return INVALID_SXP;
break;
}
}
if(!expr[x])
return INVALID_SXP;
++np;
} while (expr[x] == '|' );
if(np < 1) /* must be at least one pipe */
return INVALID_SXP;
break;
case ')':
case ']':
case ')':
case ']':
case '|':
return INVALID_SXP;
case '\\':
case '\\':
++nsc;
if(!expr[++x])
return INVALID_SXP;
default:
break;
default:
break;
}
++x;
}
if((!stop) && (!nsc))
if((!stop1) && (!nsc)) /* must be at least one special character */
return NON_SXP;
return ((expr[x] == stop) ? x : INVALID_SXP);
return ((expr[x] == stop1 || expr[x] == stop2) ? x : INVALID_SXP);
}
int
NS_WildCardValid(char *expr)
int
NS_WildCardValid(const char *expr)
{
int x;
x = _valid_subexp(expr, '\0');
x = _valid_subexp(expr, '\0', '\0');
return (x < 0 ? x : VALID_SXP);
}
@ -141,160 +145,263 @@ NS_WildCardValid(char *expr)
#define NOMATCH 1
#define ABORTED -1
static int _shexp_match(char *str, char *expr, PRBool case_insensitive);
static int
_shexp_match(const char *str, const char *expr, PRBool case_insensitive,
unsigned int level);
static int
_handle_union(char *str, char *expr, PRBool case_insensitive)
/* Count characters until we reach a NUL character or either of the
* two delimiter characters, stop1 or stop2. If we encounter a bracketed
* expression, look only for NUL or ']' inside it. Do not look for stop1
* or stop2 inside it. Return ABORTED if bracketed expression is unterminated.
* Handle all escaping.
* Return index in input string of first stop found, or ABORTED if not found.
* If "dest" is non-NULL, copy counted characters to it and NUL terminate.
*/
static int
_scan_and_copy(const char *expr, char stop1, char stop2, char *dest)
{
char *e2 = (char *) PR_Malloc(sizeof(char)*strlen(expr));
register int t,p2,p1 = 1;
int cp;
register int sx; /* source index */
register char cc;
while(1) {
for(cp=1;expr[cp] != ')';cp++)
if(expr[cp] == '\\')
++cp;
for(p2 = 0;(expr[p1] != '|') && (p1 != cp);p1++,p2++) {
if(expr[p1] == '\\')
e2[p2++] = expr[p1++];
e2[p2] = expr[p1];
for (sx = 0; (cc = expr[sx]) && cc != stop1 && cc != stop2; sx++) {
if (cc == '\\') {
if (!expr[++sx])
return ABORTED; /* should be impossible */
}
for (t=cp+1; ((e2[p2] = expr[t]) != 0); ++t,++p2) {}
if(_shexp_match(str,e2, case_insensitive) == MATCH) {
PR_Free(e2);
return MATCH;
else if (cc == '[') {
while ((cc = expr[++sx]) && cc != ']') {
if(cc == '\\' && !expr[++sx])
return ABORTED;
}
if (!cc)
return ABORTED; /* should be impossible */
}
if(p1 == cp) {
PR_Free(e2);
return NOMATCH;
}
else ++p1;
}
if (dest && sx) {
/* Copy all but the closing delimiter. */
memcpy(dest, expr, sx);
dest[sx] = 0;
}
return cc ? sx : ABORTED; /* index of closing delimiter */
}
static int
_shexp_match(char *str, char *expr, PRBool case_insensitive)
/* On input, expr[0] is the opening parenthesis of a union.
* See if any of the alternatives in the union matches as a pattern.
* The strategy is to take each of the alternatives, in turn, and append
* the rest of the expression (after the closing ')' that marks the end of
* this union) to that alternative, and then see if the resultant expression
* matches the input string. Repeat this until some alternative matches,
* or we have an abort.
*/
static int
_handle_union(const char *str, const char *expr, PRBool case_insensitive,
unsigned int level)
{
register int x,y;
register int sx; /* source index */
int cp; /* source index of closing parenthesis */
int count;
int ret = NOMATCH;
char *e2;
/* Find the closing parenthesis that ends this union in the expression */
cp = _scan_and_copy(expr, ')', '\0', NULL);
if (cp == ABORTED || cp < 4) /* must be at least "(a|b" before ')' */
return ABORTED;
++cp; /* now index of char after closing parenthesis */
e2 = (char *) PR_Malloc(1 + strlen(expr));
if (!e2)
return ABORTED;
for (sx = 1; ret == NOMATCH && expr[sx] && expr[sx] != ')'; ++sx) {
/* Here, expr[sx] is one character past the preceeding '(' or '|'. */
/* Copy everything up to the next delimiter to e2 */
count = _scan_and_copy(expr + sx, ')', '|', e2);
if (count == ABORTED || !count) {
ret = ABORTED;
break;
}
sx += count;
/* Append everything after closing parenthesis to e2. This is safe. */
strcpy(e2+count, expr+cp);
ret = _shexp_match(str, e2, case_insensitive, level + 1);
}
PR_Free(e2);
if (sx < 2)
ret = ABORTED;
return ret;
}
/* returns 1 if val is in range from start..end, case insensitive. */
static int
_is_char_in_range(int start, int end, int val)
{
char map[256];
memset(map, 0, sizeof map);
while (start <= end)
map[tolower(start++)] = 1;
return map[tolower(val)];
}
static int
_shexp_match(const char *str, const char *expr, PRBool case_insensitive,
unsigned int level)
{
register int x; /* input string index */
register int y; /* expression index */
int ret,neg;
ret = 0;
for(x=0,y=0;expr[y];++y,++x) {
if((!str[x]) && (expr[y] != '(') && (expr[y] != '$') && (expr[y] != '*'))
ret = ABORTED;
else {
switch(expr[y]) {
case '$':
if( (str[x]) )
ret = NOMATCH;
else
--x; /* we don't want loop to increment x */
break;
case '*':
while(expr[++y] == '*'){}
if(!expr[y])
if (level > 20) /* Don't let the stack get too deep. */
return ABORTED;
for(x = 0, y = 0; expr[y]; ++y, ++x) {
if((!str[x]) && (expr[y] != '$') && (expr[y] != '*')) {
return NOMATCH;
}
switch(expr[y]) {
case '$':
if(str[x])
return NOMATCH;
--x; /* we don't want loop to increment x */
break;
case '*':
while(expr[++y] == '*'){}
if(!expr[y])
return MATCH;
while(str[x]) {
ret = _shexp_match(&str[x++], &expr[y], case_insensitive,
level + 1);
switch(ret) {
case NOMATCH:
continue;
case ABORTED:
return ABORTED;
default:
return MATCH;
while(str[x]) {
switch(_shexp_match(&str[x++],&expr[y], case_insensitive)) {
case NOMATCH:
continue;
case ABORTED:
ret = ABORTED;
break;
default:
return MATCH;
}
break;
}
if((expr[y] == '$') && (expr[y+1] == '\0') && (!str[x]))
return MATCH;
else
ret = ABORTED;
break;
case '[':
neg = ((expr[++y] == '^') && (expr[y+1] != ']'));
if (neg)
++y;
if ((isalnum(expr[y])) && (expr[y+1] == '-') &&
(isalnum(expr[y+2])) && (expr[y+3] == ']'))
{
int start = expr[y], end = expr[y+2];
/* Droolproofing for pinheads not included */
if(neg ^ ((str[x] < start) || (str[x] > end))) {
ret = NOMATCH;
break;
}
y+=3;
}
if((expr[y] == '$') && (expr[y+1] == '\0') && (!str[x]))
return MATCH;
else
return NOMATCH;
case '[': {
int start, end = 0, i;
neg = ((expr[++y] == '^') && (expr[y+1] != ']'));
if (neg)
++y;
i = y;
start = (unsigned char)(expr[i++]);
if (start == '\\')
start = (unsigned char)(expr[i++]);
if (isalnum(start) && expr[i++] == '-') {
end = (unsigned char)(expr[i++]);
if (end == '\\')
end = (unsigned char)(expr[i++]);
}
if (isalnum(end) && expr[i] == ']') {
/* This is a range form: a-b */
int val = (unsigned char)(str[x]);
if (end < start) { /* swap them */
int tmp = end;
end = start;
start = tmp;
}
if (case_insensitive && isalpha(val)) {
val = _is_char_in_range(start, end, val);
if (neg == val)
return NOMATCH;
}
else if (neg != ((val < start) || (val > end))) {
return NOMATCH;
}
y = i;
}
else {
/* Not range form */
int matched = 0;
for (; expr[y] != ']'; y++) {
if (expr[y] == '\\')
++y;
if(case_insensitive) {
matched |= (toupper(str[x]) == toupper(expr[y]));
}
else {
int matched;
for (matched=0;expr[y] != ']';y++) {
/* match an escaped ']' character */
if('\\' == expr[y] && ']' == expr[y+1]) {
if(']' == str[x])
matched |= 1;
y++; /* move an extra char to compensate for '\\' */
continue;
}
else {
matched |= (str[x] == expr[y]);
}
if (neg ^ (!matched))
ret = NOMATCH;
}
break;
case '(':
return _handle_union(&str[x],&expr[y], case_insensitive);
break;
case '?':
break;
case '\\':
++y;
default:
if(case_insensitive)
{
if(toupper(str[x]) != toupper(expr[y]))
ret = NOMATCH;
}
else
{
if(str[x] != expr[y])
ret = NOMATCH;
}
break;
if (neg == matched)
return NOMATCH;
}
}
if(ret)
break;
case '(':
if (!expr[y+1])
return ABORTED;
return _handle_union(&str[x], &expr[y], case_insensitive, level);
case '?':
break;
}
return (ret ? ret : (str[x] ? NOMATCH : MATCH));
}
int
NS_WildCardMatch(char *str, char *xp, PRBool case_insensitive) {
register int x;
char *expr = PL_strdup(xp);
if(!expr)
return 1;
for(x=strlen(expr)-1;x;--x) {
if((expr[x] == '~') && (expr[x-1] != '\\')) {
expr[x] = '\0';
if(_shexp_match(str,&expr[++x], case_insensitive) == MATCH)
goto punt;
case ')':
case ']':
case '|':
return ABORTED;
case '\\':
++y;
/* fall through */
default:
if(case_insensitive) {
if(toupper(str[x]) != toupper(expr[y]))
return NOMATCH;
}
else {
if(str[x] != expr[y])
return NOMATCH;
}
break;
}
}
if(_shexp_match(str,expr, case_insensitive) == MATCH) {
PR_Free(expr);
return 0;
}
punt:
PR_Free(expr);
return 1;
return (str[x] ? NOMATCH : MATCH);
}
static int
ns_WildCardMatch(const char *str, const char *xp, PRBool case_insensitive)
{
char *expr = 0;
int x, ret = MATCH;
if (!strchr(xp, '~'))
return _shexp_match(str, xp, case_insensitive, 0);
expr = PL_strdup(xp);
if(!expr)
return NOMATCH;
x = _scan_and_copy(expr, '~', '\0', NULL);
if (x != ABORTED && expr[x] == '~') {
expr[x++] = '\0';
ret = _shexp_match(str, &expr[x], case_insensitive, 0);
switch (ret) {
case NOMATCH: ret = MATCH; break;
case MATCH: ret = NOMATCH; break;
default: break;
}
}
if (ret == MATCH)
ret = _shexp_match(str, expr, case_insensitive, 0);
PR_Free(expr);
return ret;
}
int
NS_WildCardMatch(const char *str, const char *expr, PRBool case_insensitive)
{
int is_valid = NS_WildCardValid(expr);
switch(is_valid) {
case INVALID_SXP:
return -1;
case NON_SXP:
if (case_insensitive)
return (PL_strcasecmp(expr,str) ? NOMATCH : MATCH);
return (strcmp(expr,str) ? NOMATCH : MATCH);
default:
return ns_WildCardMatch(str, expr, case_insensitive);
}
}

View File

@ -16,10 +16,11 @@
*
* The Initial Developer of the Original Code is
* Netscape Communications Corporation.
* Portions created by the Initial Developer are Copyright (C) 1998
* Portions created by the Initial Developer are Copyright (C) 1998-2009
* the Initial Developer. All Rights Reserved.
*
* Contributor(s):
* Nelson Bolyard <nelson@bolyard.me>
*
* Alternatively, the contents of this file may be used under the terms of
* either the GNU General Public License Version 2 or later (the "GPL"), or
@ -71,7 +72,7 @@
#define INVALID_SXP -2
#define VALID_SXP 1
extern int NS_WildCardValid(char *expr);
extern int NS_WildCardValid(const char *expr);
/* return values for the search routines */
@ -87,6 +88,7 @@ extern int NS_WildCardValid(char *expr);
* Returns 0 on match and 1 on non-match.
*/
extern int NS_WildCardMatch(char *str, char *expr, PRBool case_insensitive);
extern int
NS_WildCardMatch(const char *str, const char *expr, PRBool case_insensitive);
#endif /* nsWildCard_h__ */