Mozilla/mozilla/js2/src/regexpwrapper.cpp

/* -*- Mode: C++; tab-width: 4; indent-tabs-mode: nil; c-basic-offset: 4 -*-
 *
 * ***** BEGIN LICENSE BLOCK *****
 * Version: MPL 1.1/GPL 2.0/LGPL 2.1
 *
 * The contents of this file are subject to the Mozilla Public License Version
 * 1.1 (the "License"); you may not use this file except in compliance with
 * the License. You may obtain a copy of the License at
 * http://www.mozilla.org/MPL/
 *
 * Software distributed under the License is distributed on an "AS IS" basis,
 * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License
 * for the specific language governing rights and limitations under the
 * License.
 *
 * The Original Code is the JavaScript 2 Prototype.
 *
 * The Initial Developer of the Original Code is
 * Netscape Communications Corporation.
 * Portions created by the Initial Developer are Copyright (C) 1998
 * the Initial Developer. All Rights Reserved.
 *
 * Contributor(s):
 *
 * Alternatively, the contents of this file may be used under the terms of
 * either the GNU General Public License Version 2 or later (the "GPL"), or
 * the GNU Lesser General Public License Version 2.1 or later (the "LGPL"),
 * in which case the provisions of the GPL or the LGPL are applicable instead
 * of those above. If you wish to allow use of your version of this file only
 * under the terms of either the GPL or the LGPL, and not to allow others to
 * use your version of this file under the terms of the MPL, indicate your
 * decision by deleting the provisions above and replace them with the notice
 * and other provisions required by the GPL or the LGPL. If you do not delete
 * the provisions above, a recipient may use your version of this file under
 * the terms of any one of the MPL, the GPL or the LGPL.
 *
 * ***** END LICENSE BLOCK ***** */

#ifdef _WIN32
#include "msvc_pragma.h"
#endif


#include <algorithm>
#include <assert.h>

#include "world.h"
#include "strings.h"
#include "utilities.h"
#include "js2value.h"

#include <map>
#include <algorithm>
#include <list>
#include <stack>

#include "reader.h"
#include "parser.h"
#include "js2engine.h"
#include "regexp.h"
#include "bytecodecontainer.h"
#include "js2metadata.h"

typedef struct RECharSet {
    bool converted;
    bool sense;
    uint16 length;
    union {
        uint8 *bits;
        struct {
            uint16 startIndex;
            uint16 length;
        } src;
    } u;
} RECharSet;

namespace JavaScript {
namespace MetaData {

void JS_ReportOutOfMemory(JS2Metadata *meta)
{
    meta->reportError(Exception::internalError, "out of memory", 0);
}

void js_ReportCompileErrorNumber(JS2Metadata *meta, const char *message, const jschar *extra)
{
    meta->reportError(Exception::syntaxError, message, 0, extra);
}

void JS_ReportErrorNumber(JS2Metadata *meta, const char *message)
{
    meta->reportError(Exception::syntaxError, message, 0);
}


#define JS_FALSE false
#define JS_TRUE true

#define JSMSG_MISSING_PAREN         "Missing parentheses, {0}"
#define JSMSG_TOO_MANY_PARENS       "Too many parentheses, {0}"
#define JSMSG_BAD_QUANTIFIER        "Bad quantifer specification, {0}"
#define JSMSG_MIN_TOO_BIG           "Range minimun too big, {0}"
#define JSMSG_MAX_TOO_BIG           "Range maximum too big, {0}"
#define JSMSG_OUT_OF_ORDER          "Range out of order, {0}"
#define JSMSG_BAD_CLASS_RANGE       "Bad range in class"
#define JSMSG_TRAILING_SLASH        "Trailing slash, {0}"
#define JSMSG_UNTERM_CLASS          "Unterminated class, {0}"
#define JSMSG_UNTERM_QUANTIFIER     "Unterminated quantifier"

#define LINE_SEPARATOR      0x2028
#define PARAGRAPH_SEPARATOR 0x2029


#define JS_ASSERT(x) ASSERT(x)
#define JSSTRING_CHARS(x) (x)


#define JS7_ISDEC(c)    ((c) < 128 && isdigit(c))
#define JS7_UNDEC(c)    ((c) - '0')
#define JS7_ISLET(c)    ((c) < 128 && isalpha(c))

#define JUMP_OFFSET_LEN         2
#define JUMP_OFFSET_HI(off)     ((jsbytecode)((off) >> 8))
#define JUMP_OFFSET_LO(off)     ((jsbytecode)(off))
#define GET_JUMP_OFFSET(pc)     ((int16)(((pc)[1] << 8) | (pc)[2]))
#define SET_JUMP_OFFSET(pc,off) ((pc)[1] = JUMP_OFFSET_HI(off),               \
				 (pc)[2] = JUMP_OFFSET_LO(off))
#define JUMP_OFFSET_MIN         ((int16)0x8000)
#define JUMP_OFFSET_MAX         ((int16)0x7fff)


typedef bool JSBool;
typedef uint32 uintN;
typedef int32 intN;
typedef uint32 jsint;
typedef char16 JSString;
typedef char16 JSSubString;


typedef struct REMatchState {
    const jschar *cp;
    RECapture parens[1];      /* first of 're->parenCount' captures,
                               * allocated at end of this struct.
                               */
} REMatchState;

/* Note : contiguity of 'simple opcodes' is important for simpleMatch() */
typedef enum REOp {
    REOP_EMPTY         = 0,  /* match rest of input against rest of r.e. */
    REOP_ALT           = 1,  /* alternative subexpressions in kid and next */
    REOP_SIMPLE_START  = 2,  /* start of 'simple opcodes' */
    REOP_BOL           = 2,  /* beginning of input (or line if multiline) */
    REOP_EOL           = 3,  /* end of input (or line if multiline) */
    REOP_WBDRY         = 4,  /* match "" at word boundary */
    REOP_WNONBDRY      = 5,  /* match "" at word non-boundary */
    REOP_DOT           = 6,  /* stands for any character */
    REOP_DIGIT         = 7,  /* match a digit char: [0-9] */
    REOP_NONDIGIT      = 8,  /* match a non-digit char: [^0-9] */
    REOP_ALNUM         = 9,  /* match an alphanumeric char: [0-9a-z_A-Z] */
    REOP_NONALNUM      = 10, /* match a non-alphanumeric char: [^0-9a-z_A-Z] */
    REOP_SPACE         = 11, /* match a whitespace char */
    REOP_NONSPACE      = 12, /* match a non-whitespace char */
    REOP_BACKREF       = 13, /* back-reference (e.g., \1) to a parenthetical */
    REOP_FLAT          = 14, /* match a flat string */
    REOP_FLAT1         = 15, /* match a single char */
    REOP_FLATi         = 16, /* case-independent REOP_FLAT */
    REOP_FLAT1i        = 17, /* case-independent REOP_FLAT1 */
    REOP_UCFLAT1       = 18, /* single Unicode char */
    REOP_UCFLAT1i      = 19, /* case-independent REOP_UCFLAT1 */
    REOP_UCFLAT        = 20, /* flat Unicode string; len immediate counts chars */
    REOP_UCFLATi       = 21, /* case-independent REOP_UCFLAT */
    REOP_CLASS         = 22, /* character class with index */
    REOP_NCLASS        = 23, /* negated character class with index */
    REOP_SIMPLE_END    = 23, /* end of 'simple opcodes' */
    REOP_QUANT         = 25, /* quantified atom: atom{1,2} */
    REOP_STAR          = 26, /* zero or more occurrences of kid */
    REOP_PLUS          = 27, /* one or more occurrences of kid */
    REOP_OPT           = 28, /* optional subexpression in kid */
    REOP_LPAREN        = 29, /* left paren bytecode: kid is u.num'th sub-regexp */
    REOP_RPAREN        = 30, /* right paren bytecode */
    REOP_JUMP          = 31, /* for deoptimized closure loops */
    REOP_DOTSTAR       = 32, /* optimize .* to use a single opcode */
    REOP_ANCHOR        = 33, /* like .* but skips left context to unanchored r.e. */
    REOP_EOLONLY       = 34, /* $ not preceded by any pattern */
    REOP_BACKREFi      = 37, /* case-independent REOP_BACKREF */
    REOP_LPARENNON     = 41, /* non-capturing version of REOP_LPAREN */
    REOP_ASSERT        = 43, /* zero width positive lookahead assertion */
    REOP_ASSERT_NOT    = 44, /* zero width negative lookahead assertion */
    REOP_ASSERTTEST    = 45, /* sentinel at end of assertion child */
    REOP_ASSERTNOTTEST = 46, /* sentinel at end of !assertion child */
    REOP_MINIMALSTAR   = 47, /* non-greedy version of * */
    REOP_MINIMALPLUS   = 48, /* non-greedy version of + */
    REOP_MINIMALOPT    = 49, /* non-greedy version of ? */
    REOP_MINIMALQUANT  = 50, /* non-greedy version of {} */
    REOP_ENDCHILD      = 51, /* sentinel at end of quantifier child */
    REOP_REPEAT        = 52, /* directs execution of greedy quantifier */
    REOP_MINIMALREPEAT = 53, /* directs execution of non-greedy quantifier */
    REOP_ALTPREREQ     = 54, /* prerequisite for ALT, either of two chars */
    REOP_ALTPREREQ2    = 55, /* prerequisite for ALT, a char or a class */
    REOP_ENDALT        = 56, /* end of final alternate */
    REOP_CONCAT        = 57, /* concatenation of terms (parse time only) */

    REOP_END
} REOp;

#define REOP_IS_SIMPLE(op)  (((op) >= REOP_SIMPLE_START) && ((op) <= REOP_SIMPLE_END))

struct RENode {
    REOp            op;         /* r.e. op bytecode */
    RENode          *next;      /* next in concatenation order */
    void            *kid;       /* first operand */
    union {
        void        *kid2;      /* second operand */
        jsint       num;        /* could be a number */
        jsint       parenIndex; /* or a parenthesis index */
        struct {                /* or a quantifier range */
            uint16  min;
            uint16  max;
            JSBool  greedy;
        } range;
        struct {                /* or a character class */
            uint16  startIndex;
            uint16  kidlen;     /* length of string at kid, in jschars */
            uint16  bmsize;     /* bitmap size, based on max char code */
            uint16  index;      /* index into class list */
            JSBool  sense;
        } ucclass;
        struct {                /* or a literal sequence */
            jschar  chr;        /* of one character */
            uint16  length;     /* or many (via the kid) */
        } flat;
        struct {
            RENode  *kid2;      /* second operand from ALT */
            jschar  ch1;        /* match char for ALTPREREQ */
            jschar  ch2;        /* ditto, or class index for ALTPREREQ2 */
        } altprereq;
    } u;
};


#define RE_IS_LETTER(c)      ( ((c >= 'A') && (c <= 'Z')) ||               \
                                ((c >= 'a') && (c <= 'z')) )
#define RE_IS_LINE_TERM(c)  ( (c == '\n') || (c == '\r') ||               \
                                (c == LINE_SEPARATOR) || (c == PARAGRAPH_SEPARATOR))

#define CLASS_CACHE_SIZE (4)
typedef struct CompilerState {
    JS2Metadata     *meta;
    Pool<RENode>    *reNodePool;
    bool            strict;
    const jschar    *cpbegin;
    const jschar    *cpend;
    const jschar    *cp;
    uintN           flags;
    uint16          parenCount;
    uint16          classCount;   /* number of [] encountered */
    size_t          progLength;   /* estimated bytecode length */
    uintN           treeDepth;    /* maximum depth of parse tree */
    RENode          *result;
    struct {
        const jschar *start;        /* small cache of class strings */
        uint16 length;              /* since they're often the same */
        uint16 index;
    } classCache[CLASS_CACHE_SIZE];
} CompilerState;

#define NO_MAX ((uint16)(-1))

typedef struct REProgState {
    jsbytecode *continue_pc;        /* current continuation data */
    REOp continue_op;
    uint16 index;                   /* progress in text */
    uintN parenSoFar;               /* highest indexed paren started */
    union {
        struct {
            uint16 min;             /* current quantifier limits */
            uint16 max;
        } quantifier;
        struct {
            size_t top;             /* backtrack stack state */
            size_t sz;
        } assertion;
    } u;
} REProgState;

typedef struct REBackTrackData {
    size_t sz;                      /* size of previous stack entry */
    jsbytecode *backtrack_pc;       /* where to backtrack to */
    REOp backtrack_op;
    const jschar *cp;               /* index in text of match at backtrack */
    intN parenIndex;                /* start index of saved paren contents */
    uint16 parenCount;              /* # of saved paren contents */
    uint16 precedingStateTop;       /* number of parent states */
    /* saved parent states follow */
    /* saved paren contents follow */
} REBackTrackData;

#define INITIAL_STATESTACK (100)
#define INITIAL_BACKTRACK (8000)

typedef struct REGlobalData {
    JSBool globalMultiline;
    JS2RegExp *regexp;              /* the RE in execution */
    JSBool ok;                      /* runtime error (out_of_memory only?) */
    size_t start;                   /* offset to start at */
    ptrdiff_t skipped;              /* chars skipped anchoring this r.e. */
    const jschar *cpbegin, *cpend;  /* text base address and limit */

    REProgState *stateStack;        /* stack of state of current parents */
    uint16 stateStackTop;
    uint16 maxStateStack;

    REBackTrackData *backTrackStack;/* stack of matched-so-far positions */
    REBackTrackData *backTrackSP;
    size_t maxBackTrack;
    size_t cursz;                   /* size of current stack entry */

} REGlobalData;

bool JS_ISWORD(jschar ch)
{
    CharInfo chi(ch);
    return ch == '_' || isAlphanumeric(chi);
}

bool JS_ISSPACE(jschar ch)
{
    CharInfo chi(ch);
    return isSpace(chi);
}

bool JS_ISDIGIT(jschar ch)
{
    CharInfo chi(ch);
    return isDecimalDigit(chi);
}

/*
 * 1. If IgnoreCase is false, return ch.
 * 2. Let u be ch converted to upper case as if by calling
 *    String.prototype.toUpperCase on the one-character string ch.
 * 3. If u does not consist of a single character, return ch.
 * 4. Let cu be u's character.
 * 5. If ch's code point value is greater than or equal to decimal 128 and cu's
 *    code point value is less than decimal 128, then return ch.
 * 6. Return cu.
 */
static jschar
canonicalize(jschar ch)
{
    jschar cu = toUpper(ch);
    if ((ch >= 128) && (cu < 128)) return ch;
    return cu;
}

/* Construct and initialize an RENode, returning NULL for out-of-memory */
static RENode *
NewRENode(CompilerState *state, REOp op)
{
    RENode *ren;
    ren = new (*state->reNodePool) RENode();

    if (!ren) {
        JS_ReportOutOfMemory(state->meta);
        return NULL;
    }
    ren->op = op;
    ren->next = NULL;
    ren->kid = NULL;
    return ren;
}

/*
 * Validates and converts hex ascii value.
 */
static JSBool
isASCIIHexDigit(jschar c, uintN *digit)
{
    uintN cv = c;

    if (cv < '0')
        return JS_FALSE;
    if (cv <= '9') {
        *digit = cv - '0';
        return JS_TRUE;
    }
    cv |= 0x20;
    if (cv >= 'a' && cv <= 'f') {
        *digit = cv - 'a' + 10;
        return JS_TRUE;
    }
    return JS_FALSE;
}


typedef struct {
    REOp op;
    const jschar *errPos;
    uint16 parenIndex;
} REOpData;


/*
 * Process the op against the two top operands, reducing them to a single
 * operand in the penultimate slot. Update progLength and treeDepth.
 */
static JSBool
processOp(CompilerState *state, REOpData *opData, RENode **operandStack, intN operandSP)
{
    RENode *result;

    switch (opData->op) {
    case REOP_ALT:
        result = NewRENode(state, REOP_ALT);
        if (!result)
            return JS_FALSE;
        result->kid = operandStack[operandSP - 2];
        result->u.kid2 = operandStack[operandSP - 1];
        operandStack[operandSP - 2] = result;
        /*
         * look at both alternates to see if there's a FLAT or a CLASS at
         * the start of each. If so, use a prerequisite match
         */
        ++state->treeDepth;
        if ((((RENode *)(result->kid))->op == REOP_FLAT)
                && (((RENode *)(result->u.kid2))->op == REOP_FLAT)
                && ((state->flags & JSREG_FOLD) == 0) ) {
            result->op = REOP_ALTPREREQ;
            result->u.altprereq.ch1
                = ((RENode *)(result->kid))->u.flat.chr;
            result->u.altprereq.ch2
                = ((RENode *)(result->u.kid2))->u.flat.chr;
            /* ALTPREREQ, <end>, uch1, uch2, <next>, ...,
                                            JUMP, <end> ... ENDALT */
            state->progLength += 13;
        }
        else
            if ((((RENode *)(result->kid))->op == REOP_CLASS)
                    && (((RENode *)(result->kid))->u.ucclass.index < 256)
                    && (((RENode *)(result->u.kid2))->op == REOP_FLAT)
                    && ((state->flags & JSREG_FOLD) == 0) ) {
                result->op = REOP_ALTPREREQ2;
                result->u.altprereq.ch1
                    = ((RENode *)(result->u.kid2))->u.flat.chr;
                result->u.altprereq.ch2
                    = ((RENode *)(result->kid))->u.ucclass.index;
                /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
                                                JUMP, <end> ... ENDALT */
                state->progLength += 13;
            }
            else
                if ((((RENode *)(result->kid))->op == REOP_FLAT)
                        && (((RENode *)(result->u.kid2))->op == REOP_CLASS)
                        && (((RENode *)(result->u.kid2))->u.ucclass.index < 256)
                        && ((state->flags & JSREG_FOLD) == 0) ) {
                    result->op = REOP_ALTPREREQ2;
                    result->u.altprereq.ch1
                        = ((RENode *)(result->kid))->u.flat.chr;
                    result->u.altprereq.ch2
                        = ((RENode *)(result->u.kid2))->u.ucclass.index;
                    /* ALTPREREQ2, <end>, uch1, uch2, <next>, ...,
                                                  JUMP, <end> ... ENDALT */
                    state->progLength += 13;
                }
                else
                    /* ALT, <next>, ..., JUMP, <end> ... ENDALT */
                    state->progLength += 7;
        break;
    case REOP_CONCAT:
        result = operandStack[operandSP - 2];
        while (result->next)
            result = result->next;
        result->next = operandStack[operandSP - 1];
        break;
    case REOP_ASSERT:
    case REOP_ASSERT_NOT:
    case REOP_LPARENNON:
    case REOP_LPAREN:
        /* These should have been processed by a close paren. */
        js_ReportCompileErrorNumber(state->meta, JSMSG_MISSING_PAREN, opData->errPos);
        return JS_FALSE;
    }
    return JS_TRUE;
}

/*
 * Parser forward declarations.
 */
static JSBool parseTerm(CompilerState *state);
static JSBool parseQuantifier(CompilerState *state);

/*
 * Top-down regular expression grammar, based closely on Perl4.
 *
 *  regexp:     altern                  A regular expression is one or more
 *              altern '|' regexp       alternatives separated by vertical bar.
 */

#define INITIAL_STACK_SIZE (128)
static JSBool
parseRegExp(CompilerState *state)
{
    const jschar *errPos;
    uint16 parenIndex;
    RENode *operand;
    REOpData *operatorStack;
    RENode **operandStack;
    REOp op;
    intN i;
    JSBool result = JS_FALSE;

    intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE;
    intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE;

    /* Watch out for empty regexp */
    if (state->cp == state->cpend) {
        state->result = NewRENode(state, REOP_EMPTY);
        return JS_TRUE;
    }

    operatorStack = (REOpData *)malloc(sizeof(REOpData) * operatorStackSize);
    if (!operatorStack)
        return JS_FALSE;

    operandStack = (RENode **)malloc(sizeof(RENode *) * operandStackSize);
    if (!operandStack)
        goto out;


    while (JS_TRUE) {
        if (state->cp != state->cpend) {
            switch (*state->cp) {
                /* balance '(' */
            case '(':           /* balance ')' */
                errPos = state->cp;
                ++state->cp;
                if ((state->cp < state->cpend) && (*state->cp == '?')
                        && ( (state->cp[1] == '=')
                                || (state->cp[1] == '!')
                                || (state->cp[1] == ':') )) {
                    ++state->cp;
                    if (state->cp == state->cpend) {
                        js_ReportCompileErrorNumber(state->meta, JSMSG_MISSING_PAREN,
                                                    errPos);
                        goto out;
                    }
                    switch (*state->cp++) {
                    case '=':
                        op = REOP_ASSERT;
                        /* ASSERT, <next>, ... ASSERTTEST */
                        state->progLength += 4;
                        break;
                    case '!':
                        op = REOP_ASSERT_NOT;
                        /* ASSERTNOT, <next>, ... ASSERTNOTTEST */
                        state->progLength += 4;
                        break;
                    case ':':
                        op = REOP_LPARENNON;
                        break;
                    }
                    parenIndex = state->parenCount;
                }
                else {
                    op = REOP_LPAREN;
                    /* LPAREN, <index>, ... RPAREN, <index> */
                    state->progLength += 6;
                    parenIndex = state->parenCount++;
                    if (state->parenCount == 0) {
                        js_ReportCompileErrorNumber(state->meta, JSMSG_TOO_MANY_PARENS,
                                                    errPos);
                        goto out;
                    }
                }
                goto pushOperator;
            case '|':
            case ')':
                /* Expected an operand before these, so make an empty one */
                operand = NewRENode(state, REOP_EMPTY);
                if (!operand)
                    goto out;
                goto pushOperand;
            default:
                if (!parseTerm(state))
                    goto out;
                operand = state->result;
pushOperand:
                if (operandSP == operandStackSize) {
                    operandStackSize += operandStackSize;
                    operandStack =
                      (RENode **)realloc(operandStack,
                                            sizeof(RENode *) * operandStackSize);
                    if (!operandStack)
                        goto out;
                }
                operandStack[operandSP++] = operand;
                break;
            }
        }
            /* At the end; process remaining operators */
restartOperator:
        if (state->cp == state->cpend) {
            while (operatorSP) {
                --operatorSP;
                if (!processOp(state, &operatorStack[operatorSP],
                               operandStack, operandSP))
                    goto out;
                --operandSP;
            }
            JS_ASSERT(operandSP == 1);
            state->result = operandStack[0];
            result = JS_TRUE;
            goto out;
        }
        switch (*state->cp) {
        case '|':
            /* Process any stacked 'concat' operators */
            ++state->cp;
            while (operatorSP
                    && (operatorStack[operatorSP - 1].op == REOP_CONCAT)) {
                --operatorSP;
                if (!processOp(state, &operatorStack[operatorSP],
                               operandStack, operandSP))
                    goto out;
                --operandSP;
            }
            op = REOP_ALT;
            goto pushOperator;

        case ')':
            /* If there's not a stacked open parentheses,we
             * accept the close as a flat.
             */
            for (i = operatorSP - 1; i >= 0; i--)
                if ((operatorStack[i].op == REOP_ASSERT)
                        || (operatorStack[i].op == REOP_ASSERT_NOT)
                        || (operatorStack[i].op == REOP_LPARENNON)
                        || (operatorStack[i].op == REOP_LPAREN))
                    break;
            if (i == -1) {
                if (!parseTerm(state))
                    goto out;
                operand = state->result;
                goto pushOperand;
            }
            ++state->cp;
            /* process everything on the stack until the open */
            while (JS_TRUE) {
                JS_ASSERT(operatorSP);
                --operatorSP;
                switch (operatorStack[operatorSP].op) {
                case REOP_ASSERT:
                case REOP_ASSERT_NOT:
                case REOP_LPAREN:
                    operand = NewRENode(state, operatorStack[operatorSP].op);
                    if (!operand)
                        goto out;
                    operand->u.parenIndex
                                    = operatorStack[operatorSP].parenIndex;
                    JS_ASSERT(operandSP);
                    operand->kid = operandStack[operandSP - 1];
                    operandStack[operandSP - 1] = operand;
                    ++state->treeDepth;
                    /* fall thru... */
                case REOP_LPARENNON:
                    state->result = operandStack[operandSP - 1];
                    if (!parseQuantifier(state))
                        goto out;
                    operandStack[operandSP - 1] = state->result;
                    goto restartOperator;
                default:
                    if (!processOp(state, &operatorStack[operatorSP],
                                   operandStack, operandSP))
                        goto out;
                    --operandSP;
                    break;
                }
            }
            break;
        default:
            /* Anything else is the start of the next term */
            op = REOP_CONCAT;
pushOperator:
            if (operatorSP == operatorStackSize) {
                operatorStackSize += operatorStackSize;
                operatorStack =
                    (REOpData *)realloc(operatorStack,
                                           sizeof(REOpData) * operatorStackSize);
                if (!operatorStack)
                    goto out;
            }
            operatorStack[operatorSP].op = op;
            operatorStack[operatorSP].errPos = errPos;
            operatorStack[operatorSP++].parenIndex = parenIndex;
            break;
        }
    }
out:
    if (operatorStack)
        free(operatorStack);
    if (operandStack)
        free(operandStack);
    return result;
}

/*
 * Extract and return a decimal value at state->cp, the
 * initial character 'c' has already been read.
 */
static intN
getDecimalValue(jschar c, CompilerState *state)
{
    intN value = JS7_UNDEC(c);
    while (state->cp < state->cpend) {
        c = *state->cp;
        if (!JS7_ISDEC(c))
            break;
        value = (10 * value) + JS7_UNDEC(c);
        ++state->cp;
    }
    return value;
}

/*
 * Calculate the total size of the bitmap required for a class expression.
 */
static JSBool
calculateBitmapSize(CompilerState *state, RENode *target, const jschar *src,
                    const jschar *end)
{
    jschar rangeStart, c;
    uintN n, digit, nDigits, i;
    uintN max = 0;
    JSBool inRange = JS_FALSE;

    target->u.ucclass.bmsize = 0;
    target->u.ucclass.sense = JS_TRUE;

    if (src == end)
        return JS_TRUE;

    if (*src == '^') {
        ++src;
        target->u.ucclass.sense = JS_FALSE;
    }

    while (src != end) {
        uintN localMax = 0;
        switch (*src) {
        case '\\':
            ++src;
            c = *src++;
            switch (c) {
            case 'b':
                localMax = 0x8;
                break;
            case 'f':
                localMax = 0xC;
                break;
            case 'n':
                localMax = 0xA;
                break;
            case 'r':
                localMax = 0xD;
                break;
            case 't':
                localMax = 0x9;
                break;
            case 'v':
                localMax = 0xB;
                break;
            case 'c':
                if (((src + 1) < end) && RE_IS_LETTER(src[1]))
                    localMax = (jschar)(*src++ & 0x1F);
                else
                    localMax = '\\';
                break;
            case 'x':
                nDigits = 2;
                goto lexHex;
            case 'u':
                nDigits = 4;
lexHex:
                n = 0;
                for (i = 0; (i < nDigits) && (src < end); i++) {
                    c = *src++;
                    if (!isASCIIHexDigit(c, &digit)) {
                        /*
                         * Back off to accepting the original
                         *'\' as a literal.
                         */
                        src -= (i + 1);
                        n = '\\';
                        break;
                    }
                    n = (n << 4) | digit;
                }
                localMax = n;
                break;
            case 'd':
                if (inRange) {
                    JS_ReportErrorNumber(state->meta, JSMSG_BAD_CLASS_RANGE);
                    return JS_FALSE;
                }
                localMax = '9';
                break;
            case 'D':
            case 's':
            case 'S':
            case 'w':
            case 'W':
                if (inRange) {
                    JS_ReportErrorNumber(state->meta, JSMSG_BAD_CLASS_RANGE);
                    return JS_FALSE;
                }
                target->u.ucclass.bmsize = 65535;
                return JS_TRUE;
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
                /*
                 *  This is a non-ECMA extension - decimal escapes (in this
                 *  case, octal!) are supposed to be an error inside class
                 *  ranges, but supported here for backwards compatibility.
                 *
                 */
                n = JS7_UNDEC(c);
                c = *src;
                if ('0' <= c && c <= '7') {
                    src++;
                    n = 8 * n + JS7_UNDEC(c);
                    c = *src;
                    if ('0' <= c && c <= '7') {
                        src++;
                        i = 8 * n + JS7_UNDEC(c);
                        if (i <= 0377)
                            n = i;
                        else
                            src--;
                    }
                }
                localMax = n;
                break;

            default:
                localMax = c;
                break;
            }
            break;
        default:
            localMax = *src++;
            break;
        }
        if (inRange) {
            if (rangeStart > localMax) {
                JS_ReportErrorNumber(state->meta, JSMSG_BAD_CLASS_RANGE);
                return JS_FALSE;
            }
            inRange = JS_FALSE;
        }
        else {
            if (src < (end - 1)) {
                if (*src == '-') {
                    ++src;
                    inRange = JS_TRUE;
                    rangeStart = (jschar)localMax;
                    continue;
                }
            }
        }
        if (state->flags & JSREG_FOLD) {
            c = canonicalize((jschar)localMax);
            if (c > localMax)
                localMax = c;
        }
        if (localMax > max)
            max = localMax;
    }
    target->u.ucclass.bmsize = max;
    return JS_TRUE;
}

/*
 *  item:       assertion               An item is either an assertion or
 *              quantatom               a quantified atom.
 *
 *  assertion:  '^'                     Assertions match beginning of string
 *                                      (or line if the class static property
 *                                      RegExp.multiline is true).
 *              '$'                     End of string (or line if the class
 *                                      static property RegExp.multiline is
 *                                      true).
 *              '\b'                    Word boundary (between \w and \W).
 *              '\B'                    Word non-boundary.
 *
 *  quantatom:  atom                    An unquantified atom.
 *              quantatom '{' n ',' m '}'
 *                                      Atom must occur between n and m times.
 *              quantatom '{' n ',' '}' Atom must occur at least n times.
 *              quantatom '{' n '}'     Atom must occur exactly n times.
 *              quantatom '*'           Zero or more times (same as {0,}).
 *              quantatom '+'           One or more times (same as {1,}).
 *              quantatom '?'           Zero or one time (same as {0,1}).
 *
 *              any of which can be optionally followed by '?' for ungreedy
 *
 *  atom:       '(' regexp ')'          A parenthesized regexp (what matched
 *                                      can be addressed using a backreference,
 *                                      see '\' n below).
 *              '.'                     Matches any char except '\n'.
 *              '[' classlist ']'       A character class.
 *              '[' '^' classlist ']'   A negated character class.
 *              '\f'                    Form Feed.
 *              '\n'                    Newline (Line Feed).
 *              '\r'                    Carriage Return.
 *              '\t'                    Horizontal Tab.
 *              '\v'                    Vertical Tab.
 *              '\d'                    A digit (same as [0-9]).
 *              '\D'                    A non-digit.
 *              '\w'                    A word character, [0-9a-z_A-Z].
 *              '\W'                    A non-word character.
 *              '\s'                    A whitespace character, [ \b\f\n\r\t\v].
 *              '\S'                    A non-whitespace character.
 *              '\' n                   A backreference to the nth (n decimal
 *                                      and positive) parenthesized expression.
 *              '\' octal               An octal escape sequence (octal must be
 *                                      two or three digits long, unless it is
 *                                      0 for the null character).
 *              '\x' hex                A hex escape (hex must be two digits).
 *              '\u' unicode            A unicode escape (must be four digits).
 *              '\c' ctrl               A control character, ctrl is a letter.
 *              '\' literalatomchar     Any character except one of the above
 *                                      that follow '\' in an atom.
 *              otheratomchar           Any character not first among the other
 *                                      atom right-hand sides.
 */
static JSBool
parseTerm(CompilerState *state)
{
    jschar c = *state->cp++;
    uintN nDigits;
    uintN num, tmp, n, i;
    const jschar *termStart;
    JSBool foundCachedCopy;

    switch (c) {
    /* assertions and atoms */
    case '^':
        state->result = NewRENode(state, REOP_BOL);
        if (!state->result)
            return JS_FALSE;
        state->progLength++;
        return JS_TRUE;
    case '$':
        state->result = NewRENode(state, REOP_EOL);
        if (!state->result)
            return JS_FALSE;
        state->progLength++;
        return JS_TRUE;
    case '\\':
        if (state->cp >= state->cpend) {
            /* a trailing '\' is an error */
            js_ReportCompileErrorNumber(state->meta, JSMSG_TRAILING_SLASH, state->cp);
            return JS_FALSE;
        }
        c = *state->cp++;
        switch (c) {
        /* assertion escapes */
        case 'b' :
            state->result = NewRENode(state, REOP_WBDRY);
            if (!state->result)
                return JS_FALSE;
            state->progLength++;
            return JS_TRUE;
        case 'B':
            state->result = NewRENode(state, REOP_WNONBDRY);
            if (!state->result)
                return JS_FALSE;
            state->progLength++;
            return JS_TRUE;
        /* Decimal escape */
        case '0':
            if (state->strict)
                c = 0;
            else {
     doOctal:
                num = 0;
                while (state->cp < state->cpend) {
                    if ('0' <= (c = *state->cp) && c <= '7') {
                        state->cp++;
                        tmp = 8 * num + (uintN)JS7_UNDEC(c);
                        if (tmp > 0377)
                            break;
                        num = tmp;
                    }
                    else
                        break;
                }
                c = (jschar)(num);
            }
    doFlat:
            state->result = NewRENode(state, REOP_FLAT);
            if (!state->result)
                return JS_FALSE;
            state->result->u.flat.chr = c;
            state->result->u.flat.length = 1;
            state->progLength += 3;
            break;
        case '1':
        case '2':
        case '3':
        case '4':
        case '5':
        case '6':
        case '7':
        case '8':
        case '9':
            termStart = state->cp - 1;
            num = (uintN)getDecimalValue(c, state);
            if (num > 9 &&
                num > state->parenCount &&
                !(state->strict)) {
                state->cp = termStart;
                goto doOctal;
            }
            state->result = NewRENode(state, REOP_BACKREF);
            if (!state->result)
                return JS_FALSE;
            state->result->u.parenIndex = num - 1;
            state->progLength += 3;
            break;
        /* Control escape */
        case 'f':
            c = 0xC;
            goto doFlat;
        case 'n':
            c = 0xA;
            goto doFlat;
        case 'r':
            c = 0xD;
            goto doFlat;
        case 't':
            c = 0x9;
            goto doFlat;
        case 'v':
            c = 0xB;
            goto doFlat;
        /* Control letter */
        case 'c':
            if (((state->cp + 1) < state->cpend) &&
                                RE_IS_LETTER(state->cp[1]))
                c = (jschar)(*state->cp++ & 0x1F);
            else {
                /* back off to accepting the original '\' as a literal */
                --state->cp;
                c = '\\';
            }
            goto doFlat;
        /* HexEscapeSequence */
        case 'x':
            nDigits = 2;
            goto lexHex;
        /* UnicodeEscapeSequence */
        case 'u':
            nDigits = 4;
lexHex:
            n = 0;
            for (i = 0; (i < nDigits)
                    && (state->cp < state->cpend); i++) {
                uintN digit;
                c = *state->cp++;
                if (!isASCIIHexDigit(c, &digit)) {
                    /*
                     *  back off to accepting the original
                     *  'u' or 'x' as a literal
                     */
                    state->cp -= (i + 2);
                    n = *state->cp++;
                    break;
                }
                n = (n << 4) | digit;
            }
            c = (jschar)(n);
            goto doFlat;
        /* Character class escapes */
        case 'd':
            state->result = NewRENode(state, REOP_DIGIT);
doSimple:
            if (!state->result)
                return JS_FALSE;
            state->progLength++;
            break;
        case 'D':
            state->result = NewRENode(state, REOP_NONDIGIT);
            goto doSimple;
        case 's':
            state->result = NewRENode(state, REOP_SPACE);
            goto doSimple;
        case 'S':
            state->result = NewRENode(state, REOP_NONSPACE);
            goto doSimple;
        case 'w':
            state->result = NewRENode(state, REOP_ALNUM);
            goto doSimple;
        case 'W':
            state->result = NewRENode(state, REOP_NONALNUM);
            goto doSimple;
        /* IdentityEscape */
        default:
            state->result = NewRENode(state, REOP_FLAT);
            if (!state->result)
                return JS_FALSE;
            state->result->u.flat.chr = c;
            state->result->u.flat.length = 1;
            state->result->kid = (void *)(state->cp - 1);
            state->progLength += 3;
            break;
        }
        break;
    case '[':
        state->result = NewRENode(state, REOP_CLASS);
        if (!state->result)
            return JS_FALSE;
        termStart = state->cp;
        state->result->u.ucclass.startIndex = termStart - state->cpbegin;
        while (JS_TRUE) {
            if (state->cp == state->cpend) {
                js_ReportCompileErrorNumber(state->meta, JSMSG_UNTERM_CLASS, termStart);
                return JS_FALSE;
            }
            if (*state->cp == '\\')
                state->cp++;
            else {
                if (*state->cp == ']') {
                    state->result->u.ucclass.kidlen = state->cp - termStart;
                    break;
                }
            }
            state->cp++;
        }
        foundCachedCopy = JS_FALSE;
        for (i = 0; i < CLASS_CACHE_SIZE; i++) {
            if (state->classCache[i].start) {
                if (state->classCache[i].length == state->result->u.ucclass.kidlen) {
                    foundCachedCopy = JS_TRUE;
                    for (n = 0; n < state->classCache[i].length; n++) {
                        if (state->classCache[i].start[n] != termStart[n]) {
                            foundCachedCopy = JS_FALSE;
                            break;
                        }
                    }
                    if (foundCachedCopy) {
                        state->result->u.ucclass.index = state->classCache[i].index;
                        break;
                    }
                }
            }
            else {
                state->classCache[i].start = termStart;
                state->classCache[i].length = state->result->u.ucclass.kidlen;
                state->classCache[i].index = state->classCount;
                break;
            }
        }
        if (!foundCachedCopy)
            state->result->u.ucclass.index = state->classCount++;
        /*
         * Call calculateBitmapSize now as we want any errors it finds
         * to be reported during the parse phase, not at execution.
         */
        if (!calculateBitmapSize(state, state->result, termStart, state->cp++))
            return JS_FALSE;
        state->progLength += 3; /* CLASS, <index> */
        break;

    case '.':
        state->result = NewRENode(state, REOP_DOT);
        goto doSimple;
    case '*':
    case '+':
    case '?':
        js_ReportCompileErrorNumber(state->meta, JSMSG_BAD_QUANTIFIER, state->cp - 1);
        return JS_FALSE;
    case '{':
        /* Treat left-curly in a non-quantifier context as an error only
         * if it's followed immediately by a decimal digit.
         * This is an Perl extension.
         */
        if ((state->cp != state->cpend) && JS7_ISDEC(*state->cp)) {
            js_ReportCompileErrorNumber(state->meta,
                                        JSMSG_BAD_QUANTIFIER, state->cp - 1);
            return JS_FALSE;
        }
        /* fall thru... */
    default:
        state->result = NewRENode(state, REOP_FLAT);
        if (!state->result)
            return JS_FALSE;
        state->result->u.flat.chr = c;
        state->result->u.flat.length = 1;
        state->result->kid = (void *)(state->cp - 1);
        state->progLength += 3;
        break;
    }
    return parseQuantifier(state);
}

static JSBool
parseQuantifier(CompilerState *state)
{
    RENode *term;
    term = state->result;
    if (state->cp < state->cpend) {
        switch (*state->cp) {
        case '+':
            state->result = NewRENode(state, REOP_QUANT);
            if (!state->result)
                return JS_FALSE;
            state->result->u.range.min = 1;
            state->result->u.range.max = NO_MAX;
            /* <PLUS>, <next> ... <ENDCHILD> */
            state->progLength += 4;
            goto quantifier;
        case '*':
            state->result = NewRENode(state, REOP_QUANT);
            if (!state->result)
                return JS_FALSE;
            state->result->u.range.min = 0;
            state->result->u.range.max = NO_MAX;
            /* <STAR>, <next> ... <ENDCHILD> */
            state->progLength += 4;
            goto quantifier;
        case '?':
            state->result = NewRENode(state, REOP_QUANT);
            if (!state->result)
                return JS_FALSE;
            state->result->u.range.min = 0;
            state->result->u.range.max = 1;
            /* <OPT>, <next> ... <ENDCHILD> */
            state->progLength += 4;
            goto quantifier;
        case '{':       /* balance '}' */
            {
                const char *err;
                intN min = 0;
                intN max = -1;
                jschar c;
                const jschar *errp = state->cp++;

                c = *state->cp;
                if (JS7_ISDEC(c)) {
                    ++state->cp;
                    min = getDecimalValue(c, state);
                    c = *state->cp;
                }
                else {
                    /* For Perl etc. compatibility, if a curly is not
                     * followed by a proper digit, back off from it
                     * being a quantifier, and chew it up as a literal
                     * atom next time instead.
                     */
                    --state->cp;
                    return JS_TRUE;
                }
                state->result = NewRENode(state, REOP_QUANT);
                if (!state->result)
                    return JS_FALSE;

                if (min >> 16) {
                    err = JSMSG_MIN_TOO_BIG;
                    goto quantError;
                }
                if (c == ',') {
                    c = *++state->cp;
                    if (JS7_ISDEC(c)) {
                        ++state->cp;
                        max = getDecimalValue(c, state);
                        c = *state->cp;
                        if (max >> 16) {
                            err = JSMSG_MAX_TOO_BIG;
                            goto quantError;
                        }
                        if (min > max) {
                            err = JSMSG_OUT_OF_ORDER;
                            goto quantError;
                        }
                    }
                }
                else {
                    max = min;
                }
                state->result->u.range.min = min;
                state->result->u.range.max = max;
                /* QUANT, <min>, <max>, <next> ... <ENDCHILD> */
                state->progLength += 8;
                                            /* balance '{' */
                if (c == '}')
                    goto quantifier;
                else {
                    err = JSMSG_UNTERM_QUANTIFIER;
quantError:
                    js_ReportCompileErrorNumber(state->meta, err, errp);
                    return JS_FALSE;
                }
            }
        }
    }
    return JS_TRUE;

quantifier:
    ++state->treeDepth;
    ++state->cp;
    state->result->kid = term;
    if ((state->cp < state->cpend) && (*state->cp == '?')) {
        ++state->cp;
        state->result->u.range.greedy = JS_FALSE;
    }
    else
        state->result->u.range.greedy = JS_TRUE;
    return JS_TRUE;
}

#define CHECK_OFFSET(diff) (JS_ASSERT(((diff) >= -32768) && ((diff) <= 32767)))
#define SET_OFFSET(pc,off) ((pc)[0] = JUMP_OFFSET_HI(off),                     \
                                 (pc)[1] = JUMP_OFFSET_LO(off))
#define GET_OFFSET(pc)     ((int16)(((pc)[0] << 8) | (pc)[1]))
#define OFFSET_LEN         (2)
#define GET_ARG(pc)        GET_OFFSET(pc)
#define SET_ARG(pc,arg)    SET_OFFSET(pc,arg)
#define ARG_LEN            OFFSET_LEN

/*
 * Recursively generate bytecode for the tree rooted at t. Iteratively.
 */

typedef struct {
    RENode *nextAlt;
    jsbytecode *nextAltFixup, *nextTermFixup, *endTermFixup;
    RENode *continueNode;
    REOp continueOp;
} EmitStateStackEntry;

static jsbytecode *
emitREBytecode(CompilerState *state, JS2RegExp *re, intN treeDepth,
               jsbytecode *pc, RENode *t)
{
    ptrdiff_t diff;
    RECharSet *charSet;
    EmitStateStackEntry *emitStateSP, *emitStateStack = NULL;
    REOp op;

    if (treeDepth) {
        emitStateStack =
            (EmitStateStackEntry *)malloc(sizeof(EmitStateStackEntry)
                                                                * treeDepth);
        if (!emitStateStack)
            return NULL;
    }
    emitStateSP = emitStateStack;
    op = t->op;

    while (JS_TRUE) {
        *pc++ = op;
        switch (op) {
        case REOP_EMPTY:
            --pc;
            break;

        case REOP_ALTPREREQ2:
        case REOP_ALTPREREQ:
            JS_ASSERT(emitStateSP);
            emitStateSP->endTermFixup = pc;
            pc += OFFSET_LEN;
            SET_ARG(pc, t->u.altprereq.ch1);
            pc += ARG_LEN;
            SET_ARG(pc, t->u.altprereq.ch2);
            pc += ARG_LEN;

            emitStateSP->nextAltFixup = pc;    /* address of next alternate */
            pc += OFFSET_LEN;

            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_JUMP;
            ++emitStateSP;
            JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *)(t->kid);
            op = t->op;
            continue;

        case REOP_JUMP:
            emitStateSP->nextTermFixup = pc;    /* address of following term */
            pc += OFFSET_LEN;
            diff = pc - emitStateSP->nextAltFixup;
            CHECK_OFFSET(diff);
            SET_OFFSET(emitStateSP->nextAltFixup, diff);
            emitStateSP->continueOp = REOP_ENDALT;
            ++emitStateSP;
            JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *)(t->u.kid2);
            op = t->op;
            continue;

        case REOP_ENDALT:
            diff = pc - emitStateSP->nextTermFixup;
            CHECK_OFFSET(diff);
            SET_OFFSET(emitStateSP->nextTermFixup, diff);
            if (t->op != REOP_ALT) {
                diff = pc - emitStateSP->endTermFixup;
                CHECK_OFFSET(diff);
                SET_OFFSET(emitStateSP->endTermFixup, diff);
            }
            break;

        case REOP_ALT:
            JS_ASSERT(emitStateSP);
            emitStateSP->nextAltFixup = pc; /* address of pointer to next alternate */
            pc += OFFSET_LEN;
            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_JUMP;
            ++emitStateSP;
            JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *)(t->kid);
            op = t->op;
            continue;

        case REOP_FLAT:
            /*
             * Consecutize FLAT's if possible.
             */
            if (t->kid) {
                while (t->next && (t->next->op == REOP_FLAT)
                        && (((jschar*)(t->kid) + t->u.flat.length)
                                        == (jschar*)(t->next->kid))) {
                    t->u.flat.length += t->next->u.flat.length;
                    t->next = t->next->next;
                }
            }
            if (t->kid && (t->u.flat.length > 1)) {
                if (state->flags & JSREG_FOLD)
                    pc[-1] = REOP_FLATi;
                else
                    pc[-1] = REOP_FLAT;
                SET_ARG(pc, (jschar *)(t->kid) - state->cpbegin);
                pc += ARG_LEN;
                SET_ARG(pc, t->u.flat.length);
                pc += ARG_LEN;
            }
            else {
                if (t->u.flat.chr < 256) {
                    if (state->flags & JSREG_FOLD)
                        pc[-1] = REOP_FLAT1i;
                    else
                        pc[-1] = REOP_FLAT1;
                    *pc++ = (jsbytecode)(t->u.flat.chr);
                }
                else {
                    if (state->flags & JSREG_FOLD)
                        pc[-1] = REOP_UCFLAT1i;
                    else
                        pc[-1] = REOP_UCFLAT1;
                    SET_ARG(pc, t->u.flat.chr);
                    pc += ARG_LEN;
                }
            }
            break;

        case REOP_LPAREN:
            JS_ASSERT(emitStateSP);
            SET_ARG(pc, t->u.parenIndex);
            pc += ARG_LEN;
            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_RPAREN;
            ++emitStateSP;
            JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *)(t->kid);
            op = t->op;
            continue;
        case REOP_RPAREN:
            SET_ARG(pc, t->u.parenIndex);
            pc += ARG_LEN;
            break;

        case REOP_BACKREF:
            SET_ARG(pc, t->u.parenIndex);
            pc += ARG_LEN;
            break;
        case REOP_ASSERT:
            JS_ASSERT(emitStateSP);
            emitStateSP->nextTermFixup = pc;
            pc += OFFSET_LEN;
            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_ASSERTTEST;
            ++emitStateSP;
            JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *)(t->kid);
            op = t->op;
            continue;
        case REOP_ASSERTTEST:
        case REOP_ASSERTNOTTEST:
            diff = pc - emitStateSP->nextTermFixup;
            CHECK_OFFSET(diff);
            SET_OFFSET(emitStateSP->nextTermFixup, diff);
            break;
        case REOP_ASSERT_NOT:
            JS_ASSERT(emitStateSP);
            emitStateSP->nextTermFixup = pc;
            pc += OFFSET_LEN;
            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_ASSERTNOTTEST;
            ++emitStateSP;
            JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *)(t->kid);
            op = t->op;
            continue;
        case REOP_QUANT:
            JS_ASSERT(emitStateSP);
            if ((t->u.range.min == 0) && (t->u.range.max == (uint16)(-1)))
                pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR;
            else
            if ((t->u.range.min == 0) && (t->u.range.max == 1))
                pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT;
            else
            if ((t->u.range.min == 1) && (t->u.range.max == (uint16)(-1)))
                pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS;
            else {
                if (!t->u.range.greedy) pc[-1] = REOP_MINIMALQUANT;
                SET_ARG(pc, t->u.range.min);
                pc += ARG_LEN;
                SET_ARG(pc, t->u.range.max);
                pc += ARG_LEN;
            }
            emitStateSP->nextTermFixup = pc;
            pc += OFFSET_LEN;
            emitStateSP->continueNode = t;
            emitStateSP->continueOp = REOP_ENDCHILD;
            ++emitStateSP;
            JS_ASSERT((emitStateSP - emitStateStack) <= treeDepth);
            t = (RENode *)(t->kid);
            op = t->op;
            continue;
        case REOP_ENDCHILD:
            diff = pc - emitStateSP->nextTermFixup;
            CHECK_OFFSET(diff);
            SET_OFFSET(emitStateSP->nextTermFixup, diff);
            break;
        case REOP_CLASS:
            if (!t->u.ucclass.sense)
                pc[-1] = REOP_NCLASS;
            SET_ARG(pc, t->u.ucclass.index);
            pc += ARG_LEN;
            charSet = &re->classList[t->u.ucclass.index];
            charSet->converted = JS_FALSE;
            charSet->length = t->u.ucclass.bmsize;
            charSet->u.src.startIndex = t->u.ucclass.startIndex;
            charSet->u.src.length = t->u.ucclass.kidlen;
            charSet->sense = t->u.ucclass.sense;
            break;
        default:
            break;
        }
        t = t->next;
        if (t == NULL) {
            if (emitStateSP == emitStateStack)
                break;
            --emitStateSP;
            t = emitStateSP->continueNode;
            op = emitStateSP->continueOp;
        }
        else
            op = t->op;
    }
    if (emitStateStack)
        free(emitStateStack);
    return pc;
}

/*
 * Save the current state of the match - the position in the input
 * text as well as the position in the bytecode. The state of any
 * parent expressions is also saved (preceding state).
 * Contents of parenCount parentheses from parenIndex are also saved.
 */
static REBackTrackData *
pushBackTrackState(REGlobalData *gData, REOp op,
                   jsbytecode *target, REMatchState *x, const jschar *cp,
                   intN parenIndex, intN parenCount)
{
    intN i;
    REBackTrackData *result
        = (REBackTrackData *)((char *)(gData->backTrackSP) + gData->cursz);

    size_t sz = sizeof(REBackTrackData)
                    + gData->stateStackTop * sizeof(REProgState)
                    + parenCount * sizeof(RECapture);


    if (((char *)result + sz)
                    > (char *)gData->backTrackStack + gData->maxBackTrack) {
        ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack;
        gData->backTrackStack
            = (REBackTrackData *)realloc(gData->backTrackStack,
                                              gData->maxBackTrack
                                               + gData->maxBackTrack);
        gData->maxBackTrack <<= 1;
        if (!gData->backTrackStack)
            return NULL;
        result = (REBackTrackData *)((char *)gData->backTrackStack + offset);
    }
    gData->backTrackSP = result;
    result->sz = gData->cursz;
    gData->cursz = sz;

    result->backtrack_op = op;
    result->backtrack_pc = target;
    result->cp = cp;
    result->parenCount = parenCount;

    result->precedingStateTop = gData->stateStackTop;
    JS_ASSERT(gData->stateStackTop);
    memcpy(result + 1, gData->stateStack,
           sizeof(REProgState) * result->precedingStateTop);

    if (parenCount != -1) {
        result->parenIndex = parenIndex;
        memcpy((char *)(result + 1)
                    + sizeof(REProgState) * result->precedingStateTop,
               &x->parens[parenIndex],
               sizeof(RECapture) * parenCount);
        for (i = 0; i < parenCount; i++)
            x->parens[parenIndex + i].index = -1;
    }

    return result;
}


/*
 *   Consecutive literal characters.
 */
static REMatchState *
flatNMatcher(REGlobalData *gData, REMatchState *x, const jschar *matchChars,
             intN length)
{
    intN i;
    if ((x->cp + length) > gData->cpend)
        return NULL;
    for (i = 0; i < length; i++) {
        if (matchChars[i] != x->cp[i])
            return NULL;
    }
    x->cp += length;
    return x;
}

static REMatchState *
flatNIMatcher(REGlobalData *gData, REMatchState *x, const jschar *matchChars,
              intN length)
{
    intN i;
    if ((x->cp + length) > gData->cpend)
        return NULL;
    for (i = 0; i < length; i++) {
        if (canonicalize(matchChars[i])
                != canonicalize(x->cp[i]))
            return NULL;
    }
    x->cp += length;
    return x;
}

/*
 * 1. Evaluate DecimalEscape to obtain an EscapeValue E.
 * 2. If E is not a character then go to step 6.
 * 3. Let ch be E's character.
 * 4. Let A be a one-element RECharSet containing the character ch.
 * 5. Call CharacterSetMatcher(A, false) and return its Matcher result.
 * 6. E must be an integer. Let n be that integer.
 * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception.
 * 8. Return an internal Matcher closure that takes two arguments, a State x
 *    and a Continuation c, and performs the following:
 *     1. Let cap be x's captures internal array.
 *     2. Let s be cap[n].
 *     3. If s is undefined, then call c(x) and return its result.
 *     4. Let e be x's endIndex.
 *     5. Let len be s's length.
 *     6. Let f be e+len.
 *     7. If f>InputLength, return failure.
 *     8. If there exists an integer i between 0 (inclusive) and len (exclusive)
 *        such that Canonicalize(s[i]) is not the same character as
 *        Canonicalize(Input [e+i]), then return failure.
 *     9. Let y be the State (f, cap).
 *     10. Call c(y) and return its result.
 */
static REMatchState *
backrefMatcher(REGlobalData *gData, REMatchState *x, uintN parenIndex)
{
    uintN len;
    uintN i;
    const jschar *parenContent;
    RECapture *s = &x->parens[parenIndex];
    if (s->index == -1)
        return x;

    len = s->length;
    if ((x->cp + len) > gData->cpend)
        return NULL;

    parenContent = &gData->cpbegin[s->index];
    if (gData->regexp->flags & JSREG_FOLD) {
        for (i = 0; i < len; i++) {
            if (canonicalize(parenContent[i])
                                    != canonicalize(x->cp[i]))
                return NULL;
        }
    }
    else {
        for (i = 0; i < len; i++) {
            if (parenContent[i] != x->cp[i])
                return NULL;
        }
    }
    x->cp += len;
    return x;
}


/* Add a single character to the RECharSet */
static void
addCharacterToCharSet(RECharSet *cs, jschar c)
{
    uintN byteIndex = (uintN)(c / 8);
    JS_ASSERT(c <= cs->length);
    cs->u.bits[byteIndex] |= 1 << (c & 0x7);
}


/* Add a character range, c1 to c2 (inclusive) to the RECharSet */
static void
addCharacterRangeToCharSet(RECharSet *cs, jschar c1, jschar c2)
{
    uintN i;

    uintN byteIndex1 = (uintN)(c1 / 8);
    uintN byteIndex2 = (uintN)(c2 / 8);

    JS_ASSERT((c2 <= cs->length) && (c1 <= c2));

    c1 &= 0x7;
    c2 &= 0x7;

    if (byteIndex1 == byteIndex2)
        cs->u.bits[byteIndex1] |= ((uint8)(0xFF) >> (7 - (c2 - c1))) << c1;
    else {
        cs->u.bits[byteIndex1] |= 0xFF << c1;
        for (i = byteIndex1 + 1; i < byteIndex2; i++)
            cs->u.bits[i] = 0xFF;
        cs->u.bits[byteIndex2] |= (uint8)(0xFF) >> (7 - c2);
    }
}

/* Compile the source of the class into a RECharSet */
static JSBool
processCharSet(REGlobalData *gData, RECharSet *charSet)
{
    const jschar *src = JSSTRING_CHARS(gData->regexp->source)
                                        + charSet->u.src.startIndex;
    const jschar *end = src + charSet->u.src.length;

    jschar rangeStart, thisCh;
    uintN byteLength;
    jschar c;
    uintN n;
    intN nDigits;
    intN i;
    JSBool inRange = JS_FALSE;

    JS_ASSERT(!charSet->converted);
    charSet->converted = JS_TRUE;

    byteLength = (charSet->length / 8) + 1;
    charSet->u.bits = (uint8 *)malloc(byteLength);
    if (!charSet->u.bits)
        return JS_FALSE;
    memset(charSet->u.bits, 0, byteLength);

    if (src == end)
        return JS_TRUE;

    if (*src == '^') {
        JS_ASSERT(charSet->sense == JS_FALSE);
        ++src;
    }
    else
        JS_ASSERT(charSet->sense == JS_TRUE);


    while (src != end) {
        switch (*src) {
        case '\\':
            ++src;
            c = *src++;
            switch (c) {
            case 'b':
                thisCh = 0x8;
                break;
            case 'f':
                thisCh = 0xC;
                break;
            case 'n':
                thisCh = 0xA;
                break;
            case 'r':
                thisCh = 0xD;
                break;
            case 't':
                thisCh = 0x9;
                break;
            case 'v':
                thisCh = 0xB;
                break;
            case 'c':
                if (((src + 1) < end) && JS_ISWORD(src[1]))
                    thisCh = (jschar)(*src++ & 0x1F);
                else {
                    --src;
                    thisCh = '\\';
                }
                break;
            case 'x':
                nDigits = 2;
                goto lexHex;
            case 'u':
                nDigits = 4;
lexHex:
                n = 0;
                for (i = 0; (i < nDigits) && (src < end); i++) {
                    uintN digit;
                    c = *src++;
                    if (!isASCIIHexDigit(c, &digit)) {
                        /*
                         * Back off to accepting the original '\'
                         * as a literal
                         */
                        src -= (i + 1);
                        n = '\\';
                        break;
                    }
                    n = (n << 4) | digit;
                }
                thisCh = (jschar)(n);
                break;
            case '0':
            case '1':
            case '2':
            case '3':
            case '4':
            case '5':
            case '6':
            case '7':
                /*
                 *  This is a non-ECMA extension - decimal escapes (in this
                 *  case, octal!) are supposed to be an error inside class
                 *  ranges, but supported here for backwards compatibility.
                 *
                 */
                n = JS7_UNDEC(c);
                c = *src;
                if ('0' <= c && c <= '7') {
                    src++;
                    n = 8 * n + JS7_UNDEC(c);
                    c = *src;
                    if ('0' <= c && c <= '7') {
                        src++;
                        i = 8 * n + JS7_UNDEC(c);
                        if (i <= 0377)
                            n = i;
                        else
                            src--;
                    }
                }
                thisCh = (jschar)(n);
                break;

            case 'd':
                addCharacterRangeToCharSet(charSet, '0', '9');
                continue;   /* don't need range processing */
            case 'D':
                addCharacterRangeToCharSet(charSet, 0, '0' - 1);
                addCharacterRangeToCharSet(charSet, (jschar)('9' + 1),
                                            (jschar)(charSet->length));
                continue;
            case 's':
                for (i = (intN)(charSet->length); i >= 0; i--)
                    if (JS_ISSPACE(i))
                        addCharacterToCharSet(charSet, (jschar)(i));
                continue;
            case 'S':
                for (i = (intN)(charSet->length); i >= 0; i--)
                    if (!JS_ISSPACE(i))
                        addCharacterToCharSet(charSet, (jschar)(i));
                continue;
            case 'w':
                for (i = (intN)(charSet->length); i >= 0; i--)
                    if (JS_ISWORD(i))
                        addCharacterToCharSet(charSet, (jschar)(i));
                continue;
            case 'W':
                for (i = (intN)(charSet->length); i >= 0; i--)
                    if (!JS_ISWORD(i))
                        addCharacterToCharSet(charSet, (jschar)(i));
                continue;
            default:
                thisCh = c;
                break;

            }
            break;

        default:
            thisCh = *src++;
            break;

        }
        if (inRange) {
            if (gData->regexp->flags & JSREG_FOLD) {
                jschar minch = (jschar)65535;
                jschar maxch = 0;
                /*

                    yuk

                */
                if (rangeStart < minch) minch = rangeStart;
                if (thisCh < minch) minch = thisCh;
                if (canonicalize(rangeStart) < minch)
                                            minch = canonicalize(rangeStart);
                if (canonicalize(thisCh) < minch) minch = canonicalize(thisCh);

                if (rangeStart > maxch) maxch = rangeStart;
                if (thisCh > maxch) maxch = thisCh;
                if (canonicalize(rangeStart) > maxch)
                                            maxch = canonicalize(rangeStart);
                if (canonicalize(thisCh) > maxch) maxch = canonicalize(thisCh);
                addCharacterRangeToCharSet(charSet, minch, maxch);
            }
            else
                addCharacterRangeToCharSet(charSet, rangeStart, thisCh);
            inRange = JS_FALSE;
        }
        else {
            if (gData->regexp->flags & JSREG_FOLD)
                addCharacterToCharSet(charSet, canonicalize(thisCh));
            addCharacterToCharSet(charSet, thisCh);
            if (src < (end - 1)) {
                if (*src == '-') {
                    ++src;
                    inRange = JS_TRUE;
                    rangeStart = thisCh;
                }
            }
        }
    }
    return JS_TRUE;
}

void
js_DestroyRegExp(JS2RegExp *re)
{
    uintN i;
    if (re->classList) {
        for (i = 0; i < re->classCount; i++) {
            if (re->classList[i].converted)
                free(re->classList[i].u.bits);
            re->classList[i].u.bits = NULL;
        }
        free(re->classList);
    }
}

static JSBool
reallocStateStack(REGlobalData *gData)
{
    size_t sz = sizeof(REProgState) * gData->maxStateStack;
    gData->maxStateStack <<= 1;
    gData->stateStack
        = (REProgState *)realloc(gData->stateStack, sz + sz);
    if (!gData->stateStack) {
        gData->ok = JS_FALSE;
        return JS_FALSE;
    }
    return JS_TRUE;
}

/*
*   Apply the current op against the given input to see if
*   it's going to match or fail. Return false if we don't
*   get a match, true if we do and update the state of the
*   input and pc if the update flag is true.
*/
static REMatchState *simpleMatch(REGlobalData *gData, REMatchState *x,
                                 REOp op, jsbytecode **startpc, JSBool update)
{
    REMatchState *result = NULL;
    jschar matchCh;
    intN parenIndex;
    intN offset, length, index;
    jsbytecode *pc = *startpc;  /* pc has already been incremented past op */
    const jschar *source;
    const jschar *startcp = x->cp;
    jschar ch;
    RECharSet *charSet;


    switch (op) {
    default:
        JS_ASSERT(JS_FALSE);
    case REOP_BOL:
        if (x->cp != gData->cpbegin) {
            if (gData->globalMultiline ||
                        (gData->regexp->flags & JSREG_MULTILINE)) {
                if (!RE_IS_LINE_TERM(x->cp[-1]))
                    break;
            }
            else
                break;
        }
        result = x;
        break;
    case REOP_EOL:
        if (x->cp != gData->cpend) {
            if (gData->globalMultiline ||
                        (gData->regexp->flags & JSREG_MULTILINE)) {
                if (!RE_IS_LINE_TERM(*x->cp))
                    break;
            }
            else
                break;
        }
        result = x;
        break;
    case REOP_WBDRY:
        if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1]))
                           ^ !((x->cp != gData->cpend) && JS_ISWORD(*x->cp)))
            result = x;
        break;
    case REOP_WNONBDRY:
        if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1]))
                            ^ ((x->cp != gData->cpend) && JS_ISWORD(*x->cp)))
            result = x;
        break;
    case REOP_DOT:
        if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_DIGIT:
        if (x->cp != gData->cpend && JS_ISDIGIT(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_NONDIGIT:
        if (x->cp != gData->cpend && !JS_ISDIGIT(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_ALNUM:
        if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_NONALNUM:
        if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_SPACE:
        if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_NONSPACE:
        if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_BACKREF:
        parenIndex = GET_ARG(pc);
        pc += ARG_LEN;
        result = backrefMatcher(gData, x, parenIndex);
        break;
    case REOP_FLAT:
        offset = GET_ARG(pc);
        pc += ARG_LEN;
        length = GET_ARG(pc);
        pc += ARG_LEN;
        source = JSSTRING_CHARS(gData->regexp->source) + offset;
        if ((x->cp + length) <= gData->cpend) {
            for (index = 0; index < length; index++) {
                if (source[index] != x->cp[index])
                    return NULL;
            }
            x->cp += length;
            result = x;
        }
        break;
    case REOP_FLAT1:
        matchCh = *pc++;
        if ((x->cp != gData->cpend) && (*x->cp == matchCh)) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_FLATi:
        offset = GET_ARG(pc);
        pc += ARG_LEN;
        length = GET_ARG(pc);
        pc += ARG_LEN;
        source = JSSTRING_CHARS(gData->regexp->source);
        result = flatNIMatcher(gData, x, source + offset, length);
        break;
    case REOP_FLAT1i:
        matchCh = *pc++;
        if ((x->cp != gData->cpend)
                && (canonicalize(*x->cp) == canonicalize(matchCh))) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_UCFLAT1:
        matchCh = GET_ARG(pc);
        pc += ARG_LEN;
        if ((x->cp != gData->cpend) && (*x->cp == matchCh)) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_UCFLAT1i:
        matchCh = GET_ARG(pc);
        pc += ARG_LEN;
        if ((x->cp != gData->cpend)
                && (canonicalize(*x->cp) == canonicalize(matchCh))) {
            result = x;
            result->cp++;
        }
        break;
    case REOP_CLASS:
        index = GET_ARG(pc);
        pc += ARG_LEN;
        if (x->cp != gData->cpend) {
            charSet = &gData->regexp->classList[index];
            JS_ASSERT(charSet->converted);
            ch = *x->cp;
            index = ch / 8;
            if ((charSet->length != 0) &&
                   ( (ch <= charSet->length)
                   && ((charSet->u.bits[index] & (1 << (ch & 0x7))) != 0) )) {
                result = x;
                result->cp++;
            }
        }
        break;
    case REOP_NCLASS:
        index = GET_ARG(pc);
        pc += ARG_LEN;
        if (x->cp != gData->cpend) {
            charSet = &gData->regexp->classList[index];
            JS_ASSERT(charSet->converted);
            ch = *x->cp;
            index = ch / 8;
            if ((charSet->length == 0) ||
                   ( (ch > charSet->length)
                   || ((charSet->u.bits[index] & (1 << (ch & 0x7))) == 0) )) {
                result = x;
                result->cp++;
            }
        }
        break;
    }
    if (result != NULL) {
        if (update)
            *startpc = pc;
        else
            x->cp = startcp;
        return result;
    }
    x->cp = startcp;
    return NULL;
}

static REMatchState *
executeREBytecode(REGlobalData *gData, REMatchState *x, JSBool allowSkip)
{
    REMatchState *result;
    REBackTrackData *backTrackData;
    intN offset;
    jsbytecode *nextpc;
    REOp nextop;
    RECapture *cap;
    REProgState *curState;
    const jschar *startcp;
    uintN parenIndex, k;
    uintN parenSoFar = 0;

    jschar matchCh1, matchCh2;
    RECharSet *charSet;

    JSBool anchor;
    jsbytecode *pc = gData->regexp->program;
    REOp op = (REOp)(*pc++);

    /*
     *   If the first node is a simple match, step the index into
     *   the string until that match is made, or fail if it can't be
     *   found at all.
     */
    if (allowSkip && REOP_IS_SIMPLE(op)) {
        anchor = JS_FALSE;
        while (x->cp <= gData->cpend) {
            nextpc = pc;    /* reset back to start each time */
            result = simpleMatch(gData, x, op, &nextpc, JS_TRUE);
            if (result) {
                anchor = JS_TRUE;
                x = result;
                pc = nextpc;    /* accept skip to next opcode */
                op = (REOp)(*pc++);
                break;
            }
            else {
                gData->skipped++;
                x->cp++;
            }
        }
        if (!anchor)
            return NULL;
    }

    while (JS_TRUE) {
        if (REOP_IS_SIMPLE(op))
            result = simpleMatch(gData, x, op, &pc, JS_TRUE);
        else {
            curState = &gData->stateStack[gData->stateStackTop];
            switch (op) {
            case REOP_EMPTY:
                result = x;
                break;

            case REOP_ALTPREREQ2:
                nextpc = pc + GET_OFFSET(pc);   /* start of next op */
                pc += ARG_LEN;
                matchCh2 = GET_ARG(pc);
                pc += ARG_LEN;
                k = GET_ARG(pc);
                pc += ARG_LEN;

                if (x->cp != gData->cpend) {
                    charSet = &gData->regexp->classList[k];
                    if (!charSet->converted)
                        if (!processCharSet(gData, charSet))
                            return JS_FALSE;
                    matchCh1 = *x->cp;
                    k = matchCh1 / 8;
                    if ((charSet->length != 0) &&
                           ( (matchCh1 <= charSet->length)
                           && ((charSet->u.bits[k]
                                    & (1 << (matchCh1 & 0x7))) != 0) )) {
                        result = NULL;
                        break;
                    }
                }
                else {
                    result = NULL;
                    break;
                }

                if ((x->cp == gData->cpend) || (*x->cp != matchCh2)) {
                    result = NULL;
                    break;
                }
                goto doAlt;

            case REOP_ALTPREREQ:
                nextpc = pc + GET_OFFSET(pc);   /* start of next op */
                pc += ARG_LEN;
                matchCh1 = GET_ARG(pc);
                pc += ARG_LEN;
                matchCh2 = GET_ARG(pc);
                pc += ARG_LEN;
                if ((x->cp == gData->cpend)
                        || ((*x->cp != matchCh1) && (*x->cp != matchCh2))) {
                    result = NULL;
                    break;
                }
                /* else false thru... */

            case REOP_ALT:
doAlt:
                nextpc = pc + GET_OFFSET(pc);   /* start of next alternate */
                pc += ARG_LEN;                  /* start of this alternate */
                curState->parenSoFar = parenSoFar;
                ++gData->stateStackTop;
                if (gData->stateStackTop == gData->maxStateStack)
                    if (!reallocStateStack(gData))
                        return NULL;
                op = (REOp)(*pc++);
                startcp = x->cp;
                if (REOP_IS_SIMPLE(op)) {
                    if (!simpleMatch(gData, x, op, &pc, JS_TRUE)) {
                        op = (REOp)(*nextpc++);
                        pc = nextpc;
                        continue;
                    }
                    else { /* accept the match and move on */
                        result = x;
                        op = (REOp)(*pc++);
                    }
                }
                nextop = (REOp)(*nextpc++);
                if (!pushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0))
                    return NULL;
                continue;

            /*
             * Occurs at (successful) end of REOP_ALT,
             */
            case REOP_JUMP:
                --gData->stateStackTop;
                offset = GET_OFFSET(pc);
                pc += offset;
                op = (REOp)(*pc++);
                continue;

            /*
             * Occurs at last (successful) end of REOP_ALT,
             */
            case REOP_ENDALT:
                --gData->stateStackTop;
                op = (REOp)(*pc++);
                continue;

            case REOP_LPAREN:
                parenIndex = GET_ARG(pc);
                if ((parenIndex + 1) > parenSoFar)
                    parenSoFar = parenIndex + 1;
                pc += ARG_LEN;
                x->parens[parenIndex].index = x->cp - gData->cpbegin;
                x->parens[parenIndex].length = 0;
                op = (REOp)(*pc++);
                continue;
            case REOP_RPAREN:
                parenIndex = GET_ARG(pc);
                pc += ARG_LEN;
                cap = &x->parens[parenIndex];
                cap->length = x->cp - (gData->cpbegin + cap->index);
                op = (REOp)(*pc++);
                continue;

            case REOP_ASSERT:
                nextpc = pc + GET_OFFSET(pc);  /* start of term after ASSERT */
                pc += ARG_LEN;                 /* start of ASSERT child */
                op = (REOp)(*pc++);
                if (REOP_IS_SIMPLE(op)
                        && !simpleMatch(gData, x, op, &pc, JS_FALSE)) {
                    result = NULL;
                    break;
                }
                else {
                    curState->u.assertion.top
                        = (char *)gData->backTrackSP
                                    - (char *)gData->backTrackStack;
                    curState->u.assertion.sz = gData->cursz;
                    curState->index = x->cp - gData->cpbegin;
                    curState->parenSoFar = parenSoFar;
                    ++gData->stateStackTop;
                    if (gData->stateStackTop == gData->maxStateStack)
                        if (!reallocStateStack(gData))
                            return NULL;
                    if (!pushBackTrackState(gData, REOP_ASSERTTEST,
                                            nextpc, x, x->cp, 0, 0))
                        return NULL;
                }
                continue;
            case REOP_ASSERT_NOT:
                nextpc = pc + GET_OFFSET(pc);
                pc += ARG_LEN;
                op = (REOp)(*pc++);
                if (REOP_IS_SIMPLE(op)
                    /* Note - fail to fail! */
                        && simpleMatch(gData, x, op, &pc, JS_FALSE)) {
                    result = NULL;
                    break;
                }
                else {
                    curState->u.assertion.top
                        = (char *)gData->backTrackSP
                                    - (char *)gData->backTrackStack;
                    curState->u.assertion.sz = gData->cursz;
                    curState->index = x->cp - gData->cpbegin;
                    curState->parenSoFar = parenSoFar;
                    ++gData->stateStackTop;
                    if (gData->stateStackTop == gData->maxStateStack)
                        if (!reallocStateStack(gData))
                            return NULL;
                    if (!pushBackTrackState(gData, REOP_ASSERTNOTTEST,
                                            nextpc, x, x->cp, 0, 0))
                        return NULL;
                }
                continue;
            case REOP_ASSERTTEST:
                --gData->stateStackTop;
                --curState;
                x->cp = gData->cpbegin + curState->index;
                gData->backTrackSP
                    = (REBackTrackData *)((char *)gData->backTrackStack
                                                + curState->u.assertion.top);
                gData->cursz = curState->u.assertion.sz;
                if (result != NULL)
                    result = x;
                break;
            case REOP_ASSERTNOTTEST:
                --gData->stateStackTop;
                --curState;
                x->cp = gData->cpbegin + curState->index;
                gData->backTrackSP
                    = (REBackTrackData *)((char *)gData->backTrackStack
                                                + curState->u.assertion.top);
                gData->cursz = curState->u.assertion.sz;
                if (result == NULL)
                    result = x;
                else
                    result = NULL;
                break;

            case REOP_END:
                if (x != NULL)
                    return x;
                break;

            case REOP_STAR:
                curState->u.quantifier.min = 0;
                curState->u.quantifier.max = NO_MAX;
                goto quantcommon;
            case REOP_PLUS:
                curState->u.quantifier.min = 1;
                curState->u.quantifier.max = NO_MAX;
                goto quantcommon;
            case REOP_OPT:
                curState->u.quantifier.min = 0;
                curState->u.quantifier.max = 1;
                goto quantcommon;
            case REOP_QUANT:
                curState->u.quantifier.min = GET_ARG(pc);
                pc += ARG_LEN;
                curState->u.quantifier.max = GET_ARG(pc);
                pc += ARG_LEN;
quantcommon:
                if (curState->u.quantifier.max == 0) {
                    pc = pc + GET_OFFSET(pc);
                    op = (REOp)(*pc++);
                    result = x;
                    continue;
                }
                /* Step over <next> */
                nextpc = pc + ARG_LEN;
                op = (REOp)(*nextpc++);
                startcp = x->cp;
                if (REOP_IS_SIMPLE(op)) {
                    if (!simpleMatch(gData, x, op, &nextpc, JS_TRUE)) {
                        if (curState->u.quantifier.min == 0)
                            result = x;
                        else
                            result = NULL;
                        pc = pc + GET_OFFSET(pc);
                        break;
                    }
                    else {
                        op = (REOp)(*nextpc++);
                        result = x;
                    }
                }
                curState->index = startcp - gData->cpbegin;
                curState->continue_op = REOP_REPEAT;
                curState->continue_pc = pc;
                curState->parenSoFar = parenSoFar;
                ++gData->stateStackTop;
                if (gData->stateStackTop == gData->maxStateStack)
                    if (!reallocStateStack(gData))
                        return NULL;
                if (curState->u.quantifier.min == 0)
                    if (!pushBackTrackState(gData, REOP_REPEAT,
                                            pc, x, startcp, 0, 0))
                        return NULL;
                pc = nextpc;
                continue;

            case REOP_ENDCHILD: /* marks the end of a quantifier child */
                pc = curState[-1].continue_pc;
                op = curState[-1].continue_op;
                continue;

            case REOP_REPEAT:
                --curState;
repeatAgain:
                --gData->stateStackTop;
                if (result == NULL) {
                    /*
                     *  There's been a failure, see if we have enough children.
                     */
                    if (curState->u.quantifier.min == 0) {
                        result = x;
                        goto repeatDone;
                    }
                    break;
                }
                else {
                    if ((curState->u.quantifier.min == 0)
                            && (x->cp == gData->cpbegin + curState->index)) {
                        /* matched an empty string, that'll get us nowhere */
                        result = NULL;
                        break;
                    }
                    if (curState->u.quantifier.min != 0)
                        curState->u.quantifier.min--;
                    if (curState->u.quantifier.max != (uint16)(-1))
                        curState->u.quantifier.max--;
                    if (curState->u.quantifier.max == 0) {
                        result = x;
                        goto repeatDone;
                    }
                    nextpc = pc + ARG_LEN;
                    nextop = (REOp)(*nextpc);
                    startcp = x->cp;
                    if (REOP_IS_SIMPLE(nextop)) {
                        nextpc++;
                        if (!simpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) {
                            if (curState->u.quantifier.min == 0) {
                                result = x;
                                goto repeatDone;
                            }
                            else
                                result = NULL;
                            break;
                        }
                        result = x;
                    }
                    curState->index = startcp - gData->cpbegin;
                    ++gData->stateStackTop;
                    if (gData->stateStackTop == gData->maxStateStack)
                        if (!reallocStateStack(gData))
                            return NULL;
                    if (curState->u.quantifier.min == 0)
                        if (!pushBackTrackState(gData, REOP_REPEAT,
                                                pc, x, startcp,
                                                curState->parenSoFar,
                                                parenSoFar
                                                    - curState->parenSoFar))
                            return NULL;
                    if (*nextpc == REOP_ENDCHILD)
                        goto repeatAgain;
                    pc = nextpc;
                    op = (REOp)(*pc++);
                    parenSoFar = curState->parenSoFar;
                }
                continue;
repeatDone:
                pc = pc + GET_OFFSET(pc);
                break;


            case REOP_MINIMALSTAR:
                curState->u.quantifier.min = 0;
                curState->u.quantifier.max = NO_MAX;
                goto minimalquantcommon;
            case REOP_MINIMALPLUS:
                curState->u.quantifier.min = 1;
                curState->u.quantifier.max = NO_MAX;
                goto minimalquantcommon;
            case REOP_MINIMALOPT:
                curState->u.quantifier.min = 0;
                curState->u.quantifier.max = 1;
                goto minimalquantcommon;
            case REOP_MINIMALQUANT:
                curState->u.quantifier.min = GET_ARG(pc);
                pc += ARG_LEN;
                curState->u.quantifier.max = GET_ARG(pc);
                pc += ARG_LEN;
minimalquantcommon:
                curState->index = x->cp - gData->cpbegin;
                curState->parenSoFar = parenSoFar;
                ++gData->stateStackTop;
                if (gData->stateStackTop == gData->maxStateStack)
                    if (!reallocStateStack(gData))
                        return NULL;
                if (curState->u.quantifier.min != 0) {
                    curState->continue_op = REOP_MINIMALREPEAT;
                    curState->continue_pc = pc;
                    /* step over <next> */
                    pc += ARG_LEN;
                    op = (REOp)(*pc++);
                }
                else {
                    if (!pushBackTrackState(gData, REOP_MINIMALREPEAT,
                                            pc, x, x->cp, 0, 0))
                        return NULL;
                    --gData->stateStackTop;
                    pc = pc + GET_OFFSET(pc);
                    op = (REOp)(*pc++);
                }
                continue;

            case REOP_MINIMALREPEAT:
                --gData->stateStackTop;
                --curState;

                if (result == NULL) {
                    /*
                     * Non-greedy failure - try to consume another child.
                     */
                    if ((curState->u.quantifier.max == (uint16)(-1))
                            || (curState->u.quantifier.max > 0)) {
                        curState->index = x->cp - gData->cpbegin;
                        curState->continue_op = REOP_MINIMALREPEAT;
                        curState->continue_pc = pc;
                        pc += ARG_LEN;
                        for (k = curState->parenSoFar; k < parenSoFar; k++)
                            x->parens[k].index = -1;
                        ++gData->stateStackTop;
                        if (gData->stateStackTop == gData->maxStateStack)
                            if (!reallocStateStack(gData))
                                return NULL;
                        op = (REOp)(*pc++);
                        continue;
                    }
                    else {
                        /* Don't need to adjust pc since we're going to pop. */
                        break;
                    }
                }
                else {
                    if ((curState->u.quantifier.min == 0)
                            && (x->cp == gData->cpbegin + curState->index)) {
                        /* Matched an empty string, that'll get us nowhere. */
                        result = NULL;
                        break;
                    }
                    if (curState->u.quantifier.min != 0)
                        curState->u.quantifier.min--;
                    if (curState->u.quantifier.max != (uint16)(-1))
                        curState->u.quantifier.max--;
                    if (curState->u.quantifier.min != 0) {
                        curState->continue_op = REOP_MINIMALREPEAT;
                        curState->continue_pc = pc;
                        pc += ARG_LEN;
                        for (k = curState->parenSoFar; k < parenSoFar; k++)
                            x->parens[k].index = -1;
                        curState->index = x->cp - gData->cpbegin;
                        ++gData->stateStackTop;
                        if (gData->stateStackTop == gData->maxStateStack)
                            if (!reallocStateStack(gData))
                                return NULL;
                        op = (REOp)(*pc++);
                        continue;
                    }
                    else {
                        curState->index = x->cp - gData->cpbegin;
                        curState->parenSoFar = parenSoFar;
                        ++gData->stateStackTop;
                        if (gData->stateStackTop == gData->maxStateStack)
                            if (!reallocStateStack(gData))
                                return NULL;
                        if (!pushBackTrackState(gData, REOP_MINIMALREPEAT,
                                                pc, x, x->cp,
                                                curState->parenSoFar,
                                                parenSoFar
                                                    - curState->parenSoFar))
                            return NULL;
                        --gData->stateStackTop;
                        pc = pc + GET_OFFSET(pc);
                        op = (REOp)(*pc++);
                        continue;
                    }
                }

            default:
                JS_ASSERT(JS_FALSE);

            }
        }
        /*
         *  If the match failed and there's a backtrack option, take it.
         *  Otherwise this is a complete and utter failure.
         */
        if (result == NULL) {
            if (gData->cursz > 0) {
                backTrackData = gData->backTrackSP;
                gData->cursz = backTrackData->sz;
                gData->backTrackSP
                    = (REBackTrackData *)((char *)backTrackData
                                                    - backTrackData->sz);
                x->cp = backTrackData->cp;
                pc = backTrackData->backtrack_pc;
                op = backTrackData->backtrack_op;
                gData->stateStackTop = backTrackData->precedingStateTop;
                JS_ASSERT(gData->stateStackTop);

                memcpy(gData->stateStack, backTrackData + 1,
                       sizeof(REProgState) * backTrackData->precedingStateTop);
                curState = &gData->stateStack[gData->stateStackTop - 1];

                if (backTrackData->parenCount) {
                    memcpy(&x->parens[backTrackData->parenIndex],
                           (char *)(backTrackData + 1) + sizeof(REProgState) * backTrackData->precedingStateTop,
                           sizeof(RECapture) * backTrackData->parenCount);
                    parenSoFar = backTrackData->parenIndex + backTrackData->parenCount;
                }
                else {
                    for (k = curState->parenSoFar; k < parenSoFar; k++)
                        x->parens[k].index = -1;
                    parenSoFar = curState->parenSoFar;
                }
                continue;
            }
            else
                return NULL;
        }
        else
            x = result;

        /*
         *  Continue with the expression.
         */
        op = (REOp)*pc++;
    }
    return NULL;
}

static REMatchState *
MatchRegExp(REGlobalData *gData, REMatchState *x)
{
    REMatchState *result;
    const jschar *cp = x->cp;
    const jschar *cp2;
    uintN j;

    /*
     * Have to include the position beyond the last character
     * in order to detect end-of-input/line condition.
     */
    for (cp2 = cp; cp2 <= gData->cpend; cp2++) {
        gData->skipped = cp2 - cp;
        x->cp = cp2;
        for (j = 0; j < gData->regexp->parenCount; j++)
            x->parens[j].index = -1;
        result = executeREBytecode(gData, x, true);
        if (!gData->ok || result)
            return result;
        gData->backTrackSP = gData->backTrackStack;
        gData->cursz = 0;
        gData->stateStackTop = 0;
        cp2 = cp + gData->skipped;
    }
    return NULL;
}


static REMatchState *
initMatch(REGlobalData *gData, JS2RegExp *re)
{
    REMatchState *result;
    uintN i;

    gData->maxBackTrack = INITIAL_BACKTRACK;
    gData->backTrackStack = (REBackTrackData *)malloc(INITIAL_BACKTRACK);

    if (!gData->backTrackStack)
        return NULL;
    gData->backTrackSP = gData->backTrackStack;
    gData->cursz = 0;


    gData->maxStateStack = INITIAL_STATESTACK;
    gData->stateStack = (REProgState *)malloc(sizeof(REProgState) * INITIAL_STATESTACK);
    if (!gData->stateStack)
        return NULL;
    gData->stateStackTop = 0;

    gData->regexp = re;
    gData->ok = JS_TRUE;

    result = (REMatchState *)malloc(sizeof(REMatchState)
                               + (re->parenCount - 1) * sizeof(RECapture));
    if (!result)
        return NULL;

    for (i = 0; i < re->classCount; i++)
        if (!re->classList[i].converted)
            if (!processCharSet(gData, &re->classList[i]))
                return NULL;

    return result;
}

/*
 * Call the recursive matcher to do the real work.  Return null on mismatch.
 * On match, return the completed MatchResult structure.
 */
REMatchResult *REExecute(JS2Metadata *meta, JS2RegExp *re, const jschar *str, uint32 index, uint32 length, bool globalMultiline)
{
    REGlobalData gData;
    REMatchState *x, *result;
    const jschar *cp;
    uint32 start, p;
    REMatchResult *returnValue = NULL;

    start = index;
    if (start > length)
        start = length;
    cp = JSSTRING_CHARS(str);
    gData.cpbegin = cp;
    gData.cpend = cp + length;
    cp += start;
    gData.start = start;
    gData.skipped = 0;

    x = initMatch(&gData, re);
    gData.globalMultiline = globalMultiline;
    if (!x)
        return JS_FALSE;
    x->cp = cp;

    result = MatchRegExp(&gData, x);
    if (!gData.ok)
        goto out;
    if (!result)
        goto out;

    returnValue = (REMatchResult *)malloc(sizeof(REMatchResult) + (re->parenCount - 1) * sizeof(RECapture));
    returnValue->startIndex = gData.start + gData.skipped;
    returnValue->endIndex = result->cp - str;
    returnValue->parenCount = re->parenCount;
    for (p = 0; p < re->parenCount; p++) {
        returnValue->parens[p] = result->parens[p];
    }
out:
    free(x);
	free(gData.stateStack);
	free(gData.backTrackStack);
    return returnValue;
}

REMatchResult *REMatch(JS2Metadata *meta, JS2RegExp *re, const jschar *str, uint32 length)
{
    REGlobalData gData;
    REMatchState *x, *result;
    const jschar *cp;
    uint32 j, p;
	REMatchResult *returnValue = NULL;

    cp = JSSTRING_CHARS(str);
    gData.cpbegin = cp;
    gData.cpend = cp + length;
    gData.start = 0;
    gData.skipped = 0;

    x = initMatch(&gData, re);
    gData.globalMultiline = false;
    if (!x)
        return JS_FALSE;
    x->cp = cp;

    for (j = 0; j < re->parenCount; j++)
        x->parens[j].index = -1;
    result = executeREBytecode(&gData, x, false);
    if (!gData.ok)
        goto out;
    if (!result)
        goto out;

    returnValue = (REMatchResult *)malloc(sizeof(REMatchResult) + (re->parenCount - 1) * sizeof(RECapture));
    returnValue->startIndex = gData.skipped;
    returnValue->endIndex = result->cp - str;
    returnValue->parenCount = re->parenCount;
    for (p = 0; p < re->parenCount; p++) {
        returnValue->parens[p] = result->parens[p];
    }
out:
    free(x);
	free(gData.stateStack);
	free(gData.backTrackStack);
    return returnValue;
}

// Compile the flag source and build a flag bit set. Return true/false for success/failure
bool parseFlags(JS2Metadata *meta, const jschar *flagStr, uint32 length, uint32 *flags)
{
    uint32 i;
    *flags = 0;
    for (i = 0; i < length; i++) {
        switch (flagStr[i]) {
        case 'g':
            *flags |= JSREG_GLOB; break;
        case 'i':
            *flags |= JSREG_FOLD; break;
        case 'm':
            *flags |= JSREG_MULTILINE; break;
        default:
            return false;
        }
    }
    return true;
}

#define JS_HOWMANY(x,y) (((x)+(y)-1)/(y))
#define JS_ROUNDUP(x,y) (JS_HOWMANY(x,y)*(y))

// Compile the source re, return NULL for failure (error functions called)
JS2RegExp *RECompile(JS2Metadata *meta, const jschar *str, uint32 length, uint32 flags, bool flat)
{
    JS2RegExp *re;
    CompilerState state;
    size_t resize;
    jsbytecode *endPC;
    uint32 i;
    size_t len;

    re = NULL;
    state.meta = meta;
    state.reNodePool = new Pool<RENode>(32);
    state.strict = false;
    state.cpbegin = state.cp = JSSTRING_CHARS(str);
    state.cpend = state.cp + length;
    state.flags = flags;
    state.parenCount = 0;
    state.classCount = 0;
    state.progLength = 0;
    state.treeDepth = 0;
    for (i = 0; i < CLASS_CACHE_SIZE; i++)
        state.classCache[i].start = NULL;

    len = length;
    if (len != 0 && flat) {
        state.result = NewRENode(&state, REOP_FLAT);
        state.result->u.flat.chr = *state.cpbegin;
        state.result->u.flat.length = length;
        state.result->kid = (void *)(state.cpbegin);
        state.progLength += 5;
    }
    else
        if (!parseRegExp(&state))
            goto out;

    resize = sizeof *re + state.progLength + 1;
    re = (JS2RegExp *) malloc(JS_ROUNDUP(resize, sizeof(uint32)));
    if (!re)
        goto out;

    re->classCount = state.classCount;
    if (state.classCount) {
        re->classList = (RECharSet *)malloc(sizeof(RECharSet)
                                                    * state.classCount);
        if (!re->classList)
            goto out;
    }
    else
        re->classList = NULL;
    endPC = emitREBytecode(&state, re, state.treeDepth, re->program, state.result);
    if (!endPC) {
        re = NULL;
        goto out;
    }
    *endPC++ = REOP_END;
    JS_ASSERT(endPC <= (re->program + (state.progLength + 1)));

    re->parenCount = state.parenCount;
    re->flags = flags;
    re->source = str;

out:
    delete state.reNodePool;
    return re;
}

}   // namespace MetaData
}   // namespace JavaScript