diff options
Diffstat (limited to 'tools/node_modules/expresso/deps/jscoverage/js/jsregexp.cpp')
-rw-r--r-- | tools/node_modules/expresso/deps/jscoverage/js/jsregexp.cpp | 4772 |
1 files changed, 4772 insertions, 0 deletions
diff --git a/tools/node_modules/expresso/deps/jscoverage/js/jsregexp.cpp b/tools/node_modules/expresso/deps/jscoverage/js/jsregexp.cpp new file mode 100644 index 0000000..778f342 --- /dev/null +++ b/tools/node_modules/expresso/deps/jscoverage/js/jsregexp.cpp @@ -0,0 +1,4772 @@ +/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*- + * vim: set sw=4 ts=8 et tw=78: + * + * ***** BEGIN LICENSE BLOCK ***** + * Version: MPL 1.1/GPL 2.0/LGPL 2.1 + * + * The contents of this file are subject to the Mozilla Public License Version + * 1.1 (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * http://www.mozilla.org/MPL/ + * + * Software distributed under the License is distributed on an "AS IS" basis, + * WITHOUT WARRANTY OF ANY KIND, either express or implied. See the License + * for the specific language governing rights and limitations under the + * License. + * + * The Original Code is Mozilla Communicator client code, released + * March 31, 1998. + * + * The Initial Developer of the Original Code is + * Netscape Communications Corporation. + * Portions created by the Initial Developer are Copyright (C) 1998 + * the Initial Developer. All Rights Reserved. + * + * Contributor(s): + * + * Alternatively, the contents of this file may be used under the terms of + * either of the GNU General Public License Version 2 or later (the "GPL"), + * or the GNU Lesser General Public License Version 2.1 or later (the "LGPL"), + * in which case the provisions of the GPL or the LGPL are applicable instead + * of those above. If you wish to allow use of your version of this file only + * under the terms of either the GPL or the LGPL, and not to allow others to + * use your version of this file under the terms of the MPL, indicate your + * decision by deleting the provisions above and replace them with the notice + * and other provisions required by the GPL or the LGPL. If you do not delete + * the provisions above, a recipient may use your version of this file under + * the terms of any one of the MPL, the GPL or the LGPL. + * + * ***** END LICENSE BLOCK ***** */ + +/* + * JS regular expressions, after Perl. + */ +#include "jsstddef.h" +#include <stdlib.h> +#include <string.h> +#include <stdarg.h> +#include "jstypes.h" +#include "jsarena.h" /* Added by JSIFY */ +#include "jsutil.h" /* Added by JSIFY */ +#include "jsapi.h" +#include "jsarray.h" +#include "jsatom.h" +#include "jsbuiltins.h" +#include "jscntxt.h" +#include "jsversion.h" +#include "jsfun.h" +#include "jsgc.h" +#include "jsinterp.h" +#include "jslock.h" +#include "jsnum.h" +#include "jsobj.h" +#include "jsopcode.h" +#include "jsregexp.h" +#include "jsscan.h" +#include "jsscope.h" +#include "jsstr.h" + +#ifdef JS_TRACER +#include "jstracer.h" +using namespace avmplus; +using namespace nanojit; + +/* + * FIXME Duplicated with jstracer.cpp, doing it this way for now + * to keep it private to files that need it. + */ +#ifdef JS_JIT_SPEW +static bool verbose_debug = getenv("TRACEMONKEY") && strstr(getenv("TRACEMONKEY"), "verbose"); +#define debug_only_v(x) if (verbose_debug) { x; } +#else +#define debug_only_v(x) +#endif +#endif + +typedef enum REOp { +#define REOP_DEF(opcode, name) opcode, +#include "jsreops.tbl" +#undef REOP_DEF + REOP_LIMIT /* META: no operator >= to this */ +} REOp; + +#define REOP_IS_SIMPLE(op) ((op) <= REOP_NCLASS) + +#ifdef REGEXP_DEBUG +const char *reop_names[] = { +#define REOP_DEF(opcode, name) name, +#include "jsreops.tbl" +#undef REOP_DEF + NULL +}; +#endif + +#ifdef __GNUC__ +static int +re_debug(const char *fmt, ...) __attribute__ ((format(printf, 1, 2))); +#endif + +#ifdef REGEXP_DEBUG +static int +re_debug(const char *fmt, ...) +{ + va_list ap; + int retval; + + va_start(ap, fmt); + retval = vprintf(fmt, ap); + va_end(ap); + return retval; +} + +static void +re_debug_chars(const jschar *chrs, size_t length) +{ + int i = 0; + + printf(" \""); + while (*chrs && i++ < length) { + putchar((char)*chrs++); + } + printf("\""); +} +#else /* !REGEXP_DEBUG */ +/* This should be optimized to a no-op by our tier-1 compilers. */ +static int +re_debug(const char *fmt, ...) +{ + return 0; +} + +static void +re_debug_chars(const jschar *chrs, size_t length) +{ +} +#endif /* !REGEXP_DEBUG */ + +struct RENode { + REOp op; /* r.e. op bytecode */ + RENode *next; /* next in concatenation order */ + void *kid; /* first operand */ + union { + void *kid2; /* second operand */ + jsint num; /* could be a number */ + size_t parenIndex; /* or a parenthesis index */ + struct { /* or a quantifier range */ + uintN min; + uintN max; + JSPackedBool greedy; + } range; + struct { /* or a character class */ + size_t startIndex; + size_t kidlen; /* length of string at kid, in jschars */ + size_t index; /* index into class list */ + uint16 bmsize; /* bitmap size, based on max char code */ + JSPackedBool sense; + } ucclass; + struct { /* or a literal sequence */ + jschar chr; /* of one character */ + size_t length; /* or many (via the kid) */ + } flat; + struct { + RENode *kid2; /* second operand from ALT */ + jschar ch1; /* match char for ALTPREREQ */ + jschar ch2; /* ditto, or class index for ALTPREREQ2 */ + } altprereq; + } u; +}; + +#define RE_IS_LETTER(c) (((c >= 'A') && (c <= 'Z')) || \ + ((c >= 'a') && (c <= 'z')) ) +#define RE_IS_LINE_TERM(c) ((c == '\n') || (c == '\r') || \ + (c == LINE_SEPARATOR) || (c == PARA_SEPARATOR)) + +#define CLASS_CACHE_SIZE 4 + +typedef struct CompilerState { + JSContext *context; + JSTokenStream *tokenStream; /* For reporting errors */ + const jschar *cpbegin; + const jschar *cpend; + const jschar *cp; + size_t parenCount; + size_t classCount; /* number of [] encountered */ + size_t treeDepth; /* maximum depth of parse tree */ + size_t progLength; /* estimated bytecode length */ + RENode *result; + size_t classBitmapsMem; /* memory to hold all class bitmaps */ + struct { + const jschar *start; /* small cache of class strings */ + size_t length; /* since they're often the same */ + size_t index; + } classCache[CLASS_CACHE_SIZE]; + uint16 flags; +} CompilerState; + +typedef struct EmitStateStackEntry { + jsbytecode *altHead; /* start of REOP_ALT* opcode */ + jsbytecode *nextAltFixup; /* fixup pointer to next-alt offset */ + jsbytecode *nextTermFixup; /* fixup ptr. to REOP_JUMP offset */ + jsbytecode *endTermFixup; /* fixup ptr. to REOPT_ALTPREREQ* offset */ + RENode *continueNode; /* original REOP_ALT* node being stacked */ + jsbytecode continueOp; /* REOP_JUMP or REOP_ENDALT continuation */ + JSPackedBool jumpToJumpFlag; /* true if we've patched jump-to-jump to + avoid 16-bit unsigned offset overflow */ +} EmitStateStackEntry; + +/* + * Immediate operand sizes and getter/setters. Unlike the ones in jsopcode.h, + * the getters and setters take the pc of the offset, not of the opcode before + * the offset. + */ +#define ARG_LEN 2 +#define GET_ARG(pc) ((uint16)(((pc)[0] << 8) | (pc)[1])) +#define SET_ARG(pc, arg) ((pc)[0] = (jsbytecode) ((arg) >> 8), \ + (pc)[1] = (jsbytecode) (arg)) + +#define OFFSET_LEN ARG_LEN +#define OFFSET_MAX (JS_BIT(ARG_LEN * 8) - 1) +#define GET_OFFSET(pc) GET_ARG(pc) + +/* + * Maximum supported tree depth is maximum size of EmitStateStackEntry stack. + * For sanity, we limit it to 2^24 bytes. + */ +#define TREE_DEPTH_MAX (JS_BIT(24) / sizeof(EmitStateStackEntry)) + +/* + * The maximum memory that can be allocated for class bitmaps. + * For sanity, we limit it to 2^24 bytes. + */ +#define CLASS_BITMAPS_MEM_LIMIT JS_BIT(24) + +/* + * Functions to get size and write/read bytecode that represent small indexes + * compactly. + * Each byte in the code represent 7-bit chunk of the index. 8th bit when set + * indicates that the following byte brings more bits to the index. Otherwise + * this is the last byte in the index bytecode representing highest index bits. + */ +static size_t +GetCompactIndexWidth(size_t index) +{ + size_t width; + + for (width = 1; (index >>= 7) != 0; ++width) { } + return width; +} + +static JS_ALWAYS_INLINE jsbytecode * +WriteCompactIndex(jsbytecode *pc, size_t index) +{ + size_t next; + + while ((next = index >> 7) != 0) { + *pc++ = (jsbytecode)(index | 0x80); + index = next; + } + *pc++ = (jsbytecode)index; + return pc; +} + +static JS_ALWAYS_INLINE jsbytecode * +ReadCompactIndex(jsbytecode *pc, size_t *result) +{ + size_t nextByte; + + nextByte = *pc++; + if ((nextByte & 0x80) == 0) { + /* + * Short-circuit the most common case when compact index <= 127. + */ + *result = nextByte; + } else { + size_t shift = 7; + *result = 0x7F & nextByte; + do { + nextByte = *pc++; + *result |= (nextByte & 0x7F) << shift; + shift += 7; + } while ((nextByte & 0x80) != 0); + } + return pc; +} + +typedef struct RECapture { + ptrdiff_t index; /* start of contents, -1 for empty */ + size_t length; /* length of capture */ +} RECapture; + +typedef struct REMatchState { + const jschar *cp; + RECapture parens[1]; /* first of 're->parenCount' captures, + allocated at end of this struct */ +} REMatchState; + +struct REBackTrackData; + +typedef struct REProgState { + jsbytecode *continue_pc; /* current continuation data */ + jsbytecode continue_op; + ptrdiff_t index; /* progress in text */ + size_t parenSoFar; /* highest indexed paren started */ + union { + struct { + uintN min; /* current quantifier limits */ + uintN max; + } quantifier; + struct { + size_t top; /* backtrack stack state */ + size_t sz; + } assertion; + } u; +} REProgState; + +typedef struct REBackTrackData { + size_t sz; /* size of previous stack entry */ + jsbytecode *backtrack_pc; /* where to backtrack to */ + jsbytecode backtrack_op; + const jschar *cp; /* index in text of match at backtrack */ + size_t parenIndex; /* start index of saved paren contents */ + size_t parenCount; /* # of saved paren contents */ + size_t saveStateStackTop; /* number of parent states */ + /* saved parent states follow */ + /* saved paren contents follow */ +} REBackTrackData; + +#define INITIAL_STATESTACK 100 +#define INITIAL_BACKTRACK 8000 + +typedef struct REGlobalData { + JSContext *cx; + JSRegExp *regexp; /* the RE in execution */ + JSBool ok; /* runtime error (out_of_memory only?) */ + size_t start; /* offset to start at */ + ptrdiff_t skipped; /* chars skipped anchoring this r.e. */ + const jschar *cpbegin; /* text base address */ + const jschar *cpend; /* text limit address */ + + REProgState *stateStack; /* stack of state of current parents */ + size_t stateStackTop; + size_t stateStackLimit; + + REBackTrackData *backTrackStack;/* stack of matched-so-far positions */ + REBackTrackData *backTrackSP; + size_t backTrackStackSize; + size_t cursz; /* size of current stack entry */ + size_t backTrackCount; /* how many times we've backtracked */ + size_t backTrackLimit; /* upper limit on backtrack states */ +} REGlobalData; + +/* + * 1. If IgnoreCase is false, return ch. + * 2. Let u be ch converted to upper case as if by calling + * String.prototype.toUpperCase on the one-character string ch. + * 3. If u does not consist of a single character, return ch. + * 4. Let cu be u's character. + * 5. If ch's code point value is greater than or equal to decimal 128 and cu's + * code point value is less than decimal 128, then return ch. + * 6. Return cu. + */ +static JS_ALWAYS_INLINE uintN +upcase(uintN ch) +{ + uintN cu; + + JS_ASSERT((uintN) (jschar) ch == ch); + if (ch < 128) { + if (ch - (uintN) 'a' <= (uintN) ('z' - 'a')) + ch -= (uintN) ('a' - 'A'); + return ch; + } + + cu = JS_TOUPPER(ch); + return (cu < 128) ? ch : cu; +} + +static JS_ALWAYS_INLINE uintN +downcase(uintN ch) +{ + JS_ASSERT((uintN) (jschar) ch == ch); + if (ch < 128) { + if (ch - (uintN) 'A' <= (uintN) ('Z' - 'A')) + ch += (uintN) ('a' - 'A'); + return ch; + } + + return JS_TOLOWER(ch); +} + +/* Construct and initialize an RENode, returning NULL for out-of-memory */ +static RENode * +NewRENode(CompilerState *state, REOp op) +{ + JSContext *cx; + RENode *ren; + + cx = state->context; + JS_ARENA_ALLOCATE_CAST(ren, RENode *, &cx->tempPool, sizeof *ren); + if (!ren) { + js_ReportOutOfScriptQuota(cx); + return NULL; + } + ren->op = op; + ren->next = NULL; + ren->kid = NULL; + return ren; +} + +/* + * Validates and converts hex ascii value. + */ +static JSBool +isASCIIHexDigit(jschar c, uintN *digit) +{ + uintN cv = c; + + if (cv < '0') + return JS_FALSE; + if (cv <= '9') { + *digit = cv - '0'; + return JS_TRUE; + } + cv |= 0x20; + if (cv >= 'a' && cv <= 'f') { + *digit = cv - 'a' + 10; + return JS_TRUE; + } + return JS_FALSE; +} + + +typedef struct { + REOp op; + const jschar *errPos; + size_t parenIndex; +} REOpData; + +static JSBool +ReportRegExpErrorHelper(CompilerState *state, uintN flags, uintN errorNumber, + const jschar *arg) +{ + if (state->tokenStream) { + return js_ReportCompileErrorNumber(state->context, state->tokenStream, + NULL, JSREPORT_UC | flags, + errorNumber, arg); + } + return JS_ReportErrorFlagsAndNumberUC(state->context, flags, + js_GetErrorMessage, NULL, + errorNumber, arg); +} + +static JSBool +ReportRegExpError(CompilerState *state, uintN flags, uintN errorNumber) +{ + return ReportRegExpErrorHelper(state, flags, errorNumber, NULL); +} + +/* + * Process the op against the two top operands, reducing them to a single + * operand in the penultimate slot. Update progLength and treeDepth. + */ +static JSBool +ProcessOp(CompilerState *state, REOpData *opData, RENode **operandStack, + intN operandSP) +{ + RENode *result; + + switch (opData->op) { + case REOP_ALT: + result = NewRENode(state, REOP_ALT); + if (!result) + return JS_FALSE; + result->kid = operandStack[operandSP - 2]; + result->u.kid2 = operandStack[operandSP - 1]; + operandStack[operandSP - 2] = result; + + if (state->treeDepth == TREE_DEPTH_MAX) { + ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX); + return JS_FALSE; + } + ++state->treeDepth; + + /* + * Look at both alternates to see if there's a FLAT or a CLASS at + * the start of each. If so, use a prerequisite match. + */ + if (((RENode *) result->kid)->op == REOP_FLAT && + ((RENode *) result->u.kid2)->op == REOP_FLAT && + (state->flags & JSREG_FOLD) == 0) { + result->op = REOP_ALTPREREQ; + result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr; + result->u.altprereq.ch2 = ((RENode *) result->u.kid2)->u.flat.chr; + /* ALTPREREQ, <end>, uch1, uch2, <next>, ..., + JUMP, <end> ... ENDALT */ + state->progLength += 13; + } + else + if (((RENode *) result->kid)->op == REOP_CLASS && + ((RENode *) result->kid)->u.ucclass.index < 256 && + ((RENode *) result->u.kid2)->op == REOP_FLAT && + (state->flags & JSREG_FOLD) == 0) { + result->op = REOP_ALTPREREQ2; + result->u.altprereq.ch1 = ((RENode *) result->u.kid2)->u.flat.chr; + result->u.altprereq.ch2 = ((RENode *) result->kid)->u.ucclass.index; + /* ALTPREREQ2, <end>, uch1, uch2, <next>, ..., + JUMP, <end> ... ENDALT */ + state->progLength += 13; + } + else + if (((RENode *) result->kid)->op == REOP_FLAT && + ((RENode *) result->u.kid2)->op == REOP_CLASS && + ((RENode *) result->u.kid2)->u.ucclass.index < 256 && + (state->flags & JSREG_FOLD) == 0) { + result->op = REOP_ALTPREREQ2; + result->u.altprereq.ch1 = ((RENode *) result->kid)->u.flat.chr; + result->u.altprereq.ch2 = + ((RENode *) result->u.kid2)->u.ucclass.index; + /* ALTPREREQ2, <end>, uch1, uch2, <next>, ..., + JUMP, <end> ... ENDALT */ + state->progLength += 13; + } + else { + /* ALT, <next>, ..., JUMP, <end> ... ENDALT */ + state->progLength += 7; + } + break; + + case REOP_CONCAT: + result = operandStack[operandSP - 2]; + while (result->next) + result = result->next; + result->next = operandStack[operandSP - 1]; + break; + + case REOP_ASSERT: + case REOP_ASSERT_NOT: + case REOP_LPARENNON: + case REOP_LPAREN: + /* These should have been processed by a close paren. */ + ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_MISSING_PAREN, + opData->errPos); + return JS_FALSE; + + default:; + } + return JS_TRUE; +} + +/* + * Parser forward declarations. + */ +static JSBool ParseTerm(CompilerState *state); +static JSBool ParseQuantifier(CompilerState *state); +static intN ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues); + +/* + * Top-down regular expression grammar, based closely on Perl4. + * + * regexp: altern A regular expression is one or more + * altern '|' regexp alternatives separated by vertical bar. + */ +#define INITIAL_STACK_SIZE 128 + +static JSBool +ParseRegExp(CompilerState *state) +{ + size_t parenIndex; + RENode *operand; + REOpData *operatorStack; + RENode **operandStack; + REOp op; + intN i; + JSBool result = JS_FALSE; + + intN operatorSP = 0, operatorStackSize = INITIAL_STACK_SIZE; + intN operandSP = 0, operandStackSize = INITIAL_STACK_SIZE; + + /* Watch out for empty regexp */ + if (state->cp == state->cpend) { + state->result = NewRENode(state, REOP_EMPTY); + return (state->result != NULL); + } + + operatorStack = (REOpData *) + JS_malloc(state->context, sizeof(REOpData) * operatorStackSize); + if (!operatorStack) + return JS_FALSE; + + operandStack = (RENode **) + JS_malloc(state->context, sizeof(RENode *) * operandStackSize); + if (!operandStack) + goto out; + + for (;;) { + parenIndex = state->parenCount; + if (state->cp == state->cpend) { + /* + * If we are at the end of the regexp and we're short one or more + * operands, the regexp must have the form /x|/ or some such, with + * left parentheses making us short more than one operand. + */ + if (operatorSP >= operandSP) { + operand = NewRENode(state, REOP_EMPTY); + if (!operand) + goto out; + goto pushOperand; + } + } else { + switch (*state->cp) { + case '(': + ++state->cp; + if (state->cp + 1 < state->cpend && + *state->cp == '?' && + (state->cp[1] == '=' || + state->cp[1] == '!' || + state->cp[1] == ':')) { + switch (state->cp[1]) { + case '=': + op = REOP_ASSERT; + /* ASSERT, <next>, ... ASSERTTEST */ + state->progLength += 4; + break; + case '!': + op = REOP_ASSERT_NOT; + /* ASSERTNOT, <next>, ... ASSERTNOTTEST */ + state->progLength += 4; + break; + default: + op = REOP_LPARENNON; + break; + } + state->cp += 2; + } else { + op = REOP_LPAREN; + /* LPAREN, <index>, ... RPAREN, <index> */ + state->progLength + += 2 * (1 + GetCompactIndexWidth(parenIndex)); + state->parenCount++; + if (state->parenCount == 65535) { + ReportRegExpError(state, JSREPORT_ERROR, + JSMSG_TOO_MANY_PARENS); + goto out; + } + } + goto pushOperator; + + case ')': + /* + * If there's no stacked open parenthesis, throw syntax error. + */ + for (i = operatorSP - 1; ; i--) { + if (i < 0) { + ReportRegExpError(state, JSREPORT_ERROR, + JSMSG_UNMATCHED_RIGHT_PAREN); + goto out; + } + if (operatorStack[i].op == REOP_ASSERT || + operatorStack[i].op == REOP_ASSERT_NOT || + operatorStack[i].op == REOP_LPARENNON || + operatorStack[i].op == REOP_LPAREN) { + break; + } + } + /* FALL THROUGH */ + + case '|': + /* Expected an operand before these, so make an empty one */ + operand = NewRENode(state, REOP_EMPTY); + if (!operand) + goto out; + goto pushOperand; + + default: + if (!ParseTerm(state)) + goto out; + operand = state->result; +pushOperand: + if (operandSP == operandStackSize) { + RENode **tmp; + operandStackSize += operandStackSize; + tmp = (RENode **) + JS_realloc(state->context, operandStack, + sizeof(RENode *) * operandStackSize); + if (!tmp) + goto out; + operandStack = tmp; + } + operandStack[operandSP++] = operand; + break; + } + } + + /* At the end; process remaining operators. */ +restartOperator: + if (state->cp == state->cpend) { + while (operatorSP) { + --operatorSP; + if (!ProcessOp(state, &operatorStack[operatorSP], + operandStack, operandSP)) + goto out; + --operandSP; + } + JS_ASSERT(operandSP == 1); + state->result = operandStack[0]; + result = JS_TRUE; + goto out; + } + + switch (*state->cp) { + case '|': + /* Process any stacked 'concat' operators */ + ++state->cp; + while (operatorSP && + operatorStack[operatorSP - 1].op == REOP_CONCAT) { + --operatorSP; + if (!ProcessOp(state, &operatorStack[operatorSP], + operandStack, operandSP)) { + goto out; + } + --operandSP; + } + op = REOP_ALT; + goto pushOperator; + + case ')': + /* + * If there's no stacked open parenthesis, throw syntax error. + */ + for (i = operatorSP - 1; ; i--) { + if (i < 0) { + ReportRegExpError(state, JSREPORT_ERROR, + JSMSG_UNMATCHED_RIGHT_PAREN); + goto out; + } + if (operatorStack[i].op == REOP_ASSERT || + operatorStack[i].op == REOP_ASSERT_NOT || + operatorStack[i].op == REOP_LPARENNON || + operatorStack[i].op == REOP_LPAREN) { + break; + } + } + ++state->cp; + + /* Process everything on the stack until the open parenthesis. */ + for (;;) { + JS_ASSERT(operatorSP); + --operatorSP; + switch (operatorStack[operatorSP].op) { + case REOP_ASSERT: + case REOP_ASSERT_NOT: + case REOP_LPAREN: + operand = NewRENode(state, operatorStack[operatorSP].op); + if (!operand) + goto out; + operand->u.parenIndex = + operatorStack[operatorSP].parenIndex; + JS_ASSERT(operandSP); + operand->kid = operandStack[operandSP - 1]; + operandStack[operandSP - 1] = operand; + if (state->treeDepth == TREE_DEPTH_MAX) { + ReportRegExpError(state, JSREPORT_ERROR, + JSMSG_REGEXP_TOO_COMPLEX); + goto out; + } + ++state->treeDepth; + /* FALL THROUGH */ + + case REOP_LPARENNON: + state->result = operandStack[operandSP - 1]; + if (!ParseQuantifier(state)) + goto out; + operandStack[operandSP - 1] = state->result; + goto restartOperator; + default: + if (!ProcessOp(state, &operatorStack[operatorSP], + operandStack, operandSP)) + goto out; + --operandSP; + break; + } + } + break; + + case '{': + { + const jschar *errp = state->cp; + + if (ParseMinMaxQuantifier(state, JS_TRUE) < 0) { + /* + * This didn't even scan correctly as a quantifier, so we should + * treat it as flat. + */ + op = REOP_CONCAT; + goto pushOperator; + } + + state->cp = errp; + /* FALL THROUGH */ + } + + case '+': + case '*': + case '?': + ReportRegExpErrorHelper(state, JSREPORT_ERROR, JSMSG_BAD_QUANTIFIER, + state->cp); + result = JS_FALSE; + goto out; + + default: + /* Anything else is the start of the next term. */ + op = REOP_CONCAT; +pushOperator: + if (operatorSP == operatorStackSize) { + REOpData *tmp; + operatorStackSize += operatorStackSize; + tmp = (REOpData *) + JS_realloc(state->context, operatorStack, + sizeof(REOpData) * operatorStackSize); + if (!tmp) + goto out; + operatorStack = tmp; + } + operatorStack[operatorSP].op = op; + operatorStack[operatorSP].errPos = state->cp; + operatorStack[operatorSP++].parenIndex = parenIndex; + break; + } + } +out: + if (operatorStack) + JS_free(state->context, operatorStack); + if (operandStack) + JS_free(state->context, operandStack); + return result; +} + +/* + * Hack two bits in CompilerState.flags, for use within FindParenCount to flag + * its being on the stack, and to propagate errors to its callers. + */ +#define JSREG_FIND_PAREN_COUNT 0x8000 +#define JSREG_FIND_PAREN_ERROR 0x4000 + +/* + * Magic return value from FindParenCount and GetDecimalValue, to indicate + * overflow beyond GetDecimalValue's max parameter, or a computed maximum if + * its findMax parameter is non-null. + */ +#define OVERFLOW_VALUE ((uintN)-1) + +static uintN +FindParenCount(CompilerState *state) +{ + CompilerState temp; + int i; + + if (state->flags & JSREG_FIND_PAREN_COUNT) + return OVERFLOW_VALUE; + + /* + * Copy state into temp, flag it so we never report an invalid backref, + * and reset its members to parse the entire regexp. This is obviously + * suboptimal, but GetDecimalValue calls us only if a backref appears to + * refer to a forward parenthetical, which is rare. + */ + temp = *state; + temp.flags |= JSREG_FIND_PAREN_COUNT; + temp.cp = temp.cpbegin; + temp.parenCount = 0; + temp.classCount = 0; + temp.progLength = 0; + temp.treeDepth = 0; + temp.classBitmapsMem = 0; + for (i = 0; i < CLASS_CACHE_SIZE; i++) + temp.classCache[i].start = NULL; + + if (!ParseRegExp(&temp)) { + state->flags |= JSREG_FIND_PAREN_ERROR; + return OVERFLOW_VALUE; + } + return temp.parenCount; +} + +/* + * Extract and return a decimal value at state->cp. The initial character c + * has already been read. Return OVERFLOW_VALUE if the result exceeds max. + * Callers who pass a non-null findMax should test JSREG_FIND_PAREN_ERROR in + * state->flags to discover whether an error occurred under findMax. + */ +static uintN +GetDecimalValue(jschar c, uintN max, uintN (*findMax)(CompilerState *state), + CompilerState *state) +{ + uintN value = JS7_UNDEC(c); + JSBool overflow = (value > max && (!findMax || value > findMax(state))); + + /* The following restriction allows simpler overflow checks. */ + JS_ASSERT(max <= ((uintN)-1 - 9) / 10); + while (state->cp < state->cpend) { + c = *state->cp; + if (!JS7_ISDEC(c)) + break; + value = 10 * value + JS7_UNDEC(c); + if (!overflow && value > max && (!findMax || value > findMax(state))) + overflow = JS_TRUE; + ++state->cp; + } + return overflow ? OVERFLOW_VALUE : value; +} + +/* + * Calculate the total size of the bitmap required for a class expression. + */ +static JSBool +CalculateBitmapSize(CompilerState *state, RENode *target, const jschar *src, + const jschar *end) +{ + uintN max = 0; + JSBool inRange = JS_FALSE; + jschar c, rangeStart = 0; + uintN n, digit, nDigits, i; + + target->u.ucclass.bmsize = 0; + target->u.ucclass.sense = JS_TRUE; + + if (src == end) + return JS_TRUE; + + if (*src == '^') { + ++src; + target->u.ucclass.sense = JS_FALSE; + } + + while (src != end) { + JSBool canStartRange = JS_TRUE; + uintN localMax = 0; + + switch (*src) { + case '\\': + ++src; + c = *src++; + switch (c) { + case 'b': + localMax = 0x8; + break; + case 'f': + localMax = 0xC; + break; + case 'n': + localMax = 0xA; + break; + case 'r': + localMax = 0xD; + break; + case 't': + localMax = 0x9; + break; + case 'v': + localMax = 0xB; + break; + case 'c': + if (src < end && RE_IS_LETTER(*src)) { + localMax = (uintN) (*src++) & 0x1F; + } else { + --src; + localMax = '\\'; + } + break; + case 'x': + nDigits = 2; + goto lexHex; + case 'u': + nDigits = 4; +lexHex: + n = 0; + for (i = 0; (i < nDigits) && (src < end); i++) { + c = *src++; + if (!isASCIIHexDigit(c, &digit)) { + /* + * Back off to accepting the original + *'\' as a literal. + */ + src -= i + 1; + n = '\\'; + break; + } + n = (n << 4) | digit; + } + localMax = n; + break; + case 'd': + canStartRange = JS_FALSE; + if (inRange) { + JS_ReportErrorNumber(state->context, + js_GetErrorMessage, NULL, + JSMSG_BAD_CLASS_RANGE); + return JS_FALSE; + } + localMax = '9'; + break; + case 'D': + case 's': + case 'S': + case 'w': + case 'W': + canStartRange = JS_FALSE; + if (inRange) { + JS_ReportErrorNumber(state->context, + js_GetErrorMessage, NULL, + JSMSG_BAD_CLASS_RANGE); + return JS_FALSE; + } + max = 65535; + + /* + * If this is the start of a range, ensure that it's less than + * the end. + */ + localMax = 0; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + /* + * This is a non-ECMA extension - decimal escapes (in this + * case, octal!) are supposed to be an error inside class + * ranges, but supported here for backwards compatibility. + * + */ + n = JS7_UNDEC(c); + c = *src; + if ('0' <= c && c <= '7') { + src++; + n = 8 * n + JS7_UNDEC(c); + c = *src; + if ('0' <= c && c <= '7') { + src++; + i = 8 * n + JS7_UNDEC(c); + if (i <= 0377) + n = i; + else + src--; + } + } + localMax = n; + break; + + default: + localMax = c; + break; + } + break; + default: + localMax = *src++; + break; + } + + if (inRange) { + /* Throw a SyntaxError here, per ECMA-262, 15.10.2.15. */ + if (rangeStart > localMax) { + JS_ReportErrorNumber(state->context, + js_GetErrorMessage, NULL, + JSMSG_BAD_CLASS_RANGE); + return JS_FALSE; + } + inRange = JS_FALSE; + } else { + if (canStartRange && src < end - 1) { + if (*src == '-') { + ++src; + inRange = JS_TRUE; + rangeStart = (jschar)localMax; + continue; + } + } + if (state->flags & JSREG_FOLD) + rangeStart = localMax; /* one run of the uc/dc loop below */ + } + + if (state->flags & JSREG_FOLD) { + jschar maxch = localMax; + + for (i = rangeStart; i <= localMax; i++) { + jschar uch, dch; + + uch = upcase(i); + dch = downcase(i); + maxch = JS_MAX(maxch, uch); + maxch = JS_MAX(maxch, dch); + } + localMax = maxch; + } + + if (localMax > max) + max = localMax; + } + target->u.ucclass.bmsize = max; + return JS_TRUE; +} + +/* + * item: assertion An item is either an assertion or + * quantatom a quantified atom. + * + * assertion: '^' Assertions match beginning of string + * (or line if the class static property + * RegExp.multiline is true). + * '$' End of string (or line if the class + * static property RegExp.multiline is + * true). + * '\b' Word boundary (between \w and \W). + * '\B' Word non-boundary. + * + * quantatom: atom An unquantified atom. + * quantatom '{' n ',' m '}' + * Atom must occur between n and m times. + * quantatom '{' n ',' '}' Atom must occur at least n times. + * quantatom '{' n '}' Atom must occur exactly n times. + * quantatom '*' Zero or more times (same as {0,}). + * quantatom '+' One or more times (same as {1,}). + * quantatom '?' Zero or one time (same as {0,1}). + * + * any of which can be optionally followed by '?' for ungreedy + * + * atom: '(' regexp ')' A parenthesized regexp (what matched + * can be addressed using a backreference, + * see '\' n below). + * '.' Matches any char except '\n'. + * '[' classlist ']' A character class. + * '[' '^' classlist ']' A negated character class. + * '\f' Form Feed. + * '\n' Newline (Line Feed). + * '\r' Carriage Return. + * '\t' Horizontal Tab. + * '\v' Vertical Tab. + * '\d' A digit (same as [0-9]). + * '\D' A non-digit. + * '\w' A word character, [0-9a-z_A-Z]. + * '\W' A non-word character. + * '\s' A whitespace character, [ \b\f\n\r\t\v]. + * '\S' A non-whitespace character. + * '\' n A backreference to the nth (n decimal + * and positive) parenthesized expression. + * '\' octal An octal escape sequence (octal must be + * two or three digits long, unless it is + * 0 for the null character). + * '\x' hex A hex escape (hex must be two digits). + * '\u' unicode A unicode escape (must be four digits). + * '\c' ctrl A control character, ctrl is a letter. + * '\' literalatomchar Any character except one of the above + * that follow '\' in an atom. + * otheratomchar Any character not first among the other + * atom right-hand sides. + */ +static JSBool +ParseTerm(CompilerState *state) +{ + jschar c = *state->cp++; + uintN nDigits; + uintN num, tmp, n, i; + const jschar *termStart; + + switch (c) { + /* assertions and atoms */ + case '^': + state->result = NewRENode(state, REOP_BOL); + if (!state->result) + return JS_FALSE; + state->progLength++; + return JS_TRUE; + case '$': + state->result = NewRENode(state, REOP_EOL); + if (!state->result) + return JS_FALSE; + state->progLength++; + return JS_TRUE; + case '\\': + if (state->cp >= state->cpend) { + /* a trailing '\' is an error */ + ReportRegExpError(state, JSREPORT_ERROR, JSMSG_TRAILING_SLASH); + return JS_FALSE; + } + c = *state->cp++; + switch (c) { + /* assertion escapes */ + case 'b' : + state->result = NewRENode(state, REOP_WBDRY); + if (!state->result) + return JS_FALSE; + state->progLength++; + return JS_TRUE; + case 'B': + state->result = NewRENode(state, REOP_WNONBDRY); + if (!state->result) + return JS_FALSE; + state->progLength++; + return JS_TRUE; + /* Decimal escape */ + case '0': + /* Give a strict warning. See also the note below. */ + if (!ReportRegExpError(state, JSREPORT_WARNING | JSREPORT_STRICT, + JSMSG_INVALID_BACKREF)) { + return JS_FALSE; + } + doOctal: + num = 0; + while (state->cp < state->cpend) { + c = *state->cp; + if (c < '0' || '7' < c) + break; + state->cp++; + tmp = 8 * num + (uintN)JS7_UNDEC(c); + if (tmp > 0377) + break; + num = tmp; + } + c = (jschar)num; + doFlat: + state->result = NewRENode(state, REOP_FLAT); + if (!state->result) + return JS_FALSE; + state->result->u.flat.chr = c; + state->result->u.flat.length = 1; + state->progLength += 3; + break; + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + case '8': + case '9': + termStart = state->cp - 1; + num = GetDecimalValue(c, state->parenCount, FindParenCount, state); + if (state->flags & JSREG_FIND_PAREN_ERROR) + return JS_FALSE; + if (num == OVERFLOW_VALUE) { + /* Give a strict mode warning. */ + if (!ReportRegExpError(state, + JSREPORT_WARNING | JSREPORT_STRICT, + (c >= '8') + ? JSMSG_INVALID_BACKREF + : JSMSG_BAD_BACKREF)) { + return JS_FALSE; + } + + /* + * Note: ECMA 262, 15.10.2.9 says that we should throw a syntax + * error here. However, for compatibility with IE, we treat the + * whole backref as flat if the first character in it is not a + * valid octal character, and as an octal escape otherwise. + */ + state->cp = termStart; + if (c >= '8') { + /* Treat this as flat. termStart - 1 is the \. */ + c = '\\'; + goto asFlat; + } + + /* Treat this as an octal escape. */ + goto doOctal; + } + JS_ASSERT(1 <= num && num <= 0x10000); + state->result = NewRENode(state, REOP_BACKREF); + if (!state->result) + return JS_FALSE; + state->result->u.parenIndex = num - 1; + state->progLength + += 1 + GetCompactIndexWidth(state->result->u.parenIndex); + break; + /* Control escape */ + case 'f': + c = 0xC; + goto doFlat; + case 'n': + c = 0xA; + goto doFlat; + case 'r': + c = 0xD; + goto doFlat; + case 't': + c = 0x9; + goto doFlat; + case 'v': + c = 0xB; + goto doFlat; + /* Control letter */ + case 'c': + if (state->cp < state->cpend && RE_IS_LETTER(*state->cp)) { + c = (jschar) (*state->cp++ & 0x1F); + } else { + /* back off to accepting the original '\' as a literal */ + --state->cp; + c = '\\'; + } + goto doFlat; + /* HexEscapeSequence */ + case 'x': + nDigits = 2; + goto lexHex; + /* UnicodeEscapeSequence */ + case 'u': + nDigits = 4; +lexHex: + n = 0; + for (i = 0; i < nDigits && state->cp < state->cpend; i++) { + uintN digit; + c = *state->cp++; + if (!isASCIIHexDigit(c, &digit)) { + /* + * Back off to accepting the original 'u' or 'x' as a + * literal. + */ + state->cp -= i + 2; + n = *state->cp++; + break; + } + n = (n << 4) | digit; + } + c = (jschar) n; + goto doFlat; + /* Character class escapes */ + case 'd': + state->result = NewRENode(state, REOP_DIGIT); +doSimple: + if (!state->result) + return JS_FALSE; + state->progLength++; + break; + case 'D': + state->result = NewRENode(state, REOP_NONDIGIT); + goto doSimple; + case 's': + state->result = NewRENode(state, REOP_SPACE); + goto doSimple; + case 'S': + state->result = NewRENode(state, REOP_NONSPACE); + goto doSimple; + case 'w': + state->result = NewRENode(state, REOP_ALNUM); + goto doSimple; + case 'W': + state->result = NewRENode(state, REOP_NONALNUM); + goto doSimple; + /* IdentityEscape */ + default: + state->result = NewRENode(state, REOP_FLAT); + if (!state->result) + return JS_FALSE; + state->result->u.flat.chr = c; + state->result->u.flat.length = 1; + state->result->kid = (void *) (state->cp - 1); + state->progLength += 3; + break; + } + break; + case '[': + state->result = NewRENode(state, REOP_CLASS); + if (!state->result) + return JS_FALSE; + termStart = state->cp; + state->result->u.ucclass.startIndex = termStart - state->cpbegin; + for (;;) { + if (state->cp == state->cpend) { + ReportRegExpErrorHelper(state, JSREPORT_ERROR, + JSMSG_UNTERM_CLASS, termStart); + + return JS_FALSE; + } + if (*state->cp == '\\') { + state->cp++; + if (state->cp != state->cpend) + state->cp++; + continue; + } + if (*state->cp == ']') { + state->result->u.ucclass.kidlen = state->cp - termStart; + break; + } + state->cp++; + } + for (i = 0; i < CLASS_CACHE_SIZE; i++) { + if (!state->classCache[i].start) { + state->classCache[i].start = termStart; + state->classCache[i].length = state->result->u.ucclass.kidlen; + state->classCache[i].index = state->classCount; + break; + } + if (state->classCache[i].length == + state->result->u.ucclass.kidlen) { + for (n = 0; ; n++) { + if (n == state->classCache[i].length) { + state->result->u.ucclass.index + = state->classCache[i].index; + goto claim; + } + if (state->classCache[i].start[n] != termStart[n]) + break; + } + } + } + state->result->u.ucclass.index = state->classCount++; + + claim: + /* + * Call CalculateBitmapSize now as we want any errors it finds + * to be reported during the parse phase, not at execution. + */ + if (!CalculateBitmapSize(state, state->result, termStart, state->cp++)) + return JS_FALSE; + /* + * Update classBitmapsMem with number of bytes to hold bmsize bits, + * which is (bitsCount + 7) / 8 or (highest_bit + 1 + 7) / 8 + * or highest_bit / 8 + 1 where highest_bit is u.ucclass.bmsize. + */ + n = (state->result->u.ucclass.bmsize >> 3) + 1; + if (n > CLASS_BITMAPS_MEM_LIMIT - state->classBitmapsMem) { + ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX); + return JS_FALSE; + } + state->classBitmapsMem += n; + /* CLASS, <index> */ + state->progLength + += 1 + GetCompactIndexWidth(state->result->u.ucclass.index); + break; + + case '.': + state->result = NewRENode(state, REOP_DOT); + goto doSimple; + + case '{': + { + const jschar *errp = state->cp--; + intN err; + + err = ParseMinMaxQuantifier(state, JS_TRUE); + state->cp = errp; + + if (err < 0) + goto asFlat; + + /* FALL THROUGH */ + } + case '*': + case '+': + case '?': + ReportRegExpErrorHelper(state, JSREPORT_ERROR, + JSMSG_BAD_QUANTIFIER, state->cp - 1); + return JS_FALSE; + default: +asFlat: + state->result = NewRENode(state, REOP_FLAT); + if (!state->result) + return JS_FALSE; + state->result->u.flat.chr = c; + state->result->u.flat.length = 1; + state->result->kid = (void *) (state->cp - 1); + state->progLength += 3; + break; + } + return ParseQuantifier(state); +} + +static JSBool +ParseQuantifier(CompilerState *state) +{ + RENode *term; + term = state->result; + if (state->cp < state->cpend) { + switch (*state->cp) { + case '+': + state->result = NewRENode(state, REOP_QUANT); + if (!state->result) + return JS_FALSE; + state->result->u.range.min = 1; + state->result->u.range.max = (uintN)-1; + /* <PLUS>, <next> ... <ENDCHILD> */ + state->progLength += 4; + goto quantifier; + case '*': + state->result = NewRENode(state, REOP_QUANT); + if (!state->result) + return JS_FALSE; + state->result->u.range.min = 0; + state->result->u.range.max = (uintN)-1; + /* <STAR>, <next> ... <ENDCHILD> */ + state->progLength += 4; + goto quantifier; + case '?': + state->result = NewRENode(state, REOP_QUANT); + if (!state->result) + return JS_FALSE; + state->result->u.range.min = 0; + state->result->u.range.max = 1; + /* <OPT>, <next> ... <ENDCHILD> */ + state->progLength += 4; + goto quantifier; + case '{': /* balance '}' */ + { + intN err; + const jschar *errp = state->cp; + + err = ParseMinMaxQuantifier(state, JS_FALSE); + if (err == 0) + goto quantifier; + if (err == -1) + return JS_TRUE; + + ReportRegExpErrorHelper(state, JSREPORT_ERROR, err, errp); + return JS_FALSE; + } + default:; + } + } + return JS_TRUE; + +quantifier: + if (state->treeDepth == TREE_DEPTH_MAX) { + ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX); + return JS_FALSE; + } + + ++state->treeDepth; + ++state->cp; + state->result->kid = term; + if (state->cp < state->cpend && *state->cp == '?') { + ++state->cp; + state->result->u.range.greedy = JS_FALSE; + } else { + state->result->u.range.greedy = JS_TRUE; + } + return JS_TRUE; +} + +static intN +ParseMinMaxQuantifier(CompilerState *state, JSBool ignoreValues) +{ + uintN min, max; + jschar c; + const jschar *errp = state->cp++; + + c = *state->cp; + if (JS7_ISDEC(c)) { + ++state->cp; + min = GetDecimalValue(c, 0xFFFF, NULL, state); + c = *state->cp; + + if (!ignoreValues && min == OVERFLOW_VALUE) + return JSMSG_MIN_TOO_BIG; + + if (c == ',') { + c = *++state->cp; + if (JS7_ISDEC(c)) { + ++state->cp; + max = GetDecimalValue(c, 0xFFFF, NULL, state); + c = *state->cp; + if (!ignoreValues && max == OVERFLOW_VALUE) + return JSMSG_MAX_TOO_BIG; + if (!ignoreValues && min > max) + return JSMSG_OUT_OF_ORDER; + } else { + max = (uintN)-1; + } + } else { + max = min; + } + if (c == '}') { + state->result = NewRENode(state, REOP_QUANT); + if (!state->result) + return JSMSG_OUT_OF_MEMORY; + state->result->u.range.min = min; + state->result->u.range.max = max; + /* + * QUANT, <min>, <max>, <next> ... <ENDCHILD> + * where <max> is written as compact(max+1) to make + * (uintN)-1 sentinel to occupy 1 byte, not width_of(max)+1. + */ + state->progLength += (1 + GetCompactIndexWidth(min) + + GetCompactIndexWidth(max + 1) + +3); + return 0; + } + } + + state->cp = errp; + return -1; +} + +static JSBool +SetForwardJumpOffset(jsbytecode *jump, jsbytecode *target) +{ + ptrdiff_t offset = target - jump; + + /* Check that target really points forward. */ + JS_ASSERT(offset >= 2); + if ((size_t)offset > OFFSET_MAX) + return JS_FALSE; + + jump[0] = JUMP_OFFSET_HI(offset); + jump[1] = JUMP_OFFSET_LO(offset); + return JS_TRUE; +} + +/* Copy the charset data from a character class node to the charset list + * in the regexp object. */ +static JS_ALWAYS_INLINE RECharSet * +InitNodeCharSet(JSRegExp *re, RENode *node) +{ + RECharSet *charSet = &re->classList[node->u.ucclass.index]; + charSet->converted = JS_FALSE; + charSet->length = node->u.ucclass.bmsize; + charSet->u.src.startIndex = node->u.ucclass.startIndex; + charSet->u.src.length = node->u.ucclass.kidlen; + charSet->sense = node->u.ucclass.sense; + return charSet; +} + +/* + * Generate bytecode for the tree rooted at t using an explicit stack instead + * of recursion. + */ +static jsbytecode * +EmitREBytecode(CompilerState *state, JSRegExp *re, size_t treeDepth, + jsbytecode *pc, RENode *t) +{ + EmitStateStackEntry *emitStateSP, *emitStateStack; + REOp op; + + if (treeDepth == 0) { + emitStateStack = NULL; + } else { + emitStateStack = + (EmitStateStackEntry *)JS_malloc(state->context, + sizeof(EmitStateStackEntry) * + treeDepth); + if (!emitStateStack) + return NULL; + } + emitStateSP = emitStateStack; + op = t->op; + JS_ASSERT(op < REOP_LIMIT); + + for (;;) { + *pc++ = op; + switch (op) { + case REOP_EMPTY: + --pc; + break; + + case REOP_ALTPREREQ2: + case REOP_ALTPREREQ: + JS_ASSERT(emitStateSP); + emitStateSP->altHead = pc - 1; + emitStateSP->endTermFixup = pc; + pc += OFFSET_LEN; + SET_ARG(pc, t->u.altprereq.ch1); + pc += ARG_LEN; + SET_ARG(pc, t->u.altprereq.ch2); + pc += ARG_LEN; + + emitStateSP->nextAltFixup = pc; /* offset to next alternate */ + pc += OFFSET_LEN; + + emitStateSP->continueNode = t; + emitStateSP->continueOp = REOP_JUMP; + emitStateSP->jumpToJumpFlag = JS_FALSE; + ++emitStateSP; + JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth); + t = (RENode *) t->kid; + op = t->op; + JS_ASSERT(op < REOP_LIMIT); + continue; + + case REOP_JUMP: + emitStateSP->nextTermFixup = pc; /* offset to following term */ + pc += OFFSET_LEN; + if (!SetForwardJumpOffset(emitStateSP->nextAltFixup, pc)) + goto jump_too_big; + emitStateSP->continueOp = REOP_ENDALT; + ++emitStateSP; + JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth); + t = (RENode *) t->u.kid2; + op = t->op; + JS_ASSERT(op < REOP_LIMIT); + continue; + + case REOP_ENDALT: + /* + * If we already patched emitStateSP->nextTermFixup to jump to + * a nearer jump, to avoid 16-bit immediate offset overflow, we + * are done here. + */ + if (emitStateSP->jumpToJumpFlag) + break; + + /* + * Fix up the REOP_JUMP offset to go to the op after REOP_ENDALT. + * REOP_ENDALT is executed only on successful match of the last + * alternate in a group. + */ + if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc)) + goto jump_too_big; + if (t->op != REOP_ALT) { + if (!SetForwardJumpOffset(emitStateSP->endTermFixup, pc)) + goto jump_too_big; + } + + /* + * If the program is bigger than the REOP_JUMP offset range, then + * we must check for alternates before this one that are part of + * the same group, and fix up their jump offsets to target jumps + * close enough to fit in a 16-bit unsigned offset immediate. + */ + if ((size_t)(pc - re->program) > OFFSET_MAX && + emitStateSP > emitStateStack) { + EmitStateStackEntry *esp, *esp2; + jsbytecode *alt, *jump; + ptrdiff_t span, header; + + esp2 = emitStateSP; + alt = esp2->altHead; + for (esp = esp2 - 1; esp >= emitStateStack; --esp) { + if (esp->continueOp == REOP_ENDALT && + !esp->jumpToJumpFlag && + esp->nextTermFixup + OFFSET_LEN == alt && + (size_t)(pc - ((esp->continueNode->op != REOP_ALT) + ? esp->endTermFixup + : esp->nextTermFixup)) > OFFSET_MAX) { + alt = esp->altHead; + jump = esp->nextTermFixup; + + /* + * The span must be 1 less than the distance from + * jump offset to jump offset, so we actually jump + * to a REOP_JUMP bytecode, not to its offset! + */ + for (;;) { + JS_ASSERT(jump < esp2->nextTermFixup); + span = esp2->nextTermFixup - jump - 1; + if ((size_t)span <= OFFSET_MAX) + break; + do { + if (--esp2 == esp) + goto jump_too_big; + } while (esp2->continueOp != REOP_ENDALT); + } + + jump[0] = JUMP_OFFSET_HI(span); + jump[1] = JUMP_OFFSET_LO(span); + + if (esp->continueNode->op != REOP_ALT) { + /* + * We must patch the offset at esp->endTermFixup + * as well, for the REOP_ALTPREREQ{,2} opcodes. + * If we're unlucky and endTermFixup is more than + * OFFSET_MAX bytes from its target, we cheat by + * jumping 6 bytes to the jump whose offset is at + * esp->nextTermFixup, which has the same target. + */ + jump = esp->endTermFixup; + header = esp->nextTermFixup - jump; + span += header; + if ((size_t)span > OFFSET_MAX) + span = header; + + jump[0] = JUMP_OFFSET_HI(span); + jump[1] = JUMP_OFFSET_LO(span); + } + + esp->jumpToJumpFlag = JS_TRUE; + } + } + } + break; + + case REOP_ALT: + JS_ASSERT(emitStateSP); + emitStateSP->altHead = pc - 1; + emitStateSP->nextAltFixup = pc; /* offset to next alternate */ + pc += OFFSET_LEN; + emitStateSP->continueNode = t; + emitStateSP->continueOp = REOP_JUMP; + emitStateSP->jumpToJumpFlag = JS_FALSE; + ++emitStateSP; + JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth); + t = (RENode *) t->kid; + op = t->op; + JS_ASSERT(op < REOP_LIMIT); + continue; + + case REOP_FLAT: + /* + * Coalesce FLATs if possible and if it would not increase bytecode + * beyond preallocated limit. The latter happens only when bytecode + * size for coalesced string with offset p and length 2 exceeds 6 + * bytes preallocated for 2 single char nodes, i.e. when + * 1 + GetCompactIndexWidth(p) + GetCompactIndexWidth(2) > 6 or + * GetCompactIndexWidth(p) > 4. + * Since when GetCompactIndexWidth(p) <= 4 coalescing of 3 or more + * nodes strictly decreases bytecode size, the check has to be + * done only for the first coalescing. + */ + if (t->kid && + GetCompactIndexWidth((jschar *)t->kid - state->cpbegin) <= 4) + { + while (t->next && + t->next->op == REOP_FLAT && + (jschar*)t->kid + t->u.flat.length == + (jschar*)t->next->kid) { + t->u.flat.length += t->next->u.flat.length; + t->next = t->next->next; + } + } + if (t->kid && t->u.flat.length > 1) { + pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLATi : REOP_FLAT; + pc = WriteCompactIndex(pc, (jschar *)t->kid - state->cpbegin); + pc = WriteCompactIndex(pc, t->u.flat.length); + } else if (t->u.flat.chr < 256) { + pc[-1] = (state->flags & JSREG_FOLD) ? REOP_FLAT1i : REOP_FLAT1; + *pc++ = (jsbytecode) t->u.flat.chr; + } else { + pc[-1] = (state->flags & JSREG_FOLD) + ? REOP_UCFLAT1i + : REOP_UCFLAT1; + SET_ARG(pc, t->u.flat.chr); + pc += ARG_LEN; + } + break; + + case REOP_LPAREN: + JS_ASSERT(emitStateSP); + pc = WriteCompactIndex(pc, t->u.parenIndex); + emitStateSP->continueNode = t; + emitStateSP->continueOp = REOP_RPAREN; + ++emitStateSP; + JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth); + t = (RENode *) t->kid; + op = t->op; + continue; + + case REOP_RPAREN: + pc = WriteCompactIndex(pc, t->u.parenIndex); + break; + + case REOP_BACKREF: + pc = WriteCompactIndex(pc, t->u.parenIndex); + break; + + case REOP_ASSERT: + JS_ASSERT(emitStateSP); + emitStateSP->nextTermFixup = pc; + pc += OFFSET_LEN; + emitStateSP->continueNode = t; + emitStateSP->continueOp = REOP_ASSERTTEST; + ++emitStateSP; + JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth); + t = (RENode *) t->kid; + op = t->op; + continue; + + case REOP_ASSERTTEST: + case REOP_ASSERTNOTTEST: + if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc)) + goto jump_too_big; + break; + + case REOP_ASSERT_NOT: + JS_ASSERT(emitStateSP); + emitStateSP->nextTermFixup = pc; + pc += OFFSET_LEN; + emitStateSP->continueNode = t; + emitStateSP->continueOp = REOP_ASSERTNOTTEST; + ++emitStateSP; + JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth); + t = (RENode *) t->kid; + op = t->op; + continue; + + case REOP_QUANT: + JS_ASSERT(emitStateSP); + if (t->u.range.min == 0 && t->u.range.max == (uintN)-1) { + pc[-1] = (t->u.range.greedy) ? REOP_STAR : REOP_MINIMALSTAR; + } else if (t->u.range.min == 0 && t->u.range.max == 1) { + pc[-1] = (t->u.range.greedy) ? REOP_OPT : REOP_MINIMALOPT; + } else if (t->u.range.min == 1 && t->u.range.max == (uintN) -1) { + pc[-1] = (t->u.range.greedy) ? REOP_PLUS : REOP_MINIMALPLUS; + } else { + if (!t->u.range.greedy) + pc[-1] = REOP_MINIMALQUANT; + pc = WriteCompactIndex(pc, t->u.range.min); + /* + * Write max + 1 to avoid using size_t(max) + 1 bytes + * for (uintN)-1 sentinel. + */ + pc = WriteCompactIndex(pc, t->u.range.max + 1); + } + emitStateSP->nextTermFixup = pc; + pc += OFFSET_LEN; + emitStateSP->continueNode = t; + emitStateSP->continueOp = REOP_ENDCHILD; + ++emitStateSP; + JS_ASSERT((size_t)(emitStateSP - emitStateStack) <= treeDepth); + t = (RENode *) t->kid; + op = t->op; + continue; + + case REOP_ENDCHILD: + if (!SetForwardJumpOffset(emitStateSP->nextTermFixup, pc)) + goto jump_too_big; + break; + + case REOP_CLASS: + if (!t->u.ucclass.sense) + pc[-1] = REOP_NCLASS; + pc = WriteCompactIndex(pc, t->u.ucclass.index); + InitNodeCharSet(re, t); + break; + + default: + break; + } + + t = t->next; + if (t) { + op = t->op; + } else { + if (emitStateSP == emitStateStack) + break; + --emitStateSP; + t = emitStateSP->continueNode; + op = (REOp) emitStateSP->continueOp; + } + } + + cleanup: + if (emitStateStack) + JS_free(state->context, emitStateStack); + return pc; + + jump_too_big: + ReportRegExpError(state, JSREPORT_ERROR, JSMSG_REGEXP_TOO_COMPLEX); + pc = NULL; + goto cleanup; +} + +#ifdef JS_TRACER +typedef List<LIns*, LIST_NonGCObjects> LInsList; + +/* Dummy GC for nanojit placement new. */ +static GC gc; + +class RegExpNativeCompiler { + private: + JSRegExp* re; /* Careful: not fully initialized */ + CompilerState* cs; /* RegExp to compile */ + Fragment* fragment; + LirWriter* lir; + + LIns* state; + LIns* gdata; + LIns* cpend; + + JSBool isCaseInsensitive() const { return cs->flags & JSREG_FOLD; } + + void targetCurrentPoint(LIns* ins) { ins->target(lir->ins0(LIR_label)); } + + void targetCurrentPoint(LInsList& fails) + { + LIns* fail = lir->ins0(LIR_label); + for (size_t i = 0; i < fails.size(); ++i) { + fails[i]->target(fail); + } + fails.clear(); + } + + /* + * These functions return the new position after their match operation, + * or NULL if there was an error. + */ + LIns* compileEmpty(RENode* node, LIns* pos, LInsList& fails) + { + return pos; + } + + LIns* compileFlatSingleChar(RENode* node, LIns* pos, LInsList& fails) + { + /* + * Fast case-insensitive test for ASCII letters: convert text + * char to lower case by bit-or-ing in 32 and compare. + */ + JSBool useFastCI = JS_FALSE; + jschar ch = node->u.flat.chr; /* char to test for */ + jschar ch2 = ch; /* 2nd char to test for if ci */ + if (cs->flags & JSREG_FOLD) { + if (L'A' <= ch && ch <= L'Z' || L'a' <= ch || ch <= L'z') { + ch |= 32; + ch2 = ch; + useFastCI = JS_TRUE; + } else if (JS_TOLOWER(ch) != ch) { + ch2 = JS_TOLOWER(ch); + ch = JS_TOUPPER(ch); + } + } + + LIns* to_fail = lir->insBranch(LIR_jf, lir->ins2(LIR_lt, pos, cpend), 0); + fails.add(to_fail); + LIns* text_ch = lir->insLoad(LIR_ldcs, pos, lir->insImm(0)); + LIns* comp_ch = useFastCI ? + lir->ins2(LIR_or, text_ch, lir->insImm(32)) : + text_ch; + if (ch == ch2) { + fails.add(lir->insBranch(LIR_jf, lir->ins2(LIR_eq, comp_ch, lir->insImm(ch)), 0)); + } else { + LIns* to_ok = lir->insBranch(LIR_jt, lir->ins2(LIR_eq, comp_ch, lir->insImm(ch)), 0); + fails.add(lir->insBranch(LIR_jf, lir->ins2(LIR_eq, comp_ch, lir->insImm(ch2)), 0)); + targetCurrentPoint(to_ok); + } + + return lir->ins2(LIR_piadd, pos, lir->insImm(2)); + } + + LIns* compileClass(RENode* node, LIns* pos, LInsList& fails) + { + if (!node->u.ucclass.sense) + return JS_FALSE; + + RECharSet* charSet = InitNodeCharSet(re, node); + LIns* to_fail = lir->insBranch(LIR_jf, lir->ins2(LIR_lt, pos, cpend), 0); + fails.add(to_fail); + LIns* text_ch = lir->insLoad(LIR_ldcs, pos, lir->insImm(0)); + fails.add(lir->insBranch(LIR_jf, lir->ins2(LIR_le, text_ch, lir->insImm(charSet->length)), 0)); + LIns* byteIndex = lir->ins2(LIR_rsh, text_ch, lir->insImm(3)); + LIns* bitmap = lir->insLoad(LIR_ld, lir->insImmPtr(charSet), (int) offsetof(RECharSet, u.bits)); + LIns* byte = lir->insLoad(LIR_ldcb, lir->ins2(LIR_piadd, bitmap, byteIndex), (int) 0); + LIns* bitMask = lir->ins2(LIR_lsh, lir->insImm(1), + lir->ins2(LIR_and, text_ch, lir->insImm(0x7))); + LIns* test = lir->ins2(LIR_eq, lir->ins2(LIR_and, byte, bitMask), lir->insImm(0)); + + LIns* to_next = lir->insBranch(LIR_jt, test, 0); + fails.add(to_next); + return lir->ins2(LIR_piadd, pos, lir->insImm(2)); + } + + LIns* compileAlt(RENode* node, LIns* pos, LInsList& fails) + { + LInsList kidFails(NULL); + if (!compileNode((RENode *) node->kid, pos, kidFails)) + return JS_FALSE; + if (!compileNode(node->next, pos, kidFails)) + return JS_FALSE; + + targetCurrentPoint(kidFails); + if (!compileNode(node->u.altprereq.kid2, pos, fails)) + return JS_FALSE; + /* + * Disable compilation for any regexp where something follows an + * alternation. To make this work, we need to redesign to either + * (a) pass around continuations so we know the right place to go + * when we logically return, or (b) generate explicit backtracking + * code. + */ + if (node->next) + return JS_FALSE; + return pos; + } + + JSBool compileNode(RENode* node, LIns* pos, LInsList& fails) + { + for (; node; node = node->next) { + if (fragment->lirbuf->outOmem()) + return JS_FALSE; + + switch (node->op) { + case REOP_EMPTY: + pos = compileEmpty(node, pos, fails); + break; + case REOP_FLAT: + if (node->u.flat.length != 1) + return JS_FALSE; + pos = compileFlatSingleChar(node, pos, fails); + break; + case REOP_ALT: + case REOP_ALTPREREQ: + pos = compileAlt(node, pos, fails); + break; + case REOP_CLASS: + pos = compileClass(node, pos, fails); + break; + default: + return JS_FALSE; + } + if (!pos) + return JS_FALSE; + } + + lir->insStorei(pos, state, (int) offsetof(REMatchState, cp)); + lir->ins1(LIR_ret, state); + return JS_TRUE; + } + + JSBool compileSticky(RENode* root, LIns* start) + { + LInsList fails(NULL); + if (!compileNode(root, start, fails)) + return JS_FALSE; + targetCurrentPoint(fails); + lir->ins1(LIR_ret, lir->insImm(0)); + return JS_TRUE; + } + + JSBool compileAnchoring(RENode* root, LIns* start) + { + /* Even at the end, the empty regexp would match. */ + LIns* to_next = lir->insBranch(LIR_jf, + lir->ins2(LIR_le, start, cpend), 0); + LInsList fails(NULL); + if (!compileNode(root, start, fails)) + return JS_FALSE; + + targetCurrentPoint(to_next); + lir->ins1(LIR_ret, lir->insImm(0)); + + targetCurrentPoint(fails); + lir->insStorei(lir->ins2(LIR_piadd, start, lir->insImm(2)), gdata, + (int) offsetof(REGlobalData, skipped)); + + return JS_TRUE; + } + + inline LIns* + addName(LirBuffer* lirbuf, LIns* ins, const char* name) + { + debug_only_v(lirbuf->names->addName(ins, name);) + return ins; + } + + public: + RegExpNativeCompiler(JSRegExp *re, CompilerState *cs) + : re(re), cs(cs), fragment(NULL) { } + + JSBool compile(JSContext* cx) + { + GuardRecord* guard; + LIns* skip; + LIns* start; + bool oom = false; + + Fragmento* fragmento = JS_TRACE_MONITOR(cx).reFragmento; + fragment = fragmento->getLoop(re); + if (!fragment) { + fragment = fragmento->getAnchor(re); + fragment->lirbuf = new (&gc) LirBuffer(fragmento, NULL); + /* Scary: required to have the onDestroy method delete the lirbuf. */ + fragment->root = fragment; + } + LirBuffer* lirbuf = fragment->lirbuf; + LirBufWriter* lirb; + if (lirbuf->outOmem()) goto fail2; + /* FIXME Use bug 463260 smart pointer when available. */ + lir = lirb = new (&gc) LirBufWriter(lirbuf); + + /* FIXME Use bug 463260 smart pointer when available. */ + debug_only_v(fragment->lirbuf->names = new (&gc) LirNameMap(&gc, NULL, fragmento->labels);) + /* FIXME Use bug 463260 smart pointer when available. */ + debug_only_v(lir = new (&gc) VerboseWriter(&gc, lir, lirbuf->names);) + + lir->ins0(LIR_start); + lirbuf->state = state = addName(lirbuf, lir->insParam(0, 0), "state"); + lirbuf->param1 = gdata = addName(lirbuf, lir->insParam(1, 0), "gdata"); + start = addName(lirbuf, lir->insLoad(LIR_ldp, lirbuf->param1, (int) offsetof(REGlobalData, skipped)), "start"); + cpend = addName(lirbuf, lir->insLoad(LIR_ldp, lirbuf->param1, offsetof(REGlobalData, cpend)), "cpend"); + + if (cs->flags & JSREG_STICKY) { + if (!compileSticky(cs->result, start)) goto fail; + } else { + if (!compileAnchoring(cs->result, start)) goto fail; + } + + /* Create fake guard record for loop edge. */ + skip = lirb->skip(sizeof(GuardRecord) + sizeof(SideExit)); + guard = (GuardRecord *) skip->payload(); + memset(guard, 0, sizeof(*guard)); + guard->exit = (SideExit *) guard+1; + guard->exit->target = fragment; + fragment->lastIns = lir->insGuard(LIR_loop, lir->insImm(1), skip); + + ::compile(fragmento->assm(), fragment); + if (fragmento->assm()->error() != nanojit::None) { + oom = fragmento->assm()->error() == nanojit::OutOMem; + goto fail; + } + + delete lirb; + debug_only_v(delete lir;) + return JS_TRUE; + fail: + delete lirb; + debug_only_v(delete lir;) + fail2: + if (lirbuf->outOmem() || oom) + fragmento->clearFrags(); + return JS_FALSE; + } +}; + +static inline JSBool +js_CompileRegExpToNative(JSContext *cx, JSRegExp *re, CompilerState *cs) +{ + RegExpNativeCompiler rc(re, cs); + return rc.compile(cx); +} +#endif + +JSRegExp * +js_NewRegExp(JSContext *cx, JSTokenStream *ts, + JSString *str, uintN flags, JSBool flat) +{ + JSRegExp *re; + void *mark; + CompilerState state; + size_t resize; + jsbytecode *endPC; + uintN i; + size_t len; + + re = NULL; + mark = JS_ARENA_MARK(&cx->tempPool); + len = JSSTRING_LENGTH(str); + + state.context = cx; + state.tokenStream = ts; + state.cp = js_UndependString(cx, str); + if (!state.cp) + goto out; + state.cpbegin = state.cp; + state.cpend = state.cp + len; + state.flags = flags; + state.parenCount = 0; + state.classCount = 0; + state.progLength = 0; + state.treeDepth = 0; + state.classBitmapsMem = 0; + for (i = 0; i < CLASS_CACHE_SIZE; i++) + state.classCache[i].start = NULL; + + if (len != 0 && flat) { + state.result = NewRENode(&state, REOP_FLAT); + if (!state.result) + goto out; + state.result->u.flat.chr = *state.cpbegin; + state.result->u.flat.length = len; + state.result->kid = (void *) state.cpbegin; + /* Flat bytecode: REOP_FLAT compact(string_offset) compact(len). */ + state.progLength += 1 + GetCompactIndexWidth(0) + + GetCompactIndexWidth(len); + } else { + if (!ParseRegExp(&state)) + goto out; + } + + resize = offsetof(JSRegExp, program) + state.progLength + 1; + re = (JSRegExp *) JS_malloc(cx, resize); + if (!re) + goto out; + + re->nrefs = 1; + JS_ASSERT(state.classBitmapsMem <= CLASS_BITMAPS_MEM_LIMIT); + re->classCount = state.classCount; + if (re->classCount) { + re->classList = (RECharSet *) + JS_malloc(cx, re->classCount * sizeof(RECharSet)); + if (!re->classList) { + js_DestroyRegExp(cx, re); + re = NULL; + goto out; + } + for (i = 0; i < re->classCount; i++) + re->classList[i].converted = JS_FALSE; + } else { + re->classList = NULL; + } + +#ifdef JS_TRACER + /* + * Try compiling the native code version. For the time being we also + * compile the bytecode version in case we evict the native code + * version from the code cache. + */ + if (TRACING_ENABLED(cx)) + js_CompileRegExpToNative(cx, re, &state); +#endif + /* Compile the bytecode version. */ + endPC = EmitREBytecode(&state, re, state.treeDepth, re->program, state.result); + if (!endPC) { + js_DestroyRegExp(cx, re); + re = NULL; + goto out; + } + *endPC++ = REOP_END; + /* + * Check whether size was overestimated and shrink using realloc. + * This is safe since no pointers to newly parsed regexp or its parts + * besides re exist here. + */ +#if 0 + /* + * FIXME: Until bug 464866 is fixed, we can't move the re object so + * don't shrink it for now. + */ + if ((size_t)(endPC - re->program) != state.progLength + 1) { + JSRegExp *tmp; + JS_ASSERT((size_t)(endPC - re->program) < state.progLength + 1); + resize = offsetof(JSRegExp, program) + (endPC - re->program); + tmp = (JSRegExp *) JS_realloc(cx, re, resize); + if (tmp) + re = tmp; + } +#endif + + re->flags = flags; + re->parenCount = state.parenCount; + re->source = str; + +out: + JS_ARENA_RELEASE(&cx->tempPool, mark); + return re; +} + +JSRegExp * +js_NewRegExpOpt(JSContext *cx, JSString *str, JSString *opt, JSBool flat) +{ + uintN flags; + jschar *s; + size_t i, n; + char charBuf[2]; + + flags = 0; + if (opt) { + JSSTRING_CHARS_AND_LENGTH(opt, s, n); + for (i = 0; i < n; i++) { + switch (s[i]) { + case 'g': + flags |= JSREG_GLOB; + break; + case 'i': + flags |= JSREG_FOLD; + break; + case 'm': + flags |= JSREG_MULTILINE; + break; + case 'y': + flags |= JSREG_STICKY; + break; + default: + charBuf[0] = (char)s[i]; + charBuf[1] = '\0'; + JS_ReportErrorFlagsAndNumber(cx, JSREPORT_ERROR, + js_GetErrorMessage, NULL, + JSMSG_BAD_FLAG, charBuf); + return NULL; + } + } + } + return js_NewRegExp(cx, NULL, str, flags, flat); +} + +/* + * Save the current state of the match - the position in the input + * text as well as the position in the bytecode. The state of any + * parent expressions is also saved (preceding state). + * Contents of parenCount parentheses from parenIndex are also saved. + */ +static REBackTrackData * +PushBackTrackState(REGlobalData *gData, REOp op, + jsbytecode *target, REMatchState *x, const jschar *cp, + size_t parenIndex, size_t parenCount) +{ + size_t i; + REBackTrackData *result = + (REBackTrackData *) ((char *)gData->backTrackSP + gData->cursz); + + size_t sz = sizeof(REBackTrackData) + + gData->stateStackTop * sizeof(REProgState) + + parenCount * sizeof(RECapture); + + ptrdiff_t btsize = gData->backTrackStackSize; + ptrdiff_t btincr = ((char *)result + sz) - + ((char *)gData->backTrackStack + btsize); + + re_debug("\tBT_Push: %lu,%lu", + (unsigned long) parenIndex, (unsigned long) parenCount); + + JS_COUNT_OPERATION(gData->cx, JSOW_JUMP * (1 + parenCount)); + if (btincr > 0) { + ptrdiff_t offset = (char *)result - (char *)gData->backTrackStack; + + JS_COUNT_OPERATION(gData->cx, JSOW_ALLOCATION); + btincr = JS_ROUNDUP(btincr, btsize); + JS_ARENA_GROW_CAST(gData->backTrackStack, REBackTrackData *, + &gData->cx->regexpPool, btsize, btincr); + if (!gData->backTrackStack) { + js_ReportOutOfScriptQuota(gData->cx); + gData->ok = JS_FALSE; + return NULL; + } + gData->backTrackStackSize = btsize + btincr; + result = (REBackTrackData *) ((char *)gData->backTrackStack + offset); + } + gData->backTrackSP = result; + result->sz = gData->cursz; + gData->cursz = sz; + + result->backtrack_op = op; + result->backtrack_pc = target; + result->cp = cp; + result->parenCount = parenCount; + result->parenIndex = parenIndex; + + result->saveStateStackTop = gData->stateStackTop; + JS_ASSERT(gData->stateStackTop); + memcpy(result + 1, gData->stateStack, + sizeof(REProgState) * result->saveStateStackTop); + + if (parenCount != 0) { + memcpy((char *)(result + 1) + + sizeof(REProgState) * result->saveStateStackTop, + &x->parens[parenIndex], + sizeof(RECapture) * parenCount); + for (i = 0; i != parenCount; i++) + x->parens[parenIndex + i].index = -1; + } + + return result; +} + + +/* + * Consecutive literal characters. + */ +#if 0 +static REMatchState * +FlatNMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars, + size_t length) +{ + size_t i; + if (length > gData->cpend - x->cp) + return NULL; + for (i = 0; i != length; i++) { + if (matchChars[i] != x->cp[i]) + return NULL; + } + x->cp += length; + return x; +} +#endif + +static JS_ALWAYS_INLINE REMatchState * +FlatNIMatcher(REGlobalData *gData, REMatchState *x, jschar *matchChars, + size_t length) +{ + size_t i; + JS_ASSERT(gData->cpend >= x->cp); + if (length > (size_t)(gData->cpend - x->cp)) + return NULL; + for (i = 0; i != length; i++) { + if (upcase(matchChars[i]) != upcase(x->cp[i])) + return NULL; + } + x->cp += length; + return x; +} + +/* + * 1. Evaluate DecimalEscape to obtain an EscapeValue E. + * 2. If E is not a character then go to step 6. + * 3. Let ch be E's character. + * 4. Let A be a one-element RECharSet containing the character ch. + * 5. Call CharacterSetMatcher(A, false) and return its Matcher result. + * 6. E must be an integer. Let n be that integer. + * 7. If n=0 or n>NCapturingParens then throw a SyntaxError exception. + * 8. Return an internal Matcher closure that takes two arguments, a State x + * and a Continuation c, and performs the following: + * 1. Let cap be x's captures internal array. + * 2. Let s be cap[n]. + * 3. If s is undefined, then call c(x) and return its result. + * 4. Let e be x's endIndex. + * 5. Let len be s's length. + * 6. Let f be e+len. + * 7. If f>InputLength, return failure. + * 8. If there exists an integer i between 0 (inclusive) and len (exclusive) + * such that Canonicalize(s[i]) is not the same character as + * Canonicalize(Input [e+i]), then return failure. + * 9. Let y be the State (f, cap). + * 10. Call c(y) and return its result. + */ +static REMatchState * +BackrefMatcher(REGlobalData *gData, REMatchState *x, size_t parenIndex) +{ + size_t len, i; + const jschar *parenContent; + RECapture *cap = &x->parens[parenIndex]; + + if (cap->index == -1) + return x; + + len = cap->length; + if (x->cp + len > gData->cpend) + return NULL; + + parenContent = &gData->cpbegin[cap->index]; + if (gData->regexp->flags & JSREG_FOLD) { + for (i = 0; i < len; i++) { + if (upcase(parenContent[i]) != upcase(x->cp[i])) + return NULL; + } + } else { + for (i = 0; i < len; i++) { + if (parenContent[i] != x->cp[i]) + return NULL; + } + } + x->cp += len; + return x; +} + + +/* Add a single character to the RECharSet */ +static void +AddCharacterToCharSet(RECharSet *cs, jschar c) +{ + uintN byteIndex = (uintN)(c >> 3); + JS_ASSERT(c <= cs->length); + cs->u.bits[byteIndex] |= 1 << (c & 0x7); +} + + +/* Add a character range, c1 to c2 (inclusive) to the RECharSet */ +static void +AddCharacterRangeToCharSet(RECharSet *cs, uintN c1, uintN c2) +{ + uintN i; + + uintN byteIndex1 = c1 >> 3; + uintN byteIndex2 = c2 >> 3; + + JS_ASSERT(c2 <= cs->length && c1 <= c2); + + c1 &= 0x7; + c2 &= 0x7; + + if (byteIndex1 == byteIndex2) { + cs->u.bits[byteIndex1] |= ((uint8)0xFF >> (7 - (c2 - c1))) << c1; + } else { + cs->u.bits[byteIndex1] |= 0xFF << c1; + for (i = byteIndex1 + 1; i < byteIndex2; i++) + cs->u.bits[i] = 0xFF; + cs->u.bits[byteIndex2] |= (uint8)0xFF >> (7 - c2); + } +} + +struct CharacterRange { + jschar start; + jschar end; +}; + +/* + * The following characters are taken from the ECMA-262 standard, section 7.2 + * and 7.3, and the Unicode 3 standard, Table 6-1. + */ +static const CharacterRange WhiteSpaceRanges[] = { + /* TAB, LF, VT, FF, CR */ + { 0x0009, 0x000D }, + /* SPACE */ + { 0x0020, 0x0020 }, + /* NO-BREAK SPACE */ + { 0x00A0, 0x00A0 }, + /* + * EN QUAD, EM QUAD, EN SPACE, EM SPACE, THREE-PER-EM SPACE, FOUR-PER-EM + * SPACE, SIX-PER-EM SPACE, FIGURE SPACE, PUNCTUATION SPACE, THIN SPACE, + * HAIR SPACE, ZERO WIDTH SPACE + */ + { 0x2000, 0x200B }, + /* LS, PS */ + { 0x2028, 0x2029 }, + /* NARROW NO-BREAK SPACE */ + { 0x202F, 0x202F }, + /* IDEOGRAPHIC SPACE */ + { 0x3000, 0x3000 } +}; + +/* ECMA-262 standard, section 15.10.2.6. */ +static const CharacterRange WordRanges[] = { + { jschar('0'), jschar('9') }, + { jschar('A'), jschar('Z') }, + { jschar('_'), jschar('_') }, + { jschar('a'), jschar('z') } +}; + +static void +AddCharacterRanges(RECharSet *charSet, + const CharacterRange *range, + const CharacterRange *end) +{ + for (; range < end; ++range) + AddCharacterRangeToCharSet(charSet, range->start, range->end); +} + +static void +AddInvertedCharacterRanges(RECharSet *charSet, + const CharacterRange *range, + const CharacterRange *end) +{ + uint16 previous = 0; + for (; range < end; ++range) { + AddCharacterRangeToCharSet(charSet, previous, range->start - 1); + previous = range->end + 1; + } + AddCharacterRangeToCharSet(charSet, previous, charSet->length); +} + +/* Compile the source of the class into a RECharSet */ +static JSBool +ProcessCharSet(REGlobalData *gData, RECharSet *charSet) +{ + const jschar *src, *end; + JSBool inRange = JS_FALSE; + jschar rangeStart = 0; + uintN byteLength, n; + jschar c, thisCh; + intN nDigits, i; + + JS_ASSERT(!charSet->converted); + /* + * Assert that startIndex and length points to chars inside [] inside + * source string. + */ + JS_ASSERT(1 <= charSet->u.src.startIndex); + JS_ASSERT(charSet->u.src.startIndex + < JSSTRING_LENGTH(gData->regexp->source)); + JS_ASSERT(charSet->u.src.length <= JSSTRING_LENGTH(gData->regexp->source) + - 1 - charSet->u.src.startIndex); + + charSet->converted = JS_TRUE; + src = JSSTRING_CHARS(gData->regexp->source) + charSet->u.src.startIndex; + end = src + charSet->u.src.length; + JS_ASSERT(src[-1] == '['); + JS_ASSERT(end[0] == ']'); + + byteLength = (charSet->length >> 3) + 1; + charSet->u.bits = (uint8 *)JS_malloc(gData->cx, byteLength); + if (!charSet->u.bits) { + JS_ReportOutOfMemory(gData->cx); + gData->ok = JS_FALSE; + return JS_FALSE; + } + memset(charSet->u.bits, 0, byteLength); + + if (src == end) + return JS_TRUE; + + if (*src == '^') { + JS_ASSERT(charSet->sense == JS_FALSE); + ++src; + } else { + JS_ASSERT(charSet->sense == JS_TRUE); + } + + while (src != end) { + switch (*src) { + case '\\': + ++src; + c = *src++; + switch (c) { + case 'b': + thisCh = 0x8; + break; + case 'f': + thisCh = 0xC; + break; + case 'n': + thisCh = 0xA; + break; + case 'r': + thisCh = 0xD; + break; + case 't': + thisCh = 0x9; + break; + case 'v': + thisCh = 0xB; + break; + case 'c': + if (src < end && JS_ISWORD(*src)) { + thisCh = (jschar)(*src++ & 0x1F); + } else { + --src; + thisCh = '\\'; + } + break; + case 'x': + nDigits = 2; + goto lexHex; + case 'u': + nDigits = 4; + lexHex: + n = 0; + for (i = 0; (i < nDigits) && (src < end); i++) { + uintN digit; + c = *src++; + if (!isASCIIHexDigit(c, &digit)) { + /* + * Back off to accepting the original '\' + * as a literal + */ + src -= i + 1; + n = '\\'; + break; + } + n = (n << 4) | digit; + } + thisCh = (jschar)n; + break; + case '0': + case '1': + case '2': + case '3': + case '4': + case '5': + case '6': + case '7': + /* + * This is a non-ECMA extension - decimal escapes (in this + * case, octal!) are supposed to be an error inside class + * ranges, but supported here for backwards compatibility. + */ + n = JS7_UNDEC(c); + c = *src; + if ('0' <= c && c <= '7') { + src++; + n = 8 * n + JS7_UNDEC(c); + c = *src; + if ('0' <= c && c <= '7') { + src++; + i = 8 * n + JS7_UNDEC(c); + if (i <= 0377) + n = i; + else + src--; + } + } + thisCh = (jschar)n; + break; + + case 'd': + AddCharacterRangeToCharSet(charSet, '0', '9'); + continue; /* don't need range processing */ + case 'D': + AddCharacterRangeToCharSet(charSet, 0, '0' - 1); + AddCharacterRangeToCharSet(charSet, + (jschar)('9' + 1), + (jschar)charSet->length); + continue; + case 's': + AddCharacterRanges(charSet, WhiteSpaceRanges, + WhiteSpaceRanges + JS_ARRAY_LENGTH(WhiteSpaceRanges)); + continue; + case 'S': + AddInvertedCharacterRanges(charSet, WhiteSpaceRanges, + WhiteSpaceRanges + JS_ARRAY_LENGTH(WhiteSpaceRanges)); + continue; + case 'w': + AddCharacterRanges(charSet, WordRanges, + WordRanges + JS_ARRAY_LENGTH(WordRanges)); + continue; + case 'W': + AddInvertedCharacterRanges(charSet, WordRanges, + WordRanges + JS_ARRAY_LENGTH(WordRanges)); + continue; + default: + thisCh = c; + break; + + } + break; + + default: + thisCh = *src++; + break; + + } + if (inRange) { + if (gData->regexp->flags & JSREG_FOLD) { + int i; + + JS_ASSERT(rangeStart <= thisCh); + for (i = rangeStart; i <= thisCh; i++) { + jschar uch, dch; + + AddCharacterToCharSet(charSet, i); + uch = upcase(i); + dch = downcase(i); + if (i != uch) + AddCharacterToCharSet(charSet, uch); + if (i != dch) + AddCharacterToCharSet(charSet, dch); + } + } else { + AddCharacterRangeToCharSet(charSet, rangeStart, thisCh); + } + inRange = JS_FALSE; + } else { + if (gData->regexp->flags & JSREG_FOLD) { + AddCharacterToCharSet(charSet, upcase(thisCh)); + AddCharacterToCharSet(charSet, downcase(thisCh)); + } else { + AddCharacterToCharSet(charSet, thisCh); + } + if (src < end - 1) { + if (*src == '-') { + ++src; + inRange = JS_TRUE; + rangeStart = thisCh; + } + } + } + } + return JS_TRUE; +} + +void +js_DestroyRegExp(JSContext *cx, JSRegExp *re) +{ + if (JS_ATOMIC_DECREMENT(&re->nrefs) == 0) { +#ifdef JS_TRACER + /* Don't reuse this compiled code for some new regexp at same addr. */ + Fragment* fragment = JS_TRACE_MONITOR(cx).reFragmento->getLoop(re); + if (fragment) + fragment->blacklist(); +#endif + if (re->classList) { + uintN i; + for (i = 0; i < re->classCount; i++) { + if (re->classList[i].converted) + JS_free(cx, re->classList[i].u.bits); + re->classList[i].u.bits = NULL; + } + JS_free(cx, re->classList); + } + JS_free(cx, re); + } +} + +static JSBool +ReallocStateStack(REGlobalData *gData) +{ + size_t limit = gData->stateStackLimit; + size_t sz = sizeof(REProgState) * limit; + + JS_ARENA_GROW_CAST(gData->stateStack, REProgState *, + &gData->cx->regexpPool, sz, sz); + if (!gData->stateStack) { + js_ReportOutOfScriptQuota(gData->cx); + gData->ok = JS_FALSE; + return JS_FALSE; + } + gData->stateStackLimit = limit + limit; + return JS_TRUE; +} + +#define PUSH_STATE_STACK(data) \ + JS_BEGIN_MACRO \ + ++(data)->stateStackTop; \ + if ((data)->stateStackTop == (data)->stateStackLimit && \ + !ReallocStateStack((data))) { \ + return NULL; \ + } \ + JS_END_MACRO + +/* + * Apply the current op against the given input to see if it's going to match + * or fail. Return false if we don't get a match, true if we do. If updatecp is + * true, then update the current state's cp. Always update startpc to the next + * op. + */ +static JS_ALWAYS_INLINE REMatchState * +SimpleMatch(REGlobalData *gData, REMatchState *x, REOp op, + jsbytecode **startpc, JSBool updatecp) +{ + REMatchState *result = NULL; + jschar matchCh; + size_t parenIndex; + size_t offset, length, index; + jsbytecode *pc = *startpc; /* pc has already been incremented past op */ + jschar *source; + const jschar *startcp = x->cp; + jschar ch; + RECharSet *charSet; + +#ifdef REGEXP_DEBUG + const char *opname = reop_names[op]; + re_debug("\n%06d: %*s%s", pc - gData->regexp->program, + gData->stateStackTop * 2, "", opname); +#endif + switch (op) { + case REOP_EMPTY: + result = x; + break; + case REOP_BOL: + if (x->cp != gData->cpbegin) { + if (!gData->cx->regExpStatics.multiline && + !(gData->regexp->flags & JSREG_MULTILINE)) { + break; + } + if (!RE_IS_LINE_TERM(x->cp[-1])) + break; + } + result = x; + break; + case REOP_EOL: + if (x->cp != gData->cpend) { + if (!gData->cx->regExpStatics.multiline && + !(gData->regexp->flags & JSREG_MULTILINE)) { + break; + } + if (!RE_IS_LINE_TERM(*x->cp)) + break; + } + result = x; + break; + case REOP_WBDRY: + if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^ + !(x->cp != gData->cpend && JS_ISWORD(*x->cp))) { + result = x; + } + break; + case REOP_WNONBDRY: + if ((x->cp == gData->cpbegin || !JS_ISWORD(x->cp[-1])) ^ + (x->cp != gData->cpend && JS_ISWORD(*x->cp))) { + result = x; + } + break; + case REOP_DOT: + if (x->cp != gData->cpend && !RE_IS_LINE_TERM(*x->cp)) { + result = x; + result->cp++; + } + break; + case REOP_DIGIT: + if (x->cp != gData->cpend && JS7_ISDEC(*x->cp)) { + result = x; + result->cp++; + } + break; + case REOP_NONDIGIT: + if (x->cp != gData->cpend && !JS7_ISDEC(*x->cp)) { + result = x; + result->cp++; + } + break; + case REOP_ALNUM: + if (x->cp != gData->cpend && JS_ISWORD(*x->cp)) { + result = x; + result->cp++; + } + break; + case REOP_NONALNUM: + if (x->cp != gData->cpend && !JS_ISWORD(*x->cp)) { + result = x; + result->cp++; + } + break; + case REOP_SPACE: + if (x->cp != gData->cpend && JS_ISSPACE(*x->cp)) { + result = x; + result->cp++; + } + break; + case REOP_NONSPACE: + if (x->cp != gData->cpend && !JS_ISSPACE(*x->cp)) { + result = x; + result->cp++; + } + break; + case REOP_BACKREF: + pc = ReadCompactIndex(pc, &parenIndex); + JS_ASSERT(parenIndex < gData->regexp->parenCount); + result = BackrefMatcher(gData, x, parenIndex); + break; + case REOP_FLAT: + pc = ReadCompactIndex(pc, &offset); + JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source)); + pc = ReadCompactIndex(pc, &length); + JS_ASSERT(1 <= length); + JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset); + if (length <= (size_t)(gData->cpend - x->cp)) { + source = JSSTRING_CHARS(gData->regexp->source) + offset; + re_debug_chars(source, length); + for (index = 0; index != length; index++) { + if (source[index] != x->cp[index]) + return NULL; + } + x->cp += length; + result = x; + } + break; + case REOP_FLAT1: + matchCh = *pc++; + re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp); + if (x->cp != gData->cpend && *x->cp == matchCh) { + result = x; + result->cp++; + } + break; + case REOP_FLATi: + pc = ReadCompactIndex(pc, &offset); + JS_ASSERT(offset < JSSTRING_LENGTH(gData->regexp->source)); + pc = ReadCompactIndex(pc, &length); + JS_ASSERT(1 <= length); + JS_ASSERT(length <= JSSTRING_LENGTH(gData->regexp->source) - offset); + source = JSSTRING_CHARS(gData->regexp->source); + result = FlatNIMatcher(gData, x, source + offset, length); + break; + case REOP_FLAT1i: + matchCh = *pc++; + if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) { + result = x; + result->cp++; + } + break; + case REOP_UCFLAT1: + matchCh = GET_ARG(pc); + re_debug(" '%c' == '%c'", (char)matchCh, (char)*x->cp); + pc += ARG_LEN; + if (x->cp != gData->cpend && *x->cp == matchCh) { + result = x; + result->cp++; + } + break; + case REOP_UCFLAT1i: + matchCh = GET_ARG(pc); + pc += ARG_LEN; + if (x->cp != gData->cpend && upcase(*x->cp) == upcase(matchCh)) { + result = x; + result->cp++; + } + break; + case REOP_CLASS: + pc = ReadCompactIndex(pc, &index); + JS_ASSERT(index < gData->regexp->classCount); + if (x->cp != gData->cpend) { + charSet = &gData->regexp->classList[index]; + JS_ASSERT(charSet->converted); + ch = *x->cp; + index = ch >> 3; + if (charSet->length != 0 && + ch <= charSet->length && + (charSet->u.bits[index] & (1 << (ch & 0x7)))) { + result = x; + result->cp++; + } + } + break; + case REOP_NCLASS: + pc = ReadCompactIndex(pc, &index); + JS_ASSERT(index < gData->regexp->classCount); + if (x->cp != gData->cpend) { + charSet = &gData->regexp->classList[index]; + JS_ASSERT(charSet->converted); + ch = *x->cp; + index = ch >> 3; + if (charSet->length == 0 || + ch > charSet->length || + !(charSet->u.bits[index] & (1 << (ch & 0x7)))) { + result = x; + result->cp++; + } + } + break; + + default: + JS_ASSERT(JS_FALSE); + } + if (result) { + if (!updatecp) + x->cp = startcp; + *startpc = pc; + re_debug(" * "); + return result; + } + x->cp = startcp; + return NULL; +} + +static JS_ALWAYS_INLINE REMatchState * +ExecuteREBytecode(REGlobalData *gData, REMatchState *x) +{ + REMatchState *result = NULL; + REBackTrackData *backTrackData; + jsbytecode *nextpc, *testpc; + REOp nextop; + RECapture *cap; + REProgState *curState; + const jschar *startcp; + size_t parenIndex, k; + size_t parenSoFar = 0; + + jschar matchCh1, matchCh2; + RECharSet *charSet; + + JSBool anchor; + jsbytecode *pc = gData->regexp->program; + REOp op = (REOp) *pc++; + + /* + * If the first node is a simple match, step the index into the string + * until that match is made, or fail if it can't be found at all. + */ + if (REOP_IS_SIMPLE(op) && !(gData->regexp->flags & JSREG_STICKY)) { + anchor = JS_FALSE; + while (x->cp <= gData->cpend) { + nextpc = pc; /* reset back to start each time */ + result = SimpleMatch(gData, x, op, &nextpc, JS_TRUE); + if (result) { + anchor = JS_TRUE; + x = result; + pc = nextpc; /* accept skip to next opcode */ + op = (REOp) *pc++; + JS_ASSERT(op < REOP_LIMIT); + break; + } + gData->skipped++; + x->cp++; + } + if (!anchor) + goto bad; + } + + for (;;) { +#ifdef REGEXP_DEBUG + const char *opname = reop_names[op]; + re_debug("\n%06d: %*s%s", pc - gData->regexp->program, + gData->stateStackTop * 2, "", opname); +#endif + if (REOP_IS_SIMPLE(op)) { + result = SimpleMatch(gData, x, op, &pc, JS_TRUE); + } else { + curState = &gData->stateStack[gData->stateStackTop]; + switch (op) { + case REOP_END: + goto good; + case REOP_ALTPREREQ2: + nextpc = pc + GET_OFFSET(pc); /* start of next op */ + pc += ARG_LEN; + matchCh2 = GET_ARG(pc); + pc += ARG_LEN; + k = GET_ARG(pc); + pc += ARG_LEN; + + if (x->cp != gData->cpend) { + if (*x->cp == matchCh2) + goto doAlt; + + charSet = &gData->regexp->classList[k]; + if (!charSet->converted && !ProcessCharSet(gData, charSet)) + goto bad; + matchCh1 = *x->cp; + k = matchCh1 >> 3; + if ((charSet->length == 0 || + matchCh1 > charSet->length || + !(charSet->u.bits[k] & (1 << (matchCh1 & 0x7)))) ^ + charSet->sense) { + goto doAlt; + } + } + result = NULL; + break; + + case REOP_ALTPREREQ: + nextpc = pc + GET_OFFSET(pc); /* start of next op */ + pc += ARG_LEN; + matchCh1 = GET_ARG(pc); + pc += ARG_LEN; + matchCh2 = GET_ARG(pc); + pc += ARG_LEN; + if (x->cp == gData->cpend || + (*x->cp != matchCh1 && *x->cp != matchCh2)) { + result = NULL; + break; + } + /* else false thru... */ + + case REOP_ALT: + doAlt: + nextpc = pc + GET_OFFSET(pc); /* start of next alternate */ + pc += ARG_LEN; /* start of this alternate */ + curState->parenSoFar = parenSoFar; + PUSH_STATE_STACK(gData); + op = (REOp) *pc++; + startcp = x->cp; + if (REOP_IS_SIMPLE(op)) { + if (!SimpleMatch(gData, x, op, &pc, JS_TRUE)) { + op = (REOp) *nextpc++; + pc = nextpc; + continue; + } + result = x; + op = (REOp) *pc++; + } + nextop = (REOp) *nextpc++; + if (!PushBackTrackState(gData, nextop, nextpc, x, startcp, 0, 0)) + goto bad; + continue; + + /* + * Occurs at (successful) end of REOP_ALT, + */ + case REOP_JUMP: + /* + * If we have not gotten a result here, it is because of an + * empty match. Do the same thing REOP_EMPTY would do. + */ + if (!result) + result = x; + + --gData->stateStackTop; + pc += GET_OFFSET(pc); + op = (REOp) *pc++; + continue; + + /* + * Occurs at last (successful) end of REOP_ALT, + */ + case REOP_ENDALT: + /* + * If we have not gotten a result here, it is because of an + * empty match. Do the same thing REOP_EMPTY would do. + */ + if (!result) + result = x; + + --gData->stateStackTop; + op = (REOp) *pc++; + continue; + + case REOP_LPAREN: + pc = ReadCompactIndex(pc, &parenIndex); + re_debug("[ %lu ]", (unsigned long) parenIndex); + JS_ASSERT(parenIndex < gData->regexp->parenCount); + if (parenIndex + 1 > parenSoFar) + parenSoFar = parenIndex + 1; + x->parens[parenIndex].index = x->cp - gData->cpbegin; + x->parens[parenIndex].length = 0; + op = (REOp) *pc++; + continue; + + case REOP_RPAREN: + { + ptrdiff_t delta; + + pc = ReadCompactIndex(pc, &parenIndex); + JS_ASSERT(parenIndex < gData->regexp->parenCount); + cap = &x->parens[parenIndex]; + delta = x->cp - (gData->cpbegin + cap->index); + cap->length = (delta < 0) ? 0 : (size_t) delta; + op = (REOp) *pc++; + + if (!result) + result = x; + continue; + } + case REOP_ASSERT: + nextpc = pc + GET_OFFSET(pc); /* start of term after ASSERT */ + pc += ARG_LEN; /* start of ASSERT child */ + op = (REOp) *pc++; + testpc = pc; + if (REOP_IS_SIMPLE(op) && + !SimpleMatch(gData, x, op, &testpc, JS_FALSE)) { + result = NULL; + break; + } + curState->u.assertion.top = + (char *)gData->backTrackSP - (char *)gData->backTrackStack; + curState->u.assertion.sz = gData->cursz; + curState->index = x->cp - gData->cpbegin; + curState->parenSoFar = parenSoFar; + PUSH_STATE_STACK(gData); + if (!PushBackTrackState(gData, REOP_ASSERTTEST, + nextpc, x, x->cp, 0, 0)) { + goto bad; + } + continue; + + case REOP_ASSERT_NOT: + nextpc = pc + GET_OFFSET(pc); + pc += ARG_LEN; + op = (REOp) *pc++; + testpc = pc; + if (REOP_IS_SIMPLE(op) /* Note - fail to fail! */ && + SimpleMatch(gData, x, op, &testpc, JS_FALSE) && + *testpc == REOP_ASSERTNOTTEST) { + result = NULL; + break; + } + curState->u.assertion.top + = (char *)gData->backTrackSP - + (char *)gData->backTrackStack; + curState->u.assertion.sz = gData->cursz; + curState->index = x->cp - gData->cpbegin; + curState->parenSoFar = parenSoFar; + PUSH_STATE_STACK(gData); + if (!PushBackTrackState(gData, REOP_ASSERTNOTTEST, + nextpc, x, x->cp, 0, 0)) { + goto bad; + } + continue; + + case REOP_ASSERTTEST: + --gData->stateStackTop; + --curState; + x->cp = gData->cpbegin + curState->index; + gData->backTrackSP = + (REBackTrackData *) ((char *)gData->backTrackStack + + curState->u.assertion.top); + gData->cursz = curState->u.assertion.sz; + if (result) + result = x; + break; + + case REOP_ASSERTNOTTEST: + --gData->stateStackTop; + --curState; + x->cp = gData->cpbegin + curState->index; + gData->backTrackSP = + (REBackTrackData *) ((char *)gData->backTrackStack + + curState->u.assertion.top); + gData->cursz = curState->u.assertion.sz; + result = (!result) ? x : NULL; + break; + case REOP_STAR: + curState->u.quantifier.min = 0; + curState->u.quantifier.max = (uintN)-1; + goto quantcommon; + case REOP_PLUS: + curState->u.quantifier.min = 1; + curState->u.quantifier.max = (uintN)-1; + goto quantcommon; + case REOP_OPT: + curState->u.quantifier.min = 0; + curState->u.quantifier.max = 1; + goto quantcommon; + case REOP_QUANT: + pc = ReadCompactIndex(pc, &k); + curState->u.quantifier.min = k; + pc = ReadCompactIndex(pc, &k); + /* max is k - 1 to use one byte for (uintN)-1 sentinel. */ + curState->u.quantifier.max = k - 1; + JS_ASSERT(curState->u.quantifier.min + <= curState->u.quantifier.max); + quantcommon: + if (curState->u.quantifier.max == 0) { + pc = pc + GET_OFFSET(pc); + op = (REOp) *pc++; + result = x; + continue; + } + /* Step over <next> */ + nextpc = pc + ARG_LEN; + op = (REOp) *nextpc++; + startcp = x->cp; + if (REOP_IS_SIMPLE(op)) { + if (!SimpleMatch(gData, x, op, &nextpc, JS_TRUE)) { + if (curState->u.quantifier.min == 0) + result = x; + else + result = NULL; + pc = pc + GET_OFFSET(pc); + break; + } + op = (REOp) *nextpc++; + result = x; + } + curState->index = startcp - gData->cpbegin; + curState->continue_op = REOP_REPEAT; + curState->continue_pc = pc; + curState->parenSoFar = parenSoFar; + PUSH_STATE_STACK(gData); + if (curState->u.quantifier.min == 0 && + !PushBackTrackState(gData, REOP_REPEAT, pc, x, startcp, + 0, 0)) { + goto bad; + } + pc = nextpc; + continue; + + case REOP_ENDCHILD: /* marks the end of a quantifier child */ + pc = curState[-1].continue_pc; + op = (REOp) curState[-1].continue_op; + + if (!result) + result = x; + continue; + + case REOP_REPEAT: + --curState; + do { + --gData->stateStackTop; + if (!result) { + /* Failed, see if we have enough children. */ + if (curState->u.quantifier.min == 0) + goto repeatDone; + goto break_switch; + } + if (curState->u.quantifier.min == 0 && + x->cp == gData->cpbegin + curState->index) { + /* matched an empty string, that'll get us nowhere */ + result = NULL; + goto break_switch; + } + if (curState->u.quantifier.min != 0) + curState->u.quantifier.min--; + if (curState->u.quantifier.max != (uintN) -1) + curState->u.quantifier.max--; + if (curState->u.quantifier.max == 0) + goto repeatDone; + nextpc = pc + ARG_LEN; + nextop = (REOp) *nextpc; + startcp = x->cp; + if (REOP_IS_SIMPLE(nextop)) { + nextpc++; + if (!SimpleMatch(gData, x, nextop, &nextpc, JS_TRUE)) { + if (curState->u.quantifier.min == 0) + goto repeatDone; + result = NULL; + goto break_switch; + } + result = x; + } + curState->index = startcp - gData->cpbegin; + PUSH_STATE_STACK(gData); + if (curState->u.quantifier.min == 0 && + !PushBackTrackState(gData, REOP_REPEAT, + pc, x, startcp, + curState->parenSoFar, + parenSoFar - + curState->parenSoFar)) { + goto bad; + } + } while (*nextpc == REOP_ENDCHILD); + pc = nextpc; + op = (REOp) *pc++; + parenSoFar = curState->parenSoFar; + continue; + + repeatDone: + result = x; + pc += GET_OFFSET(pc); + goto break_switch; + + case REOP_MINIMALSTAR: + curState->u.quantifier.min = 0; + curState->u.quantifier.max = (uintN)-1; + goto minimalquantcommon; + case REOP_MINIMALPLUS: + curState->u.quantifier.min = 1; + curState->u.quantifier.max = (uintN)-1; + goto minimalquantcommon; + case REOP_MINIMALOPT: + curState->u.quantifier.min = 0; + curState->u.quantifier.max = 1; + goto minimalquantcommon; + case REOP_MINIMALQUANT: + pc = ReadCompactIndex(pc, &k); + curState->u.quantifier.min = k; + pc = ReadCompactIndex(pc, &k); + /* See REOP_QUANT comments about k - 1. */ + curState->u.quantifier.max = k - 1; + JS_ASSERT(curState->u.quantifier.min + <= curState->u.quantifier.max); + minimalquantcommon: + curState->index = x->cp - gData->cpbegin; + curState->parenSoFar = parenSoFar; + PUSH_STATE_STACK(gData); + if (curState->u.quantifier.min != 0) { + curState->continue_op = REOP_MINIMALREPEAT; + curState->continue_pc = pc; + /* step over <next> */ + pc += OFFSET_LEN; + op = (REOp) *pc++; + } else { + if (!PushBackTrackState(gData, REOP_MINIMALREPEAT, + pc, x, x->cp, 0, 0)) { + goto bad; + } + --gData->stateStackTop; + pc = pc + GET_OFFSET(pc); + op = (REOp) *pc++; + } + continue; + + case REOP_MINIMALREPEAT: + --gData->stateStackTop; + --curState; + + re_debug("{%d,%d}", curState->u.quantifier.min, + curState->u.quantifier.max); +#define PREPARE_REPEAT() \ + JS_BEGIN_MACRO \ + curState->index = x->cp - gData->cpbegin; \ + curState->continue_op = REOP_MINIMALREPEAT; \ + curState->continue_pc = pc; \ + pc += ARG_LEN; \ + for (k = curState->parenSoFar; k < parenSoFar; k++) \ + x->parens[k].index = -1; \ + PUSH_STATE_STACK(gData); \ + op = (REOp) *pc++; \ + JS_ASSERT(op < REOP_LIMIT); \ + JS_END_MACRO + + if (!result) { + re_debug(" - "); + /* + * Non-greedy failure - try to consume another child. + */ + if (curState->u.quantifier.max == (uintN) -1 || + curState->u.quantifier.max > 0) { + PREPARE_REPEAT(); + continue; + } + /* Don't need to adjust pc since we're going to pop. */ + break; + } + if (curState->u.quantifier.min == 0 && + x->cp == gData->cpbegin + curState->index) { + /* Matched an empty string, that'll get us nowhere. */ + result = NULL; + break; + } + if (curState->u.quantifier.min != 0) + curState->u.quantifier.min--; + if (curState->u.quantifier.max != (uintN) -1) + curState->u.quantifier.max--; + if (curState->u.quantifier.min != 0) { + PREPARE_REPEAT(); + continue; + } + curState->index = x->cp - gData->cpbegin; + curState->parenSoFar = parenSoFar; + PUSH_STATE_STACK(gData); + if (!PushBackTrackState(gData, REOP_MINIMALREPEAT, + pc, x, x->cp, + curState->parenSoFar, + parenSoFar - curState->parenSoFar)) { + goto bad; + } + --gData->stateStackTop; + pc = pc + GET_OFFSET(pc); + op = (REOp) *pc++; + JS_ASSERT(op < REOP_LIMIT); + continue; + default: + JS_ASSERT(JS_FALSE); + result = NULL; + } + break_switch:; + } + + /* + * If the match failed and there's a backtrack option, take it. + * Otherwise this is a complete and utter failure. + */ + if (!result) { + if (gData->cursz == 0) + return NULL; + if (!JS_CHECK_OPERATION_LIMIT(gData->cx, JSOW_JUMP)) { + gData->ok = JS_FALSE; + return NULL; + } + + /* Potentially detect explosive regex here. */ + gData->backTrackCount++; + if (gData->backTrackLimit && + gData->backTrackCount >= gData->backTrackLimit) { + JS_ReportErrorNumber(gData->cx, js_GetErrorMessage, NULL, + JSMSG_REGEXP_TOO_COMPLEX); + gData->ok = JS_FALSE; + return NULL; + } + + backTrackData = gData->backTrackSP; + gData->cursz = backTrackData->sz; + gData->backTrackSP = + (REBackTrackData *) ((char *)backTrackData - backTrackData->sz); + x->cp = backTrackData->cp; + pc = backTrackData->backtrack_pc; + op = (REOp) backTrackData->backtrack_op; + JS_ASSERT(op < REOP_LIMIT); + gData->stateStackTop = backTrackData->saveStateStackTop; + JS_ASSERT(gData->stateStackTop); + + memcpy(gData->stateStack, backTrackData + 1, + sizeof(REProgState) * backTrackData->saveStateStackTop); + curState = &gData->stateStack[gData->stateStackTop - 1]; + + if (backTrackData->parenCount) { + memcpy(&x->parens[backTrackData->parenIndex], + (char *)(backTrackData + 1) + + sizeof(REProgState) * backTrackData->saveStateStackTop, + sizeof(RECapture) * backTrackData->parenCount); + parenSoFar = backTrackData->parenIndex + backTrackData->parenCount; + } else { + for (k = curState->parenSoFar; k < parenSoFar; k++) + x->parens[k].index = -1; + parenSoFar = curState->parenSoFar; + } + + re_debug("\tBT_Pop: %ld,%ld", + (unsigned long) backTrackData->parenIndex, + (unsigned long) backTrackData->parenCount); + continue; + } + x = result; + + /* + * Continue with the expression. + */ + op = (REOp)*pc++; + JS_ASSERT(op < REOP_LIMIT); + } + +bad: + re_debug("\n"); + return NULL; + +good: + re_debug("\n"); + return x; +} + +static REMatchState * +MatchRegExp(REGlobalData *gData, REMatchState *x) +{ + REMatchState *result; + const jschar *cp = x->cp; + const jschar *cp2; + uintN j; +#ifdef JS_TRACER + Fragment *fragment; + + /* Run with native regexp if possible. */ + if (TRACING_ENABLED(gData->cx) && + ((fragment = JS_TRACE_MONITOR(gData->cx).reFragmento->getLoop(gData->regexp)) != NULL) + && fragment->code() && !fragment->isBlacklisted()) { + union { NIns *code; REMatchState* (FASTCALL *func)(void*, void*); } u; + u.code = fragment->code(); + REMatchState *lr; + gData->skipped = (ptrdiff_t) x->cp; + + debug_only_v(printf("entering REGEXP trace at %s:%u@%u, code: %p\n", + gData->cx->fp->script->filename, + js_FramePCToLineNumber(gData->cx, gData->cx->fp), + FramePCOffset(gData->cx->fp), + fragment->code());); + +#if defined(JS_NO_FASTCALL) && defined(NANOJIT_IA32) + SIMULATE_FASTCALL(lr, x, gData, u.func); +#else + lr = u.func(x, gData); +#endif + + debug_only_v(printf("leaving REGEXP trace\n")); + + gData->skipped = ((const jschar *) gData->skipped) - cp; + return lr; + } +#endif + /* + * Have to include the position beyond the last character + * in order to detect end-of-input/line condition. + */ + for (cp2 = cp; cp2 <= gData->cpend; cp2++) { + gData->skipped = cp2 - cp; + x->cp = cp2; + for (j = 0; j < gData->regexp->parenCount; j++) + x->parens[j].index = -1; + result = ExecuteREBytecode(gData, x); + if (!gData->ok || result || (gData->regexp->flags & JSREG_STICKY)) + return result; + gData->backTrackSP = gData->backTrackStack; + gData->cursz = 0; + gData->stateStackTop = 0; + cp2 = cp + gData->skipped; + } + return NULL; +} + +#define MIN_BACKTRACK_LIMIT 400000 + +static REMatchState * +InitMatch(JSContext *cx, REGlobalData *gData, JSRegExp *re, size_t length) +{ + REMatchState *result; + uintN i; + + gData->backTrackStackSize = INITIAL_BACKTRACK; + JS_ARENA_ALLOCATE_CAST(gData->backTrackStack, REBackTrackData *, + &cx->regexpPool, + INITIAL_BACKTRACK); + if (!gData->backTrackStack) + goto bad; + + gData->backTrackSP = gData->backTrackStack; + gData->cursz = 0; + gData->backTrackCount = 0; + gData->backTrackLimit = 0; + if (JS_GetOptions(cx) & JSOPTION_RELIMIT) { + gData->backTrackLimit = length * length * length; /* O(n^3) */ + if (gData->backTrackLimit < MIN_BACKTRACK_LIMIT) + gData->backTrackLimit = MIN_BACKTRACK_LIMIT; + } + + gData->stateStackLimit = INITIAL_STATESTACK; + JS_ARENA_ALLOCATE_CAST(gData->stateStack, REProgState *, + &cx->regexpPool, + sizeof(REProgState) * INITIAL_STATESTACK); + if (!gData->stateStack) + goto bad; + + gData->stateStackTop = 0; + gData->cx = cx; + gData->regexp = re; + gData->ok = JS_TRUE; + + JS_ARENA_ALLOCATE_CAST(result, REMatchState *, + &cx->regexpPool, + offsetof(REMatchState, parens) + + re->parenCount * sizeof(RECapture)); + if (!result) + goto bad; + + for (i = 0; i < re->classCount; i++) { + if (!re->classList[i].converted && + !ProcessCharSet(gData, &re->classList[i])) { + return NULL; + } + } + + return result; + +bad: + js_ReportOutOfScriptQuota(cx); + gData->ok = JS_FALSE; + return NULL; +} + +JSBool +js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp, + JSBool test, jsval *rval) +{ + REGlobalData gData; + REMatchState *x, *result; + + const jschar *cp, *ep; + size_t i, length, start; + JSSubString *morepar; + JSBool ok; + JSRegExpStatics *res; + ptrdiff_t matchlen; + uintN num, morenum; + JSString *parstr, *matchstr; + JSObject *obj; + + RECapture *parsub = NULL; + void *mark; + int64 *timestamp; + + /* + * It's safe to load from cp because JSStrings have a zero at the end, + * and we never let cp get beyond cpend. + */ + start = *indexp; + JSSTRING_CHARS_AND_LENGTH(str, cp, length); + if (start > length) + start = length; + gData.cpbegin = cp; + gData.cpend = cp + length; + cp += start; + gData.start = start; + gData.skipped = 0; + + if (!cx->regexpPool.first.next) { + /* + * The first arena in the regexpPool must have a timestamp at its base. + */ + JS_ARENA_ALLOCATE_CAST(timestamp, int64 *, + &cx->regexpPool, sizeof *timestamp); + if (!timestamp) + return JS_FALSE; + *timestamp = JS_Now(); + } + mark = JS_ARENA_MARK(&cx->regexpPool); + + x = InitMatch(cx, &gData, re, length); + + if (!x) { + ok = JS_FALSE; + goto out; + } + x->cp = cp; + + /* + * Call the recursive matcher to do the real work. Return null on mismatch + * whether testing or not. On match, return an extended Array object. + */ + result = MatchRegExp(&gData, x); + ok = gData.ok; + if (!ok) + goto out; + if (!result) { + *rval = JSVAL_NULL; + goto out; + } + cp = result->cp; + i = cp - gData.cpbegin; + *indexp = i; + matchlen = i - (start + gData.skipped); + ep = cp; + cp -= matchlen; + + if (test) { + /* + * Testing for a match and updating cx->regExpStatics: don't allocate + * an array object, do return true. + */ + *rval = JSVAL_TRUE; + + /* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */ + obj = NULL; + } else { + /* + * The array returned on match has element 0 bound to the matched + * string, elements 1 through state.parenCount bound to the paren + * matches, an index property telling the length of the left context, + * and an input property referring to the input string. + */ + obj = js_NewSlowArrayObject(cx); + if (!obj) { + ok = JS_FALSE; + goto out; + } + *rval = OBJECT_TO_JSVAL(obj); + +#define DEFVAL(val, id) { \ + ok = js_DefineProperty(cx, obj, id, val, \ + JS_PropertyStub, JS_PropertyStub, \ + JSPROP_ENUMERATE, NULL); \ + if (!ok) { \ + cx->weakRoots.newborn[GCX_OBJECT] = NULL; \ + cx->weakRoots.newborn[GCX_STRING] = NULL; \ + goto out; \ + } \ +} + + matchstr = js_NewStringCopyN(cx, cp, matchlen); + if (!matchstr) { + cx->weakRoots.newborn[GCX_OBJECT] = NULL; + ok = JS_FALSE; + goto out; + } + DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSID(0)); + } + + res = &cx->regExpStatics; + res->input = str; + res->parenCount = re->parenCount; + if (re->parenCount == 0) { + res->lastParen = js_EmptySubString; + } else { + for (num = 0; num < re->parenCount; num++) { + parsub = &result->parens[num]; + if (num < 9) { + if (parsub->index == -1) { + res->parens[num].chars = NULL; + res->parens[num].length = 0; + } else { + res->parens[num].chars = gData.cpbegin + parsub->index; + res->parens[num].length = parsub->length; + } + } else { + morenum = num - 9; + morepar = res->moreParens; + if (!morepar) { + res->moreLength = 10; + morepar = (JSSubString*) + JS_malloc(cx, 10 * sizeof(JSSubString)); + } else if (morenum >= res->moreLength) { + res->moreLength += 10; + morepar = (JSSubString*) + JS_realloc(cx, morepar, + res->moreLength * sizeof(JSSubString)); + } + if (!morepar) { + cx->weakRoots.newborn[GCX_OBJECT] = NULL; + cx->weakRoots.newborn[GCX_STRING] = NULL; + ok = JS_FALSE; + goto out; + } + res->moreParens = morepar; + if (parsub->index == -1) { + morepar[morenum].chars = NULL; + morepar[morenum].length = 0; + } else { + morepar[morenum].chars = gData.cpbegin + parsub->index; + morepar[morenum].length = parsub->length; + } + } + if (test) + continue; + if (parsub->index == -1) { + ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1), + JSVAL_VOID, NULL, NULL, + JSPROP_ENUMERATE, NULL); + } else { + parstr = js_NewStringCopyN(cx, gData.cpbegin + parsub->index, + parsub->length); + if (!parstr) { + cx->weakRoots.newborn[GCX_OBJECT] = NULL; + cx->weakRoots.newborn[GCX_STRING] = NULL; + ok = JS_FALSE; + goto out; + } + ok = js_DefineProperty(cx, obj, INT_TO_JSID(num + 1), + STRING_TO_JSVAL(parstr), NULL, NULL, + JSPROP_ENUMERATE, NULL); + } + if (!ok) { + cx->weakRoots.newborn[GCX_OBJECT] = NULL; + cx->weakRoots.newborn[GCX_STRING] = NULL; + goto out; + } + } + if (parsub->index == -1) { + res->lastParen = js_EmptySubString; + } else { + res->lastParen.chars = gData.cpbegin + parsub->index; + res->lastParen.length = parsub->length; + } + } + + if (!test) { + /* + * Define the index and input properties last for better for/in loop + * order (so they come after the elements). + */ + DEFVAL(INT_TO_JSVAL(start + gData.skipped), + ATOM_TO_JSID(cx->runtime->atomState.indexAtom)); + DEFVAL(STRING_TO_JSVAL(str), + ATOM_TO_JSID(cx->runtime->atomState.inputAtom)); + } + +#undef DEFVAL + + res->lastMatch.chars = cp; + res->lastMatch.length = matchlen; + + /* + * For JS1.3 and ECMAv2, emulate Perl5 exactly: + * + * js1.3 "hi", "hi there" "hihitherehi therebye" + */ + res->leftContext.chars = JSSTRING_CHARS(str); + res->leftContext.length = start + gData.skipped; + res->rightContext.chars = ep; + res->rightContext.length = gData.cpend - ep; + +out: + JS_ARENA_RELEASE(&cx->regexpPool, mark); + return ok; +} + +/************************************************************************/ + +#define REGEXP_PROP_ATTRS (JSPROP_PERMANENT | JSPROP_SHARED) +#define RO_REGEXP_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_READONLY) + +static JSPropertySpec regexp_props[] = { + {"source", REGEXP_SOURCE, RO_REGEXP_PROP_ATTRS,0,0}, + {"global", REGEXP_GLOBAL, RO_REGEXP_PROP_ATTRS,0,0}, + {"ignoreCase", REGEXP_IGNORE_CASE, RO_REGEXP_PROP_ATTRS,0,0}, + {"lastIndex", REGEXP_LAST_INDEX, REGEXP_PROP_ATTRS,0,0}, + {"multiline", REGEXP_MULTILINE, RO_REGEXP_PROP_ATTRS,0,0}, + {"sticky", REGEXP_STICKY, RO_REGEXP_PROP_ATTRS,0,0}, + {0,0,0,0,0} +}; + +static JSBool +regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp) +{ + jsint slot; + JSRegExp *re; + + if (!JSVAL_IS_INT(id)) + return JS_TRUE; + while (OBJ_GET_CLASS(cx, obj) != &js_RegExpClass) { + obj = OBJ_GET_PROTO(cx, obj); + if (!obj) + return JS_TRUE; + } + slot = JSVAL_TO_INT(id); + if (slot == REGEXP_LAST_INDEX) + return JS_GetReservedSlot(cx, obj, 0, vp); + + JS_LOCK_OBJ(cx, obj); + re = (JSRegExp *) JS_GetPrivate(cx, obj); + if (re) { + switch (slot) { + case REGEXP_SOURCE: + *vp = STRING_TO_JSVAL(re->source); + break; + case REGEXP_GLOBAL: + *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0); + break; + case REGEXP_IGNORE_CASE: + *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0); + break; + case REGEXP_MULTILINE: + *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0); + break; + case REGEXP_STICKY: + *vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_STICKY) != 0); + break; + } + } + JS_UNLOCK_OBJ(cx, obj); + return JS_TRUE; +} + +static JSBool +regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp) +{ + JSBool ok; + jsint slot; + jsdouble lastIndex; + + ok = JS_TRUE; + if (!JSVAL_IS_INT(id)) + return ok; + while (OBJ_GET_CLASS(cx, obj) != &js_RegExpClass) { + obj = OBJ_GET_PROTO(cx, obj); + if (!obj) + return JS_TRUE; + } + slot = JSVAL_TO_INT(id); + if (slot == REGEXP_LAST_INDEX) { + if (!JS_ValueToNumber(cx, *vp, &lastIndex)) + return JS_FALSE; + lastIndex = js_DoubleToInteger(lastIndex); + ok = JS_NewNumberValue(cx, lastIndex, vp) && + JS_SetReservedSlot(cx, obj, 0, *vp); + } + return ok; +} + +/* + * RegExp class static properties and their Perl counterparts: + * + * RegExp.input $_ + * RegExp.multiline $* + * RegExp.lastMatch $& + * RegExp.lastParen $+ + * RegExp.leftContext $` + * RegExp.rightContext $' + */ +enum regexp_static_tinyid { + REGEXP_STATIC_INPUT = -1, + REGEXP_STATIC_MULTILINE = -2, + REGEXP_STATIC_LAST_MATCH = -3, + REGEXP_STATIC_LAST_PAREN = -4, + REGEXP_STATIC_LEFT_CONTEXT = -5, + REGEXP_STATIC_RIGHT_CONTEXT = -6 +}; + +JSBool +js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res) +{ + JS_ClearRegExpStatics(cx); + return js_AddRoot(cx, &res->input, "res->input"); +} + +void +js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res) +{ + if (res->moreParens) { + JS_free(cx, res->moreParens); + res->moreParens = NULL; + } + js_RemoveRoot(cx->runtime, &res->input); +} + +static JSBool +regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp) +{ + jsint slot; + JSRegExpStatics *res; + JSString *str; + JSSubString *sub; + + res = &cx->regExpStatics; + if (!JSVAL_IS_INT(id)) + return JS_TRUE; + slot = JSVAL_TO_INT(id); + switch (slot) { + case REGEXP_STATIC_INPUT: + *vp = res->input ? STRING_TO_JSVAL(res->input) + : JS_GetEmptyStringValue(cx); + return JS_TRUE; + case REGEXP_STATIC_MULTILINE: + *vp = BOOLEAN_TO_JSVAL(res->multiline); + return JS_TRUE; + case REGEXP_STATIC_LAST_MATCH: + sub = &res->lastMatch; + break; + case REGEXP_STATIC_LAST_PAREN: + sub = &res->lastParen; + break; + case REGEXP_STATIC_LEFT_CONTEXT: + sub = &res->leftContext; + break; + case REGEXP_STATIC_RIGHT_CONTEXT: + sub = &res->rightContext; + break; + default: + sub = REGEXP_PAREN_SUBSTRING(res, slot); + break; + } + str = js_NewStringCopyN(cx, sub->chars, sub->length); + if (!str) + return JS_FALSE; + *vp = STRING_TO_JSVAL(str); + return JS_TRUE; +} + +static JSBool +regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp) +{ + JSRegExpStatics *res; + + if (!JSVAL_IS_INT(id)) + return JS_TRUE; + res = &cx->regExpStatics; + /* XXX use if-else rather than switch to keep MSVC1.52 from crashing */ + if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) { + if (!JSVAL_IS_STRING(*vp) && + !JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) { + return JS_FALSE; + } + res->input = JSVAL_TO_STRING(*vp); + } else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) { + if (!JSVAL_IS_BOOLEAN(*vp) && + !JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) { + return JS_FALSE; + } + res->multiline = JSVAL_TO_BOOLEAN(*vp); + } + return JS_TRUE; +} +#define REGEXP_STATIC_PROP_ATTRS (REGEXP_PROP_ATTRS | JSPROP_ENUMERATE) +#define RO_REGEXP_STATIC_PROP_ATTRS (REGEXP_STATIC_PROP_ATTRS | JSPROP_READONLY) + +static JSPropertySpec regexp_static_props[] = { + {"input", + REGEXP_STATIC_INPUT, + REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_setProperty}, + {"multiline", + REGEXP_STATIC_MULTILINE, + REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_setProperty}, + {"lastMatch", + REGEXP_STATIC_LAST_MATCH, + RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"lastParen", + REGEXP_STATIC_LAST_PAREN, + RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"leftContext", + REGEXP_STATIC_LEFT_CONTEXT, + RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"rightContext", + REGEXP_STATIC_RIGHT_CONTEXT, + RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + + /* XXX should have block scope and local $1, etc. */ + {"$1", 0, RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"$2", 1, RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"$3", 2, RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"$4", 3, RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"$5", 4, RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"$6", 5, RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"$7", 6, RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"$8", 7, RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + {"$9", 8, RO_REGEXP_STATIC_PROP_ATTRS, + regexp_static_getProperty, regexp_static_getProperty}, + + {0,0,0,0,0} +}; + +static void +regexp_finalize(JSContext *cx, JSObject *obj) +{ + JSRegExp *re; + + re = (JSRegExp *) JS_GetPrivate(cx, obj); + if (!re) + return; + js_DestroyRegExp(cx, re); +} + +/* Forward static prototype. */ +static JSBool +regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, + JSBool test, jsval *rval); + +static JSBool +regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) +{ + return regexp_exec_sub(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv, + JS_FALSE, rval); +} + +#if JS_HAS_XDR + +#include "jsxdrapi.h" + +static JSBool +regexp_xdrObject(JSXDRState *xdr, JSObject **objp) +{ + JSRegExp *re; + JSString *source; + uint32 flagsword; + JSObject *obj; + + if (xdr->mode == JSXDR_ENCODE) { + re = (JSRegExp *) JS_GetPrivate(xdr->cx, *objp); + if (!re) + return JS_FALSE; + source = re->source; + flagsword = (uint32)re->flags; + } + if (!JS_XDRString(xdr, &source) || + !JS_XDRUint32(xdr, &flagsword)) { + return JS_FALSE; + } + if (xdr->mode == JSXDR_DECODE) { + obj = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL, 0); + if (!obj) + return JS_FALSE; + STOBJ_CLEAR_PARENT(obj); + STOBJ_CLEAR_PROTO(obj); + re = js_NewRegExp(xdr->cx, NULL, source, (uint8)flagsword, JS_FALSE); + if (!re) + return JS_FALSE; + if (!JS_SetPrivate(xdr->cx, obj, re) || + !js_SetLastIndex(xdr->cx, obj, 0)) { + js_DestroyRegExp(xdr->cx, re); + return JS_FALSE; + } + *objp = obj; + } + return JS_TRUE; +} + +#else /* !JS_HAS_XDR */ + +#define regexp_xdrObject NULL + +#endif /* !JS_HAS_XDR */ + +static void +regexp_trace(JSTracer *trc, JSObject *obj) +{ + JSRegExp *re; + + re = (JSRegExp *) JS_GetPrivate(trc->context, obj); + if (re && re->source) + JS_CALL_STRING_TRACER(trc, re->source, "source"); +} + +JSClass js_RegExpClass = { + js_RegExp_str, + JSCLASS_HAS_PRIVATE | JSCLASS_HAS_RESERVED_SLOTS(1) | + JSCLASS_MARK_IS_TRACE | JSCLASS_HAS_CACHED_PROTO(JSProto_RegExp), + JS_PropertyStub, JS_PropertyStub, + regexp_getProperty, regexp_setProperty, + JS_EnumerateStub, JS_ResolveStub, + JS_ConvertStub, regexp_finalize, + NULL, NULL, + regexp_call, NULL, + regexp_xdrObject, NULL, + JS_CLASS_TRACE(regexp_trace), 0 +}; + +static const jschar empty_regexp_ucstr[] = {'(', '?', ':', ')', 0}; + +JSBool +js_regexp_toString(JSContext *cx, JSObject *obj, jsval *vp) +{ + JSRegExp *re; + const jschar *source; + jschar *chars; + size_t length, nflags; + uintN flags; + JSString *str; + + if (!JS_InstanceOf(cx, obj, &js_RegExpClass, vp + 2)) + return JS_FALSE; + JS_LOCK_OBJ(cx, obj); + re = (JSRegExp *) JS_GetPrivate(cx, obj); + if (!re) { + JS_UNLOCK_OBJ(cx, obj); + *vp = STRING_TO_JSVAL(cx->runtime->emptyString); + return JS_TRUE; + } + + JSSTRING_CHARS_AND_LENGTH(re->source, source, length); + if (length == 0) { + source = empty_regexp_ucstr; + length = JS_ARRAY_LENGTH(empty_regexp_ucstr) - 1; + } + length += 2; + nflags = 0; + for (flags = re->flags; flags != 0; flags &= flags - 1) + nflags++; + chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar)); + if (!chars) { + JS_UNLOCK_OBJ(cx, obj); + return JS_FALSE; + } + + chars[0] = '/'; + js_strncpy(&chars[1], source, length - 2); + chars[length-1] = '/'; + if (nflags) { + if (re->flags & JSREG_GLOB) + chars[length++] = 'g'; + if (re->flags & JSREG_FOLD) + chars[length++] = 'i'; + if (re->flags & JSREG_MULTILINE) + chars[length++] = 'm'; + if (re->flags & JSREG_STICKY) + chars[length++] = 'y'; + } + JS_UNLOCK_OBJ(cx, obj); + chars[length] = 0; + + str = js_NewString(cx, chars, length); + if (!str) { + JS_free(cx, chars); + return JS_FALSE; + } + *vp = STRING_TO_JSVAL(str); + return JS_TRUE; +} + +static JSBool +regexp_toString(JSContext *cx, uintN argc, jsval *vp) +{ + JSObject *obj; + + obj = JS_THIS_OBJECT(cx, vp); + return obj && js_regexp_toString(cx, obj, vp); +} + +static JSBool +regexp_compile_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, + jsval *rval) +{ + JSString *opt, *str; + JSRegExp *oldre, *re; + JSBool ok, ok2; + JSObject *obj2; + size_t length, nbytes; + const jschar *cp, *start, *end; + jschar *nstart, *ncp, *tmp; + + if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv)) + return JS_FALSE; + opt = NULL; + if (argc == 0) { + str = cx->runtime->emptyString; + } else { + if (JSVAL_IS_OBJECT(argv[0])) { + /* + * If we get passed in a RegExp object we construct a new + * RegExp that is a duplicate of it by re-compiling the + * original source code. ECMA requires that it be an error + * here if the flags are specified. (We must use the flags + * from the original RegExp also). + */ + obj2 = JSVAL_TO_OBJECT(argv[0]); + if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) { + if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' passed */ + JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, + JSMSG_NEWREGEXP_FLAGGED); + return JS_FALSE; + } + JS_LOCK_OBJ(cx, obj2); + re = (JSRegExp *) JS_GetPrivate(cx, obj2); + if (!re) { + JS_UNLOCK_OBJ(cx, obj2); + return JS_FALSE; + } + re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE); + JS_UNLOCK_OBJ(cx, obj2); + goto created; + } + } + str = js_ValueToString(cx, argv[0]); + if (!str) + return JS_FALSE; + argv[0] = STRING_TO_JSVAL(str); + if (argc > 1) { + if (JSVAL_IS_VOID(argv[1])) { + opt = NULL; + } else { + opt = js_ValueToString(cx, argv[1]); + if (!opt) + return JS_FALSE; + argv[1] = STRING_TO_JSVAL(opt); + } + } + + /* Escape any naked slashes in the regexp source. */ + JSSTRING_CHARS_AND_LENGTH(str, start, length); + end = start + length; + nstart = ncp = NULL; + for (cp = start; cp < end; cp++) { + if (*cp == '/' && (cp == start || cp[-1] != '\\')) { + nbytes = (++length + 1) * sizeof(jschar); + if (!nstart) { + nstart = (jschar *) JS_malloc(cx, nbytes); + if (!nstart) + return JS_FALSE; + ncp = nstart + (cp - start); + js_strncpy(nstart, start, cp - start); + } else { + tmp = (jschar *) JS_realloc(cx, nstart, nbytes); + if (!tmp) { + JS_free(cx, nstart); + return JS_FALSE; + } + ncp = tmp + (ncp - nstart); + nstart = tmp; + } + *ncp++ = '\\'; + } + if (nstart) + *ncp++ = *cp; + } + + if (nstart) { + /* Don't forget to store the backstop after the new string. */ + JS_ASSERT((size_t)(ncp - nstart) == length); + *ncp = 0; + str = js_NewString(cx, nstart, length); + if (!str) { + JS_free(cx, nstart); + return JS_FALSE; + } + argv[0] = STRING_TO_JSVAL(str); + } + } + + re = js_NewRegExpOpt(cx, str, opt, JS_FALSE); +created: + if (!re) + return JS_FALSE; + JS_LOCK_OBJ(cx, obj); + oldre = (JSRegExp *) JS_GetPrivate(cx, obj); + ok = JS_SetPrivate(cx, obj, re); + ok2 = js_SetLastIndex(cx, obj, 0); + JS_UNLOCK_OBJ(cx, obj); + if (!ok) { + js_DestroyRegExp(cx, re); + return JS_FALSE; + } + if (oldre) + js_DestroyRegExp(cx, oldre); + *rval = OBJECT_TO_JSVAL(obj); + return ok2; +} + +static JSBool +regexp_compile(JSContext *cx, uintN argc, jsval *vp) +{ + JSObject *obj; + + obj = JS_THIS_OBJECT(cx, vp); + return obj && regexp_compile_sub(cx, obj, argc, vp + 2, vp); +} + +static JSBool +regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, + JSBool test, jsval *rval) +{ + JSBool ok, sticky; + JSRegExp *re; + jsdouble lastIndex; + JSString *str; + size_t i; + + ok = JS_InstanceOf(cx, obj, &js_RegExpClass, argv); + if (!ok) + return JS_FALSE; + JS_LOCK_OBJ(cx, obj); + re = (JSRegExp *) JS_GetPrivate(cx, obj); + if (!re) { + JS_UNLOCK_OBJ(cx, obj); + return JS_TRUE; + } + + /* NB: we must reach out: after this paragraph, in order to drop re. */ + HOLD_REGEXP(cx, re); + sticky = (re->flags & JSREG_STICKY) != 0; + if (re->flags & (JSREG_GLOB | JSREG_STICKY)) { + ok = js_GetLastIndex(cx, obj, &lastIndex); + } else { + lastIndex = 0; + } + JS_UNLOCK_OBJ(cx, obj); + if (!ok) + goto out; + + /* Now that obj is unlocked, it's safe to (potentially) grab the GC lock. */ + if (argc == 0) { + str = cx->regExpStatics.input; + if (!str) { + const char *bytes = js_GetStringBytes(cx, re->source); + + if (bytes) { + JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL, + JSMSG_NO_INPUT, + bytes, + (re->flags & JSREG_GLOB) ? "g" : "", + (re->flags & JSREG_FOLD) ? "i" : "", + (re->flags & JSREG_MULTILINE) ? "m" : "", + (re->flags & JSREG_STICKY) ? "y" : ""); + } + ok = JS_FALSE; + goto out; + } + } else { + str = js_ValueToString(cx, argv[0]); + if (!str) { + ok = JS_FALSE; + goto out; + } + argv[0] = STRING_TO_JSVAL(str); + } + + if (lastIndex < 0 || JSSTRING_LENGTH(str) < lastIndex) { + ok = js_SetLastIndex(cx, obj, 0); + *rval = JSVAL_NULL; + } else { + i = (size_t) lastIndex; + ok = js_ExecuteRegExp(cx, re, str, &i, test, rval); + if (ok && + ((re->flags & JSREG_GLOB) || (*rval != JSVAL_NULL && sticky))) { + ok = js_SetLastIndex(cx, obj, (*rval == JSVAL_NULL) ? 0 : i); + } + } + +out: + DROP_REGEXP(cx, re); + return ok; +} + +static JSBool +regexp_exec(JSContext *cx, uintN argc, jsval *vp) +{ + return regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_FALSE, + vp); +} + +static JSBool +regexp_test(JSContext *cx, uintN argc, jsval *vp) +{ + if (!regexp_exec_sub(cx, JS_THIS_OBJECT(cx, vp), argc, vp + 2, JS_TRUE, vp)) + return JS_FALSE; + if (*vp != JSVAL_TRUE) + *vp = JSVAL_FALSE; + return JS_TRUE; +} + +#ifdef JS_TRACER +static jsint FASTCALL +Regexp_p_test(JSContext* cx, JSObject* regexp, JSString* str) +{ + jsval vp[3] = { JSVAL_NULL, OBJECT_TO_JSVAL(regexp), STRING_TO_JSVAL(str) }; + if (!regexp_exec_sub(cx, regexp, 1, vp + 2, JS_TRUE, vp)) + return JSVAL_TO_BOOLEAN(JSVAL_VOID); + return *vp == JSVAL_TRUE; +} + +JS_DEFINE_TRCINFO_1(regexp_test, + (3, (static, BOOL_FAIL, Regexp_p_test, CONTEXT, THIS, STRING, 1, 1))) + +#endif + +static JSFunctionSpec regexp_methods[] = { +#if JS_HAS_TOSOURCE + JS_FN(js_toSource_str, regexp_toString, 0,0), +#endif + JS_FN(js_toString_str, regexp_toString, 0,0), + JS_FN("compile", regexp_compile, 2,0), + JS_FN("exec", regexp_exec, 1,0), + JS_TN("test", regexp_test, 1,0, regexp_test_trcinfo), + JS_FS_END +}; + +static JSBool +RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval) +{ + if (!(cx->fp->flags & JSFRAME_CONSTRUCTING)) { + /* + * If first arg is regexp and no flags are given, just return the arg. + * (regexp_compile_sub detects the regexp + flags case and throws a + * TypeError.) See 10.15.3.1. + */ + if ((argc < 2 || JSVAL_IS_VOID(argv[1])) && + !JSVAL_IS_PRIMITIVE(argv[0]) && + OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) { + *rval = argv[0]; + return JS_TRUE; + } + + /* Otherwise, replace obj with a new RegExp object. */ + obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL, 0); + if (!obj) + return JS_FALSE; + + /* + * regexp_compile_sub does not use rval to root its temporaries so we + * can use it to root obj. + */ + *rval = OBJECT_TO_JSVAL(obj); + } + return regexp_compile_sub(cx, obj, argc, argv, rval); +} + +JSObject * +js_InitRegExpClass(JSContext *cx, JSObject *obj) +{ + JSObject *proto, *ctor; + jsval rval; + + proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1, + regexp_props, regexp_methods, + regexp_static_props, NULL); + + if (!proto || !(ctor = JS_GetConstructor(cx, proto))) + return NULL; + if (!JS_AliasProperty(cx, ctor, "input", "$_") || + !JS_AliasProperty(cx, ctor, "multiline", "$*") || + !JS_AliasProperty(cx, ctor, "lastMatch", "$&") || + !JS_AliasProperty(cx, ctor, "lastParen", "$+") || + !JS_AliasProperty(cx, ctor, "leftContext", "$`") || + !JS_AliasProperty(cx, ctor, "rightContext", "$'")) { + goto bad; + } + + /* Give RegExp.prototype private data so it matches the empty string. */ + if (!regexp_compile_sub(cx, proto, 0, NULL, &rval)) + goto bad; + return proto; + +bad: + JS_DeleteProperty(cx, obj, js_RegExpClass.name); + return NULL; +} + +JSObject * +js_NewRegExpObject(JSContext *cx, JSTokenStream *ts, + jschar *chars, size_t length, uintN flags) +{ + JSString *str; + JSObject *obj; + JSRegExp *re; + JSTempValueRooter tvr; + + str = js_NewStringCopyN(cx, chars, length); + if (!str) + return NULL; + re = js_NewRegExp(cx, ts, str, flags, JS_FALSE); + if (!re) + return NULL; + JS_PUSH_TEMP_ROOT_STRING(cx, str, &tvr); + obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL, 0); + if (!obj || !JS_SetPrivate(cx, obj, re)) { + js_DestroyRegExp(cx, re); + obj = NULL; + } + if (obj && !js_SetLastIndex(cx, obj, 0)) + obj = NULL; + JS_POP_TEMP_ROOT(cx, &tvr); + return obj; +} + +JSObject * +js_CloneRegExpObject(JSContext *cx, JSObject *obj, JSObject *parent) +{ + JSObject *clone; + JSRegExp *re; + + JS_ASSERT(OBJ_GET_CLASS(cx, obj) == &js_RegExpClass); + clone = js_NewObject(cx, &js_RegExpClass, NULL, parent, 0); + if (!clone) + return NULL; + re = (JSRegExp *) JS_GetPrivate(cx, obj); + if (!JS_SetPrivate(cx, clone, re) || !js_SetLastIndex(cx, clone, 0)) { + cx->weakRoots.newborn[GCX_OBJECT] = NULL; + return NULL; + } + HOLD_REGEXP(cx, re); + return clone; +} + +JSBool +js_GetLastIndex(JSContext *cx, JSObject *obj, jsdouble *lastIndex) +{ + jsval v; + + return JS_GetReservedSlot(cx, obj, 0, &v) && + JS_ValueToNumber(cx, v, lastIndex); +} + +JSBool +js_SetLastIndex(JSContext *cx, JSObject *obj, jsdouble lastIndex) +{ + jsval v; + + return JS_NewNumberValue(cx, lastIndex, &v) && + JS_SetReservedSlot(cx, obj, 0, v); +} + |