pjs/js/src/jsregexp.c

2847 строки
79 KiB
C

/* -*- Mode: C; tab-width: 8; indent-tabs-mode: nil; c-basic-offset: 4 -*-
*
* The contents of this file are subject to the Netscape Public
* License Version 1.1 (the "License"); you may not use this file
* except in compliance with the License. You may obtain a copy of
* the License at http://www.mozilla.org/NPL/
*
* Software distributed under the License is distributed on an "AS
* IS" basis, WITHOUT WARRANTY OF ANY KIND, either express oqr
* implied. See the License for the specific language governing
* rights and limitations under the License.
*
* The Original Code is Mozilla Communicator client code, released
* March 31, 1998.
*
* The Initial Developer of the Original Code is Netscape
* Communications Corporation. Portions created by Netscape are
* Copyright (C) 1998 Netscape Communications Corporation. All
* Rights Reserved.
*
* Contributor(s):
*
* Alternatively, the contents of this file may be used under the
* terms of the GNU Public License (the "GPL"), in which case the
* provisions of the GPL are applicable instead of those above.
* If you wish to allow use of your version of this file only
* under the terms of the GPL and not to allow others to use your
* version of this file under the NPL, indicate your decision by
* deleting the provisions above and replace them with the notice
* and other provisions required by the GPL. If you do not delete
* the provisions above, a recipient may use your version of this
* file under either the NPL or the GPL.
*/
/*
* JS regular expressions, after Perl.
*/
#include "jsstddef.h"
#include <stdlib.h>
#include <string.h>
#include "jstypes.h"
#include "jsarena.h" /* Added by JSIFY */
#include "jsutil.h" /* Added by JSIFY */
#include "jsapi.h"
#include "jsarray.h"
#include "jsatom.h"
#include "jscntxt.h"
#include "jsconfig.h"
#include "jsfun.h"
#include "jsgc.h"
#include "jsinterp.h"
#include "jslock.h"
#include "jsnum.h"
#include "jsobj.h"
#include "jsopcode.h"
#include "jsregexp.h"
#include "jsscan.h"
#include "jsstr.h"
#ifdef XP_MAC
#include <MacMemory.h>
#endif
#if JS_HAS_REGEXPS
typedef struct RENode RENode;
typedef enum REOp {
REOP_EMPTY = 0, /* match rest of input against rest of r.e. */
REOP_ALT = 1, /* alternative subexpressions in kid and next */
REOP_BOL = 2, /* beginning of input (or line if multiline) */
REOP_EOL = 3, /* end of input (or line if multiline) */
REOP_WBDRY = 4, /* match "" at word boundary */
REOP_WNONBDRY = 5, /* match "" at word non-boundary */
REOP_QUANT = 6, /* quantified atom: atom{1,2} */
REOP_STAR = 7, /* zero or more occurrences of kid */
REOP_PLUS = 8, /* one or more occurrences of kid */
REOP_OPT = 9, /* optional subexpression in kid */
REOP_LPAREN = 10, /* left paren bytecode: kid is u.num'th sub-regexp */
REOP_RPAREN = 11, /* right paren bytecode */
REOP_DOT = 12, /* stands for any character */
REOP_CCLASS = 13, /* character class: [a-f] */
REOP_DIGIT = 14, /* match a digit char: [0-9] */
REOP_NONDIGIT = 15, /* match a non-digit char: [^0-9] */
REOP_ALNUM = 16, /* match an alphanumeric char: [0-9a-z_A-Z] */
REOP_NONALNUM = 17, /* match a non-alphanumeric char: [^0-9a-z_A-Z] */
REOP_SPACE = 18, /* match a whitespace char */
REOP_NONSPACE = 19, /* match a non-whitespace char */
REOP_BACKREF = 20, /* back-reference (e.g., \1) to a parenthetical */
REOP_FLAT = 21, /* match a flat string */
REOP_FLAT1 = 22, /* match a single char */
REOP_JUMP = 23, /* for deoptimized closure loops */
REOP_DOTSTAR = 24, /* optimize .* to use a single opcode */
REOP_ANCHOR = 25, /* like .* but skips left context to unanchored r.e. */
REOP_EOLONLY = 26, /* $ not preceded by any pattern */
REOP_UCFLAT = 27, /* flat Unicode string; len immediate counts chars */
REOP_UCFLAT1 = 28, /* single Unicode char */
REOP_UCCLASS = 29, /* Unicode character class, vector of chars to match */
REOP_NUCCLASS = 30, /* negated Unicode character class */
REOP_BACKREFi = 31, /* case-independent REOP_BACKREF */
REOP_FLATi = 32, /* case-independent REOP_FLAT */
REOP_FLAT1i = 33, /* case-independent REOP_FLAT1 */
REOP_UCFLATi = 34, /* case-independent REOP_UCFLAT */
REOP_UCFLAT1i = 35, /* case-independent REOP_UCFLAT1 */
REOP_ANCHOR1 = 36, /* first-char discriminating REOP_ANCHOR */
REOP_NCCLASS = 37, /* negated 8-bit character class */
REOP_DOTSTARMIN = 38, /* ungreedy version of REOP_DOTSTAR */
REOP_LPARENNON = 39, /* non-capturing version of REOP_LPAREN */
REOP_RPARENNON = 40, /* non-capturing version of REOP_RPAREN */
REOP_ASSERT = 41, /* zero width positive lookahead assertion */
REOP_ASSERT_NOT = 42, /* zero width negative lookahead assertion */
REOP_END
} REOp;
#define REOP_FLATLEN_MAX 255 /* maximum length of FLAT string */
struct RENode {
uint8 op; /* packed r.e. op bytecode */
uint8 flags; /* flags, see below */
#ifdef DEBUG
uint16 offset; /* bytecode offset */
#endif
RENode *next; /* next in concatenation order */
void *kid; /* first operand */
union {
void *kid2; /* second operand */
jsint num; /* could be a number */
jschar chr; /* or a character */
struct { /* or a quantifier range */
uint16 min;
uint16 max;
} range;
struct { /* or a Unicode character class */
uint16 kidlen; /* length of string at kid, in jschars */
uint16 bmsize; /* bitmap size, based on max char code */
uint8 *bitmap;
} ucclass;
} u;
};
#define REOP(ren) ((REOp)(ren)->op)
#define RENODE_ANCHORED 0x01 /* anchored at the front */
#define RENODE_SINGLE 0x02 /* matches a single char */
#define RENODE_NONEMPTY 0x04 /* does not match empty string */
#define RENODE_ISNEXT 0x08 /* ren is next after at least one node */
#define RENODE_GOODNEXT 0x10 /* ren->next is a tree-like edge in the graph */
#define RENODE_ISJOIN 0x20 /* ren is a join point in the graph */
#define RENODE_REALLOK 0x40 /* REOP_FLAT owns tempPool space to realloc */
#define RENODE_MINIMAL 0x80 /* un-greedy matching for ? * + {} */
typedef struct CompilerState {
JSContext *context;
JSTokenStream *tokenStream; /* For reporting errors */
const jschar *cpbegin;
const jschar *cpend;
const jschar *cp;
uintN flags;
uintN parenCount;
size_t progLength;
} CompilerState;
#ifdef DEBUG
#include <stdio.h>
static char *reopname[] = {
"empty",
"alt",
"bol",
"eol",
"wbdry",
"wnonbdry",
"quant",
"star",
"plus",
"opt",
"lparen",
"rparen",
"dot",
"cclass",
"digit",
"nondigit",
"alnum",
"nonalnum",
"space",
"nonspace",
"backref",
"flat",
"flat1",
"jump",
"dotstar",
"anchor",
"eolonly",
"ucflat",
"ucflat1",
"ucclass",
"nucclass",
"backrefi",
"flati",
"flat1i",
"ucflati",
"ucflat1i",
"anchor1",
"ncclass",
"dotstar_min",
"lparen_non",
"rparen_non",
"assert",
"assert_not",
"end"
};
static void
PrintChar(jschar c)
{
if (c >> 8)
printf("\\u%04X", c);
else
#if !defined XP_PC || !defined _MSC_VER || _MSC_VER > 800
putchar((char)c);
#else
/* XXX is there a better way with MSVC1.52? */
printf("%c", c);
#endif
}
static int gOffset = 0;
static JSBool
DumpRegExp(JSContext *cx, RENode *ren)
{
static int level;
JSBool ok;
int i, len;
jschar *cp;
char *cstr;
if (level == 0)
printf("level offset flags description\n");
level++;
ok = JS_TRUE;
do {
printf("%5d %6d %6d %c%c%c%c%c%c%c %s",
level,
(int)ren->offset, (ren->next) ? (int)ren->next->offset : -1,
(ren->flags & RENODE_ANCHORED) ? 'A' : '-',
(ren->flags & RENODE_SINGLE) ? 'S' : '-',
(ren->flags & RENODE_NONEMPTY) ? 'F' : '-', /* F for full */
(ren->flags & RENODE_ISNEXT) ? 'N' : '-', /* N for next */
(ren->flags & RENODE_GOODNEXT) ? 'G' : '-',
(ren->flags & RENODE_ISJOIN) ? 'J' : '-',
(ren->flags & RENODE_MINIMAL) ? 'M' : '-',
reopname[ren->op]);
switch (REOP(ren)) {
case REOP_ALT:
printf(" %d\n", ren->next->offset);
ok = DumpRegExp(cx, (RENode*) ren->kid);
if (!ok)
goto out;
break;
case REOP_STAR:
case REOP_PLUS:
case REOP_OPT:
case REOP_ANCHOR1:
case REOP_ASSERT:
case REOP_ASSERT_NOT:
printf("\n");
ok = DumpRegExp(cx, (RENode*) ren->kid);
if (!ok)
goto out;
break;
case REOP_QUANT:
printf(" next %d min %d max %d\n",
ren->next->offset, ren->u.range.min, ren->u.range.max);
ok = DumpRegExp(cx, (RENode*) ren->kid);
if (!ok)
goto out;
break;
case REOP_LPAREN:
printf(" num %d\n", (int)ren->u.num);
ok = DumpRegExp(cx, (RENode*) ren->kid);
if (!ok)
goto out;
break;
case REOP_LPARENNON:
printf("\n");
ok = DumpRegExp(cx, (RENode*) ren->kid);
if (!ok)
goto out;
break;
case REOP_RPAREN:
printf(" num %d\n", (int)ren->u.num);
break;
case REOP_CCLASS:
len = (jschar *)ren->u.kid2 - (jschar *)ren->kid;
cstr = js_DeflateString(cx, (jschar *)ren->kid, len);
if (!cstr) {
ok = JS_FALSE;
goto out;
}
printf(" [%s]\n", cstr);
JS_free(cx, cstr);
break;
case REOP_BACKREF:
printf(" num %d\n", (int)ren->u.num);
break;
case REOP_FLAT:
len = (jschar *)ren->u.kid2 - (jschar *)ren->kid;
cstr = js_DeflateString(cx, (jschar *)ren->kid, len);
if (!cstr) {
ok = JS_FALSE;
goto out;
}
printf(" %s (%d)\n", cstr, len);
JS_free(cx, cstr);
break;
case REOP_FLAT1:
printf(" %c ('\\%o')\n", (char)ren->u.chr, ren->u.chr);
break;
case REOP_JUMP:
printf(" %d\n", ren->next->offset);
break;
case REOP_UCFLAT:
cp = (jschar*) ren->kid;
len = (jschar *)ren->u.kid2 - cp;
for (i = 0; i < len; i++)
PrintChar(cp[i]);
printf("\n");
break;
case REOP_UCFLAT1:
PrintChar(ren->u.chr);
printf("\n");
break;
case REOP_UCCLASS:
cp = (jschar*) ren->kid;
len = ren->u.ucclass.kidlen;
printf(" [");
for (i = 0; i < len; i++)
PrintChar(cp[i]);
printf("]\n");
break;
default:
printf("\n");
break;
}
if (!(ren->flags & RENODE_GOODNEXT))
break;
} while ((ren = ren->next) != NULL);
out:
level--;
return ok;
}
#endif /* DEBUG */
static RENode *
NewRENode(CompilerState *state, REOp op, void *kid)
{
JSContext *cx;
RENode *ren;
cx = state->context;
ren = (RENode*) JS_malloc(cx, sizeof *ren);
if (!ren) {
JS_ReportOutOfMemory(cx);
return NULL;
}
ren->op = (uint8)op;
ren->flags = 0;
#ifdef DEBUG
ren->offset = gOffset++;
#endif
ren->next = NULL;
ren->kid = kid;
return ren;
}
static JSBool
FixNext(CompilerState *state, RENode *ren1, RENode *ren2, RENode *oldnext)
{
JSBool goodnext;
RENode *next, *kid, *ren;
goodnext = ren2 && !(ren2->flags & RENODE_ISNEXT);
/*
* Find the final node in a list of alternatives, or concatenations, or
* even a concatenation of alternatives followed by non-alternatives (e.g.
* ((x|y)z)w where ((x|y)z) is ren1 and w is ren2).
*/
for (; (next = ren1->next) != NULL && next != oldnext; ren1 = next) {
if (REOP(ren1) == REOP_ALT) {
/* Find the end of this alternative's operand list. */
kid = (RENode*) ren1->kid;
if (REOP(kid) == REOP_JUMP)
continue;
for (ren = kid; ren->next; ren = ren->next)
JS_ASSERT(REOP(ren) != REOP_ALT);
/* Append a jump node to all but the last alternative. */
ren->next = NewRENode(state, REOP_JUMP, NULL);
if (!ren->next)
return JS_FALSE;
ren->next->flags |= RENODE_ISNEXT;
ren->flags |= RENODE_GOODNEXT;
/* Recur to fix all descendent nested alternatives. */
if (!FixNext(state, kid, ren2, oldnext))
return JS_FALSE;
}
}
/*
* Now ren1 points to the last alternative, or to the final node on a
* concatenation list. Set its next link to ren2, flagging a join point
* if appropriate.
*/
if (ren2) {
if (!(ren2->flags & RENODE_ISNEXT))
ren2->flags |= RENODE_ISNEXT;
else
ren2->flags |= RENODE_ISJOIN;
}
ren1->next = ren2;
if (goodnext)
ren1->flags |= RENODE_GOODNEXT;
/*
* The following ops have a kid subtree through which to recur. Here is
* where we fix the next links under the final ALT node's kid.
*/
switch (REOP(ren1)) {
case REOP_ALT:
case REOP_QUANT:
case REOP_STAR:
case REOP_PLUS:
case REOP_OPT:
case REOP_LPAREN:
case REOP_LPARENNON:
case REOP_ASSERT:
case REOP_ASSERT_NOT:
if (!FixNext(state, (RENode*) ren1->kid, ren2, oldnext))
return JS_FALSE;
break;
default:;
}
return JS_TRUE;
}
static JSBool
SetNext(CompilerState *state, RENode *ren1, RENode *ren2)
{
return FixNext(state, ren1, ren2, NULL);
}
/*
* Parser forward declarations.
*/
typedef RENode *REParser(CompilerState *state);
static REParser ParseRegExp;
static REParser ParseAltern;
static REParser ParseItem;
static REParser ParseQuantAtom;
static REParser ParseAtom;
/*
* Top-down regular expression grammar, based closely on Perl4.
*
* regexp: altern A regular expression is one or more
* altern '|' regexp alternatives separated by vertical bar.
*/
static RENode *
ParseRegExp(CompilerState *state)
{
RENode *ren, *kid, *ren1, *ren2;
const jschar *cp;
ren = ParseAltern(state);
if (!ren)
return NULL;
cp = state->cp;
if ((cp < state->cpend) && (*cp == '|')) {
kid = ren;
ren = NewRENode(state, REOP_ALT, kid);
if (!ren)
return NULL;
ren->flags = kid->flags & (RENODE_ANCHORED | RENODE_NONEMPTY);
ren1 = ren;
do {
/* (balance: */
state->cp = ++cp;
if (*cp == '|' || *cp == ')') {
kid = NewRENode(state, REOP_EMPTY, NULL);
} else {
kid = ParseAltern(state);
cp = state->cp;
}
if (!kid)
return NULL;
ren2 = NewRENode(state, REOP_ALT, kid);
if (!ren2)
return NULL;
ren1->next = ren2;
ren1->flags |= RENODE_GOODNEXT;
ren2->flags = (kid->flags & (RENODE_ANCHORED | RENODE_NONEMPTY))
| RENODE_ISNEXT;
ren1 = ren2;
} while ((cp < state->cpend) && (*cp == '|'));
}
return ren;
}
/*
* altern: item An alternative is one or more items,
* item altern concatenated together.
*/
static RENode *
ParseAltern(CompilerState *state)
{
RENode *ren, *ren1, *ren2;
uintN flags;
const jschar *cp;
jschar c;
ren = ren1 = ParseItem(state);
if (!ren)
return NULL;
flags = 0;
cp = state->cp;
/* (balance: */
while ((cp < state->cpend) && (c = *cp) != '|' && c != ')') {
ren2 = ParseItem(state);
if (!ren2)
return NULL;
if (!SetNext(state, ren1, ren2))
return NULL;
flags |= ren2->flags;
ren1 = ren2;
cp = state->cp;
}
/*
* Propagate NONEMPTY to the front of a concatenation list, so that the
* first alternative in (^a|b) is considered non-empty. The first node
* in a list may match the empty string (as ^ does), but if the list is
* non-empty, then the first node's NONEMPTY flag must be set.
*/
ren->flags |= flags & RENODE_NONEMPTY;
return ren;
}
/*
* item: assertion An item is either an assertion or
* quantatom a quantified atom.
*
* assertion: '^' Assertions match beginning of string
* (or line if the class static property
* RegExp.multiline is true).
* '$' End of string (or line if the class
* static property RegExp.multiline is
* true).
* '\b' Word boundary (between \w and \W).
* '\B' Word non-boundary.
*/
static RENode *
ParseItem(CompilerState *state)
{
const jschar *cp;
RENode *ren;
REOp op;
cp = state->cp;
if (cp < state->cpend)
switch (*cp) {
case '^':
state->cp = cp + 1;
ren = NewRENode(state, REOP_BOL, NULL);
if (ren)
ren->flags |= RENODE_ANCHORED;
return ren;
case '$':
state->cp = cp + 1;
return NewRENode(state,
(cp == state->cpbegin ||
((cp[-1] == '(' || cp[-1] == '|') && /*balance)*/
(cp - 1 == state->cpbegin || cp[-2] != '\\')))
? REOP_EOLONLY
: REOP_EOL,
NULL);
case '\\':
switch (*++cp) {
case 'b':
op = REOP_WBDRY;
break;
case 'B':
op = REOP_WNONBDRY;
break;
default:
return ParseQuantAtom(state);
}
/*
* Word boundaries and non-boundaries are flagged as non-empty so they
* will be prefixed by an anchoring node.
*/
state->cp = cp + 1;
ren = NewRENode(state, op, NULL);
if (ren)
ren->flags |= RENODE_NONEMPTY;
return ren;
default:;
}
return ParseQuantAtom(state);
}
/*
* quantatom: atom An unquantified atom.
* quantatom '{' n ',' m '}'
* Atom must occur between n and m times.
* quantatom '{' n ',' '}' Atom must occur at least n times.
* quantatom '{' n '}' Atom must occur exactly n times.
* quantatom '*' Zero or more times (same as {0,}).
* quantatom '+' One or more times (same as {1,}).
* quantatom '?' Zero or one time (same as {0,1}).
*
* any of which can be optionally followed by '?' for ungreedy
*/
static RENode *
ParseQuantAtom(CompilerState *state)
{
RENode *ren, *ren2;
const jschar *cp, *up;
jschar c;
uint32 min, max;
ren = ParseAtom(state);
if (!ren)
return NULL;
cp = state->cp;
loop:
if (cp < state->cpend)
switch (*cp) {
case '{':
c = *++cp;
if (!JS7_ISDEC(c)) {
js_ReportCompileErrorNumber(state->context, state->tokenStream,
NULL, JSREPORT_ERROR,
JSMSG_BAD_QUANTIFIER, state->cp);
return NULL;
}
min = (uint32)JS7_UNDEC(c);
for (c = *++cp; JS7_ISDEC(c); c = *++cp) {
min = 10 * min + (uint32)JS7_UNDEC(c);
if (min >> 16) {
js_ReportCompileErrorNumber(state->context,
state->tokenStream, NULL,
JSREPORT_ERROR,
JSMSG_MIN_TOO_BIG, state->cp);
return NULL;
}
}
if (*cp == ',') {
up = ++cp;
if (JS7_ISDEC(*cp)) {
max = (uint32)JS7_UNDEC(*cp);
for (c = *++cp; JS7_ISDEC(c); c = *++cp) {
max = 10 * max + (uint32)JS7_UNDEC(c);
if (max >> 16) {
js_ReportCompileErrorNumber(state->context,
state->tokenStream,
NULL,
JSREPORT_ERROR,
JSMSG_MAX_TOO_BIG, up);
return NULL;
}
}
if (max == 0)
goto zero_quant;
if (min > max) {
js_ReportCompileErrorNumber(state->context,
state->tokenStream,
NULL,
JSREPORT_ERROR,
JSMSG_OUT_OF_ORDER, up);
return NULL;
}
} else {
/* 0 means no upper bound. */
max = 0;
}
} else {
/* Exactly n times. */
if (min == 0) {
zero_quant:
js_ReportCompileErrorNumber(state->context,
state->tokenStream,
NULL,
JSREPORT_ERROR,
JSMSG_ZERO_QUANTIFIER,
state->cp);
return NULL;
}
max = min;
}
if (*cp != '}') {
js_ReportCompileErrorNumber(state->context,
state->tokenStream,
NULL,
JSREPORT_ERROR,
JSMSG_UNTERM_QUANTIFIER, state->cp);
return NULL;
}
cp++;
ren2 = NewRENode(state, REOP_QUANT, ren);
if (!ren2)
return NULL;
if (min > 0 && (ren->flags & RENODE_NONEMPTY))
ren2->flags |= RENODE_NONEMPTY;
ren2->u.range.min = (uint16)min;
ren2->u.range.max = (uint16)max;
ren = ren2;
goto parseMinimalFlag;
case '*':
cp++;
ren = NewRENode(state, REOP_STAR, ren);
parseMinimalFlag :
if (*cp == '?') {
ren->flags |= RENODE_MINIMAL;
cp++;
}
goto loop;
case '+':
cp++;
ren2 = NewRENode(state, REOP_PLUS, ren);
if (!ren2)
return NULL;
if (ren->flags & RENODE_NONEMPTY)
ren2->flags |= RENODE_NONEMPTY;
ren = ren2;
goto parseMinimalFlag;
case '?':
cp++;
ren = NewRENode(state, REOP_OPT, ren);
goto parseMinimalFlag;
}
state->cp = cp;
return ren;
}
/*
* atom: '(' regexp ')' A parenthesized regexp (what matched
* can be addressed using a backreference,
* see '\' n below).
* '.' Matches any char except '\n'.
* '[' classlist ']' A character class.
* '[' '^' classlist ']' A negated character class.
* '\f' Form Feed.
* '\n' Newline (Line Feed).
* '\r' Carriage Return.
* '\t' Horizontal Tab.
* '\v' Vertical Tab.
* '\d' A digit (same as [0-9]).
* '\D' A non-digit.
* '\w' A word character, [0-9a-z_A-Z].
* '\W' A non-word character.
* '\s' A whitespace character, [ \b\f\n\r\t\v].
* '\S' A non-whitespace character.
* '\' n A backreference to the nth (n decimal
* and positive) parenthesized expression.
* '\' octal An octal escape sequence (octal must be
* two or three digits long, unless it is
* 0 for the null character).
* '\x' hex A hex escape (hex must be two digits).
* '\c' ctrl A control character, ctrl is a letter.
* '\' literalatomchar Any character except one of the above
* that follow '\' in an atom.
* otheratomchar Any character not first among the other
* atom right-hand sides.
*/
static jschar metachars[] = {
'|', '^', '$', '{', '*', '+', '?', '(', ')', '.', '[', '\\', '}', 0
};
static jschar closurechars[] = {
'{', '*', '+', '?', 0 /* balance} */
};
static RENode *
ParseAtom(CompilerState *state)
{
const jschar *cp, *ocp;
uintN tmp, num, len;
RENode *ren, *ren2;
jschar c;
REOp op;
cp = ocp = state->cp;
/* handle /|a/ by returning an empty node for the leftside */
if ((cp == state->cpend) || (*cp == '|'))
return NewRENode(state, REOP_EMPTY, NULL);
ren = NULL; /* suppress warning */
switch (*cp) {
case '(':
num = -1; /* suppress warning */
op = REOP_END;
if (cp[1] == '?') {
switch (cp[2]) {
case ':' :
op = REOP_LPARENNON;
break;
case '=' :
op = REOP_ASSERT;
break;
case '!' :
op = REOP_ASSERT_NOT;
break;
}
}
if (op == REOP_END) {
num = state->parenCount++; /* \1 is numbered 0, etc. */
op = REOP_LPAREN;
cp++;
}
else
cp += 3;
state->cp = cp;
/* Handle empty paren */
if (*cp == ')') {
ren2 = NewRENode(state, REOP_EMPTY, NULL);
}
else {
ren2 = ParseRegExp(state);
if (!ren2)
return NULL;
cp = state->cp;
if (*cp != ')') {
js_ReportCompileErrorNumber(state->context, state->tokenStream,
NULL,
JSREPORT_ERROR,
JSMSG_MISSING_PAREN, ocp);
return NULL;
}
}
cp++;
ren = NewRENode(state, op, ren2);
if (!ren)
return NULL;
ren->flags = ren2->flags & (RENODE_ANCHORED | RENODE_NONEMPTY);
ren->u.num = num;
if ((op == REOP_LPAREN) || (op == REOP_LPARENNON)) {
/* Assume RPAREN ops immediately succeed LPAREN ops */
ren2 = NewRENode(state, (REOp)(op + 1), NULL);
if (!ren2 || !SetNext(state, ren, ren2))
return NULL;
ren2->u.num = num;
}
break;
case '.':
cp++;
op = REOP_DOT;
if (*cp == '*') {
cp++;
op = REOP_DOTSTAR;
if (*cp == '?') {
cp++;
op = REOP_DOTSTARMIN;
}
}
ren = NewRENode(state, op, NULL);
if (ren && op == REOP_DOT)
ren->flags = RENODE_SINGLE | RENODE_NONEMPTY;
break;
case '[':
++cp;
ren = NewRENode(state, REOP_CCLASS, (void *)cp);
if (!ren)
return NULL;
while ((c = *++cp) != ']') {
if (cp == state->cpend) {
js_ReportCompileErrorNumber(state->context, state->tokenStream,
NULL,
JSREPORT_ERROR,
JSMSG_UNTERM_CLASS, ocp);
return NULL;
}
if (c == '\\' && (cp+1 != state->cpend))
cp++;
}
ren->u.kid2 = (void *)cp++;
ren->u.ucclass.bitmap = NULL;
/* Since we rule out [] and [^], we can set the non-empty flag. */
ren->flags = RENODE_SINGLE | RENODE_NONEMPTY;
break;
case '\\':
c = *++cp;
if (cp == state->cpend) {
js_ReportCompileErrorNumber(state->context, state->tokenStream,
NULL,
JSREPORT_ERROR,
JSMSG_TRAILING_SLASH);
return NULL;
}
switch (c) {
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
ren = NewRENode(state, REOP_FLAT1, NULL);
c = js_strchr(js_EscapeMap, c)[-1];
break;
case 'd':
ren = NewRENode(state, REOP_DIGIT, NULL);
break;
case 'D':
ren = NewRENode(state, REOP_NONDIGIT, NULL);
break;
case 'w':
ren = NewRENode(state, REOP_ALNUM, NULL);
break;
case 'W':
ren = NewRENode(state, REOP_NONALNUM, NULL);
break;
case 's':
ren = NewRENode(state, REOP_SPACE, NULL);
break;
case 'S':
ren = NewRENode(state, REOP_NONSPACE, NULL);
break;
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
/*
Yuk. Keeping the old style \n interpretation for 1.2
compatibility.
*/
if (state->context->version != JSVERSION_DEFAULT &&
state->context->version <= JSVERSION_1_4) {
switch (c) {
case '0':
do_octal:
num = 0;
while ('0' <= (c = *++cp) && c <= '7') {
tmp = 8 * num + (uintN)JS7_UNDEC(c);
if (tmp > 0377)
break;
num = tmp;
}
cp--;
ren = NewRENode(state, REOP_FLAT1, NULL);
c = (jschar)num;
break;
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
case '8':
case '9':
num = (uintN)JS7_UNDEC(c);
tmp = 1;
for (c = *++cp; JS7_ISDEC(c); c = *++cp, tmp++)
num = 10 * num + (uintN)JS7_UNDEC(c);
/* n in [8-9] and > count of parenetheses, then revert to
'8' or '9', ignoring the '\' */
if (((num == 8) || (num == 9)) && (num > state->parenCount)) {
ocp = --cp; /* skip beyond the '\' */
goto do_flat;
}
/* more than 1 digit, or a number greater than
the count of parentheses => it's an octal */
if ((tmp > 1) || (num > state->parenCount)) {
cp = ocp;
goto do_octal;
}
cp--;
ren = NewRENode(state, REOP_BACKREF, NULL);
if (!ren)
return NULL;
ren->u.num = num - 1; /* \1 is numbered 0, etc. */
/* Avoid common chr- and flags-setting code after switch. */
ren->flags = RENODE_NONEMPTY;
goto bump_cp;
}
}
else {
if (c == '0') {
ren = NewRENode(state, REOP_FLAT1, NULL);
c = 0;
}
else {
num = (uintN)JS7_UNDEC(c);
for (c = *++cp; JS7_ISDEC(c); c = *++cp)
num = 10 * num + (uintN)JS7_UNDEC(c);
cp--;
ren = NewRENode(state, REOP_BACKREF, NULL);
if (!ren)
return NULL;
ren->u.num = num - 1; /* \1 is numbered 0, etc. */
/* Avoid common chr- and flags-setting code after switch. */
ren->flags = RENODE_NONEMPTY;
goto bump_cp;
}
}
break;
case 'x':
ocp = cp;
c = *++cp;
if (JS7_ISHEX(c)) {
num = JS7_UNHEX(c);
c = *++cp;
if (JS7_ISHEX(c)) {
num <<= 4;
num += JS7_UNHEX(c);
} else {
if (state->context->version != JSVERSION_DEFAULT &&
state->context->version <= JSVERSION_1_4)
cp--; /* back up so cp points to last hex char */
else
goto nothex; /* ECMA 2 insists on pairs */
}
} else {
nothex:
cp = ocp; /* \xZZ is xZZ (Perl does \0ZZ!) */
num = 'x';
}
ren = NewRENode(state, REOP_FLAT1, NULL);
c = (jschar)num;
break;
case 'c':
c = *++cp;
if (!JS7_ISLET(c)) {
cp -= 2;
ocp = cp;
goto do_flat;
}
c = JS_TOUPPER(c);
c = JS_TOCTRL(c);
ren = NewRENode(state, REOP_FLAT1, NULL);
break;
case 'u':
if (JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) {
c = (((((JS7_UNHEX(cp[1]) << 4) + JS7_UNHEX(cp[2])) << 4)
+ JS7_UNHEX(cp[3])) << 4) + JS7_UNHEX(cp[4]);
cp += 4;
ren = NewRENode(state, REOP_FLAT1, NULL);
break;
}
/* Unlike Perl \xZZ, we take \uZZZ to be literal-u then ZZZ. */
ocp = cp;
goto do_flat;
default:
ocp = cp;
goto do_flat;
}
/* Common chr- and flags-setting code for escape opcodes. */
if (ren) {
ren->u.chr = c;
ren->flags = RENODE_SINGLE | RENODE_NONEMPTY;
}
bump_cp:
/* Skip to next unparsed char. */
cp++;
break;
default:
do_flat:
while ((c = *++cp) && (cp != state->cpend) && !js_strchr(metachars, c))
;
len = (uintN)(cp - ocp);
if ((cp != state->cpend) && len > 1 && js_strchr(closurechars, c)) {
cp--;
len--;
}
if (len > REOP_FLATLEN_MAX) {
len = REOP_FLATLEN_MAX;
cp = ocp + len;
}
ren = NewRENode(state, (len == 1) ? REOP_FLAT1 : REOP_FLAT,
(void *)ocp);
if (!ren)
return NULL;
ren->flags = RENODE_NONEMPTY;
if (len > 1) {
ren->u.kid2 = (void *)cp;
} else {
ren->flags |= RENODE_SINGLE;
ren->u.chr = *ocp;
}
break;
}
state->cp = cp;
return ren;
}
JSRegExp *
js_NewRegExp(JSContext *cx, JSTokenStream *ts,
JSString *str, uintN flags, JSBool flat)
{
JSRegExp *re;
void *mark;
CompilerState state;
RENode *ren, *ren2, *end;
size_t resize;
const jschar *cp;
uintN len;
re = NULL;
mark = JS_ARENA_MARK(&cx->tempPool);
state.context = cx;
state.tokenStream = ts;
state.cpbegin = state.cp = str->chars;
state.cpend = state.cp + str->length;
state.flags = flags;
state.parenCount = 0;
state.progLength = 0;
if (flat) {
len = str->length;
cp = str->chars;
ren = NULL;
while (len > 0) {
ren2 = NewRENode(&state,
(len == 1) ? REOP_FLAT1 : REOP_FLAT, (void *)cp);
if (!ren2)
goto out;
ren2->flags = RENODE_NONEMPTY;
if (len > 1) {
if (len > REOP_FLATLEN_MAX) {
cp += REOP_FLATLEN_MAX;
len -= REOP_FLATLEN_MAX;
}
else {
cp += len;
len = 0;
}
ren2->u.kid2 = (void *)cp;
} else {
ren2->flags |= RENODE_SINGLE;
ren2->u.chr = *cp;
len = 0;
}
if (ren) {
if (!SetNext(&state, ren, ren2))
goto out;
}
else
ren = ren2;
}
}
else
ren = ParseRegExp(&state);
if (!ren)
goto out;
end = NewRENode(&state, REOP_END, NULL);
if (!end || !SetNext(&state, ren, end))
goto out;
#ifdef DEBUG_notme
if (!DumpRegExp(cx, ren))
goto out;
#endif
resize = sizeof *re + state.progLength - 1;
re = (JSRegExp*) JS_malloc(cx, JS_ROUNDUP(resize, sizeof(jsword)));
if (!re)
goto out;
re->source = str;
re->lastIndex = 0;
re->parenCount = state.parenCount;
re->flags = flags;
re->ren = ren;
/* Success: lock re->source string. */
(void) js_LockGCThing(cx, str);
out:
JS_ARENA_RELEASE(&cx->tempPool, mark);
return re;
}
JSRegExp *
js_NewRegExpOpt(JSContext *cx, JSTokenStream *ts,
JSString *str, JSString *opt, JSBool flat)
{
uintN flags;
jschar *cp;
flags = 0;
if (opt) {
for (cp = opt->chars; *cp; cp++) {
switch (*cp) {
case 'g':
flags |= JSREG_GLOB;
break;
case 'i':
flags |= JSREG_FOLD;
break;
case 'm':
flags |= JSREG_MULTILINE;
break;
default: {
char charBuf[2] = " ";
charBuf[0] = (char)*cp;
js_ReportCompileErrorNumber(cx, ts, NULL, JSREPORT_ERROR,
JSMSG_BAD_FLAG, charBuf);
return NULL;
}
}
}
}
return js_NewRegExp(cx, ts, str, flags, flat);
}
static void freeRENtree(JSContext *cx, RENode *ren, RENode *stop)
{
while (ren && (ren != stop)) {
RENode *n;
switch (ren->op) {
case REOP_ALT: {
RENode *altStop = (RENode *)(ren->next);
while (altStop && (altStop->op == REOP_ALT))
altStop = altStop->next;
freeRENtree(cx, (RENode *)(ren->kid), altStop);
}
break;
case REOP_QUANT:
case REOP_PLUS:
case REOP_STAR:
case REOP_OPT:
case REOP_LPAREN:
case REOP_LPARENNON:
case REOP_ASSERT:
case REOP_ASSERT_NOT:
freeRENtree(cx, (RENode *)(ren->kid), (RENode *)(ren->next));
break;
case REOP_CCLASS:
if (ren->u.ucclass.bitmap)
JS_free(cx, ren->u.ucclass.bitmap);
break;
}
n = ren->next;
JS_free(cx, ren);
ren = n;
}
}
void
js_DestroyRegExp(JSContext *cx, JSRegExp *re)
{
js_UnlockGCThing(cx, re->source);
freeRENtree(cx, re->ren, NULL);
JS_free(cx, re);
}
typedef struct MatchState {
JSContext *context; /* for access to regExpStatics */
JSBool anchoring; /* true if multiline anchoring ^/$ */
jsbytecode *pcend; /* pc limit (fencepost) */
const jschar *cpbegin, *cpend; /* cp base address and limit */
size_t start; /* offset from cpbegin to start at */
ptrdiff_t skipped; /* chars skipped anchoring this r.e. */
uint8 flags; /* flags */
uintN parenCount; /* number of paren substring matches */
JSSubString *maybeParens; /* possible paren substring pointers */
JSSubString *parens; /* certain paren substring matches */
JSBool ok; /* indicates runtime error during matching */
} MatchState;
/*
* Returns updated cp on match, null on mismatch.
*/
static JSBool matchChar(int flags, jschar c, jschar c2)
{
if (c == c2)
return JS_TRUE;
else
if ((flags & JSREG_FOLD) != 0) {
return ((c = JS_TOUPPER(c)) == (c2 = JS_TOUPPER(c2)))
|| (JS_TOLOWER(c) == JS_TOLOWER(c2));
}
else
return JS_FALSE;
}
static const jschar *matchRENodes(MatchState *state, RENode *ren, RENode *stop,
const jschar *cp);
typedef struct {
MatchState *state;
RENode *kid;
RENode *next;
RENode *stop;
int kidCount;
int maxKid;
} GreedyState;
static const jschar *greedyRecurse(GreedyState *grState, const jschar *cp, const jschar *previousKid)
{
const jschar *kidMatch;
const jschar *match;
int num = grState->state->parenCount;
#ifdef XP_MAC
if (StackSpace() < 16384) {
JS_ReportOutOfMemory (grState->state->context);
grState->state->ok = JS_FALSE;
return NULL;
}
#endif
/*
* When the kid match fails, we reset the parencount and run any
* previously succesful kid in order to restablish it's paren
* contents.
*/
kidMatch = matchRENodes(grState->state, grState->kid, grState->next, cp);
if (kidMatch == NULL) {
match = matchRENodes(grState->state, grState->next, grState->stop, cp);
if (match) {
grState->state->parenCount = num;
if (previousKid != NULL)
matchRENodes(grState->state, grState->kid, grState->next, previousKid);
return cp;
}
return NULL;
}
else {
if (kidMatch == cp) {
if (previousKid != NULL)
matchRENodes(grState->state, grState->kid, grState->next, previousKid);
return kidMatch; /* no point pursuing an empty match forever */
}
if (!grState->maxKid || (++grState->kidCount < grState->maxKid)) {
match = greedyRecurse(grState, kidMatch, cp);
if (match)
return match;
if (grState->maxKid)
--grState->kidCount;
}
grState->state->parenCount = num;
if (matchRENodes(grState->state, grState->next, grState->stop, kidMatch)) {
matchRENodes(grState->state, grState->kid, grState->next, cp);
return kidMatch;
}
return NULL;
}
}
static const jschar *matchGreedyKid(MatchState *state, RENode *ren, RENode *stop,
int kidCount, const jschar *cp, const jschar *previousKid)
{
GreedyState grState;
const jschar *match;
grState.state = state;
grState.kid = (RENode *)ren->kid;
grState.next = ren->next;
grState.kidCount = kidCount;
grState.maxKid = (ren->op == REOP_QUANT) ? ren->u.range.max : 0;
/*
We try to match the sub-tree to completion first, and if that
doesn't work, match only up to the given end of the sub-tree.
*/
grState.stop = NULL;
match = greedyRecurse(&grState, cp, previousKid);
if (match || !stop)
return match;
grState.kidCount = kidCount;
grState.stop = stop;
return greedyRecurse(&grState, cp, previousKid);
}
static const jschar *matchNonGreedyKid(MatchState *state, RENode *ren,
int kidCount, int maxKid,
const jschar *cp)
{
const jschar *kidMatch;
const jschar *match;
match = matchRENodes(state, ren->next, NULL, cp);
if (match != NULL) return cp;
kidMatch = matchRENodes(state, (RENode *)ren->kid, ren->next, cp);
if (kidMatch == NULL)
return NULL;
if (kidMatch == cp)
return kidMatch; /* no point pursuing an empty match forever */
return matchNonGreedyKid(state, ren, kidCount, maxKid, kidMatch);
}
static void calcBMSize(MatchState *state, RENode *ren)
{
const jschar *cp = (const jschar *) ren->kid;
const jschar *cp2 = (const jschar *) ren->u.kid2;
uintN maxc = 0;
jschar c, c2;
while (cp < cp2) {
c = *cp++;
if (c == '\\') {
if (cp + 5 <= cp2 && *cp == 'u' &&
JS7_ISHEX(cp[1]) && JS7_ISHEX(cp[2]) &&
JS7_ISHEX(cp[3]) && JS7_ISHEX(cp[4])) {
c = (((((JS7_UNHEX(cp[1]) << 4)
+ JS7_UNHEX(cp[2])) << 4)
+ JS7_UNHEX(cp[3])) << 4)
+ JS7_UNHEX(cp[4]);
cp += 5;
} else {
if (maxc < 255) maxc = 255;
/*
* Octal and hex escapes can't be > 255. Skip this
* backslash and let the loop pass over the remaining
* escape sequence as if it were text to match.
*/
continue;
}
}
if (state->flags & JSREG_FOLD) {
/*
* Don't assume that lowercase are above uppercase, or
* that c is either even when c has upper and lowercase
* versions.
*/
if ((c2 = JS_TOUPPER(c)) > maxc)
maxc = c2;
if ((c2 = JS_TOLOWER(c2)) > maxc)
maxc = c2;
}
if (c > maxc)
maxc = c;
}
ren->u.ucclass.bmsize = (uint16)((size_t)(maxc + JS_BITS_PER_BYTE)
/ JS_BITS_PER_BYTE);
}
static JSBool buildBitmap(MatchState *state, RENode *ren)
{
uintN i, n, b, c, lastc, foldc, nchars;
uint8 *bitmap;
uint8 fill;
JSBool inrange;
const jschar *cp = (const jschar *) ren->kid;
const jschar *end = (const jschar *) ren->u.kid2;
const jschar *ocp;
calcBMSize(state, ren);
ren->u.ucclass.bitmap = bitmap = (uint8*) JS_malloc(state->context,
ren->u.ucclass.bmsize);
if (!bitmap) {
JS_ReportOutOfMemory(state->context);
return JS_FALSE;
}
if (*cp == '^') {
fill = 0xff;
cp++;
} else {
fill = 0;
}
n = (uintN)ren->u.ucclass.bmsize;
for (i = 0; i < n; i++)
bitmap[i] = fill;
nchars = n * JS_BITS_PER_BYTE;
/* Split ops up into statements to keep MSVC1.52 from crashing. */
#define MATCH_BIT(c) { i = (c) >> 3; b = (c) & 7; b = 1 << b; \
if (fill) bitmap[i] &= ~b; else bitmap[i] |= b; }
lastc = nchars;
inrange = JS_FALSE;
while (cp < end) {
c = (uintN) *cp++;
if (c == '\\') {
c = *cp++;
switch (c) {
case 'b':
case 'f':
case 'n':
case 'r':
case 't':
case 'v':
c = js_strchr(js_EscapeMap, (jschar)c)[-1];
break;
#define CHECK_RANGE() if (inrange) { MATCH_BIT(lastc); MATCH_BIT('-'); \
inrange = JS_FALSE; }
case 'd':
CHECK_RANGE();
for (c = '0'; c <= '9'; c++)
MATCH_BIT(c);
continue;
case 'D':
CHECK_RANGE();
for (c = 0; c < '0'; c++)
MATCH_BIT(c);
for (c = '9' + 1; c < nchars; c++)
MATCH_BIT(c);
continue;
case 'w':
CHECK_RANGE();
for (c = 0; c < nchars; c++)
if (JS_ISWORD(c))
MATCH_BIT(c);
continue;
case 'W':
CHECK_RANGE();
for (c = 0; c < nchars; c++)
if (!JS_ISWORD(c))
MATCH_BIT(c);
continue;
case 's':
CHECK_RANGE();
for (c = 0; c < nchars; c++)
if (JS_ISSPACE(c))
MATCH_BIT(c);
continue;
case 'S':
CHECK_RANGE();
for (c = 0; c < nchars; c++)
if (!JS_ISSPACE(c))
MATCH_BIT(c);
continue;
#undef CHECK_RANGE
case '0':
case '1':
case '2':
case '3':
case '4':
case '5':
case '6':
case '7':
n = JS7_UNDEC(c);
ocp = cp - 2;
c = *cp;
if ('0' <= c && c <= '7') {
cp++;
n = 8 * n + JS7_UNDEC(c);
c = *cp;
if ('0' <= c && c <= '7') {
cp++;
i = 8 * n + JS7_UNDEC(c);
if (i <= 0377)
n = i;
else
cp--;
}
}
c = n;
break;
case 'x':
ocp = cp;
c = *cp++;
if (JS7_ISHEX(c)) {
n = JS7_UNHEX(c);
c = *cp++;
if (JS7_ISHEX(c)) {
n <<= 4;
n += JS7_UNHEX(c);
}
} else {
cp = ocp; /* \xZZ is xZZ (Perl does \0ZZ!) */
n = 'x';
}
c = n;
break;
case 'u':
if (JS7_ISHEX(cp[0]) && JS7_ISHEX(cp[1]) &&
JS7_ISHEX(cp[2]) && JS7_ISHEX(cp[3])) {
n = (((((JS7_UNHEX(cp[0]) << 4)
+ JS7_UNHEX(cp[1])) << 4)
+ JS7_UNHEX(cp[2])) << 4)
+ JS7_UNHEX(cp[3]);
c = n;
cp += 4;
}
break;
case 'c':
c = *cp++;
c = JS_TOUPPER(c);
c = JS_TOCTRL(c);
break;
}
}
if (inrange) {
if (lastc > c) {
JS_ReportErrorNumber(state->context,
js_GetErrorMessage, NULL,
JSMSG_BAD_CLASS_RANGE);
return JS_FALSE;
}
inrange = JS_FALSE;
} else {
/* Set lastc so we match just c's bit in the for loop. */
lastc = c;
/* [balance: */
if (*cp == '-' && cp + 1 < end && cp[1] != ']') {
cp++;
inrange = JS_TRUE;
continue;
}
}
/* Match characters in the range [lastc, c]. */
for (; lastc <= c; lastc++) {
MATCH_BIT(lastc);
if (state->flags & JSREG_FOLD) {
/*
* Must do both upper and lower for Turkish dotless i,
* Georgian, etc.
*/
foldc = JS_TOUPPER(lastc);
MATCH_BIT(foldc);
foldc = JS_TOLOWER(foldc);
MATCH_BIT(foldc);
}
}
lastc = c;
}
return JS_TRUE;
}
static const jschar *matchRENodes(MatchState *state, RENode *ren, RENode *stop,
const jschar *cp)
{
const jschar *cp2, *kidMatch, *lastKid, *source, *cpend = state->cpend;
jschar c;
JSSubString *parsub;
uintN i, b, bit, num, length;
while ((ren != stop) && (ren != NULL))
{
switch (ren->op) {
case REOP_EMPTY:
break;
case REOP_ALT:
if (ren->next->op != REOP_ALT) {
ren = (RENode *)ren->kid;
continue;
}
num = state->parenCount;
kidMatch = matchRENodes(state, (RENode *)ren->kid, stop, cp);
if (kidMatch != NULL)
return kidMatch;
for (i = num; i < state->parenCount; i++)
state->parens[i].length = 0;
state->parenCount = num;
break;
case REOP_QUANT:
lastKid = NULL;
for (num = 0; num < ren->u.range.min; num++) {
kidMatch = matchRENodes(state, (RENode *)ren->kid,
ren->next, cp);
if (kidMatch == NULL)
return NULL;
lastKid = cp;
cp = kidMatch;
}
if (num == ren->u.range.max) break;
if ((ren->flags & RENODE_MINIMAL) == 0) {
cp2 = matchGreedyKid(state, ren, stop, num, cp, lastKid);
if (cp2 == NULL) {
if (lastKid)
cp = matchRENodes(state, (RENode *)ren->kid, ren->next,
lastKid);
}
else
cp = cp2;
}
else {
cp = matchNonGreedyKid(state, ren, num,
ren->u.range.max, cp);
if (cp == NULL)
return NULL;
}
break;
case REOP_PLUS:
kidMatch = matchRENodes(state, (RENode *)ren->kid,
ren->next, cp);
if (kidMatch == NULL)
return NULL;
if ((ren->flags & RENODE_MINIMAL) == 0) {
cp2 = matchGreedyKid(state, ren, stop, 1, kidMatch, cp);
if (cp2 == NULL)
cp = matchRENodes(state, (RENode *)ren->kid, ren->next, cp);
else
cp = cp2;
}
else
cp = matchNonGreedyKid(state, ren, 1, 0, kidMatch);
if (cp == NULL)
return NULL;
break;
case REOP_STAR:
if ((ren->flags & RENODE_MINIMAL) == 0) {
cp2 = matchGreedyKid(state, ren, stop, 0, cp, NULL);
if (cp2)
cp = cp2;
}
else {
cp = matchNonGreedyKid(state, ren, 0, 0, cp);
if (cp == NULL)
return NULL;
}
break;
case REOP_OPT:
num = state->parenCount;
if (ren->flags & RENODE_MINIMAL) {
const jschar *restMatch = matchRENodes(state, ren->next,
stop, cp);
if (restMatch != NULL)
return restMatch;
}
kidMatch = matchRENodes(state, (RENode *)ren->kid,
ren->next, cp);
if (kidMatch == NULL) {
state->parenCount = num;
break;
}
else {
const jschar *restMatch = matchRENodes(state, ren->next,
stop, kidMatch);
if (restMatch == NULL) {
/* need to undo the result of running the kid */
state->parenCount = num;
break;
}
else
return restMatch;
}
case REOP_LPARENNON:
ren = (RENode *)ren->kid;
continue;
case REOP_RPARENNON:
break;
case REOP_LPAREN:
num = ren->u.num;
ren = (RENode *)ren->kid;
parsub = &state->parens[num];
parsub->chars = cp;
parsub->length = 0;
if (num >= state->parenCount)
state->parenCount = num + 1;
continue;
case REOP_RPAREN:
num = ren->u.num;
parsub = &state->parens[num];
parsub->length = cp - parsub->chars;
break;
case REOP_ASSERT:
kidMatch = matchRENodes(state, (RENode *)ren->kid,
ren->next, cp);
if (kidMatch == NULL)
return NULL;
break;
case REOP_ASSERT_NOT:
kidMatch = matchRENodes(state, (RENode *)ren->kid,
ren->next, cp);
if (kidMatch != NULL)
return NULL;
break;
case REOP_BACKREF:
num = ren->u.num;
if (num >= state->parenCount) {
JS_ReportErrorNumber(state->context, js_GetErrorMessage, NULL,
JSMSG_BAD_BACKREF);
state->ok = JS_FALSE;
return NULL;
}
parsub = &state->parens[num];
if ((cp + parsub->length) > cpend)
return NULL;
else {
cp2 = parsub->chars;
for (i = 0; i < parsub->length; i++) {
if (!matchChar(state->flags, *cp++, *cp2++))
return NULL;
}
}
break;
case REOP_CCLASS:
if (cp != cpend) {
if (ren->u.ucclass.bitmap == NULL) {
if (!buildBitmap(state, ren)) {
state->ok = JS_FALSE;
return NULL;
}
}
c = *cp;
b = (c >> 3);
if (b >= ren->u.ucclass.bmsize) {
cp2 = (jschar*) ren->kid;
if (*cp2 == '^')
cp++;
else
return NULL;
}
else {
bit = c & 7;
bit = 1 << bit;
if ((ren->u.ucclass.bitmap[b] & bit) != 0)
cp++;
else
return NULL;
}
}
else
return NULL;
break;
case REOP_DOT:
if ((cp != cpend) && (*cp != '\n'))
cp++;
else
return NULL;
break;
case REOP_DOTSTARMIN:
for (cp2 = cp; cp2 < cpend; cp2++) {
const jschar *cp3 = matchRENodes(state, ren->next, stop, cp2);
if (cp3 != NULL)
return cp3;
if (*cp2 == '\n')
return NULL;
}
return NULL;
case REOP_DOTSTAR:
for (cp2 = cp; cp2 < cpend; cp2++)
if (*cp2 == '\n')
break;
while (cp2 >= cp) {
const jschar *cp3 = matchRENodes(state, ren->next, NULL, cp2);
if (cp3 != NULL) {
cp = cp2;
break;
}
cp2--;
}
break;
case REOP_WBDRY:
if (((cp == state->cpbegin) || !JS_ISWORD(cp[-1]))
^ ((cp >= cpend) || !JS_ISWORD(*cp)))
; /* leave cp */
else
return NULL;
break;
case REOP_WNONBDRY:
if (((cp == state->cpbegin) || !JS_ISWORD(cp[-1]))
^ ((cp < cpend) && JS_ISWORD(*cp)))
; /* leave cp */
else
return NULL;
break;
case REOP_EOLONLY:
case REOP_EOL:
if (cp == cpend)
; /* leave cp */
else {
if (state->context->regExpStatics.multiline
|| ((state->flags & JSREG_MULTILINE) != 0))
if (*cp == '\n')
;/* leave cp */
else
return NULL;
else
return NULL;
}
break;
case REOP_BOL:
if (cp != state->cpbegin) {
if ((cp < cpend)
&& (state->context->regExpStatics.multiline
|| ((state->flags & JSREG_MULTILINE) != 0))) {
if (cp[-1] == '\n') {
break;
}
}
return NULL;
}
/* leave cp */
break;
case REOP_DIGIT:
if ((cp != cpend) && JS_ISDIGIT(*cp))
cp++;
else
return NULL;
break;
case REOP_NONDIGIT:
if ((cp != cpend) && !JS_ISDIGIT(*cp))
cp++;
else
return NULL;
break;
case REOP_ALNUM:
if ((cp != cpend) && JS_ISWORD(*cp))
cp++;
else
return NULL;
break;
case REOP_NONALNUM:
if ((cp != cpend) && !JS_ISWORD(*cp))
cp++;
else
return NULL;
break;
case REOP_SPACE:
if ((cp != cpend) && JS_ISSPACE(*cp))
cp++;
else
return NULL;
break;
case REOP_NONSPACE:
if ((cp != cpend) && !JS_ISSPACE(*cp))
cp++;
else
return NULL;
break;
case REOP_FLAT1:
if ((cp != cpend)
&& matchChar(state->flags, ren->u.chr, *cp))
cp++;
else
return NULL;
break;
case REOP_FLAT:
source = (jschar*) ren->kid;
length = (const jschar *)ren->u.kid2 - source;
if ((cp + length) > cpend)
return NULL;
else {
for (i = 0; i < length; i++) {
if (!matchChar(state->flags, *cp++, *source++))
return NULL;
}
}
break;
case REOP_JUMP:
break;
case REOP_END:
break;
default :
JS_ASSERT(JS_FALSE);
break;
}
ren = ren->next;
}
return cp;
}
static const jschar *
MatchRegExp(MatchState *state, RENode *ren, const jschar *cp)
{
const jschar *cp2, *cp3;
/* have to include the position beyond the last character
in order to detect end-of-input/line condition */
for (cp2 = cp; cp2 <= state->cpend; cp2++) {
state->skipped = cp2 - cp;
state->parenCount = 0;
cp3 = matchRENodes(state, ren, NULL, cp2);
if (!state->ok) return NULL;
if (cp3 != NULL)
return cp3;
}
return NULL;
}
JSBool
js_ExecuteRegExp(JSContext *cx, JSRegExp *re, JSString *str, size_t *indexp,
JSBool test, jsval *rval)
{
MatchState state;
const jschar *cp, *ep;
size_t i, length, start;
void *mark;
JSSubString *parsub, *morepar;
JSBool ok;
JSRegExpStatics *res;
ptrdiff_t matchlen;
uintN num, morenum;
JSString *parstr, *matchstr;
JSObject *obj;
/*
* Initialize a state struct to minimize recursive argument traffic.
*/
state.context = cx;
state.anchoring = JS_FALSE;
state.flags = re->flags;
/*
* It's safe to load from cp because JSStrings have a zero at the end,
* and we never let cp get beyond cpend.
*/
start = *indexp;
if (start > str->length)
start = str->length;
cp = str->chars + start;
state.cpbegin = str->chars;
state.cpend = str->chars + str->length;
state.start = start;
state.skipped = 0;
/*
* Use the temporary arena pool to grab space for parenthetical matches.
* After the JS_ARENA_ALLOCATE early return on error, goto out to be sure
* to free this memory.
*/
length = 2 * sizeof(JSSubString) * re->parenCount;
mark = JS_ARENA_MARK(&cx->tempPool);
JS_ARENA_ALLOCATE_CAST(parsub, JSSubString *, &cx->tempPool, length);
if (!parsub) {
JS_ReportOutOfMemory(cx);
return JS_FALSE;
}
memset(parsub, 0, length);
state.parenCount = 0;
state.maybeParens = parsub;
state.parens = parsub + re->parenCount;
state.ok = JS_TRUE;
/*
* Call the recursive matcher to do the real work. Return null on mismatch
* whether testing or not. On match, return an extended Array object.
*/
cp = MatchRegExp(&state, re->ren, cp);
if (!(ok = state.ok)) goto out;
if (!cp) {
*rval = JSVAL_NULL;
goto out;
}
i = PTRDIFF(cp, state.cpbegin, jschar);
*indexp = i;
matchlen = i - (start + state.skipped);
ep = cp;
cp -= matchlen;
if (test) {
/*
* Testing for a match and updating cx->regExpStatics: don't allocate
* an array object, do return true.
*/
*rval = JSVAL_TRUE;
/* Avoid warning. (gcc doesn't detect that obj is needed iff !test); */
obj = NULL;
} else {
/*
* The array returned on match has element 0 bound to the matched
* string, elements 1 through state.parenCount bound to the paren
* matches, an index property telling the length of the left context,
* and an input property referring to the input string.
*/
obj = js_NewArrayObject(cx, 0, NULL);
if (!obj) {
ok = JS_FALSE;
goto out;
}
*rval = OBJECT_TO_JSVAL(obj);
#define DEFVAL(val, id) { \
ok = js_DefineProperty(cx, obj, id, val, \
JS_PropertyStub, JS_PropertyStub, \
JSPROP_ENUMERATE, NULL); \
if (!ok) { \
cx->newborn[GCX_OBJECT] = NULL; \
cx->newborn[GCX_STRING] = NULL; \
goto out; \
} \
}
matchstr = js_NewStringCopyN(cx, cp, matchlen, 0);
if (!matchstr) {
cx->newborn[GCX_OBJECT] = NULL;
ok = JS_FALSE;
goto out;
}
DEFVAL(STRING_TO_JSVAL(matchstr), INT_TO_JSVAL(0));
}
res = &cx->regExpStatics;
JS_ASSERT(state.parenCount <= re->parenCount);
if (state.parenCount == 0) {
res->parenCount = 0;
res->lastParen = js_EmptySubString;
} else {
for (num = 0; num < state.parenCount; num++) {
parsub = &state.parens[num];
if (num < 9) {
res->parens[num] = *parsub;
} else {
morenum = num - 9;
morepar = res->moreParens;
if (!morepar) {
res->moreLength = 10;
morepar = (JSSubString*) JS_malloc(cx,
10 * sizeof(JSSubString));
} else if (morenum > res->moreLength) {
res->moreLength += 10;
morepar = (JSSubString*) JS_realloc(cx, morepar,
res->moreLength * sizeof(JSSubString));
}
if (!morepar) {
cx->newborn[GCX_OBJECT] = NULL;
cx->newborn[GCX_STRING] = NULL;
ok = JS_FALSE;
goto out;
}
res->moreParens = morepar;
morepar[morenum] = *parsub;
}
if (test)
continue;
parstr = js_NewStringCopyN(cx, parsub->chars, parsub->length, 0);
if (!parstr) {
cx->newborn[GCX_OBJECT] = NULL;
cx->newborn[GCX_STRING] = NULL;
ok = JS_FALSE;
goto out;
}
ok = js_DefineProperty(cx, obj, INT_TO_JSVAL(num + 1),
STRING_TO_JSVAL(parstr), NULL, NULL,
JSPROP_ENUMERATE, NULL);
if (!ok) {
cx->newborn[GCX_OBJECT] = NULL;
cx->newborn[GCX_STRING] = NULL;
goto out;
}
}
res->parenCount = num;
res->lastParen = *parsub;
}
if (!test) {
/*
* Define the index and input properties last for better for/in loop
* order (so they come after the elements).
*/
DEFVAL(INT_TO_JSVAL(start + state.skipped),
(jsid)cx->runtime->atomState.indexAtom);
DEFVAL(STRING_TO_JSVAL(str),
(jsid)cx->runtime->atomState.inputAtom);
}
#undef DEFVAL
res->lastMatch.chars = cp;
res->lastMatch.length = matchlen;
if (cx->version == JSVERSION_1_2) {
/*
* JS1.2 emulated Perl4.0.1.8 (patch level 36) for global regexps used
* in scalar contexts, and unintentionally for the string.match "list"
* psuedo-context. On "hi there bye", the following would result:
*
* Language while(/ /g){print("$`");} s/ /$`/g
* perl4.036 "hi", "there" "hihitherehi therebye"
* perl5 "hi", "hi there" "hihitherehi therebye"
* js1.2 "hi", "there" "hihitheretherebye"
*/
res->leftContext.chars = str->chars + start;
res->leftContext.length = state.skipped;
} else {
/*
* For JS1.3 and ECMAv2, emulate Perl5 exactly:
*
* js1.3 "hi", "hi there" "hihitherehi therebye"
*/
res->leftContext.chars = str->chars;
res->leftContext.length = start + state.skipped;
}
res->rightContext.chars = ep;
res->rightContext.length = state.cpend - ep;
out:
JS_ARENA_RELEASE(&cx->tempPool, mark);
return ok;
}
/************************************************************************/
enum regexp_tinyid {
REGEXP_SOURCE = -1,
REGEXP_GLOBAL = -2,
REGEXP_IGNORE_CASE = -3,
REGEXP_LAST_INDEX = -4,
REGEXP_MULTILINE = -5
};
#define REGEXP_PROP_ATTRS (JSPROP_ENUMERATE|JSPROP_PERMANENT|JSPROP_SHARED)
static JSPropertySpec regexp_props[] = {
{"source", REGEXP_SOURCE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
{"global", REGEXP_GLOBAL, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
{"ignoreCase", REGEXP_IGNORE_CASE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
{"lastIndex", REGEXP_LAST_INDEX, REGEXP_PROP_ATTRS,0,0},
{"multiline", REGEXP_MULTILINE, REGEXP_PROP_ATTRS | JSPROP_READONLY,0,0},
{0,0,0,0,0}
};
static JSBool
regexp_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
{
jsint slot;
JSRegExp *re;
if (!JSVAL_IS_INT(id))
return JS_TRUE;
slot = JSVAL_TO_INT(id);
JS_LOCK_OBJ(cx, obj);
re = (JSRegExp*) JS_GetInstancePrivate(cx, obj, &js_RegExpClass, NULL);
if (re) {
switch (slot) {
case REGEXP_SOURCE:
*vp = STRING_TO_JSVAL(re->source);
break;
case REGEXP_GLOBAL:
*vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_GLOB) != 0);
break;
case REGEXP_IGNORE_CASE:
*vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_FOLD) != 0);
break;
case REGEXP_LAST_INDEX:
if (!js_NewNumberValue(cx, (jsdouble)re->lastIndex, vp))
return JS_FALSE;
break;
case REGEXP_MULTILINE:
*vp = BOOLEAN_TO_JSVAL((re->flags & JSREG_MULTILINE) != 0);
break;
}
}
JS_UNLOCK_OBJ(cx, obj);
return JS_TRUE;
}
static JSBool
regexp_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
{
jsint slot;
JSRegExp *re;
jsdouble d;
if (!JSVAL_IS_INT(id))
return JS_TRUE;
slot = JSVAL_TO_INT(id);
JS_LOCK_OBJ(cx, obj);
re = (JSRegExp*) JS_GetInstancePrivate(cx, obj, &js_RegExpClass, NULL);
if (re && slot == REGEXP_LAST_INDEX) {
if (!js_ValueToNumber(cx, *vp, &d))
return JS_FALSE;
re->lastIndex = (uintN)d;
}
JS_UNLOCK_OBJ(cx, obj);
return JS_TRUE;
}
/*
* RegExp class static properties and their Perl counterparts:
*
* RegExp.input $_
* RegExp.multiline $*
* RegExp.lastMatch $&
* RegExp.lastParen $+
* RegExp.leftContext $`
* RegExp.rightContext $'
*/
enum regexp_static_tinyid {
REGEXP_STATIC_INPUT = -1,
REGEXP_STATIC_MULTILINE = -2,
REGEXP_STATIC_LAST_MATCH = -3,
REGEXP_STATIC_LAST_PAREN = -4,
REGEXP_STATIC_LEFT_CONTEXT = -5,
REGEXP_STATIC_RIGHT_CONTEXT = -6
};
JSBool
js_InitRegExpStatics(JSContext *cx, JSRegExpStatics *res)
{
JS_ClearRegExpStatics(cx);
return js_AddRoot(cx, &res->input, "res->input");
}
void
js_FreeRegExpStatics(JSContext *cx, JSRegExpStatics *res)
{
if (res->moreParens) {
JS_free(cx, res->moreParens);
res->moreParens = NULL;
}
js_RemoveRoot(cx->runtime, &res->input);
}
static JSBool
regexp_static_getProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
{
jsint slot;
JSRegExpStatics *res;
JSString *str;
JSSubString *sub;
res = &cx->regExpStatics;
if (!JSVAL_IS_INT(id))
return JS_TRUE;
slot = JSVAL_TO_INT(id);
switch (slot) {
case REGEXP_STATIC_INPUT:
*vp = res->input ? STRING_TO_JSVAL(res->input)
: JS_GetEmptyStringValue(cx);
return JS_TRUE;
case REGEXP_STATIC_MULTILINE:
*vp = BOOLEAN_TO_JSVAL(res->multiline);
return JS_TRUE;
case REGEXP_STATIC_LAST_MATCH:
sub = &res->lastMatch;
break;
case REGEXP_STATIC_LAST_PAREN:
sub = &res->lastParen;
break;
case REGEXP_STATIC_LEFT_CONTEXT:
sub = &res->leftContext;
break;
case REGEXP_STATIC_RIGHT_CONTEXT:
sub = &res->rightContext;
break;
default:
sub = REGEXP_PAREN_SUBSTRING(res, slot);
break;
}
str = js_NewStringCopyN(cx, sub->chars, sub->length, 0);
if (!str)
return JS_FALSE;
*vp = STRING_TO_JSVAL(str);
return JS_TRUE;
}
static JSBool
regexp_static_setProperty(JSContext *cx, JSObject *obj, jsval id, jsval *vp)
{
JSRegExpStatics *res;
if (!JSVAL_IS_INT(id))
return JS_TRUE;
res = &cx->regExpStatics;
/* XXX use if-else rather than switch to keep MSVC1.52 from crashing */
if (JSVAL_TO_INT(id) == REGEXP_STATIC_INPUT) {
if (!JSVAL_IS_STRING(*vp) &&
!JS_ConvertValue(cx, *vp, JSTYPE_STRING, vp)) {
return JS_FALSE;
}
res->input = JSVAL_TO_STRING(*vp);
} else if (JSVAL_TO_INT(id) == REGEXP_STATIC_MULTILINE) {
if (!JSVAL_IS_BOOLEAN(*vp) &&
!JS_ConvertValue(cx, *vp, JSTYPE_BOOLEAN, vp)) {
return JS_FALSE;
}
res->multiline = JSVAL_TO_BOOLEAN(*vp);
}
return JS_TRUE;
}
static JSPropertySpec regexp_static_props[] = {
{"input",
REGEXP_STATIC_INPUT,
JSPROP_ENUMERATE|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_setProperty},
{"multiline",
REGEXP_STATIC_MULTILINE,
JSPROP_ENUMERATE|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_setProperty},
{"lastMatch",
REGEXP_STATIC_LAST_MATCH,
JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"lastParen",
REGEXP_STATIC_LAST_PAREN,
JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"leftContext",
REGEXP_STATIC_LEFT_CONTEXT,
JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"rightContext",
REGEXP_STATIC_RIGHT_CONTEXT,
JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
/* XXX should have block scope and local $1, etc. */
{"$1", 0, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"$2", 1, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"$3", 2, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"$4", 3, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"$5", 4, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"$6", 5, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"$7", 6, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"$8", 7, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{"$9", 8, JSPROP_ENUMERATE|JSPROP_READONLY|JSPROP_SHARED,
regexp_static_getProperty, regexp_static_getProperty},
{0,0,0,0,0}
};
static void
regexp_finalize(JSContext *cx, JSObject *obj)
{
JSRegExp *re;
re = (JSRegExp*) JS_GetPrivate(cx, obj);
if (!re)
return;
js_DestroyRegExp(cx, re);
}
/* Forward static prototype. */
static JSBool
regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
jsval *rval);
static JSBool
regexp_call(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
return regexp_exec(cx, JSVAL_TO_OBJECT(argv[-2]), argc, argv, rval);
}
#if JS_HAS_XDR
#include "jsxdrapi.h"
static JSBool
regexp_xdrObject(JSXDRState *xdr, JSObject **objp)
{
JSRegExp *re;
JSString *source;
uint8 flags;
if (xdr->mode == JSXDR_ENCODE) {
re = (JSRegExp*) JS_GetPrivate(xdr->cx, *objp);
if (!re)
return JS_FALSE;
source = re->source;
flags = re->flags;
}
if (!JS_XDRString(xdr, &source) ||
!JS_XDRUint8(xdr, &flags)) {
return JS_FALSE;
}
if (xdr->mode == JSXDR_DECODE) {
*objp = js_NewObject(xdr->cx, &js_RegExpClass, NULL, NULL);
if (!*objp)
return JS_FALSE;
re = js_NewRegExp(xdr->cx, NULL, source, flags, JS_FALSE);
if (!re)
return JS_FALSE;
if (!JS_SetPrivate(xdr->cx, *objp, re)) {
js_DestroyRegExp(xdr->cx, re);
return JS_FALSE;
}
}
return JS_TRUE;
}
#else /* !JS_HAS_XDR */
#define regexp_xdrObject NULL
#endif /* !JS_HAS_XDR */
JSClass js_RegExpClass = {
js_RegExp_str,
JSCLASS_HAS_PRIVATE,
JS_PropertyStub, JS_PropertyStub, regexp_getProperty, regexp_setProperty,
JS_EnumerateStub, JS_ResolveStub, JS_ConvertStub, regexp_finalize,
NULL, NULL, regexp_call, NULL,
regexp_xdrObject, NULL, NULL, 0
};
static JSBool
regexp_toString(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
jsval *rval)
{
JSBool ok;
JSRegExp *re;
jschar *chars;
size_t length, nflags;
uintN flags;
JSString *str;
if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
return JS_FALSE;
ok = JS_TRUE;
JS_LOCK_OBJ(cx, obj);
re = (JSRegExp*) JS_GetPrivate(cx, obj);
if (!re) {
*rval = STRING_TO_JSVAL(cx->runtime->emptyString);
goto out;
}
length = re->source->length + 2;
nflags = 0;
for (flags = re->flags; flags != 0; flags &= flags - 1)
nflags++;
chars = (jschar*) JS_malloc(cx, (length + nflags + 1) * sizeof(jschar));
if (!chars) {
ok = JS_FALSE;
goto out;
}
chars[0] = '/';
js_strncpy(&chars[1], re->source->chars, length - 2);
chars[length-1] = '/';
if (nflags) {
if (re->flags & JSREG_GLOB)
chars[length++] = 'g';
if (re->flags & JSREG_FOLD)
chars[length++] = 'i';
if (re->flags & JSREG_MULTILINE)
chars[length++] = 'm';
}
chars[length] = 0;
str = js_NewString(cx, chars, length, 0);
if (!str) {
JS_free(cx, chars);
ok = JS_FALSE;
goto out;
}
*rval = STRING_TO_JSVAL(str);
out:
JS_UNLOCK_OBJ(cx, obj);
return ok;
}
static JSBool
regexp_compile(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
jsval *rval)
{
JSString *opt, *str;
JSRegExp *oldre, *re;
JSBool ok;
JSObject *obj2;
if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
return JS_FALSE;
opt = NULL;
JS_LOCK_OBJ(cx, obj);
if (argc == 0) {
str = js_NewStringCopyN(cx, cx->runtime->emptyString->chars,
cx->runtime->emptyString->length, 0);
if (!str) {
ok = JS_FALSE;
goto out;
}
} else {
if (JSVAL_IS_OBJECT(argv[0])) {
obj2 = JSVAL_TO_OBJECT(argv[0]);
/*
* If we get passed in a RegExp object we construct a new
* RegExp that is a duplicate of it by re-compiling the
* original source code. ECMA requires that it be an error
* here if the flags are specified. (We must use the flags
* from the original RegExp also).
*/
if (obj2 && OBJ_GET_CLASS(cx, obj2) == &js_RegExpClass) {
if (argc >= 2 && !JSVAL_IS_VOID(argv[1])) { /* 'flags' defined */
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
JSMSG_NEWREGEXP_FLAGGED);
ok = JS_FALSE;
goto out;
}
re = (JSRegExp*) JS_GetPrivate(cx, obj2);
if (!re) {
ok = JS_FALSE;
goto out;
}
re = js_NewRegExp(cx, NULL, re->source, re->flags, JS_FALSE);
goto madeit;
}
}
str = js_ValueToString(cx, argv[0]);
if (!str) {
ok = JS_FALSE;
goto out;
}
argv[0] = STRING_TO_JSVAL(str);
if (argc > 1) {
if (JSVAL_IS_VOID(argv[1]))
opt = NULL;
else {
opt = js_ValueToString(cx, argv[1]);
if (!opt) {
ok = JS_FALSE;
goto out;
}
argv[1] = STRING_TO_JSVAL(opt);
}
}
}
re = js_NewRegExpOpt(cx, NULL, str, opt, JS_FALSE);
madeit:
if (!re) {
ok = JS_FALSE;
goto out;
}
oldre = (JSRegExp*) JS_GetPrivate(cx, obj);
ok = JS_SetPrivate(cx, obj, re);
if (!ok) {
js_DestroyRegExp(cx, re);
goto out;
}
if (oldre)
js_DestroyRegExp(cx, oldre);
*rval = OBJECT_TO_JSVAL(obj);
out:
JS_UNLOCK_OBJ(cx, obj);
return ok;
}
static JSBool
regexp_exec_sub(JSContext *cx, JSObject *obj, uintN argc, jsval *argv,
JSBool test, jsval *rval)
{
JSBool ok, locked;
JSRegExp *re;
JSString *str;
size_t i;
if (!JS_InstanceOf(cx, obj, &js_RegExpClass, argv))
return JS_FALSE;
re = (JSRegExp*) JS_GetPrivate(cx, obj);
if (!re)
return JS_TRUE;
ok = locked = JS_FALSE;
if (argc == 0) {
str = cx->regExpStatics.input;
if (!str) {
JS_ReportErrorNumber(cx, js_GetErrorMessage, NULL,
JSMSG_NO_INPUT,
JS_GetStringBytes(re->source),
(re->flags & JSREG_GLOB) ? "g" : "",
(re->flags & JSREG_FOLD) ? "i" : "",
(re->flags & JSREG_MULTILINE) ? "m" : "");
goto out;
}
} else {
str = js_ValueToString(cx, argv[0]);
if (!str)
goto out;
argv[0] = STRING_TO_JSVAL(str);
}
if (re->flags & JSREG_GLOB) {
JS_LOCK_OBJ(cx, obj);
locked = JS_TRUE;
i = re->lastIndex;
} else {
i = 0;
}
ok = js_ExecuteRegExp(cx, re, str, &i, test, rval);
if (re->flags & JSREG_GLOB)
re->lastIndex = (*rval == JSVAL_NULL) ? 0 : i;
out:
if (locked)
JS_UNLOCK_OBJ(cx, obj);
return ok;
}
static JSBool
regexp_exec(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
return regexp_exec_sub(cx, obj, argc, argv, JS_FALSE, rval);
}
static JSBool
regexp_test(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
if (!regexp_exec_sub(cx, obj, argc, argv, JS_TRUE, rval))
return JS_FALSE;
if (*rval != JSVAL_TRUE)
*rval = JSVAL_FALSE;
return JS_TRUE;
}
static JSFunctionSpec regexp_methods[] = {
#if JS_HAS_TOSOURCE
{js_toSource_str, regexp_toString, 0,0,0},
#endif
{js_toString_str, regexp_toString, 0,0,0},
{"compile", regexp_compile, 1,0,0},
{"exec", regexp_exec, 0,0,0},
{"test", regexp_test, 0,0,0},
{0,0,0,0,0}
};
static JSBool
RegExp(JSContext *cx, JSObject *obj, uintN argc, jsval *argv, jsval *rval)
{
if (!cx->fp->constructing) {
/*
* If first arg is regexp and no flags are given, just return the arg.
* (regexp_compile detects the regexp + flags case and throws a
* TypeError.) See 10.15.3.1.
*/
if ((argc < 2 || JSVAL_IS_VOID(argv[1])) && JSVAL_IS_OBJECT(argv[0]) &&
OBJ_GET_CLASS(cx, JSVAL_TO_OBJECT(argv[0])) == &js_RegExpClass) {
*rval = argv[0];
return JS_TRUE;
}
/* Otherwise, replace obj with a new RegExp object. */
obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
if (!obj)
return JS_FALSE;
}
return regexp_compile(cx, obj, argc, argv, rval);
}
JSObject *
js_InitRegExpClass(JSContext *cx, JSObject *obj)
{
JSObject *proto, *ctor;
proto = JS_InitClass(cx, obj, NULL, &js_RegExpClass, RegExp, 1,
regexp_props, regexp_methods,
regexp_static_props, NULL);
if (!proto || !(ctor = JS_GetConstructor(cx, proto)))
return NULL;
if (!JS_AliasProperty(cx, ctor, "input", "$_") ||
!JS_AliasProperty(cx, ctor, "multiline", "$*") ||
!JS_AliasProperty(cx, ctor, "lastMatch", "$&") ||
!JS_AliasProperty(cx, ctor, "lastParen", "$+") ||
!JS_AliasProperty(cx, ctor, "leftContext", "$`") ||
!JS_AliasProperty(cx, ctor, "rightContext", "$'")) {
goto bad;
}
return proto;
bad:
JS_DeleteProperty(cx, obj, js_RegExpClass.name);
return NULL;
}
JSObject *
js_NewRegExpObject(JSContext *cx, JSTokenStream *ts,
jschar *chars, size_t length, uintN flags)
{
JSString *str;
JSObject *obj;
JSRegExp *re;
str = js_NewStringCopyN(cx, chars, length, 0);
if (!str)
return NULL;
re = js_NewRegExp(cx, ts, str, flags, JS_FALSE);
if (!re)
return NULL;
obj = js_NewObject(cx, &js_RegExpClass, NULL, NULL);
if (!obj || !JS_SetPrivate(cx, obj, re)) {
js_DestroyRegExp(cx, re);
return NULL;
}
return obj;
}
#endif /* JS_HAS_REGEXPS */