зеркало из https://github.com/mozilla/pjs.git
Bug 751845 - Remove pcre source, and cleanup js/src/Makefile.in after bug 691898. r=dmandelin
This commit is contained in:
Родитель
a7b353d0fa
Коммит
a779df4135
|
@ -321,47 +321,19 @@ endif
|
||||||
|
|
||||||
endif
|
endif
|
||||||
|
|
||||||
ifeq (,$(filter arm% sparc %86 x86_64 mips%,$(TARGET_CPU)))
|
|
||||||
|
|
||||||
VPATH += $(srcdir)/assembler \
|
|
||||||
$(srcdir)/assembler/wtf \
|
|
||||||
$(srcdir)/assembler/jit \
|
|
||||||
$(srcdir)/yarr \
|
|
||||||
$(NULL)
|
|
||||||
|
|
||||||
CPPSRCS += ExecutableAllocator.cpp \
|
|
||||||
ExecutableAllocatorPosix.cpp \
|
|
||||||
OSAllocatorOS2.cpp \
|
|
||||||
OSAllocatorPosix.cpp \
|
|
||||||
OSAllocatorWin.cpp \
|
|
||||||
PageBlock.cpp \
|
|
||||||
YarrInterpreter.cpp \
|
|
||||||
YarrPattern.cpp \
|
|
||||||
YarrSyntaxChecker.cpp \
|
|
||||||
$(NULL)
|
|
||||||
else
|
|
||||||
|
|
||||||
###############################################
|
###############################################
|
||||||
# BEGIN include sources for the Nitro assembler
|
# BEGIN include sources for the Nitro assembler
|
||||||
#
|
#
|
||||||
|
|
||||||
ENABLE_YARR_JIT = 1
|
|
||||||
|
|
||||||
VPATH += $(srcdir)/assembler \
|
VPATH += $(srcdir)/assembler \
|
||||||
$(srcdir)/assembler/wtf \
|
$(srcdir)/assembler/wtf \
|
||||||
$(srcdir)/assembler/jit \
|
$(srcdir)/assembler/jit \
|
||||||
$(srcdir)/assembler/assembler \
|
|
||||||
$(srcdir)/methodjit \
|
|
||||||
$(srcdir)/yarr \
|
$(srcdir)/yarr \
|
||||||
$(NONE)
|
$(NONE)
|
||||||
|
|
||||||
CPPSRCS += ExecutableAllocator.cpp \
|
CPPSRCS += ExecutableAllocator.cpp \
|
||||||
ARMAssembler.cpp \
|
|
||||||
MacroAssemblerARM.cpp \
|
|
||||||
MacroAssemblerX86Common.cpp \
|
|
||||||
PageBlock.cpp \
|
PageBlock.cpp \
|
||||||
YarrInterpreter.cpp \
|
YarrInterpreter.cpp \
|
||||||
YarrJIT.cpp \
|
|
||||||
YarrPattern.cpp \
|
YarrPattern.cpp \
|
||||||
YarrSyntaxChecker.cpp \
|
YarrSyntaxChecker.cpp \
|
||||||
$(NONE)
|
$(NONE)
|
||||||
|
@ -386,6 +358,19 @@ CPPSRCS += ExecutableAllocatorOS2.cpp \
|
||||||
$(NONE)
|
$(NONE)
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
ifneq (,$(filter arm% sparc %86 x86_64 mips%,$(TARGET_CPU)))
|
||||||
|
ENABLE_YARR_JIT = 1
|
||||||
|
|
||||||
|
VPATH += $(srcdir)/assembler/assembler \
|
||||||
|
$(srcdir)/methodjit \
|
||||||
|
$(NONE)
|
||||||
|
|
||||||
|
CPPSRCS += ARMAssembler.cpp \
|
||||||
|
MacroAssemblerARM.cpp \
|
||||||
|
MacroAssemblerX86Common.cpp \
|
||||||
|
YarrJIT.cpp \
|
||||||
|
$(NONE)
|
||||||
|
|
||||||
ifeq (86, $(findstring 86,$(TARGET_CPU)))
|
ifeq (86, $(findstring 86,$(TARGET_CPU)))
|
||||||
ifeq (x86_64, $(TARGET_CPU))
|
ifeq (x86_64, $(TARGET_CPU))
|
||||||
#CPPSRCS += only_on_x86_64.cpp
|
#CPPSRCS += only_on_x86_64.cpp
|
||||||
|
@ -396,12 +381,13 @@ endif
|
||||||
ifeq (arm, $(TARGET_CPU))
|
ifeq (arm, $(TARGET_CPU))
|
||||||
#CPPSRCS += only_on_arm.cpp
|
#CPPSRCS += only_on_arm.cpp
|
||||||
endif
|
endif
|
||||||
|
|
||||||
|
endif
|
||||||
|
|
||||||
#
|
#
|
||||||
# END enclude sources for the Nitro assembler
|
# END enclude sources for the Nitro assembler
|
||||||
#############################################
|
#############################################
|
||||||
|
|
||||||
endif
|
|
||||||
|
|
||||||
ifdef JS_HAS_CTYPES
|
ifdef JS_HAS_CTYPES
|
||||||
VPATH += $(srcdir)/ctypes
|
VPATH += $(srcdir)/ctypes
|
||||||
|
|
||||||
|
|
|
@ -1,12 +0,0 @@
|
||||||
Originally written by: Philip Hazel
|
|
||||||
Email local part: ph10
|
|
||||||
Email domain: cam.ac.uk
|
|
||||||
|
|
||||||
University of Cambridge Computing Service,
|
|
||||||
Cambridge, England. Phone: +44 1223 334714.
|
|
||||||
|
|
||||||
Copyright (c) 1997-2005 University of Cambridge. All rights reserved.
|
|
||||||
|
|
||||||
Adapted for JavaScriptCore and WebKit by Apple Inc.
|
|
||||||
|
|
||||||
Copyright (c) 2005, 2006, 2007 Apple Inc. All rights reserved.
|
|
|
@ -1,35 +0,0 @@
|
||||||
PCRE is a library of functions to support regular expressions whose syntax
|
|
||||||
and semantics are as close as possible to those of the Perl 5 language.
|
|
||||||
|
|
||||||
This is JavaScriptCore's variant of the PCRE library. While this library
|
|
||||||
started out as a copy of PCRE, many of the features of PCRE have been
|
|
||||||
removed.
|
|
||||||
|
|
||||||
Copyright (c) 1997-2005 University of Cambridge. All rights reserved.
|
|
||||||
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the name of Apple
|
|
||||||
Inc. nor the names of their contributors may be used to endorse or
|
|
||||||
promote products derived from this software without specific prior
|
|
||||||
written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
|
@ -1,96 +0,0 @@
|
||||||
/*************************************************
|
|
||||||
* Perl-Compatible Regular Expressions *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This file is automatically written by the dftables auxiliary
|
|
||||||
program. If you edit it by hand, you might like to edit the Makefile to
|
|
||||||
prevent its ever being regenerated.
|
|
||||||
|
|
||||||
This file contains the default tables for characters with codes less than
|
|
||||||
128 (ASCII characters). These tables are used when no external tables are
|
|
||||||
passed to PCRE. */
|
|
||||||
|
|
||||||
const unsigned char jsc_pcre_default_tables[480] = {
|
|
||||||
|
|
||||||
/* This table is a lower casing table. */
|
|
||||||
|
|
||||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
|
||||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
|
||||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
|
||||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
|
||||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
|
|
||||||
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
|
|
||||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
|
||||||
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
|
|
||||||
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
|
||||||
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
|
|
||||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
|
||||||
0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
|
|
||||||
0x60, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
|
||||||
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
|
|
||||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
|
||||||
0x78, 0x79, 0x7A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
|
|
||||||
|
|
||||||
/* This table is a case flipping table. */
|
|
||||||
|
|
||||||
0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
|
|
||||||
0x08, 0x09, 0x0A, 0x0B, 0x0C, 0x0D, 0x0E, 0x0F,
|
|
||||||
0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
|
|
||||||
0x18, 0x19, 0x1A, 0x1B, 0x1C, 0x1D, 0x1E, 0x1F,
|
|
||||||
0x20, 0x21, 0x22, 0x23, 0x24, 0x25, 0x26, 0x27,
|
|
||||||
0x28, 0x29, 0x2A, 0x2B, 0x2C, 0x2D, 0x2E, 0x2F,
|
|
||||||
0x30, 0x31, 0x32, 0x33, 0x34, 0x35, 0x36, 0x37,
|
|
||||||
0x38, 0x39, 0x3A, 0x3B, 0x3C, 0x3D, 0x3E, 0x3F,
|
|
||||||
0x40, 0x61, 0x62, 0x63, 0x64, 0x65, 0x66, 0x67,
|
|
||||||
0x68, 0x69, 0x6A, 0x6B, 0x6C, 0x6D, 0x6E, 0x6F,
|
|
||||||
0x70, 0x71, 0x72, 0x73, 0x74, 0x75, 0x76, 0x77,
|
|
||||||
0x78, 0x79, 0x7A, 0x5B, 0x5C, 0x5D, 0x5E, 0x5F,
|
|
||||||
0x60, 0x41, 0x42, 0x43, 0x44, 0x45, 0x46, 0x47,
|
|
||||||
0x48, 0x49, 0x4A, 0x4B, 0x4C, 0x4D, 0x4E, 0x4F,
|
|
||||||
0x50, 0x51, 0x52, 0x53, 0x54, 0x55, 0x56, 0x57,
|
|
||||||
0x58, 0x59, 0x5A, 0x7B, 0x7C, 0x7D, 0x7E, 0x7F,
|
|
||||||
|
|
||||||
/* This table contains bit maps for various character classes.
|
|
||||||
Each map is 32 bytes long and the bits run from the least
|
|
||||||
significant end of each byte. The classes are: space, digit, word. */
|
|
||||||
|
|
||||||
0x00, 0x3E, 0x00, 0x00, 0x01, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0xFF, 0x03,
|
|
||||||
0xFE, 0xFF, 0xFF, 0x87, 0xFE, 0xFF, 0xFF, 0x07,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00,
|
|
||||||
|
|
||||||
/* This table identifies various classes of character by individual bits:
|
|
||||||
0x01 white space character
|
|
||||||
0x08 hexadecimal digit
|
|
||||||
0x10 alphanumeric or '_'
|
|
||||||
*/
|
|
||||||
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 0- 7 */
|
|
||||||
0x00, 0x01, 0x01, 0x01, 0x01, 0x01, 0x00, 0x00, /* 8- 15 */
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 16- 23 */
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 24- 31 */
|
|
||||||
0x01, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* - ' */
|
|
||||||
0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* ( - / */
|
|
||||||
0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, /* 0 - 7 */
|
|
||||||
0x18, 0x18, 0x00, 0x00, 0x00, 0x00, 0x00, 0x00, /* 8 - ? */
|
|
||||||
0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x10, /* @ - G */
|
|
||||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* H - O */
|
|
||||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* P - W */
|
|
||||||
0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x10, /* X - _ */
|
|
||||||
0x00, 0x18, 0x18, 0x18, 0x18, 0x18, 0x18, 0x10, /* ` - g */
|
|
||||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* h - o */
|
|
||||||
0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, 0x10, /* p - w */
|
|
||||||
0x10, 0x10, 0x10, 0x00, 0x00, 0x00, 0x00, 0x00}; /* x -127 */
|
|
||||||
|
|
||||||
|
|
||||||
/* End of chartables.c */
|
|
|
@ -1,273 +0,0 @@
|
||||||
#!/usr/bin/perl -w
|
|
||||||
#
|
|
||||||
# This is JavaScriptCore's variant of the PCRE library. While this library
|
|
||||||
# started out as a copy of PCRE, many of the features of PCRE have been
|
|
||||||
# removed. This library now supports only the regular expression features
|
|
||||||
# required by the JavaScript language specification, and has only the functions
|
|
||||||
# needed by JavaScriptCore and the rest of WebKit.
|
|
||||||
#
|
|
||||||
# Originally written by Philip Hazel
|
|
||||||
# Copyright (c) 1997-2006 University of Cambridge
|
|
||||||
# Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
|
||||||
#
|
|
||||||
# -----------------------------------------------------------------------------
|
|
||||||
# Redistribution and use in source and binary forms, with or without
|
|
||||||
# modification, are permitted provided that the following conditions are met:
|
|
||||||
#
|
|
||||||
# * Redistributions of source code must retain the above copyright notice,
|
|
||||||
# this list of conditions and the following disclaimer.
|
|
||||||
#
|
|
||||||
# * Redistributions in binary form must reproduce the above copyright
|
|
||||||
# notice, this list of conditions and the following disclaimer in the
|
|
||||||
# documentation and/or other materials provided with the distribution.
|
|
||||||
#
|
|
||||||
# * Neither the name of the University of Cambridge nor the names of its
|
|
||||||
# contributors may be used to endorse or promote products derived from
|
|
||||||
# this software without specific prior written permission.
|
|
||||||
#
|
|
||||||
# THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
# AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
# IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
# ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
# LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
# CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
# SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
# INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
# CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
# ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
# POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
# -----------------------------------------------------------------------------
|
|
||||||
|
|
||||||
# This is a freestanding support program to generate a file containing
|
|
||||||
# character tables. The tables are built according to the default C
|
|
||||||
# locale.
|
|
||||||
|
|
||||||
use strict;
|
|
||||||
|
|
||||||
use File::Basename;
|
|
||||||
use File::Spec;
|
|
||||||
use File::Temp qw(tempfile);
|
|
||||||
use Getopt::Long;
|
|
||||||
|
|
||||||
sub readHeaderValues();
|
|
||||||
|
|
||||||
my %pcre_internal;
|
|
||||||
|
|
||||||
if (scalar(@ARGV) < 1) {
|
|
||||||
print STDERR "Usage: ", basename($0), " [--preprocessor=program] output-file\n";
|
|
||||||
exit 1;
|
|
||||||
}
|
|
||||||
|
|
||||||
my $outputFile;
|
|
||||||
my $preprocessor;
|
|
||||||
GetOptions('preprocessor=s' => \$preprocessor);
|
|
||||||
if (not $preprocessor) {
|
|
||||||
$preprocessor = "cpp";
|
|
||||||
}
|
|
||||||
|
|
||||||
$outputFile = $ARGV[0];
|
|
||||||
die('Must specify output file.') unless defined($outputFile);
|
|
||||||
|
|
||||||
readHeaderValues();
|
|
||||||
|
|
||||||
open(OUT, ">", $outputFile) or die "$!";
|
|
||||||
binmode(OUT);
|
|
||||||
|
|
||||||
printf(OUT
|
|
||||||
"/*************************************************\n" .
|
|
||||||
"* Perl-Compatible Regular Expressions *\n" .
|
|
||||||
"*************************************************/\n\n" .
|
|
||||||
"/* This file is automatically written by the dftables auxiliary \n" .
|
|
||||||
"program. If you edit it by hand, you might like to edit the Makefile to \n" .
|
|
||||||
"prevent its ever being regenerated.\n\n");
|
|
||||||
printf(OUT
|
|
||||||
"This file contains the default tables for characters with codes less than\n" .
|
|
||||||
"128 (ASCII characters). These tables are used when no external tables are\n" .
|
|
||||||
"passed to PCRE. */\n\n" .
|
|
||||||
"const unsigned char jsc_pcre_default_tables[%d] = {\n\n" .
|
|
||||||
"/* This table is a lower casing table. */\n\n", $pcre_internal{tables_length});
|
|
||||||
|
|
||||||
if ($pcre_internal{lcc_offset} != 0) {
|
|
||||||
die "lcc_offset != 0";
|
|
||||||
}
|
|
||||||
|
|
||||||
printf(OUT " ");
|
|
||||||
for (my $i = 0; $i < 128; $i++) {
|
|
||||||
if (($i & 7) == 0 && $i != 0) {
|
|
||||||
printf(OUT "\n ");
|
|
||||||
}
|
|
||||||
printf(OUT "0x%02X", ord(lc(chr($i))));
|
|
||||||
if ($i != 127) {
|
|
||||||
printf(OUT ", ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
printf(OUT ",\n\n");
|
|
||||||
|
|
||||||
printf(OUT "/* This table is a case flipping table. */\n\n");
|
|
||||||
|
|
||||||
if ($pcre_internal{fcc_offset} != 128) {
|
|
||||||
die "fcc_offset != 128";
|
|
||||||
}
|
|
||||||
|
|
||||||
printf(OUT " ");
|
|
||||||
for (my $i = 0; $i < 128; $i++) {
|
|
||||||
if (($i & 7) == 0 && $i != 0) {
|
|
||||||
printf(OUT "\n ");
|
|
||||||
}
|
|
||||||
my $c = chr($i);
|
|
||||||
printf(OUT "0x%02X", $c =~ /[[:lower:]]/ ? ord(uc($c)) : ord(lc($c)));
|
|
||||||
if ($i != 127) {
|
|
||||||
printf(OUT ", ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
printf(OUT ",\n\n");
|
|
||||||
|
|
||||||
printf(OUT
|
|
||||||
"/* This table contains bit maps for various character classes.\n" .
|
|
||||||
"Each map is 32 bytes long and the bits run from the least\n" .
|
|
||||||
"significant end of each byte. The classes are: space, digit, word. */\n\n");
|
|
||||||
|
|
||||||
if ($pcre_internal{cbits_offset} != $pcre_internal{fcc_offset} + 128) {
|
|
||||||
die "cbits_offset != fcc_offset + 128";
|
|
||||||
}
|
|
||||||
|
|
||||||
my @cbit_table = (0) x $pcre_internal{cbit_length};
|
|
||||||
for (my $i = ord('0'); $i <= ord('9'); $i++) {
|
|
||||||
$cbit_table[$pcre_internal{cbit_digit} + $i / 8] |= 1 << ($i & 7);
|
|
||||||
}
|
|
||||||
$cbit_table[$pcre_internal{cbit_word} + ord('_') / 8] |= 1 << (ord('_') & 7);
|
|
||||||
for (my $i = 0; $i < 128; $i++) {
|
|
||||||
my $c = chr($i);
|
|
||||||
if ($c =~ /[[:alnum:]]/) {
|
|
||||||
$cbit_table[$pcre_internal{cbit_word} + $i / 8] |= 1 << ($i & 7);
|
|
||||||
}
|
|
||||||
if ($c =~ /[[:space:]]/) {
|
|
||||||
$cbit_table[$pcre_internal{cbit_space} + $i / 8] |= 1 << ($i & 7);
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
printf(OUT " ");
|
|
||||||
for (my $i = 0; $i < $pcre_internal{cbit_length}; $i++) {
|
|
||||||
if (($i & 7) == 0 && $i != 0) {
|
|
||||||
if (($i & 31) == 0) {
|
|
||||||
printf(OUT "\n");
|
|
||||||
}
|
|
||||||
printf(OUT "\n ");
|
|
||||||
}
|
|
||||||
printf(OUT "0x%02X", $cbit_table[$i]);
|
|
||||||
if ($i != $pcre_internal{cbit_length} - 1) {
|
|
||||||
printf(OUT ", ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
printf(OUT ",\n\n");
|
|
||||||
|
|
||||||
printf(OUT
|
|
||||||
"/* This table identifies various classes of character by individual bits:\n" .
|
|
||||||
" 0x%02x white space character\n" .
|
|
||||||
" 0x%02x hexadecimal digit\n" .
|
|
||||||
" 0x%02x alphanumeric or '_'\n*/\n\n",
|
|
||||||
$pcre_internal{ctype_space}, $pcre_internal{ctype_xdigit}, $pcre_internal{ctype_word});
|
|
||||||
|
|
||||||
if ($pcre_internal{ctypes_offset} != $pcre_internal{cbits_offset} + $pcre_internal{cbit_length}) {
|
|
||||||
die "ctypes_offset != cbits_offset + cbit_length";
|
|
||||||
}
|
|
||||||
|
|
||||||
printf(OUT " ");
|
|
||||||
for (my $i = 0; $i < 128; $i++) {
|
|
||||||
my $x = 0;
|
|
||||||
my $c = chr($i);
|
|
||||||
if ($c =~ /[[:space:]]/) {
|
|
||||||
$x += $pcre_internal{ctype_space};
|
|
||||||
}
|
|
||||||
if ($c =~ /[[:xdigit:]]/) {
|
|
||||||
$x += $pcre_internal{ctype_xdigit};
|
|
||||||
}
|
|
||||||
if ($c =~ /[[:alnum:]_]/) {
|
|
||||||
$x += $pcre_internal{ctype_word};
|
|
||||||
}
|
|
||||||
printf(OUT "0x%02X", $x);
|
|
||||||
if ($i != 127) {
|
|
||||||
printf(OUT ", ");
|
|
||||||
} else {
|
|
||||||
printf(OUT "};");
|
|
||||||
}
|
|
||||||
if (($i & 7) == 7) {
|
|
||||||
printf(OUT " /* ");
|
|
||||||
my $d = chr($i - 7);
|
|
||||||
if ($d =~ /[[:print:]]/) {
|
|
||||||
printf(OUT " %c -", $i - 7);
|
|
||||||
} else {
|
|
||||||
printf(OUT "%3d-", $i - 7);
|
|
||||||
}
|
|
||||||
if ($c =~ m/[[:print:]]/) {
|
|
||||||
printf(OUT " %c ", $i);
|
|
||||||
} else {
|
|
||||||
printf(OUT "%3d", $i);
|
|
||||||
}
|
|
||||||
printf(OUT " */\n");
|
|
||||||
if ($i != 127) {
|
|
||||||
printf(OUT " ");
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
if ($pcre_internal{tables_length} != $pcre_internal{ctypes_offset} + 128) {
|
|
||||||
die "tables_length != ctypes_offset + 128";
|
|
||||||
}
|
|
||||||
|
|
||||||
printf(OUT "\n\n/* End of chartables.c */\n");
|
|
||||||
|
|
||||||
close(OUT);
|
|
||||||
|
|
||||||
exit 0;
|
|
||||||
|
|
||||||
sub readHeaderValues()
|
|
||||||
{
|
|
||||||
my @variables = qw(
|
|
||||||
cbit_digit
|
|
||||||
cbit_length
|
|
||||||
cbit_space
|
|
||||||
cbit_word
|
|
||||||
cbits_offset
|
|
||||||
ctype_space
|
|
||||||
ctype_word
|
|
||||||
ctype_xdigit
|
|
||||||
ctypes_offset
|
|
||||||
fcc_offset
|
|
||||||
lcc_offset
|
|
||||||
tables_length
|
|
||||||
);
|
|
||||||
|
|
||||||
local $/ = undef;
|
|
||||||
|
|
||||||
my $headerPath = File::Spec->catfile(dirname($0), "pcre_internal.h");
|
|
||||||
|
|
||||||
my ($fh, $tempFile) = tempfile(
|
|
||||||
basename($0) . "-XXXXXXXX",
|
|
||||||
DIR => File::Spec->tmpdir(),
|
|
||||||
SUFFIX => ".in",
|
|
||||||
UNLINK => 0,
|
|
||||||
);
|
|
||||||
|
|
||||||
print $fh "#define DFTABLES\n\n";
|
|
||||||
|
|
||||||
open(HEADER, "<", $headerPath) or die "$!";
|
|
||||||
print $fh <HEADER>;
|
|
||||||
close(HEADER);
|
|
||||||
|
|
||||||
print $fh "\n\n";
|
|
||||||
|
|
||||||
for my $v (@variables) {
|
|
||||||
print $fh "\$pcre_internal{\"$v\"} = $v;\n";
|
|
||||||
}
|
|
||||||
|
|
||||||
close($fh);
|
|
||||||
|
|
||||||
open(CPP, "$preprocessor \"$tempFile\" |") or die "$!";
|
|
||||||
my $content = <CPP>;
|
|
||||||
close(CPP);
|
|
||||||
|
|
||||||
eval $content;
|
|
||||||
die "$@" if $@;
|
|
||||||
unlink $tempFile;
|
|
||||||
}
|
|
|
@ -1,76 +0,0 @@
|
||||||
/* This is the public header file for JavaScriptCore's variant of the PCRE
|
|
||||||
library. While this library started out as a copy of PCRE, many of the
|
|
||||||
features of PCRE have been removed. This library now supports only the
|
|
||||||
regular expression features required by the JavaScript language
|
|
||||||
specification, and has only the functions needed by JavaScriptCore and the
|
|
||||||
rest of WebKit.
|
|
||||||
|
|
||||||
Copyright (c) 1997-2005 University of Cambridge
|
|
||||||
Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
// FIXME: This file needs to be renamed to JSRegExp.h; it's no longer PCRE.
|
|
||||||
|
|
||||||
#ifndef JSRegExp_h
|
|
||||||
#define JSRegExp_h
|
|
||||||
|
|
||||||
// Last remnants from the JSWTFBridge.
|
|
||||||
#include "assembler/wtf/Platform.h"
|
|
||||||
#include "jsstr.h"
|
|
||||||
#include "jsprvtd.h"
|
|
||||||
#include "jstl.h"
|
|
||||||
|
|
||||||
typedef jschar UChar;
|
|
||||||
typedef JSLinearString UString;
|
|
||||||
|
|
||||||
struct JSRegExp;
|
|
||||||
struct JSContext;
|
|
||||||
|
|
||||||
enum JSRegExpIgnoreCaseOption { JSRegExpDoNotIgnoreCase, JSRegExpIgnoreCase };
|
|
||||||
enum JSRegExpMultilineOption { JSRegExpSingleLine, JSRegExpMultiline };
|
|
||||||
|
|
||||||
/* jsRegExpExecute error codes */
|
|
||||||
const int JSRegExpErrorNoMatch = -1;
|
|
||||||
const int JSRegExpErrorHitLimit = -2;
|
|
||||||
const int JSRegExpErrorInternal = -4;
|
|
||||||
|
|
||||||
JSRegExp* jsRegExpCompile(
|
|
||||||
const UChar* pattern, int patternLength,
|
|
||||||
JSRegExpIgnoreCaseOption, JSRegExpMultilineOption,
|
|
||||||
unsigned* numSubpatterns, int *error);
|
|
||||||
|
|
||||||
int jsRegExpExecute(JSContext *, const JSRegExp*,
|
|
||||||
const UChar* subject, int subjectLength, int startOffset,
|
|
||||||
int* offsetsVector, int offsetsVectorLength);
|
|
||||||
|
|
||||||
void jsRegExpFree(JSRegExp*);
|
|
||||||
|
|
||||||
#endif
|
|
|
@ -1,12 +0,0 @@
|
||||||
# Perl Compatible Regular Expressions - Qt4 build info
|
|
||||||
VPATH += $$PWD
|
|
||||||
INCLUDEPATH += $$PWD $$OUTPUT_DIR/JavaScriptCore/tmp
|
|
||||||
DEPENDPATH += $$PWD
|
|
||||||
|
|
||||||
SOURCES += \
|
|
||||||
pcre_compile.cpp \
|
|
||||||
pcre_exec.cpp \
|
|
||||||
pcre_tables.cpp \
|
|
||||||
pcre_ucp_searchfuncs.cpp \
|
|
||||||
pcre_xclass.cpp
|
|
||||||
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
|
@ -1,434 +0,0 @@
|
||||||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
|
||||||
started out as a copy of PCRE, many of the features of PCRE have been
|
|
||||||
removed. This library now supports only the regular expression features
|
|
||||||
required by the JavaScript language specification, and has only the functions
|
|
||||||
needed by JavaScriptCore and the rest of WebKit.
|
|
||||||
|
|
||||||
Originally written by Philip Hazel
|
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
|
||||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* This header contains definitions that are shared between the different
|
|
||||||
modules, but which are not relevant to the exported API. This includes some
|
|
||||||
functions whose names all begin with "_pcre_". */
|
|
||||||
|
|
||||||
#ifndef PCRE_INTERNAL_H
|
|
||||||
#define PCRE_INTERNAL_H
|
|
||||||
|
|
||||||
/* Bit definitions for entries in the pcre_ctypes table. */
|
|
||||||
|
|
||||||
#define ctype_space 0x01
|
|
||||||
#define ctype_xdigit 0x08
|
|
||||||
#define ctype_word 0x10 /* alphameric or '_' */
|
|
||||||
|
|
||||||
/* Offsets for the bitmap tables in pcre_cbits. Each table contains a set
|
|
||||||
of bits for a class map. Some classes are built by combining these tables. */
|
|
||||||
|
|
||||||
#define cbit_space 0 /* \s */
|
|
||||||
#define cbit_digit 32 /* \d */
|
|
||||||
#define cbit_word 64 /* \w */
|
|
||||||
#define cbit_length 96 /* Length of the cbits table */
|
|
||||||
|
|
||||||
/* Offsets of the various tables from the base tables pointer, and
|
|
||||||
total length. */
|
|
||||||
|
|
||||||
#define lcc_offset 0
|
|
||||||
#define fcc_offset 128
|
|
||||||
#define cbits_offset 256
|
|
||||||
#define ctypes_offset (cbits_offset + cbit_length)
|
|
||||||
#define tables_length (ctypes_offset + 128)
|
|
||||||
|
|
||||||
#ifndef DFTABLES
|
|
||||||
|
|
||||||
#include "pcre.h"
|
|
||||||
|
|
||||||
/* The value of LINK_SIZE determines the number of bytes used to store links as
|
|
||||||
offsets within the compiled regex. The default is 2, which allows for compiled
|
|
||||||
patterns up to 64K long. */
|
|
||||||
|
|
||||||
#define LINK_SIZE 3
|
|
||||||
|
|
||||||
/* Use a macro for debugging printing, 'cause that eliminates the use of #ifdef
|
|
||||||
inline, and there are *still* stupid compilers about that don't like indented
|
|
||||||
pre-processor statements, or at least there were when I first wrote this. After
|
|
||||||
all, it had only been about 10 years then... */
|
|
||||||
|
|
||||||
#ifdef DEBUG
|
|
||||||
#define DPRINTF(p) /*printf p; fflush(stdout);*/
|
|
||||||
#else
|
|
||||||
#define DPRINTF(p) /*nothing*/
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* PCRE keeps offsets in its compiled code as 2-byte quantities (always stored
|
|
||||||
in big-endian order) by default. These are used, for example, to link from the
|
|
||||||
start of a subpattern to its alternatives and its end. The use of 2 bytes per
|
|
||||||
offset limits the size of the compiled regex to around 64K, which is big enough
|
|
||||||
for almost everybody. However, I received a request for an even bigger limit.
|
|
||||||
For this reason, and also to make the code easier to maintain, the storing and
|
|
||||||
loading of offsets from the byte string is now handled by the functions that are
|
|
||||||
defined here. */
|
|
||||||
|
|
||||||
/* PCRE uses some other 2-byte quantities that do not change when the size of
|
|
||||||
offsets changes. There are used for repeat counts and for other things such as
|
|
||||||
capturing parenthesis numbers in back references. */
|
|
||||||
|
|
||||||
static inline void put2ByteValue(unsigned char* opcodePtr, int value)
|
|
||||||
{
|
|
||||||
JS_ASSERT(value >= 0 && value <= 0xFFFF);
|
|
||||||
opcodePtr[0] = value >> 8;
|
|
||||||
opcodePtr[1] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void put3ByteValue(unsigned char* opcodePtr, int value)
|
|
||||||
{
|
|
||||||
JS_ASSERT(value >= 0 && value <= 0xFFFFFF);
|
|
||||||
opcodePtr[0] = value >> 16;
|
|
||||||
opcodePtr[1] = value >> 8;
|
|
||||||
opcodePtr[2] = value;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int get2ByteValue(const unsigned char* opcodePtr)
|
|
||||||
{
|
|
||||||
return (opcodePtr[0] << 8) | opcodePtr[1];
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int get3ByteValue(const unsigned char* opcodePtr)
|
|
||||||
{
|
|
||||||
return (opcodePtr[0] << 16) | (opcodePtr[1] << 8) | opcodePtr[2];
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void put2ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
|
|
||||||
{
|
|
||||||
put2ByteValue(opcodePtr, value);
|
|
||||||
opcodePtr += 2;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void put3ByteValueAndAdvance(unsigned char*& opcodePtr, int value)
|
|
||||||
{
|
|
||||||
put3ByteValue(opcodePtr, value);
|
|
||||||
opcodePtr += 3;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void putLinkValueAllowZero(unsigned char* opcodePtr, int value)
|
|
||||||
{
|
|
||||||
#if LINK_SIZE == 3
|
|
||||||
put3ByteValue(opcodePtr, value);
|
|
||||||
#elif LINK_SIZE == 2
|
|
||||||
put2ByteValue(opcodePtr, value);
|
|
||||||
#else
|
|
||||||
# error LINK_SIZE not supported.
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int getLinkValueAllowZero(const unsigned char* opcodePtr)
|
|
||||||
{
|
|
||||||
#if LINK_SIZE == 3
|
|
||||||
return get3ByteValue(opcodePtr);
|
|
||||||
#elif LINK_SIZE == 2
|
|
||||||
return get2ByteValue(opcodePtr);
|
|
||||||
#else
|
|
||||||
# error LINK_SIZE not supported.
|
|
||||||
#endif
|
|
||||||
}
|
|
||||||
|
|
||||||
#define MAX_PATTERN_SIZE 4096 * 1024 // Derived by empirical testing of compile time in PCRE and WREC.
|
|
||||||
JS_STATIC_ASSERT(MAX_PATTERN_SIZE < (1 << (8 * LINK_SIZE)));
|
|
||||||
|
|
||||||
static inline void putLinkValue(unsigned char* opcodePtr, int value)
|
|
||||||
{
|
|
||||||
JS_ASSERT(value);
|
|
||||||
putLinkValueAllowZero(opcodePtr, value);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline int getLinkValue(const unsigned char* opcodePtr)
|
|
||||||
{
|
|
||||||
int value = getLinkValueAllowZero(opcodePtr);
|
|
||||||
JS_ASSERT(value);
|
|
||||||
return value;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void putLinkValueAndAdvance(unsigned char*& opcodePtr, int value)
|
|
||||||
{
|
|
||||||
putLinkValue(opcodePtr, value);
|
|
||||||
opcodePtr += LINK_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void putLinkValueAllowZeroAndAdvance(unsigned char*& opcodePtr, int value)
|
|
||||||
{
|
|
||||||
putLinkValueAllowZero(opcodePtr, value);
|
|
||||||
opcodePtr += LINK_SIZE;
|
|
||||||
}
|
|
||||||
|
|
||||||
// FIXME: These are really more of a "compiled regexp state" than "regexp options"
|
|
||||||
enum RegExpOptions {
|
|
||||||
UseFirstByteOptimizationOption = 0x40000000, /* firstByte is set */
|
|
||||||
UseRequiredByteOptimizationOption = 0x20000000, /* reqByte is set */
|
|
||||||
UseMultiLineFirstByteOptimizationOption = 0x10000000, /* start after \n for multiline */
|
|
||||||
IsAnchoredOption = 0x02000000, /* can't use partial with this regex */
|
|
||||||
IgnoreCaseOption = 0x00000001,
|
|
||||||
MatchAcrossMultipleLinesOption = 0x00000002
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Flags added to firstByte or reqByte; a "non-literal" item is either a
|
|
||||||
variable-length repeat, or a anything other than literal characters. */
|
|
||||||
|
|
||||||
#define REQ_IGNORE_CASE 0x0100 /* indicates should ignore case */
|
|
||||||
#define REQ_VARY 0x0200 /* reqByte followed non-literal item */
|
|
||||||
|
|
||||||
/* Miscellaneous definitions */
|
|
||||||
|
|
||||||
/* Flag bits and data types for the extended class (OP_XCLASS) for classes that
|
|
||||||
contain UTF-8 characters with values greater than 255. */
|
|
||||||
|
|
||||||
#define XCL_NOT 0x01 /* Flag: this is a negative class */
|
|
||||||
#define XCL_MAP 0x02 /* Flag: a 32-byte map is present */
|
|
||||||
|
|
||||||
#define XCL_END 0 /* Marks end of individual items */
|
|
||||||
#define XCL_SINGLE 1 /* Single item (one multibyte char) follows */
|
|
||||||
#define XCL_RANGE 2 /* A range (two multibyte chars) follows */
|
|
||||||
|
|
||||||
/* These are escaped items that aren't just an encoding of a particular data
|
|
||||||
value such as \n. They must have non-zero values, as check_escape() returns
|
|
||||||
their negation. Also, they must appear in the same order as in the opcode
|
|
||||||
definitions below, up to ESC_w. The final one must be
|
|
||||||
ESC_REF as subsequent values are used for \1, \2, \3, etc. There is are two
|
|
||||||
tests in the code for an escape > ESC_b and <= ESC_w to
|
|
||||||
detect the types that may be repeated. These are the types that consume
|
|
||||||
characters. If any new escapes are put in between that don't consume a
|
|
||||||
character, that code will have to change. */
|
|
||||||
|
|
||||||
enum { ESC_B = 1, ESC_b, ESC_D, ESC_d, ESC_S, ESC_s, ESC_W, ESC_w, ESC_REF };
|
|
||||||
|
|
||||||
/* Opcode table: OP_BRA must be last, as all values >= it are used for brackets
|
|
||||||
that extract substrings. Starting from 1 (i.e. after OP_END), the values up to
|
|
||||||
OP_EOD must correspond in order to the list of escapes immediately above.
|
|
||||||
Note that whenever this list is updated, the two macro definitions that follow
|
|
||||||
must also be updated to match. */
|
|
||||||
|
|
||||||
#define FOR_EACH_OPCODE(macro) \
|
|
||||||
macro(END) \
|
|
||||||
\
|
|
||||||
, macro(NOT_WORD_BOUNDARY) \
|
|
||||||
, macro(WORD_BOUNDARY) \
|
|
||||||
, macro(NOT_DIGIT) \
|
|
||||||
, macro(DIGIT) \
|
|
||||||
, macro(NOT_WHITESPACE) \
|
|
||||||
, macro(WHITESPACE) \
|
|
||||||
, macro(NOT_WORDCHAR) \
|
|
||||||
, macro(WORDCHAR) \
|
|
||||||
\
|
|
||||||
, macro(NOT_NEWLINE) \
|
|
||||||
\
|
|
||||||
, macro(CIRC) \
|
|
||||||
, macro(DOLL) \
|
|
||||||
, macro(BOL) \
|
|
||||||
, macro(EOL) \
|
|
||||||
, macro(CHAR) \
|
|
||||||
, macro(CHAR_IGNORING_CASE) \
|
|
||||||
, macro(ASCII_CHAR) \
|
|
||||||
, macro(ASCII_LETTER_IGNORING_CASE) \
|
|
||||||
, macro(NOT) \
|
|
||||||
\
|
|
||||||
, macro(STAR) \
|
|
||||||
, macro(MINSTAR) \
|
|
||||||
, macro(PLUS) \
|
|
||||||
, macro(MINPLUS) \
|
|
||||||
, macro(QUERY) \
|
|
||||||
, macro(MINQUERY) \
|
|
||||||
, macro(UPTO) \
|
|
||||||
, macro(MINUPTO) \
|
|
||||||
, macro(EXACT) \
|
|
||||||
\
|
|
||||||
, macro(NOTSTAR) \
|
|
||||||
, macro(NOTMINSTAR) \
|
|
||||||
, macro(NOTPLUS) \
|
|
||||||
, macro(NOTMINPLUS) \
|
|
||||||
, macro(NOTQUERY) \
|
|
||||||
, macro(NOTMINQUERY) \
|
|
||||||
, macro(NOTUPTO) \
|
|
||||||
, macro(NOTMINUPTO) \
|
|
||||||
, macro(NOTEXACT) \
|
|
||||||
\
|
|
||||||
, macro(TYPESTAR) \
|
|
||||||
, macro(TYPEMINSTAR) \
|
|
||||||
, macro(TYPEPLUS) \
|
|
||||||
, macro(TYPEMINPLUS) \
|
|
||||||
, macro(TYPEQUERY) \
|
|
||||||
, macro(TYPEMINQUERY) \
|
|
||||||
, macro(TYPEUPTO) \
|
|
||||||
, macro(TYPEMINUPTO) \
|
|
||||||
, macro(TYPEEXACT) \
|
|
||||||
\
|
|
||||||
, macro(CRSTAR) \
|
|
||||||
, macro(CRMINSTAR) \
|
|
||||||
, macro(CRPLUS) \
|
|
||||||
, macro(CRMINPLUS) \
|
|
||||||
, macro(CRQUERY) \
|
|
||||||
, macro(CRMINQUERY) \
|
|
||||||
, macro(CRRANGE) \
|
|
||||||
, macro(CRMINRANGE) \
|
|
||||||
\
|
|
||||||
, macro(CLASS) \
|
|
||||||
, macro(NCLASS) \
|
|
||||||
, macro(XCLASS) \
|
|
||||||
\
|
|
||||||
, macro(REF) \
|
|
||||||
\
|
|
||||||
, macro(ALT) \
|
|
||||||
, macro(KET) \
|
|
||||||
, macro(KETRMAX) \
|
|
||||||
, macro(KETRMIN) \
|
|
||||||
\
|
|
||||||
, macro(ASSERT) \
|
|
||||||
, macro(ASSERT_NOT) \
|
|
||||||
\
|
|
||||||
, macro(BRAZERO) \
|
|
||||||
, macro(BRAMINZERO) \
|
|
||||||
, macro(BRANUMBER) \
|
|
||||||
, macro(BRA)
|
|
||||||
|
|
||||||
#define OPCODE_ENUM_VALUE(opcode) OP_##opcode
|
|
||||||
enum { FOR_EACH_OPCODE(OPCODE_ENUM_VALUE) };
|
|
||||||
|
|
||||||
/* WARNING WARNING WARNING: There is an implicit assumption in pcre.c and
|
|
||||||
study.c that all opcodes are less than 128 in value. This makes handling UTF-8
|
|
||||||
character sequences easier. */
|
|
||||||
|
|
||||||
/* The highest extraction number before we have to start using additional
|
|
||||||
bytes. (Originally PCRE didn't have support for extraction counts higher than
|
|
||||||
this number.) The value is limited by the number of opcodes left after OP_BRA,
|
|
||||||
i.e. 255 - OP_BRA. We actually set it a bit lower to leave room for additional
|
|
||||||
opcodes. */
|
|
||||||
|
|
||||||
/* FIXME: Note that OP_BRA + 100 is > 128, so the two comments above
|
|
||||||
are in conflict! */
|
|
||||||
|
|
||||||
#define EXTRACT_BASIC_MAX 100
|
|
||||||
|
|
||||||
/* The code vector runs on as long as necessary after the end. */
|
|
||||||
|
|
||||||
struct JSRegExp {
|
|
||||||
unsigned options;
|
|
||||||
|
|
||||||
unsigned short topBracket;
|
|
||||||
unsigned short topBackref;
|
|
||||||
|
|
||||||
unsigned short firstByte;
|
|
||||||
unsigned short reqByte;
|
|
||||||
};
|
|
||||||
|
|
||||||
/* Internal shared data tables. These are tables that are used by more than one
|
|
||||||
of the exported public functions. They have to be "external" in the C sense,
|
|
||||||
but are not part of the PCRE public API. The data for these tables is in the
|
|
||||||
pcre_tables.c module. */
|
|
||||||
|
|
||||||
#define jsc_pcre_utf8_table1_size 6
|
|
||||||
|
|
||||||
extern const int jsc_pcre_utf8_table1[6];
|
|
||||||
extern const int jsc_pcre_utf8_table2[6];
|
|
||||||
extern const int jsc_pcre_utf8_table3[6];
|
|
||||||
extern const unsigned char jsc_pcre_utf8_table4[0x40];
|
|
||||||
|
|
||||||
extern const unsigned char jsc_pcre_default_tables[tables_length];
|
|
||||||
|
|
||||||
static inline unsigned char toLowerCase(unsigned char c)
|
|
||||||
{
|
|
||||||
static const unsigned char* lowerCaseChars = jsc_pcre_default_tables + lcc_offset;
|
|
||||||
return lowerCaseChars[c];
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned char flipCase(unsigned char c)
|
|
||||||
{
|
|
||||||
static const unsigned char* flippedCaseChars = jsc_pcre_default_tables + fcc_offset;
|
|
||||||
return flippedCaseChars[c];
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned char classBitmapForChar(unsigned char c)
|
|
||||||
{
|
|
||||||
static const unsigned char* charClassBitmaps = jsc_pcre_default_tables + cbits_offset;
|
|
||||||
return charClassBitmaps[c];
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline unsigned char charTypeForChar(unsigned char c)
|
|
||||||
{
|
|
||||||
const unsigned char* charTypeMap = jsc_pcre_default_tables + ctypes_offset;
|
|
||||||
return charTypeMap[c];
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool isWordChar(UChar c)
|
|
||||||
{
|
|
||||||
return c < 128 && (charTypeForChar(c) & ctype_word);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool isSpaceChar(UChar c)
|
|
||||||
{
|
|
||||||
return (c < 128 && (charTypeForChar(c) & ctype_space)) || c == 0x00A0;
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool isNewline(UChar nl)
|
|
||||||
{
|
|
||||||
return (nl == 0xA || nl == 0xD || nl == 0x2028 || nl == 0x2029);
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline bool isBracketStartOpcode(unsigned char opcode)
|
|
||||||
{
|
|
||||||
if (opcode >= OP_BRA)
|
|
||||||
return true;
|
|
||||||
switch (opcode) {
|
|
||||||
case OP_ASSERT:
|
|
||||||
case OP_ASSERT_NOT:
|
|
||||||
return true;
|
|
||||||
default:
|
|
||||||
return false;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
static inline void advanceToEndOfBracket(const unsigned char*& opcodePtr)
|
|
||||||
{
|
|
||||||
JS_ASSERT(isBracketStartOpcode(*opcodePtr) || *opcodePtr == OP_ALT);
|
|
||||||
do
|
|
||||||
opcodePtr += getLinkValue(opcodePtr + 1);
|
|
||||||
while (*opcodePtr == OP_ALT);
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Internal shared functions. These are functions that are used in more
|
|
||||||
that one of the source files. They have to have external linkage, but
|
|
||||||
but are not part of the public API and so not exported from the library. */
|
|
||||||
|
|
||||||
extern int jsc_pcre_ucp_othercase(unsigned);
|
|
||||||
extern bool jsc_pcre_xclass(int, const unsigned char*);
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
#endif
|
|
||||||
|
|
||||||
/* End of pcre_internal.h */
|
|
|
@ -1,71 +0,0 @@
|
||||||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
|
||||||
started out as a copy of PCRE, many of the features of PCRE have been
|
|
||||||
removed. This library now supports only the regular expression features
|
|
||||||
required by the JavaScript language specification, and has only the functions
|
|
||||||
needed by JavaScriptCore and the rest of WebKit.
|
|
||||||
|
|
||||||
Originally written by Philip Hazel
|
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
|
||||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* This module contains some fixed tables that are used by more than one of the
|
|
||||||
PCRE code modules. */
|
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Tables for UTF-8 support *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* These are the breakpoints for different numbers of bytes in a UTF-8
|
|
||||||
character. */
|
|
||||||
|
|
||||||
const int jsc_pcre_utf8_table1[6] =
|
|
||||||
{ 0x7f, 0x7ff, 0xffff, 0x1fffff, 0x3ffffff, 0x7fffffff};
|
|
||||||
|
|
||||||
/* These are the indicator bits and the mask for the data bits to set in the
|
|
||||||
first byte of a character, indexed by the number of additional bytes. */
|
|
||||||
|
|
||||||
const int jsc_pcre_utf8_table2[6] = { 0, 0xc0, 0xe0, 0xf0, 0xf8, 0xfc};
|
|
||||||
const int jsc_pcre_utf8_table3[6] = { 0xff, 0x1f, 0x0f, 0x07, 0x03, 0x01};
|
|
||||||
|
|
||||||
/* Table of the number of extra characters, indexed by the first character
|
|
||||||
masked with 0x3f. The highest number for a valid UTF-8 character is in fact
|
|
||||||
0x3d. */
|
|
||||||
|
|
||||||
const unsigned char jsc_pcre_utf8_table4[0x40] = {
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,1,
|
|
||||||
2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,2,
|
|
||||||
3,3,3,3,3,3,3,3,4,4,4,4,5,5,5,5 };
|
|
||||||
|
|
||||||
#include "chartables.c"
|
|
|
@ -1,98 +0,0 @@
|
||||||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
|
||||||
started out as a copy of PCRE, many of the features of PCRE have been
|
|
||||||
removed. This library now supports only the regular expression features
|
|
||||||
required by the JavaScript language specification, and has only the functions
|
|
||||||
needed by JavaScriptCore and the rest of WebKit.
|
|
||||||
|
|
||||||
Originally written by Philip Hazel
|
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
|
||||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
|
|
||||||
/* This module contains code for searching the table of Unicode character
|
|
||||||
properties. */
|
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
|
||||||
|
|
||||||
#include "ucpinternal.h" /* Internal table details */
|
|
||||||
#include "ucptable.cpp" /* The table itself */
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Search table and return other case *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* If the given character is a letter, and there is another case for the
|
|
||||||
letter, return the other case. Otherwise, return -1.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
c the character value
|
|
||||||
|
|
||||||
Returns: the other case or -1 if none
|
|
||||||
*/
|
|
||||||
|
|
||||||
int jsc_pcre_ucp_othercase(unsigned c)
|
|
||||||
{
|
|
||||||
int bot = 0;
|
|
||||||
int top = sizeof(ucp_table) / sizeof(cnode);
|
|
||||||
int mid;
|
|
||||||
|
|
||||||
/* The table is searched using a binary chop. You might think that using
|
|
||||||
intermediate variables to hold some of the common expressions would speed
|
|
||||||
things up, but tests with gcc 3.4.4 on Linux showed that, on the contrary, it
|
|
||||||
makes things a lot slower. */
|
|
||||||
|
|
||||||
for (;;) {
|
|
||||||
if (top <= bot)
|
|
||||||
return -1;
|
|
||||||
mid = (bot + top) >> 1;
|
|
||||||
if (c == (ucp_table[mid].f0 & f0_charmask))
|
|
||||||
break;
|
|
||||||
if (c < (ucp_table[mid].f0 & f0_charmask))
|
|
||||||
top = mid;
|
|
||||||
else {
|
|
||||||
if ((ucp_table[mid].f0 & f0_rangeflag) && (c <= (ucp_table[mid].f0 & f0_charmask) + (ucp_table[mid].f1 & f1_rangemask)))
|
|
||||||
break;
|
|
||||||
bot = mid + 1;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
/* Found an entry in the table. Return -1 for a range entry. Otherwise return
|
|
||||||
the other case if there is one, else -1. */
|
|
||||||
|
|
||||||
if (ucp_table[mid].f0 & f0_rangeflag)
|
|
||||||
return -1;
|
|
||||||
|
|
||||||
int offset = ucp_table[mid].f1 & f1_casemask;
|
|
||||||
if (offset & f1_caseneg)
|
|
||||||
offset |= f1_caseneg;
|
|
||||||
return !offset ? -1 : c + offset;
|
|
||||||
}
|
|
|
@ -1,114 +0,0 @@
|
||||||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
|
||||||
started out as a copy of PCRE, many of the features of PCRE have been
|
|
||||||
removed. This library now supports only the regular expression features
|
|
||||||
required by the JavaScript language specification, and has only the functions
|
|
||||||
needed by JavaScriptCore and the rest of WebKit.
|
|
||||||
|
|
||||||
Originally written by Philip Hazel
|
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
|
||||||
Copyright (C) 2002, 2004, 2006, 2007, 2008, 2009 Apple Inc. All rights reserved.
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* This module contains an internal function that is used to match an extended
|
|
||||||
class (one that contains characters whose values are > 255). */
|
|
||||||
|
|
||||||
#include "pcre_internal.h"
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Match character against an XCLASS *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* This function is called to match a character against an extended class that
|
|
||||||
might contain values > 255.
|
|
||||||
|
|
||||||
Arguments:
|
|
||||||
c the character
|
|
||||||
data points to the flag byte of the XCLASS data
|
|
||||||
|
|
||||||
Returns: true if character matches, else false
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* Get the next UTF-8 character, advancing the pointer. This is called when we
|
|
||||||
know we are in UTF-8 mode. */
|
|
||||||
|
|
||||||
static inline void getUTF8CharAndAdvancePointer(int& c, const unsigned char*& subjectPtr)
|
|
||||||
{
|
|
||||||
c = *subjectPtr++;
|
|
||||||
if ((c & 0xc0) == 0xc0) {
|
|
||||||
int gcaa = jsc_pcre_utf8_table4[c & 0x3f]; /* Number of additional bytes */
|
|
||||||
int gcss = 6 * gcaa;
|
|
||||||
c = (c & jsc_pcre_utf8_table3[gcaa]) << gcss;
|
|
||||||
while (gcaa-- > 0) {
|
|
||||||
gcss -= 6;
|
|
||||||
c |= (*subjectPtr++ & 0x3f) << gcss;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
bool jsc_pcre_xclass(int c, const unsigned char* data)
|
|
||||||
{
|
|
||||||
bool negated = (*data & XCL_NOT);
|
|
||||||
|
|
||||||
/* Character values < 256 are matched against a bitmap, if one is present. If
|
|
||||||
not, we still carry on, because there may be ranges that start below 256 in the
|
|
||||||
additional data. */
|
|
||||||
|
|
||||||
if (c < 256) {
|
|
||||||
if ((*data & XCL_MAP) != 0 && (data[1 + c/8] & (1 << (c&7))) != 0)
|
|
||||||
return !negated; /* char found */
|
|
||||||
}
|
|
||||||
|
|
||||||
/* First skip the bit map if present. Then match against the list of Unicode
|
|
||||||
properties or large chars or ranges that end with a large char. We won't ever
|
|
||||||
encounter XCL_PROP or XCL_NOTPROP when UCP support is not compiled. */
|
|
||||||
|
|
||||||
if ((*data++ & XCL_MAP) != 0)
|
|
||||||
data += 32;
|
|
||||||
|
|
||||||
int t;
|
|
||||||
while ((t = *data++) != XCL_END) {
|
|
||||||
if (t == XCL_SINGLE) {
|
|
||||||
int x;
|
|
||||||
getUTF8CharAndAdvancePointer(x, data);
|
|
||||||
if (c == x)
|
|
||||||
return !negated;
|
|
||||||
}
|
|
||||||
else if (t == XCL_RANGE) {
|
|
||||||
int x, y;
|
|
||||||
getUTF8CharAndAdvancePointer(x, data);
|
|
||||||
getUTF8CharAndAdvancePointer(y, data);
|
|
||||||
if (c >= x && c <= y)
|
|
||||||
return !negated;
|
|
||||||
}
|
|
||||||
}
|
|
||||||
|
|
||||||
return negated; /* char did not match */
|
|
||||||
}
|
|
|
@ -1,126 +0,0 @@
|
||||||
/* This is JavaScriptCore's variant of the PCRE library. While this library
|
|
||||||
started out as a copy of PCRE, many of the features of PCRE have been
|
|
||||||
removed. This library now supports only the regular expression features
|
|
||||||
required by the JavaScript language specification, and has only the functions
|
|
||||||
needed by JavaScriptCore and the rest of WebKit.
|
|
||||||
|
|
||||||
Originally written by Philip Hazel
|
|
||||||
Copyright (c) 1997-2006 University of Cambridge
|
|
||||||
Copyright (C) 2002, 2004, 2006, 2007 Apple Inc. All rights reserved.
|
|
||||||
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
Redistribution and use in source and binary forms, with or without
|
|
||||||
modification, are permitted provided that the following conditions are met:
|
|
||||||
|
|
||||||
* Redistributions of source code must retain the above copyright notice,
|
|
||||||
this list of conditions and the following disclaimer.
|
|
||||||
|
|
||||||
* Redistributions in binary form must reproduce the above copyright
|
|
||||||
notice, this list of conditions and the following disclaimer in the
|
|
||||||
documentation and/or other materials provided with the distribution.
|
|
||||||
|
|
||||||
* Neither the name of the University of Cambridge nor the names of its
|
|
||||||
contributors may be used to endorse or promote products derived from
|
|
||||||
this software without specific prior written permission.
|
|
||||||
|
|
||||||
THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS "AS IS"
|
|
||||||
AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT LIMITED TO, THE
|
|
||||||
IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR A PARTICULAR PURPOSE
|
|
||||||
ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT OWNER OR CONTRIBUTORS BE
|
|
||||||
LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL, SPECIAL, EXEMPLARY, OR
|
|
||||||
CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT LIMITED TO, PROCUREMENT OF
|
|
||||||
SUBSTITUTE GOODS OR SERVICES; LOSS OF USE, DATA, OR PROFITS; OR BUSINESS
|
|
||||||
INTERRUPTION) HOWEVER CAUSED AND ON ANY THEORY OF LIABILITY, WHETHER IN
|
|
||||||
CONTRACT, STRICT LIABILITY, OR TORT (INCLUDING NEGLIGENCE OR OTHERWISE)
|
|
||||||
ARISING IN ANY WAY OUT OF THE USE OF THIS SOFTWARE, EVEN IF ADVISED OF THE
|
|
||||||
POSSIBILITY OF SUCH DAMAGE.
|
|
||||||
-----------------------------------------------------------------------------
|
|
||||||
*/
|
|
||||||
|
|
||||||
/*************************************************
|
|
||||||
* Unicode Property Table handler *
|
|
||||||
*************************************************/
|
|
||||||
|
|
||||||
/* Internal header file defining the layout of the bits in each pair of 32-bit
|
|
||||||
words that form a data item in the table. */
|
|
||||||
|
|
||||||
typedef struct cnode {
|
|
||||||
unsigned f0;
|
|
||||||
unsigned f1;
|
|
||||||
} cnode;
|
|
||||||
|
|
||||||
/* Things for the f0 field */
|
|
||||||
|
|
||||||
#define f0_scriptmask 0xff000000 /* Mask for script field */
|
|
||||||
#define f0_scriptshift 24 /* Shift for script value */
|
|
||||||
#define f0_rangeflag 0x00f00000 /* Flag for a range item */
|
|
||||||
#define f0_charmask 0x001fffff /* Mask for code point value */
|
|
||||||
|
|
||||||
/* Things for the f1 field */
|
|
||||||
|
|
||||||
#define f1_typemask 0xfc000000 /* Mask for char type field */
|
|
||||||
#define f1_typeshift 26 /* Shift for the type field */
|
|
||||||
#define f1_rangemask 0x0000ffff /* Mask for a range offset */
|
|
||||||
#define f1_casemask 0x0000ffff /* Mask for a case offset */
|
|
||||||
#define f1_caseneg 0xffff8000 /* Bits for negation */
|
|
||||||
|
|
||||||
/* The data consists of a vector of structures of type cnode. The two unsigned
|
|
||||||
32-bit integers are used as follows:
|
|
||||||
|
|
||||||
(f0) (1) The most significant byte holds the script number. The numbers are
|
|
||||||
defined by the enum in ucp.h.
|
|
||||||
|
|
||||||
(2) The 0x00800000 bit is set if this entry defines a range of characters.
|
|
||||||
It is not set if this entry defines a single character
|
|
||||||
|
|
||||||
(3) The 0x00600000 bits are spare.
|
|
||||||
|
|
||||||
(4) The 0x001fffff bits contain the code point. No Unicode code point will
|
|
||||||
ever be greater than 0x0010ffff, so this should be OK for ever.
|
|
||||||
|
|
||||||
(f1) (1) The 0xfc000000 bits contain the character type number. The numbers are
|
|
||||||
defined by an enum in ucp.h.
|
|
||||||
|
|
||||||
(2) The 0x03ff0000 bits are spare.
|
|
||||||
|
|
||||||
(3) The 0x0000ffff bits contain EITHER the unsigned offset to the top of
|
|
||||||
range if this entry defines a range, OR the *signed* offset to the
|
|
||||||
character's "other case" partner if this entry defines a single
|
|
||||||
character. There is no partner if the value is zero.
|
|
||||||
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
| script (8) |.|.|.| codepoint (21) || type (6) |.|.| spare (8) | offset (16) |
|
|
||||||
-------------------------------------------------------------------------------
|
|
||||||
| | | | |
|
|
||||||
| | |-> spare | |-> spare
|
|
||||||
| | |
|
|
||||||
| |-> spare |-> spare
|
|
||||||
|
|
|
||||||
|-> range flag
|
|
||||||
|
|
||||||
The upper/lower casing information is set only for characters that come in
|
|
||||||
pairs. The non-one-to-one mappings in the Unicode data are ignored.
|
|
||||||
|
|
||||||
When searching the data, proceed as follows:
|
|
||||||
|
|
||||||
(1) Set up for a binary chop search.
|
|
||||||
|
|
||||||
(2) If the top is not greater than the bottom, the character is not in the
|
|
||||||
table. Its type must therefore be "Cn" ("Undefined").
|
|
||||||
|
|
||||||
(3) Find the middle vector element.
|
|
||||||
|
|
||||||
(4) Extract the code point and compare. If equal, we are done.
|
|
||||||
|
|
||||||
(5) If the test character is smaller, set the top to the current point, and
|
|
||||||
goto (2).
|
|
||||||
|
|
||||||
(6) If the current entry defines a range, compute the last character by adding
|
|
||||||
the offset, and see if the test character is within the range. If it is,
|
|
||||||
we are done.
|
|
||||||
|
|
||||||
(7) Otherwise, set the bottom to one element past the current point and goto
|
|
||||||
(2).
|
|
||||||
*/
|
|
||||||
|
|
||||||
/* End of ucpinternal.h */
|
|
Разница между файлами не показана из-за своего большого размера
Загрузить разницу
Загрузка…
Ссылка в новой задаче