1998-01-16 15:13:05 +03:00
|
|
|
|
/* Extended regular expression matching and search library.
|
1999-08-13 09:45:20 +04:00
|
|
|
|
Copyright (C) 1993, 94, 95, 96, 97, 98 Free Software Foundation, Inc.
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
The GNU C Library is free software; you can redistribute it and/or
|
|
|
|
|
modify it under the terms of the GNU Library General Public License as
|
|
|
|
|
published by the Free Software Foundation; either version 2 of the
|
|
|
|
|
License, or (at your option) any later version.
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
The GNU C Library is distributed in the hope that it will be useful,
|
1998-01-16 15:13:05 +03:00
|
|
|
|
but WITHOUT ANY WARRANTY; without even the implied warranty of
|
1999-08-13 09:45:20 +04:00
|
|
|
|
MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the GNU
|
|
|
|
|
Library General Public License for more details.
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
You should have received a copy of the GNU Library General Public
|
2001-07-11 23:25:52 +04:00
|
|
|
|
License along with the GNU C Library; see the file LGPL. If not,
|
1999-08-13 09:45:20 +04:00
|
|
|
|
write to the Free Software Foundation, Inc., 59 Temple Place - Suite 330,
|
|
|
|
|
Boston, MA 02111-1307, USA. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
/* Multi-byte extension added May, 1993 by t^2 (Takahiro Tanimoto)
|
|
|
|
|
Last change: May 21, 1993 by t^2 */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* removed gapped buffer support, multiple syntax support by matz <matz@nts.co.jp> */
|
|
|
|
|
/* Perl5 extension added by matz <matz@caelum.co.jp> */
|
|
|
|
|
/* UTF-8 extension added Jan 16 1999 by Yoshida Masato <yoshidam@tau.bekkoame.ne.jp> */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#include "config.h"
|
2000-05-12 13:07:57 +04:00
|
|
|
|
|
|
|
|
|
#ifdef HAVE_STRING_H
|
|
|
|
|
# include <string.h>
|
|
|
|
|
#else
|
|
|
|
|
# include <strings.h>
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#endif
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* We write fatal error messages on standard error. */
|
|
|
|
|
#include <stdio.h>
|
|
|
|
|
|
|
|
|
|
/* isalpha(3) etc. are used for the character classes. */
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
#include <sys/types.h>
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#ifndef PARAMS
|
|
|
|
|
# if defined __GNUC__ || (defined __STDC__ && __STDC__)
|
|
|
|
|
# define PARAMS(args) args
|
|
|
|
|
# else
|
|
|
|
|
# define PARAMS(args) ()
|
|
|
|
|
# endif /* GCC. */
|
|
|
|
|
#endif /* Not PARAMS. */
|
|
|
|
|
|
|
|
|
|
#if defined(STDC_HEADERS)
|
|
|
|
|
# include <stddef.h>
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#else
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* We need this for `regex.h', and perhaps for the Emacs include files. */
|
|
|
|
|
# include <sys/types.h>
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
2001-07-19 06:46:28 +04:00
|
|
|
|
#if !defined(__STDC__) && !defined(_MSC_VER)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
# define volatile
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifdef HAVE_PROTOTYPES
|
|
|
|
|
# define _(args) args
|
|
|
|
|
#else
|
|
|
|
|
# define _(args) ()
|
|
|
|
|
#endif
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
2000-05-12 13:07:57 +04:00
|
|
|
|
#ifdef RUBY_PLATFORM
|
2000-09-25 21:51:29 +04:00
|
|
|
|
#include "defines.h"
|
|
|
|
|
|
2000-05-12 13:07:57 +04:00
|
|
|
|
# define RUBY
|
|
|
|
|
extern int rb_prohibit_interrupt;
|
|
|
|
|
extern int rb_trap_pending;
|
|
|
|
|
void rb_trap_exec _((void));
|
|
|
|
|
|
2002-04-25 17:57:01 +04:00
|
|
|
|
# define CHECK_INTS do {\
|
|
|
|
|
if (!rb_prohibit_interrupt) {\
|
|
|
|
|
if (rb_trap_pending) rb_trap_exec();\
|
|
|
|
|
}\
|
|
|
|
|
} while (0)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#endif
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
|
/* Make alloca work the best possible way. */
|
|
|
|
|
#ifdef __GNUC__
|
1999-01-20 07:59:39 +03:00
|
|
|
|
# ifndef atarist
|
|
|
|
|
# ifndef alloca
|
|
|
|
|
# define alloca __builtin_alloca
|
|
|
|
|
# endif
|
|
|
|
|
# endif /* atarist */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#else
|
1999-01-20 07:59:39 +03:00
|
|
|
|
# if defined(HAVE_ALLOCA_H)
|
|
|
|
|
# include <alloca.h>
|
2000-09-27 07:43:15 +04:00
|
|
|
|
# elif !defined(alloca)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
char *alloca();
|
1999-01-20 07:59:39 +03:00
|
|
|
|
# endif
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#endif /* __GNUC__ */
|
|
|
|
|
|
|
|
|
|
#ifdef _AIX
|
|
|
|
|
#pragma alloca
|
|
|
|
|
#endif
|
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
|
#ifdef HAVE_STRING_H
|
|
|
|
|
# include <string.h>
|
|
|
|
|
#else
|
|
|
|
|
# include <strings.h>
|
|
|
|
|
#endif
|
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#ifdef C_ALLOCA
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#define FREE_VARIABLES() alloca(0)
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#else
|
|
|
|
|
#define FREE_VARIABLES()
|
|
|
|
|
#endif
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
2000-09-25 21:51:29 +04:00
|
|
|
|
#define FREE_AND_RETURN_VOID(stackb) do { \
|
2000-09-26 11:07:13 +04:00
|
|
|
|
FREE_VARIABLES(); \
|
2000-09-25 21:51:29 +04:00
|
|
|
|
if (stackb != stacka) xfree(stackb); \
|
|
|
|
|
return; \
|
|
|
|
|
} while(0)
|
|
|
|
|
|
|
|
|
|
#define FREE_AND_RETURN(stackb,val) do { \
|
2000-09-26 11:07:13 +04:00
|
|
|
|
FREE_VARIABLES(); \
|
2000-09-25 21:51:29 +04:00
|
|
|
|
if (stackb != stacka) xfree(stackb); \
|
|
|
|
|
return(val); \
|
|
|
|
|
} while(0)
|
|
|
|
|
|
2000-09-26 11:07:13 +04:00
|
|
|
|
#define DOUBLE_STACK(type) do { \
|
|
|
|
|
type *stackx; \
|
|
|
|
|
unsigned int xlen = stacke - stackb; \
|
2000-09-25 21:51:29 +04:00
|
|
|
|
if (stackb == stacka) { \
|
2000-09-26 11:07:13 +04:00
|
|
|
|
stackx = (type*)xmalloc(2 * xlen * sizeof(type)); \
|
|
|
|
|
memcpy(stackx, stackb, xlen * sizeof (type)); \
|
2000-09-25 21:51:29 +04:00
|
|
|
|
} \
|
|
|
|
|
else { \
|
2000-09-26 11:07:13 +04:00
|
|
|
|
stackx = (type*)xrealloc(stackb, 2 * xlen * sizeof(type)); \
|
2000-09-25 21:51:29 +04:00
|
|
|
|
} \
|
2000-09-26 11:07:13 +04:00
|
|
|
|
/* Rearrange the pointers. */ \
|
|
|
|
|
stackp = stackx + (stackp - stackb); \
|
|
|
|
|
stackb = stackx; \
|
|
|
|
|
stacke = stackb + 2 * xlen; \
|
2000-02-08 11:54:01 +03:00
|
|
|
|
} while (0)
|
2000-09-25 21:51:29 +04:00
|
|
|
|
|
2000-09-26 11:07:13 +04:00
|
|
|
|
#define RE_TALLOC(n,t) ((t*)alloca((n)*sizeof(t)))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#define TMALLOC(n,t) ((t*)xmalloc((n)*sizeof(t)))
|
|
|
|
|
#define TREALLOC(s,n,t) (s=((t*)xrealloc(s,(n)*sizeof(t))))
|
|
|
|
|
|
2000-09-26 11:07:13 +04:00
|
|
|
|
#define EXPAND_FAIL_STACK() DOUBLE_STACK(unsigned char*)
|
2000-08-28 13:53:42 +04:00
|
|
|
|
#define ENSURE_FAIL_STACK(n) \
|
|
|
|
|
do { \
|
|
|
|
|
if (stacke - stackp <= (n)) { \
|
|
|
|
|
/* if (len > re_max_failures * MAX_NUM_FAILURE_ITEMS) \
|
|
|
|
|
{ \
|
|
|
|
|
FREE_AND_RETURN(stackb,(-2)); \
|
|
|
|
|
}*/ \
|
|
|
|
|
\
|
|
|
|
|
/* Roughly double the size of the stack. */ \
|
2000-09-26 11:07:13 +04:00
|
|
|
|
EXPAND_FAIL_STACK(); \
|
2000-08-28 13:53:42 +04:00
|
|
|
|
} \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
|
/* Get the interface, including the syntax bits. */
|
|
|
|
|
#include "regex.h"
|
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
|
/* Subroutines for re_compile_pattern. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
static void store_jump _((char*, int, char*));
|
|
|
|
|
static void insert_jump _((int, char*, char*, char*));
|
|
|
|
|
static void store_jump_n _((char*, int, char*, unsigned));
|
|
|
|
|
static void insert_jump_n _((int, char*, char*, char*, unsigned));
|
|
|
|
|
static void insert_op _((int, char*, char*));
|
|
|
|
|
static void insert_op_2 _((int, char*, char*, int, int));
|
|
|
|
|
static int memcmp_translate _((unsigned char*, unsigned char*, int));
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* Define the syntax stuff, so we can do the \<, \>, etc. */
|
|
|
|
|
|
|
|
|
|
/* This must be nonzero for the wordchar and notwordchar pattern
|
1999-01-20 07:59:39 +03:00
|
|
|
|
commands in re_match. */
|
|
|
|
|
#define Sword 1
|
|
|
|
|
#define Sword2 2
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
#define SYNTAX(c) re_syntax_table[c]
|
|
|
|
|
|
|
|
|
|
static char re_syntax_table[256];
|
1999-08-13 09:45:20 +04:00
|
|
|
|
static void init_syntax_once _((void));
|
|
|
|
|
static const unsigned char *translate = 0;
|
|
|
|
|
static void init_regs _((struct re_registers*, unsigned int));
|
2000-05-01 13:42:38 +04:00
|
|
|
|
static void bm_init_skip _((int *, unsigned char*, int, const unsigned char*));
|
1999-01-20 07:59:39 +03:00
|
|
|
|
static int current_mbctype = MBCTYPE_ASCII;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
#undef P
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#ifdef RUBY
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#include "util.h"
|
2003-07-24 22:33:50 +04:00
|
|
|
|
void rb_warn _((char*));
|
* regex.c (re_compile_pattern): fix previous change.
* instruby.rb, ext/extmk.rb, ext/tk/lib/tk.rb, lib/benchmark.rb,
lib/cgi.rb, lib/debug.rb, lib/getoptlong.rb, lib/jcode.rb,
lib/optparse.rb, lib/time.rb, lib/date/format.rb,
lib/irb/ruby-lex.rb: escape `[', `]', `-' in chracter class in
regexp to avoid warning.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3595 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2003-03-21 18:13:23 +03:00
|
|
|
|
# define re_warning(x) rb_warn(x)
|
2003-03-21 17:37:32 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
#ifndef re_warning
|
|
|
|
|
# define re_warning(x)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#endif
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
init_syntax_once()
|
|
|
|
|
{
|
|
|
|
|
register int c;
|
|
|
|
|
static int done = 0;
|
|
|
|
|
|
|
|
|
|
if (done)
|
|
|
|
|
return;
|
|
|
|
|
|
|
|
|
|
memset(re_syntax_table, 0, sizeof re_syntax_table);
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (c=0; c<=0x7f; c++)
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if (isalnum(c))
|
|
|
|
|
re_syntax_table[c] = Sword;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
re_syntax_table['_'] = Sword;
|
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
for (c=0x80; c<=0xff; c++)
|
|
|
|
|
if (isalnum(c))
|
|
|
|
|
re_syntax_table[c] = Sword2;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
done = 1;
|
|
|
|
|
}
|
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
void
|
|
|
|
|
re_set_casetable(table)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
const char *table;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
{
|
1999-08-13 09:45:20 +04:00
|
|
|
|
translate = (const unsigned char*)table;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Jim Meyering writes:
|
|
|
|
|
|
|
|
|
|
"... Some ctype macros are valid only for character codes that
|
|
|
|
|
isascii says are ASCII (SGI's IRIX-4.0.5 is one such system --when
|
|
|
|
|
using /bin/cc or gcc but without giving an ansi option). So, all
|
|
|
|
|
ctype uses should be through macros like ISPRINT... If
|
|
|
|
|
STDC_HEADERS is defined, then autoconf has verified that the ctype
|
|
|
|
|
macros don't need to be guarded with references to isascii. ...
|
|
|
|
|
Defining isascii to 1 should let any compiler worth its salt
|
1999-08-13 09:45:20 +04:00
|
|
|
|
eliminate the && through constant folding."
|
|
|
|
|
Solaris defines some of these symbols so we must undefine them first. */
|
|
|
|
|
|
|
|
|
|
#undef ISASCII
|
|
|
|
|
#if defined STDC_HEADERS || (!defined isascii && !defined HAVE_ISASCII)
|
|
|
|
|
# define ISASCII(c) 1
|
|
|
|
|
#else
|
|
|
|
|
# define ISASCII(c) isascii(c)
|
|
|
|
|
#endif
|
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#ifdef isblank
|
2000-05-24 08:34:26 +04:00
|
|
|
|
# define ISBLANK(c) (ISASCII(c) && isblank(c))
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#else
|
1999-08-13 09:45:20 +04:00
|
|
|
|
# define ISBLANK(c) ((c) == ' ' || (c) == '\t')
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#endif
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#ifdef isgraph
|
2000-05-24 08:34:26 +04:00
|
|
|
|
# define ISGRAPH(c) (ISASCII(c) && isgraph(c))
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#else
|
2000-05-24 08:34:26 +04:00
|
|
|
|
# define ISGRAPH(c) (ISASCII(c) && isprint(c) && !isspace(c))
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#undef ISPRINT
|
2000-05-24 08:34:26 +04:00
|
|
|
|
#define ISPRINT(c) (ISASCII(c) && isprint(c))
|
|
|
|
|
#define ISDIGIT(c) (ISASCII(c) && isdigit(c))
|
|
|
|
|
#define ISALNUM(c) (ISASCII(c) && isalnum(c))
|
|
|
|
|
#define ISALPHA(c) (ISASCII(c) && isalpha(c))
|
|
|
|
|
#define ISCNTRL(c) (ISASCII(c) && iscntrl(c))
|
|
|
|
|
#define ISLOWER(c) (ISASCII(c) && islower(c))
|
|
|
|
|
#define ISPUNCT(c) (ISASCII(c) && ispunct(c))
|
|
|
|
|
#define ISSPACE(c) (ISASCII(c) && isspace(c))
|
|
|
|
|
#define ISUPPER(c) (ISASCII(c) && isupper(c))
|
|
|
|
|
#define ISXDIGIT(c) (ISASCII(c) && isxdigit(c))
|
1999-08-13 09:45:20 +04:00
|
|
|
|
|
|
|
|
|
#ifndef NULL
|
|
|
|
|
# define NULL (void *)0
|
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* We remove any previous definition of `SIGN_EXTEND_CHAR',
|
|
|
|
|
since ours (we hope) works properly with all combinations of
|
|
|
|
|
machines, compilers, `char' and `unsigned char' argument types.
|
|
|
|
|
(Per Bothner suggested the basic approach.) */
|
|
|
|
|
#undef SIGN_EXTEND_CHAR
|
|
|
|
|
#if __STDC__
|
2000-05-24 08:34:26 +04:00
|
|
|
|
# define SIGN_EXTEND_CHAR(c) ((signed char)(c))
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#else /* not __STDC__ */
|
|
|
|
|
/* As in Harbison and Steele. */
|
2000-05-24 08:34:26 +04:00
|
|
|
|
# define SIGN_EXTEND_CHAR(c) ((((unsigned char)(c)) ^ 128) - 128)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#endif
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* These are the command codes that appear in compiled regular
|
|
|
|
|
expressions, one per byte. Some command codes are followed by
|
|
|
|
|
argument bytes. A command code can specify any interpretation
|
|
|
|
|
whatsoever for its arguments. Zero-bytes may appear in the compiled
|
|
|
|
|
regular expression.
|
|
|
|
|
|
|
|
|
|
The value of `exactn' is needed in search.c (search_buffer) in emacs.
|
|
|
|
|
So regex.h defines a symbol `RE_EXACTN_VALUE' to be 1; the value of
|
|
|
|
|
`exactn' we use here must also be 1. */
|
|
|
|
|
|
|
|
|
|
enum regexpcode
|
|
|
|
|
{
|
|
|
|
|
unused=0,
|
|
|
|
|
exactn=1, /* Followed by one byte giving n, then by n literal bytes. */
|
|
|
|
|
begline, /* Fail unless at beginning of line. */
|
|
|
|
|
endline, /* Fail unless at end of line. */
|
1998-01-16 15:19:22 +03:00
|
|
|
|
begbuf, /* Succeeds if at beginning of buffer (if emacs) or at beginning
|
|
|
|
|
of string to be matched (if not). */
|
|
|
|
|
endbuf, /* Analogously, for end of buffer/string. */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
endbuf2, /* End of buffer/string, or newline just before it. */
|
2000-02-08 11:54:01 +03:00
|
|
|
|
begpos, /* Matches where last scan//gsub left off. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
jump, /* Followed by two bytes giving relative address to jump to. */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
jump_past_alt,/* Same as jump, but marks the end of an alternative. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
on_failure_jump, /* Followed by two bytes giving relative address of
|
|
|
|
|
place to resume at in case of failure. */
|
|
|
|
|
finalize_jump, /* Throw away latest failure point and then jump to
|
|
|
|
|
address. */
|
|
|
|
|
maybe_finalize_jump, /* Like jump but finalize if safe to do so.
|
|
|
|
|
This is used to jump back to the beginning
|
|
|
|
|
of a repeat. If the command that follows
|
|
|
|
|
this jump is clearly incompatible with the
|
|
|
|
|
one at the beginning of the repeat, such that
|
|
|
|
|
we can be sure that there is no use backtracking
|
|
|
|
|
out of repetitions already completed,
|
|
|
|
|
then we finalize. */
|
|
|
|
|
dummy_failure_jump, /* Jump, and push a dummy failure point. This
|
|
|
|
|
failure point will be thrown away if an attempt
|
|
|
|
|
is made to use it for a failure. A + construct
|
|
|
|
|
makes this before the first repeat. Also
|
|
|
|
|
use it as an intermediary kind of jump when
|
|
|
|
|
compiling an or construct. */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
push_dummy_failure, /* Push a dummy failure point and continue. Used at the end of
|
|
|
|
|
alternatives. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
succeed_n, /* Used like on_failure_jump except has to succeed n times;
|
|
|
|
|
then gets turned into an on_failure_jump. The relative
|
|
|
|
|
address following it is useless until then. The
|
|
|
|
|
address is followed by two bytes containing n. */
|
|
|
|
|
jump_n, /* Similar to jump, but jump n times only; also the relative
|
|
|
|
|
address following is in turn followed by yet two more bytes
|
|
|
|
|
containing n. */
|
1998-01-16 15:19:22 +03:00
|
|
|
|
try_next, /* Jump to next pattern for the first time,
|
|
|
|
|
leaving this pattern on the failure stack. */
|
|
|
|
|
finalize_push, /* Finalize stack and push the beginning of the pattern
|
|
|
|
|
on the stack to retry (used for non-greedy match) */
|
|
|
|
|
finalize_push_n, /* Similar to finalize_push, buf finalize n time only */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
set_number_at, /* Set the following relative location to the
|
|
|
|
|
subsequent number. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
anychar, /* Matches any (more or less) one character excluding newlines. */
|
|
|
|
|
anychar_repeat, /* Matches sequence of characters excluding newlines. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
charset, /* Matches any one char belonging to specified set.
|
|
|
|
|
First following byte is number of bitmap bytes.
|
|
|
|
|
Then come bytes for a bitmap saying which chars are in.
|
|
|
|
|
Bits in each byte are ordered low-bit-first.
|
|
|
|
|
A character is in the set if its bit is 1.
|
|
|
|
|
A character too large to have a bit in the map
|
|
|
|
|
is automatically not in the set. */
|
|
|
|
|
charset_not, /* Same parameters as charset, but match any character
|
|
|
|
|
that is not one of those specified. */
|
|
|
|
|
start_memory, /* Start remembering the text that is matched, for
|
|
|
|
|
storing in a memory register. Followed by one
|
|
|
|
|
byte containing the register number. Register numbers
|
|
|
|
|
must be in the range 0 through RE_NREGS. */
|
|
|
|
|
stop_memory, /* Stop remembering the text that is matched
|
|
|
|
|
and store it in a memory register. Followed by
|
|
|
|
|
one byte containing the register number. Register
|
|
|
|
|
numbers must be in the range 0 through RE_NREGS. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
start_paren, /* Place holder at the start of (?:..). */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
stop_paren, /* Place holder at the end of (?:..). */
|
|
|
|
|
casefold_on, /* Turn on casefold flag. */
|
|
|
|
|
casefold_off, /* Turn off casefold flag. */
|
2000-05-24 08:34:26 +04:00
|
|
|
|
option_set, /* Turn on multi line match (match with newlines). */
|
1998-01-16 15:19:22 +03:00
|
|
|
|
start_nowidth, /* Save string point to the stack. */
|
|
|
|
|
stop_nowidth, /* Restore string place at the point start_nowidth. */
|
|
|
|
|
pop_and_fail, /* Fail after popping nowidth entry from stack. */
|
2000-02-08 11:54:01 +03:00
|
|
|
|
stop_backtrack, /* Restore backtrack stack at the point start_nowidth. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
duplicate, /* Match a duplicate of something remembered.
|
|
|
|
|
Followed by one byte containing the index of the memory
|
|
|
|
|
register. */
|
|
|
|
|
wordchar, /* Matches any word-constituent character. */
|
|
|
|
|
notwordchar, /* Matches any char that is not a word-constituent. */
|
1998-01-16 15:19:22 +03:00
|
|
|
|
wordbeg, /* Succeeds if at word beginning. */
|
|
|
|
|
wordend, /* Succeeds if at word end. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
wordbound, /* Succeeds if at a word boundary. */
|
2002-01-23 10:30:43 +03:00
|
|
|
|
notwordbound /* Succeeds if not at a word boundary. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Number of failure points to allocate space for initially,
|
|
|
|
|
when matching. If this number is exceeded, more space is allocated,
|
|
|
|
|
so it is not a hard limit. */
|
|
|
|
|
|
|
|
|
|
#ifndef NFAILURES
|
2000-02-08 11:54:01 +03:00
|
|
|
|
#define NFAILURES 160
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* Store NUMBER in two contiguous bytes starting at DESTINATION. */
|
|
|
|
|
#define STORE_NUMBER(destination, number) \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
do { (destination)[0] = (number) & 0377; \
|
|
|
|
|
(destination)[1] = (number) >> 8; } while (0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* Same as STORE_NUMBER, except increment the destination pointer to
|
|
|
|
|
the byte after where the number is stored. Watch out that values for
|
|
|
|
|
DESTINATION such as p + 1 won't work, whereas p will. */
|
|
|
|
|
#define STORE_NUMBER_AND_INCR(destination, number) \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
do { STORE_NUMBER(destination, number); \
|
|
|
|
|
(destination) += 2; } while (0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Put into DESTINATION a number stored in two contingous bytes starting
|
|
|
|
|
at SOURCE. */
|
|
|
|
|
#define EXTRACT_NUMBER(destination, source) \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
do { (destination) = *(source) & 0377; \
|
2000-05-24 08:34:26 +04:00
|
|
|
|
(destination) += SIGN_EXTEND_CHAR(*(char*)((source) + 1)) << 8; } while (0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* Same as EXTRACT_NUMBER, except increment the pointer for source to
|
|
|
|
|
point to second byte of SOURCE. Note that SOURCE has to be a value
|
|
|
|
|
such as p, not, e.g., p + 1. */
|
|
|
|
|
#define EXTRACT_NUMBER_AND_INCR(destination, source) \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
do { EXTRACT_NUMBER(destination, source); \
|
1999-08-13 09:45:20 +04:00
|
|
|
|
(source) += 2; } while (0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Specify the precise syntax of regexps for compilation. This provides
|
|
|
|
|
for compatibility for various utilities which historically have
|
|
|
|
|
different, incompatible syntaxes.
|
|
|
|
|
|
|
|
|
|
The argument SYNTAX is a bit-mask comprised of the various bits
|
|
|
|
|
defined in regex.h. */
|
|
|
|
|
|
|
|
|
|
long
|
|
|
|
|
re_set_syntax(syntax)
|
|
|
|
|
long syntax;
|
|
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
|
/* obsolete */
|
1999-12-06 12:04:03 +03:00
|
|
|
|
return 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Macros for re_compile_pattern, which is found below these definitions. */
|
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#define TRANSLATE_P() ((options&RE_OPTION_IGNORECASE) && translate)
|
|
|
|
|
#define MAY_TRANSLATE() ((bufp->options&(RE_OPTION_IGNORECASE|RE_MAY_IGNORECASE)) && translate)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
/* Fetch the next character in the uncompiled pattern---translating it
|
|
|
|
|
if necessary. Also cast from a signed character in the constant
|
|
|
|
|
string passed to us by the user to an unsigned char that we can use
|
|
|
|
|
as an array index (in, e.g., `translate'). */
|
|
|
|
|
#define PATFETCH(c) \
|
|
|
|
|
do {if (p == pend) goto end_of_pattern; \
|
|
|
|
|
c = (unsigned char) *p++; \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if (TRANSLATE_P()) c = (unsigned char)translate[c]; \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
/* Fetch the next character in the uncompiled pattern, with no
|
|
|
|
|
translation. */
|
|
|
|
|
#define PATFETCH_RAW(c) \
|
|
|
|
|
do {if (p == pend) goto end_of_pattern; \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
c = (unsigned char)*p++; \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
/* Go backwards one character in the pattern. */
|
|
|
|
|
#define PATUNFETCH p--
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#define MBC2WC(c, p) \
|
|
|
|
|
do { \
|
|
|
|
|
if (current_mbctype == MBCTYPE_UTF8) { \
|
|
|
|
|
int n = mbclen(c) - 1; \
|
|
|
|
|
c &= (1<<(BYTEWIDTH-2-n)) - 1; \
|
|
|
|
|
while (n--) { \
|
2002-01-23 10:30:43 +03:00
|
|
|
|
c = c << 6 | (*p++ & ((1<<6)-1)); \
|
1999-08-13 09:45:20 +04:00
|
|
|
|
} \
|
|
|
|
|
} \
|
|
|
|
|
else { \
|
|
|
|
|
c <<= 8; \
|
|
|
|
|
c |= (unsigned char)*(p)++; \
|
|
|
|
|
} \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
} while (0)
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#define PATFETCH_MBC(c) \
|
|
|
|
|
do { \
|
|
|
|
|
if (p + mbclen(c) - 1 >= pend) goto end_of_pattern; \
|
|
|
|
|
MBC2WC(c, p); \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
} while(0)
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#define WC2MBC1ST(c) \
|
2001-12-10 10:18:16 +03:00
|
|
|
|
((current_mbctype != MBCTYPE_UTF8) ? ((c<0x100) ? (c) : (((c)>>8)&0xff)) : utf8_firstbyte(c))
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
2002-02-01 11:49:02 +03:00
|
|
|
|
typedef unsigned int (*mbc_startpos_func_t) _((const char *string, unsigned int pos));
|
2002-03-25 12:08:15 +03:00
|
|
|
|
|
|
|
|
|
static unsigned int asc_startpos _((const char *string, unsigned int pos));
|
|
|
|
|
static unsigned int euc_startpos _((const char *string, unsigned int pos));
|
|
|
|
|
static unsigned int sjis_startpos _((const char *string, unsigned int pos));
|
|
|
|
|
static unsigned int utf8_startpos _((const char *string, unsigned int pos));
|
|
|
|
|
|
|
|
|
|
static const mbc_startpos_func_t mbc_startpos_func[4] = {
|
|
|
|
|
asc_startpos, euc_startpos, sjis_startpos, utf8_startpos
|
|
|
|
|
};
|
|
|
|
|
|
2002-02-01 11:49:02 +03:00
|
|
|
|
#define mbc_startpos(start, pos) (*mbc_startpos_func[current_mbctype])((start), (pos))
|
2002-01-29 22:33:11 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
static unsigned int
|
|
|
|
|
utf8_firstbyte(c)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
unsigned long c;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
{
|
|
|
|
|
if (c < 0x80) return c;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (c <= 0x7ff) return ((c>>6)&0xff)|0xc0;
|
|
|
|
|
if (c <= 0xffff) return ((c>>12)&0xff)|0xe0;
|
|
|
|
|
if (c <= 0x1fffff) return ((c>>18)&0xff)|0xf0;
|
|
|
|
|
if (c <= 0x3ffffff) return ((c>>24)&0xff)|0xf8;
|
|
|
|
|
if (c <= 0x7fffffff) return ((c>>30)&0xff)|0xfc;
|
|
|
|
|
#if SIZEOF_INT > 4
|
|
|
|
|
if (c <= 0xfffffffff) return 0xfe;
|
|
|
|
|
#else
|
|
|
|
|
return 0xfe;
|
|
|
|
|
#endif
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
print_mbc(c)
|
2000-05-12 13:07:57 +04:00
|
|
|
|
unsigned int c;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
{
|
|
|
|
|
if (current_mbctype == MBCTYPE_UTF8) {
|
|
|
|
|
if (c < 0x80)
|
2002-01-23 10:30:43 +03:00
|
|
|
|
printf("%c", (int)c);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
else if (c <= 0x7ff)
|
2002-01-23 10:30:43 +03:00
|
|
|
|
printf("%c%c", (int)utf8_firstbyte(c), (int)(c & 0x3f));
|
1999-08-13 09:45:20 +04:00
|
|
|
|
else if (c <= 0xffff)
|
2002-01-23 10:30:43 +03:00
|
|
|
|
printf("%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 6) & 0x3f),
|
|
|
|
|
(int)(c & 0x3f));
|
1999-08-13 09:45:20 +04:00
|
|
|
|
else if (c <= 0x1fffff)
|
2002-01-23 10:30:43 +03:00
|
|
|
|
printf("%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 12) & 0x3f),
|
|
|
|
|
(int)((c >> 6) & 0x3f), (int)(c & 0x3f));
|
1999-08-13 09:45:20 +04:00
|
|
|
|
else if (c <= 0x3ffffff)
|
2002-01-23 10:30:43 +03:00
|
|
|
|
printf("%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 18) & 0x3f),
|
|
|
|
|
(int)((c >> 12) & 0x3f), (int)((c >> 6) & 0x3f), (int)(c & 0x3f));
|
1999-08-13 09:45:20 +04:00
|
|
|
|
else if (c <= 0x7fffffff)
|
2002-01-23 10:30:43 +03:00
|
|
|
|
printf("%c%c%c%c%c%c", (int)utf8_firstbyte(c), (int)((c >> 24) & 0x3f),
|
|
|
|
|
(int)((c >> 18) & 0x3f), (int)((c >> 12) & 0x3f),
|
|
|
|
|
(int)((c >> 6) & 0x3f), (int)(c & 0x3f));
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1999-09-01 13:48:03 +04:00
|
|
|
|
else if (c < 0xff) {
|
2002-01-23 10:30:43 +03:00
|
|
|
|
printf("\\%o", (int)c);
|
1999-09-01 13:48:03 +04:00
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
else {
|
2002-01-23 10:30:43 +03:00
|
|
|
|
printf("%c%c", (int)(c >> BYTEWIDTH), (int)(c &0xff));
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* If the buffer isn't allocated when it comes in, use this. */
|
|
|
|
|
#define INIT_BUF_SIZE 28
|
|
|
|
|
|
|
|
|
|
/* Make sure we have at least N more bytes of space in buffer. */
|
|
|
|
|
#define GET_BUFFER_SPACE(n) \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
do { \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
while (b - bufp->buffer + (n) >= bufp->allocated) \
|
|
|
|
|
EXTEND_BUFFER; \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
} while (0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* Make sure we have one more byte of buffer space and then add CH to it. */
|
|
|
|
|
#define BUFPUSH(ch) \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
do { \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
GET_BUFFER_SPACE(1); \
|
|
|
|
|
*b++ = (char)(ch); \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
} while (0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* Extend the buffer by twice its current size via reallociation and
|
|
|
|
|
reset the pointers that pointed into the old allocation to point to
|
|
|
|
|
the correct places in the new allocation. If extending the buffer
|
|
|
|
|
results in it being larger than 1 << 16, then flag memory exhausted. */
|
2001-06-05 11:19:39 +04:00
|
|
|
|
#define EXTEND_BUFFER \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
do { char *old_buffer = bufp->buffer; \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
if (bufp->allocated == (1L<<16)) goto too_big; \
|
|
|
|
|
bufp->allocated *= 2; \
|
|
|
|
|
if (bufp->allocated > (1L<<16)) bufp->allocated = (1L<<16); \
|
2000-05-24 08:34:26 +04:00
|
|
|
|
bufp->buffer = (char*)xrealloc(bufp->buffer, bufp->allocated); \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
if (bufp->buffer == 0) \
|
|
|
|
|
goto memory_exhausted; \
|
|
|
|
|
b = (b - old_buffer) + bufp->buffer; \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if (fixup_alt_jump) \
|
|
|
|
|
fixup_alt_jump = (fixup_alt_jump - old_buffer) + bufp->buffer; \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
if (laststart) \
|
|
|
|
|
laststart = (laststart - old_buffer) + bufp->buffer; \
|
|
|
|
|
begalt = (begalt - old_buffer) + bufp->buffer; \
|
|
|
|
|
if (pending_exact) \
|
|
|
|
|
pending_exact = (pending_exact - old_buffer) + bufp->buffer; \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
} while (0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Set the bit for character C in a character set list. */
|
|
|
|
|
#define SET_LIST_BIT(c) \
|
|
|
|
|
(b[(unsigned char)(c) / BYTEWIDTH] \
|
|
|
|
|
|= 1 << ((unsigned char)(c) % BYTEWIDTH))
|
|
|
|
|
|
|
|
|
|
/* Get the next unsigned number in the uncompiled pattern. */
|
|
|
|
|
#define GET_UNSIGNED_NUMBER(num) \
|
1999-08-13 09:45:20 +04:00
|
|
|
|
do { if (p != pend) { \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
PATFETCH(c); \
|
1999-08-13 09:45:20 +04:00
|
|
|
|
while (ISDIGIT(c)) { \
|
|
|
|
|
if (num < 0) \
|
|
|
|
|
num = 0; \
|
|
|
|
|
num = num * 10 + c - '0'; \
|
|
|
|
|
if (p == pend) \
|
|
|
|
|
break; \
|
|
|
|
|
PATFETCH(c); \
|
|
|
|
|
} \
|
|
|
|
|
} \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
} while (0)
|
|
|
|
|
|
2000-05-24 08:34:26 +04:00
|
|
|
|
#define STREQ(s1, s2) ((strcmp(s1, s2) == 0))
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
|
|
|
|
#define CHAR_CLASS_MAX_LENGTH 6 /* Namely, `xdigit'. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#define IS_CHAR_CLASS(string) \
|
|
|
|
|
(STREQ(string, "alpha") || STREQ(string, "upper") \
|
|
|
|
|
|| STREQ(string, "lower") || STREQ(string, "digit") \
|
|
|
|
|
|| STREQ(string, "alnum") || STREQ(string, "xdigit") \
|
|
|
|
|
|| STREQ(string, "space") || STREQ(string, "print") \
|
|
|
|
|
|| STREQ(string, "punct") || STREQ(string, "graph") \
|
|
|
|
|
|| STREQ(string, "cntrl") || STREQ(string, "blank"))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#define STORE_MBC(p, c) \
|
|
|
|
|
do { \
|
|
|
|
|
(p)[0] = (unsigned char)(((c) >>24) & 0xff); \
|
|
|
|
|
(p)[1] = (unsigned char)(((c) >>16) & 0xff); \
|
|
|
|
|
(p)[2] = (unsigned char)(((c) >> 8) & 0xff); \
|
|
|
|
|
(p)[3] = (unsigned char)(((c) >> 0) & 0xff); \
|
|
|
|
|
} while (0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#define STORE_MBC_AND_INCR(p, c) \
|
|
|
|
|
do { \
|
|
|
|
|
*(p)++ = (unsigned char)(((c) >>24) & 0xff); \
|
|
|
|
|
*(p)++ = (unsigned char)(((c) >>16) & 0xff); \
|
|
|
|
|
*(p)++ = (unsigned char)(((c) >> 8) & 0xff); \
|
|
|
|
|
*(p)++ = (unsigned char)(((c) >> 0) & 0xff); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
#define EXTRACT_MBC(p) \
|
2000-05-12 13:07:57 +04:00
|
|
|
|
((unsigned int)((unsigned char)(p)[0] << 24 | \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
(unsigned char)(p)[1] << 16 | \
|
|
|
|
|
(unsigned char)(p)[2] << 8 | \
|
|
|
|
|
(unsigned char)(p)[3]))
|
|
|
|
|
|
|
|
|
|
#define EXTRACT_MBC_AND_INCR(p) \
|
2000-05-12 13:07:57 +04:00
|
|
|
|
((unsigned int)((p) += 4, \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
(unsigned char)(p)[-4] << 24 | \
|
|
|
|
|
(unsigned char)(p)[-3] << 16 | \
|
|
|
|
|
(unsigned char)(p)[-2] << 8 | \
|
|
|
|
|
(unsigned char)(p)[-1]))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
#define EXTRACT_UNSIGNED(p) \
|
|
|
|
|
((unsigned char)(p)[0] | (unsigned char)(p)[1] << 8)
|
|
|
|
|
#define EXTRACT_UNSIGNED_AND_INCR(p) \
|
|
|
|
|
((p) += 2, (unsigned char)(p)[-2] | (unsigned char)(p)[-1] << 8)
|
|
|
|
|
|
|
|
|
|
/* Handle (mb)?charset(_not)?.
|
|
|
|
|
|
|
|
|
|
Structure of mbcharset(_not)? in compiled pattern.
|
|
|
|
|
|
|
|
|
|
struct {
|
|
|
|
|
unsinged char id; mbcharset(_not)?
|
|
|
|
|
unsigned char sbc_size;
|
|
|
|
|
unsigned char sbc_map[sbc_size]; same as charset(_not)? up to here.
|
|
|
|
|
unsigned short mbc_size; number of intervals.
|
|
|
|
|
struct {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
unsigned long beg; beginning of interval.
|
|
|
|
|
unsigned long end; end of interval.
|
1998-01-16 15:13:05 +03:00
|
|
|
|
} intervals[mbc_size];
|
|
|
|
|
}; */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
set_list_bits(c1, c2, b)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
unsigned long c1, c2;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
unsigned char *b;
|
|
|
|
|
{
|
|
|
|
|
unsigned char sbc_size = b[-1];
|
|
|
|
|
unsigned short mbc_size = EXTRACT_UNSIGNED(&b[sbc_size]);
|
|
|
|
|
unsigned short beg, end, upb;
|
|
|
|
|
|
|
|
|
|
if (c1 > c2)
|
|
|
|
|
return;
|
|
|
|
|
b = &b[sbc_size + 2];
|
|
|
|
|
|
|
|
|
|
for (beg = 0, upb = mbc_size; beg < upb; ) {
|
|
|
|
|
unsigned short mid = (unsigned short)(beg + upb) >> 1;
|
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if ((int)c1 - 1 > (int)EXTRACT_MBC(&b[mid*8+4]))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
beg = mid + 1;
|
|
|
|
|
else
|
|
|
|
|
upb = mid;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
for (end = beg, upb = mbc_size; end < upb; ) {
|
|
|
|
|
unsigned short mid = (unsigned short)(end + upb) >> 1;
|
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if ((int)c2 >= (int)EXTRACT_MBC(&b[mid*8]) - 1)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
end = mid + 1;
|
|
|
|
|
else
|
|
|
|
|
upb = mid;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (beg != end) {
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if (c1 > EXTRACT_MBC(&b[beg*8]))
|
|
|
|
|
c1 = EXTRACT_MBC(&b[beg*8]);
|
|
|
|
|
if (c2 < EXTRACT_MBC(&b[(end - 1)*8+4]))
|
|
|
|
|
c2 = EXTRACT_MBC(&b[(end - 1)*8+4]);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
if (end < mbc_size && end != beg + 1)
|
|
|
|
|
/* NOTE: memcpy() would not work here. */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
memmove(&b[(beg + 1)*8], &b[end*8], (mbc_size - end)*8);
|
|
|
|
|
STORE_MBC(&b[beg*8 + 0], c1);
|
|
|
|
|
STORE_MBC(&b[beg*8 + 4], c2);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
mbc_size += beg - end + 1;
|
|
|
|
|
STORE_NUMBER(&b[-2], mbc_size);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
2003-01-20 11:29:24 +03:00
|
|
|
|
is_in_list_sbc(c, b)
|
|
|
|
|
unsigned long c;
|
|
|
|
|
const unsigned char *b;
|
|
|
|
|
{
|
|
|
|
|
unsigned short size;
|
|
|
|
|
|
|
|
|
|
size = *b++;
|
|
|
|
|
return ((int)c / BYTEWIDTH < (int)size && b[c / BYTEWIDTH] & 1 << c % BYTEWIDTH);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
is_in_list_mbc(c, b)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
unsigned long c;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
const unsigned char *b;
|
|
|
|
|
{
|
1999-08-13 09:45:20 +04:00
|
|
|
|
unsigned short size;
|
|
|
|
|
unsigned short i, j;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
size = *b++;
|
|
|
|
|
b += size + 2;
|
|
|
|
|
size = EXTRACT_UNSIGNED(&b[-2]);
|
|
|
|
|
if (size == 0) return 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (i = 0, j = size; i < j; ) {
|
|
|
|
|
unsigned short k = (unsigned short)(i + j) >> 1;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (c > EXTRACT_MBC(&b[k*8+4]))
|
|
|
|
|
i = k + 1;
|
|
|
|
|
else
|
|
|
|
|
j = k;
|
|
|
|
|
}
|
2002-04-24 08:54:16 +04:00
|
|
|
|
if (i < size && EXTRACT_MBC(&b[i*8]) <= c)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
return 1;
|
2002-04-24 08:54:16 +04:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
return 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
|
2003-01-20 11:29:24 +03:00
|
|
|
|
static int
|
|
|
|
|
is_in_list(c, b)
|
|
|
|
|
unsigned long c;
|
|
|
|
|
const unsigned char *b;
|
|
|
|
|
{
|
2003-01-23 06:39:25 +03:00
|
|
|
|
return is_in_list_sbc(c, b) || (current_mbctype ? is_in_list_mbc(c, b) : 0);
|
2003-01-20 11:29:24 +03:00
|
|
|
|
}
|
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
|
static void
|
|
|
|
|
print_partial_compiled_pattern(start, end)
|
|
|
|
|
unsigned char *start;
|
|
|
|
|
unsigned char *end;
|
|
|
|
|
{
|
|
|
|
|
int mcnt, mcnt2;
|
|
|
|
|
unsigned char *p = start;
|
|
|
|
|
unsigned char *pend = end;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (start == NULL) {
|
|
|
|
|
printf("(null)\n");
|
|
|
|
|
return;
|
|
|
|
|
}
|
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
|
/* Loop over pattern commands. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
while (p < pend) {
|
|
|
|
|
switch ((enum regexpcode)*p++) {
|
|
|
|
|
case unused:
|
|
|
|
|
printf("/unused");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case exactn:
|
|
|
|
|
mcnt = *p++;
|
|
|
|
|
printf("/exactn/%d", mcnt);
|
|
|
|
|
do {
|
|
|
|
|
putchar('/');
|
|
|
|
|
printf("%c", *p++);
|
|
|
|
|
}
|
|
|
|
|
while (--mcnt);
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case start_memory:
|
|
|
|
|
mcnt = *p++;
|
|
|
|
|
printf("/start_memory/%d/%d", mcnt, *p++);
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case stop_memory:
|
|
|
|
|
mcnt = *p++;
|
|
|
|
|
printf("/stop_memory/%d/%d", mcnt, *p++);
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case start_paren:
|
|
|
|
|
printf("/start_paren");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case stop_paren:
|
|
|
|
|
printf("/stop_paren");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case casefold_on:
|
|
|
|
|
printf("/casefold_on");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case casefold_off:
|
|
|
|
|
printf("/casefold_off");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
2000-05-24 08:34:26 +04:00
|
|
|
|
case option_set:
|
|
|
|
|
printf("/option_set/%d", *p++);
|
2000-05-17 10:33:50 +04:00
|
|
|
|
break;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case start_nowidth:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/start_nowidth//%d", mcnt);
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case stop_nowidth:
|
|
|
|
|
printf("/stop_nowidth//");
|
|
|
|
|
p += 2;
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case pop_and_fail:
|
|
|
|
|
printf("/pop_and_fail");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
2000-02-08 11:54:01 +03:00
|
|
|
|
case stop_backtrack:
|
|
|
|
|
printf("/stop_backtrack//");
|
|
|
|
|
p += 2;
|
|
|
|
|
break;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case duplicate:
|
|
|
|
|
printf("/duplicate/%d", *p++);
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case anychar:
|
|
|
|
|
printf("/anychar");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case anychar_repeat:
|
|
|
|
|
printf("/anychar_repeat");
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case charset:
|
|
|
|
|
case charset_not:
|
|
|
|
|
{
|
|
|
|
|
register int c;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/charset%s",
|
|
|
|
|
(enum regexpcode)*(p - 1) == charset_not ? "_not" : "");
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
mcnt = *p++;
|
|
|
|
|
printf("/%d", mcnt);
|
|
|
|
|
for (c = 0; c < mcnt; c++) {
|
|
|
|
|
unsigned bit;
|
|
|
|
|
unsigned char map_byte = p[c];
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
2002-04-24 08:54:16 +04:00
|
|
|
|
putchar('/');
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (bit = 0; bit < BYTEWIDTH; bit++)
|
|
|
|
|
if (map_byte & (1 << bit))
|
|
|
|
|
printf("%c", c * BYTEWIDTH + bit);
|
|
|
|
|
}
|
|
|
|
|
p += mcnt;
|
|
|
|
|
mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
|
2002-04-24 08:54:16 +04:00
|
|
|
|
putchar('/');
|
1999-08-13 09:45:20 +04:00
|
|
|
|
while (mcnt--) {
|
|
|
|
|
print_mbc(EXTRACT_MBC_AND_INCR(p));
|
2002-04-24 08:54:16 +04:00
|
|
|
|
putchar('-');
|
1999-08-13 09:45:20 +04:00
|
|
|
|
print_mbc(EXTRACT_MBC_AND_INCR(p));
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case begline:
|
|
|
|
|
printf("/begline");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case endline:
|
|
|
|
|
printf("/endline");
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case on_failure_jump:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/on_failure_jump//%d", mcnt);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case dummy_failure_jump:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/dummy_failure_jump//%d", mcnt);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case push_dummy_failure:
|
|
|
|
|
printf("/push_dummy_failure");
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case finalize_jump:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/finalize_jump//%d", mcnt);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case maybe_finalize_jump:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/maybe_finalize_jump//%d", mcnt);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case jump_past_alt:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/jump_past_alt//%d", mcnt);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case jump:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/jump//%d", mcnt);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case succeed_n:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt2, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/succeed_n//%d//%d", mcnt, mcnt2);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case jump_n:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt2, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/jump_n//%d//%d", mcnt, mcnt2);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case set_number_at:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt2, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/set_number_at//%d//%d", mcnt, mcnt2);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case try_next:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/try_next//%d", mcnt);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case finalize_push:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/finalize_push//%d", mcnt);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case finalize_push_n:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt2, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
printf("/finalize_push_n//%d//%d", mcnt, mcnt2);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case wordbound:
|
|
|
|
|
printf("/wordbound");
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case notwordbound:
|
|
|
|
|
printf("/notwordbound");
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case wordbeg:
|
|
|
|
|
printf("/wordbeg");
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case wordend:
|
|
|
|
|
printf("/wordend");
|
|
|
|
|
|
|
|
|
|
case wordchar:
|
|
|
|
|
printf("/wordchar");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case notwordchar:
|
|
|
|
|
printf("/notwordchar");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case begbuf:
|
|
|
|
|
printf("/begbuf");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case endbuf:
|
|
|
|
|
printf("/endbuf");
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case endbuf2:
|
|
|
|
|
printf("/endbuf2");
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
2000-02-08 11:54:01 +03:00
|
|
|
|
case begpos:
|
|
|
|
|
printf("/begpos");
|
|
|
|
|
break;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
default:
|
|
|
|
|
printf("?%d", *(p-1));
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
printf("/\n");
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
print_compiled_pattern(bufp)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
struct re_pattern_buffer *bufp;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
|
unsigned char *buffer = (unsigned char*)bufp->buffer;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
2000-05-24 08:34:26 +04:00
|
|
|
|
print_partial_compiled_pattern(buffer, buffer + bufp->used);
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static char*
|
|
|
|
|
calculate_must_string(start, end)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
char *start;
|
|
|
|
|
char *end;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
|
int mcnt;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
int max = 0;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
char *p = start;
|
|
|
|
|
char *pend = end;
|
|
|
|
|
char *must = 0;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
|
|
|
|
if (start == NULL) return 0;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
|
/* Loop over pattern commands. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
while (p < pend) {
|
|
|
|
|
switch ((enum regexpcode)*p++) {
|
|
|
|
|
case unused:
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case exactn:
|
|
|
|
|
mcnt = *p;
|
|
|
|
|
if (mcnt > max) {
|
|
|
|
|
must = p;
|
|
|
|
|
max = mcnt;
|
|
|
|
|
}
|
|
|
|
|
p += mcnt+1;
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case start_memory:
|
|
|
|
|
case stop_memory:
|
|
|
|
|
p += 2;
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case duplicate:
|
2003-06-16 08:49:25 +04:00
|
|
|
|
case option_set:
|
1999-08-13 09:45:20 +04:00
|
|
|
|
p++;
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case casefold_on:
|
|
|
|
|
case casefold_off:
|
|
|
|
|
return 0; /* should not check must_string */
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case pop_and_fail:
|
|
|
|
|
case anychar:
|
|
|
|
|
case anychar_repeat:
|
|
|
|
|
case begline:
|
|
|
|
|
case endline:
|
|
|
|
|
case wordbound:
|
|
|
|
|
case notwordbound:
|
|
|
|
|
case wordbeg:
|
|
|
|
|
case wordend:
|
|
|
|
|
case wordchar:
|
|
|
|
|
case notwordchar:
|
|
|
|
|
case begbuf:
|
|
|
|
|
case endbuf:
|
|
|
|
|
case endbuf2:
|
2000-02-08 11:54:01 +03:00
|
|
|
|
case begpos:
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case push_dummy_failure:
|
|
|
|
|
case start_paren:
|
|
|
|
|
case stop_paren:
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case charset:
|
|
|
|
|
case charset_not:
|
|
|
|
|
mcnt = *p++;
|
|
|
|
|
p += mcnt;
|
|
|
|
|
mcnt = EXTRACT_UNSIGNED_AND_INCR(p);
|
|
|
|
|
while (mcnt--) {
|
2002-05-21 09:39:19 +04:00
|
|
|
|
p += 8;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case on_failure_jump:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (mcnt > 0) p += mcnt;
|
|
|
|
|
if ((enum regexpcode)p[-3] == jump) {
|
2001-05-02 08:22:21 +04:00
|
|
|
|
p -= 2;
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (mcnt > 0) p += mcnt;
|
|
|
|
|
}
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case dummy_failure_jump:
|
|
|
|
|
case succeed_n:
|
|
|
|
|
case try_next:
|
|
|
|
|
case jump:
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (mcnt > 0) p += mcnt;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case start_nowidth:
|
|
|
|
|
case stop_nowidth:
|
2000-02-08 11:54:01 +03:00
|
|
|
|
case stop_backtrack:
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case finalize_jump:
|
|
|
|
|
case maybe_finalize_jump:
|
|
|
|
|
case finalize_push:
|
|
|
|
|
p += 2;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case jump_n:
|
|
|
|
|
case set_number_at:
|
|
|
|
|
case finalize_push_n:
|
|
|
|
|
p += 4;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
|
return must;
|
|
|
|
|
}
|
|
|
|
|
|
2000-03-08 09:25:19 +03:00
|
|
|
|
static unsigned int
|
1999-08-13 09:45:20 +04:00
|
|
|
|
read_backslash(c)
|
|
|
|
|
int c;
|
|
|
|
|
{
|
|
|
|
|
switch (c) {
|
|
|
|
|
case 'n':
|
|
|
|
|
return '\n';
|
|
|
|
|
|
|
|
|
|
case 't':
|
|
|
|
|
return '\t';
|
|
|
|
|
|
|
|
|
|
case 'r':
|
|
|
|
|
return '\r';
|
|
|
|
|
|
|
|
|
|
case 'f':
|
|
|
|
|
return '\f';
|
|
|
|
|
|
|
|
|
|
case 'v':
|
|
|
|
|
return '\v';
|
|
|
|
|
|
|
|
|
|
case 'a':
|
|
|
|
|
return '\007';
|
|
|
|
|
|
|
|
|
|
case 'b':
|
|
|
|
|
return '\010';
|
|
|
|
|
|
|
|
|
|
case 'e':
|
|
|
|
|
return '\033';
|
|
|
|
|
}
|
|
|
|
|
return c;
|
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
2000-03-08 09:25:19 +03:00
|
|
|
|
static unsigned int
|
|
|
|
|
read_special(p, pend, pp)
|
|
|
|
|
const char *p, *pend, **pp;
|
|
|
|
|
{
|
|
|
|
|
int c;
|
|
|
|
|
|
|
|
|
|
PATFETCH_RAW(c);
|
|
|
|
|
switch (c) {
|
|
|
|
|
case 'M':
|
|
|
|
|
PATFETCH_RAW(c);
|
|
|
|
|
if (c != '-') return -1;
|
|
|
|
|
PATFETCH_RAW(c);
|
|
|
|
|
*pp = p;
|
|
|
|
|
if (c == '\\') {
|
|
|
|
|
return read_special(p, pend, pp) | 0x80;
|
|
|
|
|
}
|
|
|
|
|
else if (c == -1) return ~0;
|
|
|
|
|
else {
|
|
|
|
|
return ((c & 0xff) | 0x80);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
case 'C':
|
|
|
|
|
PATFETCH_RAW(c);
|
|
|
|
|
if (c != '-') return ~0;
|
|
|
|
|
case 'c':
|
|
|
|
|
PATFETCH_RAW(c);
|
|
|
|
|
*pp = p;
|
|
|
|
|
if (c == '\\') {
|
|
|
|
|
c = read_special(p, pend, pp);
|
|
|
|
|
}
|
|
|
|
|
else if (c == '?') return 0177;
|
|
|
|
|
else if (c == -1) return ~0;
|
|
|
|
|
return c & 0x9f;
|
|
|
|
|
default:
|
|
|
|
|
return read_backslash(c);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
end_of_pattern:
|
|
|
|
|
return ~0;
|
|
|
|
|
}
|
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
|
/* re_compile_pattern takes a regular-expression string
|
|
|
|
|
and converts it into a buffer full of byte commands for matching.
|
|
|
|
|
|
|
|
|
|
PATTERN is the address of the pattern string
|
|
|
|
|
SIZE is the length of it.
|
|
|
|
|
BUFP is a struct re_pattern_buffer * which points to the info
|
|
|
|
|
on where to store the byte commands.
|
|
|
|
|
This structure contains a char * which points to the
|
|
|
|
|
actual space, which should have been obtained with malloc.
|
|
|
|
|
re_compile_pattern may use realloc to grow the buffer space.
|
|
|
|
|
|
|
|
|
|
The number of bytes of commands can be found out by looking in
|
|
|
|
|
the `struct re_pattern_buffer' that bufp pointed to, after
|
|
|
|
|
re_compile_pattern returns. */
|
|
|
|
|
|
|
|
|
|
char *
|
|
|
|
|
re_compile_pattern(pattern, size, bufp)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
const char *pattern;
|
|
|
|
|
int size;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
struct re_pattern_buffer *bufp;
|
|
|
|
|
{
|
1999-08-13 09:45:20 +04:00
|
|
|
|
register char *b = bufp->buffer;
|
|
|
|
|
register const char *p = pattern;
|
|
|
|
|
const char *nextp;
|
|
|
|
|
const char *pend = pattern + size;
|
2002-01-23 10:30:43 +03:00
|
|
|
|
register unsigned int c, c1 = 0;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
const char *p0;
|
|
|
|
|
int numlen;
|
2000-09-25 13:15:08 +04:00
|
|
|
|
#define ERROR_MSG_MAX_SIZE 200
|
|
|
|
|
static char error_msg[ERROR_MSG_MAX_SIZE+1];
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Address of the count-byte of the most recently inserted `exactn'
|
|
|
|
|
command. This makes it possible to tell whether a new exact-match
|
|
|
|
|
character can be added to that command or requires a new `exactn'
|
|
|
|
|
command. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
char *pending_exact = 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Address of the place where a forward-jump should go to the end of
|
|
|
|
|
the containing expression. Each alternative of an `or', except the
|
|
|
|
|
last, ends with a forward-jump of this sort. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
char *fixup_alt_jump = 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Address of start of the most recently finished expression.
|
|
|
|
|
This tells postfix * where to find the start of its operand. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
char *laststart = 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* In processing a repeat, 1 means zero matches is allowed. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
char zero_times_ok;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* In processing a repeat, 1 means many matches is allowed. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
char many_times_ok;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* In processing a repeat, 1 means non-greedy matches. */
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
char greedy;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Address of beginning of regexp, or inside of last (. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
char *begalt = b;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Place in the uncompiled pattern (i.e., the {) to
|
|
|
|
|
which to go back if the interval is invalid. */
|
|
|
|
|
const char *beg_interval;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* In processing an interval, at least this many matches must be made. */
|
|
|
|
|
int lower_bound;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* In processing an interval, at most this many matches can be made. */
|
|
|
|
|
int upper_bound;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Stack of information saved by ( and restored by ).
|
|
|
|
|
Five stack elements are pushed by each (:
|
|
|
|
|
First, the value of b.
|
|
|
|
|
Second, the value of fixup_alt_jump.
|
|
|
|
|
Third, the value of begalt.
|
|
|
|
|
Fourth, the value of regnum.
|
|
|
|
|
Fifth, the type of the paren. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
2000-09-25 21:51:29 +04:00
|
|
|
|
int stacka[40];
|
|
|
|
|
int *stackb = stacka;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
int *stackp = stackb;
|
|
|
|
|
int *stacke = stackb + 40;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Counts ('s as they are encountered. Remembered for the matching ),
|
|
|
|
|
where it becomes the register number to put in the stop_memory
|
|
|
|
|
command. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
int regnum = 1;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
int range = 0;
|
|
|
|
|
int had_mbchar = 0;
|
1999-09-01 13:48:03 +04:00
|
|
|
|
int had_num_literal = 0;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
int had_char_class = 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
int options = bufp->options;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
bufp->fastmap_accurate = 0;
|
|
|
|
|
bufp->must = 0;
|
|
|
|
|
bufp->must_skip = 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Initialize the syntax table. */
|
|
|
|
|
init_syntax_once();
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (bufp->allocated == 0) {
|
|
|
|
|
bufp->allocated = INIT_BUF_SIZE;
|
2001-09-19 10:54:11 +04:00
|
|
|
|
/* EXTEND_BUFFER loses when bufp->allocated is 0. */
|
|
|
|
|
bufp->buffer = (char*)xrealloc(bufp->buffer, INIT_BUF_SIZE);
|
|
|
|
|
if (!bufp->buffer) goto memory_exhausted; /* this not happen */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
begalt = b = bufp->buffer;
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
while (p != pend) {
|
|
|
|
|
PATFETCH(c);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
switch (c) {
|
|
|
|
|
case '$':
|
2000-05-24 08:34:26 +04:00
|
|
|
|
if (bufp->options & RE_OPTION_SINGLELINE) {
|
2000-05-22 11:09:55 +04:00
|
|
|
|
BUFPUSH(endbuf);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
p0 = p;
|
|
|
|
|
/* When testing what follows the $,
|
|
|
|
|
look past the \-constructs that don't consume anything. */
|
|
|
|
|
|
|
|
|
|
while (p0 != pend) {
|
|
|
|
|
if (*p0 == '\\' && p0 + 1 != pend
|
|
|
|
|
&& (p0[1] == 'b' || p0[1] == 'B'))
|
|
|
|
|
p0 += 2;
|
|
|
|
|
else
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
BUFPUSH(endline);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
break;
|
2000-05-22 11:09:55 +04:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '^':
|
2000-05-24 08:34:26 +04:00
|
|
|
|
if (bufp->options & RE_OPTION_SINGLELINE)
|
2000-05-22 11:09:55 +04:00
|
|
|
|
BUFPUSH(begbuf);
|
|
|
|
|
else
|
|
|
|
|
BUFPUSH(begline);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '+':
|
|
|
|
|
case '?':
|
|
|
|
|
case '*':
|
|
|
|
|
/* If there is no previous pattern, char not special. */
|
|
|
|
|
if (!laststart) {
|
2000-09-25 13:15:08 +04:00
|
|
|
|
snprintf(error_msg, ERROR_MSG_MAX_SIZE,
|
|
|
|
|
"invalid regular expression; there's no previous pattern, to which '%c' would define cardinality at %d",
|
|
|
|
|
c, p-pattern);
|
|
|
|
|
FREE_AND_RETURN(stackb, error_msg);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
/* If there is a sequence of repetition chars,
|
|
|
|
|
collapse it down to just one. */
|
|
|
|
|
zero_times_ok = c != '+';
|
|
|
|
|
many_times_ok = c != '?';
|
|
|
|
|
greedy = 1;
|
|
|
|
|
if (p != pend) {
|
|
|
|
|
PATFETCH(c);
|
|
|
|
|
switch (c) {
|
1998-01-16 15:13:05 +03:00
|
|
|
|
case '?':
|
1999-08-13 09:45:20 +04:00
|
|
|
|
greedy = 0;
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
case '*':
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '+':
|
|
|
|
|
goto nested_meta;
|
|
|
|
|
default:
|
|
|
|
|
PATUNFETCH;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
repeat:
|
|
|
|
|
/* Star, etc. applied to an empty pattern is equivalent
|
|
|
|
|
to an empty pattern. */
|
|
|
|
|
if (!laststart)
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (greedy && many_times_ok && *laststart == anychar && b - laststart <= 2) {
|
|
|
|
|
if (b[-1] == stop_paren)
|
|
|
|
|
b--;
|
|
|
|
|
if (zero_times_ok)
|
|
|
|
|
*laststart = anychar_repeat;
|
|
|
|
|
else {
|
|
|
|
|
BUFPUSH(anychar_repeat);
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
/* Now we know whether or not zero matches is allowed
|
|
|
|
|
and also whether or not two or more matches is allowed. */
|
|
|
|
|
if (many_times_ok) {
|
|
|
|
|
/* If more than one repetition is allowed, put in at the
|
|
|
|
|
end a backward relative jump from b to before the next
|
|
|
|
|
jump we're going to put in below (which jumps from
|
|
|
|
|
laststart to after this jump). */
|
|
|
|
|
GET_BUFFER_SPACE(3);
|
|
|
|
|
store_jump(b,greedy?maybe_finalize_jump:finalize_push,laststart-3);
|
|
|
|
|
b += 3; /* Because store_jump put stuff here. */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* On failure, jump from laststart to next pattern, which will be the
|
|
|
|
|
end of the buffer after this jump is inserted. */
|
|
|
|
|
GET_BUFFER_SPACE(3);
|
|
|
|
|
insert_jump(on_failure_jump, laststart, b + 3, b);
|
|
|
|
|
b += 3;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (zero_times_ok) {
|
|
|
|
|
if (greedy == 0) {
|
1998-01-16 15:19:22 +03:00
|
|
|
|
GET_BUFFER_SPACE(3);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
insert_jump(try_next, laststart, b + 3, b);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
b += 3;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* At least one repetition is required, so insert a
|
|
|
|
|
`dummy_failure_jump' before the initial
|
|
|
|
|
`on_failure_jump' instruction of the loop. This
|
|
|
|
|
effects a skip over that instruction the first time
|
|
|
|
|
we hit that loop. */
|
|
|
|
|
GET_BUFFER_SPACE(3);
|
|
|
|
|
insert_jump(dummy_failure_jump, laststart, laststart + 6, b);
|
|
|
|
|
b += 3;
|
|
|
|
|
}
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '.':
|
|
|
|
|
laststart = b;
|
|
|
|
|
BUFPUSH(anychar);
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '[':
|
|
|
|
|
if (p == pend)
|
2000-09-25 13:15:08 +04:00
|
|
|
|
FREE_AND_RETURN(stackb, "invalid regular expression; '[' can't be the last character ie. can't start range at the end of pattern");
|
1999-08-13 09:45:20 +04:00
|
|
|
|
while ((b - bufp->buffer + 9 + (1 << BYTEWIDTH) / BYTEWIDTH)
|
|
|
|
|
> bufp->allocated)
|
|
|
|
|
EXTEND_BUFFER;
|
|
|
|
|
|
|
|
|
|
laststart = b;
|
|
|
|
|
if (*p == '^') {
|
|
|
|
|
BUFPUSH(charset_not);
|
|
|
|
|
p++;
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
BUFPUSH(charset);
|
|
|
|
|
p0 = p;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
|
|
|
|
|
/* Clear the whole map */
|
|
|
|
|
memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
had_mbchar = 0;
|
1999-09-01 13:48:03 +04:00
|
|
|
|
had_num_literal = 0;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
had_char_class = 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Read in characters and ranges, setting map bits. */
|
|
|
|
|
for (;;) {
|
|
|
|
|
int size;
|
|
|
|
|
unsigned last = (unsigned)-1;
|
|
|
|
|
|
2003-01-24 12:18:04 +03:00
|
|
|
|
if ((size = EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])) || current_mbctype) {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Ensure the space is enough to hold another interval
|
|
|
|
|
of multi-byte chars in charset(_not)?. */
|
|
|
|
|
size = (1 << BYTEWIDTH) / BYTEWIDTH + 2 + size*8 + 8;
|
|
|
|
|
while (b + size + 1 > bufp->buffer + bufp->allocated)
|
|
|
|
|
EXTEND_BUFFER;
|
|
|
|
|
}
|
|
|
|
|
range_retry:
|
2001-05-02 08:22:21 +04:00
|
|
|
|
if (range && had_char_class) {
|
|
|
|
|
FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as an end value of range");
|
|
|
|
|
}
|
2003-09-16 21:37:34 +04:00
|
|
|
|
PATFETCH_RAW(c);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (c == ']') {
|
|
|
|
|
if (p == p0 + 1) {
|
|
|
|
|
if (p == pend)
|
2000-09-25 13:15:08 +04:00
|
|
|
|
FREE_AND_RETURN(stackb, "invalid regular expression; empty character class");
|
2003-03-21 17:37:32 +03:00
|
|
|
|
re_warning("character class has `]' without escape");
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
/* Stop if this isn't merely a ] inside a bracket
|
|
|
|
|
expression, but rather the end of a bracket
|
|
|
|
|
expression. */
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
/* Look ahead to see if it's a range when the last thing
|
|
|
|
|
was a character class. */
|
|
|
|
|
if (had_char_class && c == '-' && *p != ']')
|
2000-09-25 13:15:08 +04:00
|
|
|
|
FREE_AND_RETURN(stackb, "invalid regular expression; can't use character class as a start value of range");
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (ismbchar(c)) {
|
|
|
|
|
PATFETCH_MBC(c);
|
|
|
|
|
had_mbchar++;
|
|
|
|
|
}
|
2001-05-02 08:22:21 +04:00
|
|
|
|
had_char_class = 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
2003-04-09 19:45:28 +04:00
|
|
|
|
if (c == '-' && ((p != p0 + 1 && *p != ']') ||
|
|
|
|
|
(p[0] == '-' && p[1] != ']') ||
|
|
|
|
|
range))
|
2003-03-21 17:37:32 +03:00
|
|
|
|
re_warning("character class has `-' without escape");
|
* regex.c (re_compile_pattern): fix previous change.
* instruby.rb, ext/extmk.rb, ext/tk/lib/tk.rb, lib/benchmark.rb,
lib/cgi.rb, lib/debug.rb, lib/getoptlong.rb, lib/jcode.rb,
lib/optparse.rb, lib/time.rb, lib/date/format.rb,
lib/irb/ruby-lex.rb: escape `[', `]', `-' in chracter class in
regexp to avoid warning.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@3595 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2003-03-21 18:13:23 +03:00
|
|
|
|
if (c == '[' && *p != ':')
|
|
|
|
|
re_warning("character class has `[' without escape");
|
2003-03-21 17:37:32 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* \ escapes characters when inside [...]. */
|
|
|
|
|
if (c == '\\') {
|
|
|
|
|
PATFETCH_RAW(c);
|
|
|
|
|
switch (c) {
|
|
|
|
|
case 'w':
|
|
|
|
|
for (c = 0; c < (1 << BYTEWIDTH); c++) {
|
|
|
|
|
if (SYNTAX(c) == Sword ||
|
|
|
|
|
(!current_mbctype && SYNTAX(c) == Sword2))
|
|
|
|
|
SET_LIST_BIT(c);
|
|
|
|
|
}
|
|
|
|
|
if (current_mbctype) {
|
|
|
|
|
set_list_bits(0x80, 0xffffffff, b);
|
|
|
|
|
}
|
2001-05-02 08:22:21 +04:00
|
|
|
|
had_char_class = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
last = -1;
|
|
|
|
|
continue;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case 'W':
|
|
|
|
|
for (c = 0; c < (1 << BYTEWIDTH); c++) {
|
|
|
|
|
if (SYNTAX(c) != Sword &&
|
2002-01-23 10:30:43 +03:00
|
|
|
|
((current_mbctype && !re_mbctab[c]) ||
|
|
|
|
|
(!current_mbctype && SYNTAX(c) != Sword2)))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
SET_LIST_BIT(c);
|
|
|
|
|
}
|
2001-05-02 08:22:21 +04:00
|
|
|
|
had_char_class = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
last = -1;
|
|
|
|
|
continue;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case 's':
|
|
|
|
|
for (c = 0; c < 256; c++)
|
|
|
|
|
if (ISSPACE(c))
|
|
|
|
|
SET_LIST_BIT(c);
|
2001-05-02 08:22:21 +04:00
|
|
|
|
had_char_class = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
last = -1;
|
|
|
|
|
continue;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case 'S':
|
|
|
|
|
for (c = 0; c < 256; c++)
|
|
|
|
|
if (!ISSPACE(c))
|
|
|
|
|
SET_LIST_BIT(c);
|
|
|
|
|
if (current_mbctype)
|
|
|
|
|
set_list_bits(0x80, 0xffffffff, b);
|
2001-05-02 08:22:21 +04:00
|
|
|
|
had_char_class = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
last = -1;
|
|
|
|
|
continue;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case 'd':
|
|
|
|
|
for (c = '0'; c <= '9'; c++)
|
|
|
|
|
SET_LIST_BIT(c);
|
2001-05-02 08:22:21 +04:00
|
|
|
|
had_char_class = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
last = -1;
|
|
|
|
|
continue;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case 'D':
|
|
|
|
|
for (c = 0; c < 256; c++)
|
|
|
|
|
if (!ISDIGIT(c))
|
|
|
|
|
SET_LIST_BIT(c);
|
|
|
|
|
if (current_mbctype)
|
|
|
|
|
set_list_bits(0x80, 0xffffffff, b);
|
2001-05-02 08:22:21 +04:00
|
|
|
|
had_char_class = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
last = -1;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case 'x':
|
|
|
|
|
c = scan_hex(p, 2, &numlen);
|
2002-06-14 10:27:18 +04:00
|
|
|
|
if (numlen == 0) goto invalid_escape;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
p += numlen;
|
1999-09-01 13:48:03 +04:00
|
|
|
|
had_num_literal = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '0': case '1': case '2': case '3': case '4':
|
|
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
|
|
|
|
PATUNFETCH;
|
|
|
|
|
c = scan_oct(p, 3, &numlen);
|
|
|
|
|
p += numlen;
|
1999-09-01 13:48:03 +04:00
|
|
|
|
had_num_literal = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
|
|
|
|
|
2000-03-08 09:25:19 +03:00
|
|
|
|
case 'M':
|
|
|
|
|
case 'C':
|
|
|
|
|
case 'c':
|
2001-10-30 11:43:28 +03:00
|
|
|
|
{
|
|
|
|
|
char *pp;
|
|
|
|
|
|
|
|
|
|
--p;
|
|
|
|
|
c = read_special(p, pend, &pp);
|
|
|
|
|
if (c > 255) goto invalid_escape;
|
|
|
|
|
p = pp;
|
|
|
|
|
had_num_literal = 1;
|
|
|
|
|
}
|
2000-03-08 09:25:19 +03:00
|
|
|
|
break;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
default:
|
|
|
|
|
c = read_backslash(c);
|
|
|
|
|
if (ismbchar(c)) {
|
|
|
|
|
PATFETCH_MBC(c);
|
|
|
|
|
had_mbchar++;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
2003-03-27 06:16:23 +03:00
|
|
|
|
else if (c == '[' && *p == ':') { /* [:...:] */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Leave room for the null. */
|
|
|
|
|
char str[CHAR_CLASS_MAX_LENGTH + 1];
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
2000-05-24 08:34:26 +04:00
|
|
|
|
PATFETCH_RAW(c);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
c1 = 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* If pattern is `[[:'. */
|
|
|
|
|
if (p == pend)
|
2000-09-25 13:15:08 +04:00
|
|
|
|
FREE_AND_RETURN(stackb, "invalid regular expression; re can't end '[[:'");
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (;;) {
|
2003-09-16 21:37:34 +04:00
|
|
|
|
PATFETCH_RAW(c);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (c == ':' || c == ']' || p == pend
|
|
|
|
|
|| c1 == CHAR_CLASS_MAX_LENGTH)
|
1998-01-16 15:19:22 +03:00
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
str[c1++] = c;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
str[c1] = '\0';
|
|
|
|
|
|
2003-03-27 06:16:23 +03:00
|
|
|
|
/* If isn't a word bracketed by `[:' and `:]':
|
|
|
|
|
undo the ending character, the letters, and
|
|
|
|
|
the leading `:' and `['. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (c == ':' && *p == ']') {
|
|
|
|
|
int ch;
|
|
|
|
|
char is_alnum = STREQ(str, "alnum");
|
|
|
|
|
char is_alpha = STREQ(str, "alpha");
|
|
|
|
|
char is_blank = STREQ(str, "blank");
|
|
|
|
|
char is_cntrl = STREQ(str, "cntrl");
|
|
|
|
|
char is_digit = STREQ(str, "digit");
|
|
|
|
|
char is_graph = STREQ(str, "graph");
|
|
|
|
|
char is_lower = STREQ(str, "lower");
|
|
|
|
|
char is_print = STREQ(str, "print");
|
|
|
|
|
char is_punct = STREQ(str, "punct");
|
|
|
|
|
char is_space = STREQ(str, "space");
|
|
|
|
|
char is_upper = STREQ(str, "upper");
|
|
|
|
|
char is_xdigit = STREQ(str, "xdigit");
|
|
|
|
|
|
2000-09-25 13:15:08 +04:00
|
|
|
|
if (!IS_CHAR_CLASS(str)){
|
|
|
|
|
snprintf(error_msg, ERROR_MSG_MAX_SIZE,
|
|
|
|
|
"invalid regular expression; [:%s:] is not a character class", str);
|
|
|
|
|
FREE_AND_RETURN(stackb, error_msg);
|
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
|
|
|
|
|
/* Throw away the ] at the end of the character class. */
|
2000-05-24 08:34:26 +04:00
|
|
|
|
PATFETCH(c);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
|
|
|
|
|
if (p == pend)
|
2000-09-25 13:15:08 +04:00
|
|
|
|
FREE_AND_RETURN(stackb, "invalid regular expression; range doesn't have ending ']' after a character class");
|
1999-08-13 09:45:20 +04:00
|
|
|
|
|
|
|
|
|
for (ch = 0; ch < 1 << BYTEWIDTH; ch++) {
|
|
|
|
|
if ( (is_alnum && ISALNUM(ch))
|
|
|
|
|
|| (is_alpha && ISALPHA(ch))
|
|
|
|
|
|| (is_blank && ISBLANK(ch))
|
|
|
|
|
|| (is_cntrl && ISCNTRL(ch))
|
|
|
|
|
|| (is_digit && ISDIGIT(ch))
|
|
|
|
|
|| (is_graph && ISGRAPH(ch))
|
|
|
|
|
|| (is_lower && ISLOWER(ch))
|
|
|
|
|
|| (is_print && ISPRINT(ch))
|
|
|
|
|
|| (is_punct && ISPUNCT(ch))
|
|
|
|
|
|| (is_space && ISSPACE(ch))
|
|
|
|
|
|| (is_upper && ISUPPER(ch))
|
|
|
|
|
|| (is_xdigit && ISXDIGIT(ch)))
|
2000-05-24 08:34:26 +04:00
|
|
|
|
SET_LIST_BIT(ch);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
had_char_class = 1;
|
2003-03-27 06:16:23 +03:00
|
|
|
|
continue;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
else {
|
2003-03-27 06:16:23 +03:00
|
|
|
|
c1 += 2;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
while (c1--)
|
|
|
|
|
PATUNFETCH;
|
2003-03-21 17:37:32 +03:00
|
|
|
|
re_warning("character class has `[' without escape");
|
2003-03-27 06:16:23 +03:00
|
|
|
|
c = '[';
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
2003-03-27 06:16:23 +03:00
|
|
|
|
|
|
|
|
|
/* Get a range. */
|
|
|
|
|
if (range) {
|
|
|
|
|
if (last > c)
|
|
|
|
|
goto invalid_pattern;
|
|
|
|
|
|
|
|
|
|
range = 0;
|
|
|
|
|
if (had_mbchar == 0) {
|
2003-09-16 21:37:34 +04:00
|
|
|
|
if (TRANSLATE_P()) {
|
|
|
|
|
for (;last<=c;last++)
|
|
|
|
|
SET_LIST_BIT(translate[last]);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
for (;last<=c;last++)
|
|
|
|
|
SET_LIST_BIT(last);
|
|
|
|
|
}
|
2003-03-27 06:16:23 +03:00
|
|
|
|
}
|
|
|
|
|
else if (had_mbchar == 2) {
|
|
|
|
|
set_list_bits(last, c, b);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
/* restriction: range between sbc and mbc */
|
|
|
|
|
goto invalid_pattern;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (p[0] == '-' && p[1] != ']') {
|
|
|
|
|
last = c;
|
2003-09-16 21:37:34 +04:00
|
|
|
|
PATFETCH_RAW(c1);
|
2003-03-27 06:16:23 +03:00
|
|
|
|
range = 1;
|
|
|
|
|
goto range_retry;
|
|
|
|
|
}
|
2003-09-16 21:37:34 +04:00
|
|
|
|
else {
|
|
|
|
|
if (TRANSLATE_P()) c = (unsigned char)translate[c];
|
|
|
|
|
if (had_mbchar == 0 && (!current_mbctype || !had_num_literal)) {
|
|
|
|
|
SET_LIST_BIT(c);
|
|
|
|
|
had_num_literal = 0;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
set_list_bits(c, c, b);
|
|
|
|
|
}
|
1999-09-01 13:48:03 +04:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
had_mbchar = 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Discard any character set/class bitmap bytes that are all
|
|
|
|
|
0 at the end of the map. Decrement the map-length byte too. */
|
|
|
|
|
while ((int)b[-1] > 0 && b[b[-1] - 1] == 0)
|
|
|
|
|
b[-1]--;
|
|
|
|
|
if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
|
2002-09-25 18:52:37 +04:00
|
|
|
|
memmove(&b[(unsigned char)b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
|
2000-05-24 08:34:26 +04:00
|
|
|
|
2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
|
2002-09-25 18:52:37 +04:00
|
|
|
|
b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(unsigned char)b[-1]])*8;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '(':
|
2001-06-05 11:19:39 +04:00
|
|
|
|
{
|
|
|
|
|
int old_options = options;
|
2000-05-24 08:34:26 +04:00
|
|
|
|
int push_option = 0;
|
2001-06-05 11:19:39 +04:00
|
|
|
|
int casefold = 0;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
|
2001-06-05 11:19:39 +04:00
|
|
|
|
PATFETCH(c);
|
|
|
|
|
if (c == '?') {
|
|
|
|
|
int negative = 0;
|
2000-05-18 08:32:13 +04:00
|
|
|
|
|
2001-06-05 11:19:39 +04:00
|
|
|
|
PATFETCH_RAW(c);
|
|
|
|
|
switch (c) {
|
2002-04-19 08:14:07 +04:00
|
|
|
|
case 'x': case 'm': case 'i': case '-':
|
2001-06-05 11:19:39 +04:00
|
|
|
|
for (;;) {
|
|
|
|
|
switch (c) {
|
|
|
|
|
case '-':
|
|
|
|
|
negative = 1;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ':':
|
|
|
|
|
case ')':
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'x':
|
|
|
|
|
if (negative)
|
|
|
|
|
options &= ~RE_OPTION_EXTENDED;
|
|
|
|
|
else
|
|
|
|
|
options |= RE_OPTION_EXTENDED;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'm':
|
|
|
|
|
if (negative) {
|
|
|
|
|
if (options&RE_OPTION_MULTILINE) {
|
|
|
|
|
options &= ~RE_OPTION_MULTILINE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (!(options&RE_OPTION_MULTILINE)) {
|
|
|
|
|
options |= RE_OPTION_MULTILINE;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
2001-06-05 11:19:39 +04:00
|
|
|
|
push_option = 1;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'i':
|
|
|
|
|
if (negative) {
|
|
|
|
|
if (options&RE_OPTION_IGNORECASE) {
|
|
|
|
|
options &= ~RE_OPTION_IGNORECASE;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else if (!(options&RE_OPTION_IGNORECASE)) {
|
|
|
|
|
options |= RE_OPTION_IGNORECASE;
|
|
|
|
|
}
|
|
|
|
|
casefold = 1;
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
FREE_AND_RETURN(stackb, "undefined (?...) inline option");
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
2001-06-05 11:19:39 +04:00
|
|
|
|
if (c == ')') {
|
|
|
|
|
c = '#'; /* read whole in-line options */
|
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
2001-06-05 11:19:39 +04:00
|
|
|
|
if (c == ':') break;
|
|
|
|
|
PATFETCH_RAW(c);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
2001-06-05 11:19:39 +04:00
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '#':
|
|
|
|
|
for (;;) {
|
|
|
|
|
PATFETCH(c);
|
|
|
|
|
if (c == ')') break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
2001-06-05 11:19:39 +04:00
|
|
|
|
c = '#';
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case ':':
|
|
|
|
|
case '=':
|
|
|
|
|
case '!':
|
|
|
|
|
case '>':
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
|
|
|
|
FREE_AND_RETURN(stackb, "undefined (?...) sequence");
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
PATUNFETCH;
|
|
|
|
|
c = '(';
|
|
|
|
|
}
|
|
|
|
|
if (c == '#') {
|
|
|
|
|
if (push_option) {
|
|
|
|
|
BUFPUSH(option_set);
|
|
|
|
|
BUFPUSH(options);
|
|
|
|
|
}
|
|
|
|
|
if (casefold) {
|
|
|
|
|
if (options & RE_OPTION_IGNORECASE)
|
|
|
|
|
BUFPUSH(casefold_on);
|
|
|
|
|
else
|
|
|
|
|
BUFPUSH(casefold_off);
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
|
|
|
|
break;
|
2001-06-05 11:19:39 +04:00
|
|
|
|
}
|
|
|
|
|
if (stackp+8 >= stacke) {
|
|
|
|
|
DOUBLE_STACK(int);
|
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
2001-06-05 11:19:39 +04:00
|
|
|
|
/* Laststart should point to the start_memory that we are about
|
|
|
|
|
to push (unless the pattern has RE_NREGS or more ('s). */
|
|
|
|
|
/* obsolete: now RE_NREGS is just a default register size. */
|
|
|
|
|
*stackp++ = b - bufp->buffer;
|
|
|
|
|
*stackp++ = fixup_alt_jump ? fixup_alt_jump - bufp->buffer + 1 : 0;
|
|
|
|
|
*stackp++ = begalt - bufp->buffer;
|
|
|
|
|
switch (c) {
|
|
|
|
|
case '(':
|
|
|
|
|
BUFPUSH(start_memory);
|
|
|
|
|
BUFPUSH(regnum);
|
|
|
|
|
*stackp++ = regnum++;
|
|
|
|
|
*stackp++ = b - bufp->buffer;
|
|
|
|
|
BUFPUSH(0);
|
|
|
|
|
/* too many ()'s to fit in a byte. (max 254) */
|
|
|
|
|
if (regnum >= RE_REG_MAX) goto too_big;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '=':
|
|
|
|
|
case '!':
|
2000-02-08 11:54:01 +03:00
|
|
|
|
case '>':
|
2001-06-05 11:19:39 +04:00
|
|
|
|
BUFPUSH(start_nowidth);
|
|
|
|
|
*stackp++ = b - bufp->buffer;
|
|
|
|
|
BUFPUSH(0); /* temporary value */
|
|
|
|
|
BUFPUSH(0);
|
|
|
|
|
if (c != '!') break;
|
|
|
|
|
|
|
|
|
|
BUFPUSH(on_failure_jump);
|
|
|
|
|
*stackp++ = b - bufp->buffer;
|
|
|
|
|
BUFPUSH(0); /* temporary value */
|
|
|
|
|
BUFPUSH(0);
|
1998-01-16 15:19:22 +03:00
|
|
|
|
break;
|
|
|
|
|
|
2001-06-05 11:19:39 +04:00
|
|
|
|
case ':':
|
|
|
|
|
BUFPUSH(start_paren);
|
|
|
|
|
pending_exact = 0;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
default:
|
2001-06-05 11:19:39 +04:00
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
2000-05-24 08:34:26 +04:00
|
|
|
|
if (push_option) {
|
|
|
|
|
BUFPUSH(option_set);
|
|
|
|
|
BUFPUSH(options);
|
|
|
|
|
}
|
2001-06-05 11:19:39 +04:00
|
|
|
|
if (casefold) {
|
|
|
|
|
if (options & RE_OPTION_IGNORECASE)
|
|
|
|
|
BUFPUSH(casefold_on);
|
|
|
|
|
else
|
|
|
|
|
BUFPUSH(casefold_off);
|
|
|
|
|
}
|
|
|
|
|
*stackp++ = c;
|
|
|
|
|
*stackp++ = old_options;
|
|
|
|
|
fixup_alt_jump = 0;
|
|
|
|
|
laststart = 0;
|
|
|
|
|
begalt = b;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case ')':
|
|
|
|
|
if (stackp == stackb)
|
|
|
|
|
FREE_AND_RETURN(stackb, "unmatched )");
|
2000-05-24 08:34:26 +04:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
pending_exact = 0;
|
|
|
|
|
if (fixup_alt_jump) {
|
|
|
|
|
/* Push a dummy failure point at the end of the
|
|
|
|
|
alternative for a possible future
|
|
|
|
|
`finalize_jump' to pop. See comments at
|
|
|
|
|
`push_dummy_failure' in `re_match'. */
|
|
|
|
|
BUFPUSH(push_dummy_failure);
|
|
|
|
|
|
|
|
|
|
/* We allocated space for this jump when we assigned
|
|
|
|
|
to `fixup_alt_jump', in the `handle_alt' case below. */
|
|
|
|
|
store_jump(fixup_alt_jump, jump, b);
|
|
|
|
|
}
|
2001-06-05 11:19:39 +04:00
|
|
|
|
if (options != stackp[-1]) {
|
|
|
|
|
if ((options ^ stackp[-1]) & RE_OPTION_IGNORECASE) {
|
|
|
|
|
BUFPUSH((options&RE_OPTION_IGNORECASE)?casefold_off:casefold_on);
|
|
|
|
|
}
|
2001-06-19 08:35:17 +04:00
|
|
|
|
if ((options ^ stackp[-1]) != RE_OPTION_IGNORECASE) {
|
|
|
|
|
BUFPUSH(option_set);
|
|
|
|
|
BUFPUSH(stackp[-1]);
|
|
|
|
|
}
|
2001-06-05 11:19:39 +04:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
p0 = b;
|
|
|
|
|
options = *--stackp;
|
|
|
|
|
switch (c = *--stackp) {
|
|
|
|
|
case '(':
|
|
|
|
|
{
|
|
|
|
|
char *loc = bufp->buffer + *--stackp;
|
|
|
|
|
*loc = regnum - stackp[-1];
|
|
|
|
|
BUFPUSH(stop_memory);
|
|
|
|
|
BUFPUSH(stackp[-1]);
|
|
|
|
|
BUFPUSH(regnum - stackp[-1]);
|
|
|
|
|
stackp--;
|
|
|
|
|
}
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '!':
|
|
|
|
|
BUFPUSH(pop_and_fail);
|
|
|
|
|
/* back patch */
|
|
|
|
|
STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
|
|
|
|
|
stackp--;
|
|
|
|
|
/* fall through */
|
|
|
|
|
case '=':
|
|
|
|
|
BUFPUSH(stop_nowidth);
|
|
|
|
|
/* tell stack-pos place to start_nowidth */
|
|
|
|
|
STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
|
|
|
|
|
BUFPUSH(0); /* space to hold stack pos */
|
|
|
|
|
BUFPUSH(0);
|
|
|
|
|
stackp--;
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
2000-02-08 11:54:01 +03:00
|
|
|
|
case '>':
|
|
|
|
|
BUFPUSH(stop_backtrack);
|
|
|
|
|
/* tell stack-pos place to start_nowidth */
|
|
|
|
|
STORE_NUMBER(bufp->buffer+stackp[-1], b - bufp->buffer - stackp[-1] - 2);
|
|
|
|
|
BUFPUSH(0); /* space to hold stack pos */
|
|
|
|
|
BUFPUSH(0);
|
|
|
|
|
stackp--;
|
|
|
|
|
break;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case ':':
|
|
|
|
|
BUFPUSH(stop_paren);
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
begalt = *--stackp + bufp->buffer;
|
|
|
|
|
stackp--;
|
|
|
|
|
fixup_alt_jump = *stackp ? *stackp + bufp->buffer - 1 : 0;
|
|
|
|
|
laststart = *--stackp + bufp->buffer;
|
|
|
|
|
if (c == '!' || c == '=') laststart = b;
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '|':
|
|
|
|
|
/* Insert before the previous alternative a jump which
|
|
|
|
|
jumps to this alternative if the former fails. */
|
|
|
|
|
GET_BUFFER_SPACE(3);
|
|
|
|
|
insert_jump(on_failure_jump, begalt, b + 6, b);
|
|
|
|
|
pending_exact = 0;
|
|
|
|
|
b += 3;
|
|
|
|
|
/* The alternative before this one has a jump after it
|
|
|
|
|
which gets executed if it gets matched. Adjust that
|
|
|
|
|
jump so it will jump to this alternative's analogous
|
|
|
|
|
jump (put in below, which in turn will jump to the next
|
|
|
|
|
(if any) alternative's such jump, etc.). The last such
|
|
|
|
|
jump jumps to the correct final destination. A picture:
|
|
|
|
|
_____ _____
|
|
|
|
|
| | | |
|
|
|
|
|
| v | v
|
|
|
|
|
a | b | c
|
|
|
|
|
|
|
|
|
|
If we are at `b', then fixup_alt_jump right now points to a
|
|
|
|
|
three-byte space after `a'. We'll put in the jump, set
|
|
|
|
|
fixup_alt_jump to right after `b', and leave behind three
|
|
|
|
|
bytes which we'll fill in when we get to after `c'. */
|
|
|
|
|
|
|
|
|
|
if (fixup_alt_jump)
|
|
|
|
|
store_jump(fixup_alt_jump, jump_past_alt, b);
|
|
|
|
|
|
|
|
|
|
/* Mark and leave space for a jump after this alternative,
|
|
|
|
|
to be filled in later either by next alternative or
|
|
|
|
|
when know we're at the end of a series of alternatives. */
|
|
|
|
|
fixup_alt_jump = b;
|
|
|
|
|
GET_BUFFER_SPACE(3);
|
|
|
|
|
b += 3;
|
|
|
|
|
|
|
|
|
|
laststart = 0;
|
|
|
|
|
begalt = b;
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '{':
|
|
|
|
|
/* If there is no previous pattern, this is an invalid pattern. */
|
2000-09-25 13:15:08 +04:00
|
|
|
|
if (!laststart) {
|
|
|
|
|
snprintf(error_msg, ERROR_MSG_MAX_SIZE,
|
|
|
|
|
"invalid regular expression; there's no previous pattern, to which '{' would define cardinality at %d",
|
|
|
|
|
p-pattern);
|
|
|
|
|
FREE_AND_RETURN(stackb, error_msg);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
2000-09-25 13:15:08 +04:00
|
|
|
|
if( p == pend)
|
|
|
|
|
FREE_AND_RETURN(stackb, "invalid regular expression; '{' can't be last character" );
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
beg_interval = p - 1;
|
|
|
|
|
|
|
|
|
|
lower_bound = -1; /* So can see if are set. */
|
|
|
|
|
upper_bound = -1;
|
|
|
|
|
GET_UNSIGNED_NUMBER(lower_bound);
|
|
|
|
|
if (c == ',') {
|
|
|
|
|
GET_UNSIGNED_NUMBER(upper_bound);
|
|
|
|
|
}
|
|
|
|
|
else
|
|
|
|
|
/* Interval such as `{1}' => match exactly once. */
|
|
|
|
|
upper_bound = lower_bound;
|
|
|
|
|
|
|
|
|
|
if (lower_bound < 0 || c != '}')
|
|
|
|
|
goto unfetch_interval;
|
|
|
|
|
|
|
|
|
|
if (lower_bound >= RE_DUP_MAX || upper_bound >= RE_DUP_MAX)
|
|
|
|
|
FREE_AND_RETURN(stackb, "too big quantifier in {,}");
|
|
|
|
|
if (upper_bound < 0) upper_bound = RE_DUP_MAX;
|
|
|
|
|
if (lower_bound > upper_bound)
|
|
|
|
|
FREE_AND_RETURN(stackb, "can't do {n,m} with n > m");
|
|
|
|
|
|
|
|
|
|
beg_interval = 0;
|
|
|
|
|
pending_exact = 0;
|
|
|
|
|
|
|
|
|
|
greedy = 1;
|
|
|
|
|
if (p != pend) {
|
|
|
|
|
PATFETCH(c);
|
|
|
|
|
if (c == '?') greedy = 0;
|
|
|
|
|
else PATUNFETCH;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (lower_bound == 0) {
|
|
|
|
|
zero_times_ok = 1;
|
|
|
|
|
if (upper_bound == RE_DUP_MAX) {
|
|
|
|
|
many_times_ok = 1;
|
|
|
|
|
goto repeat;
|
|
|
|
|
}
|
|
|
|
|
if (upper_bound == 1) {
|
|
|
|
|
many_times_ok = 0;
|
|
|
|
|
goto repeat;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (lower_bound == 1) {
|
|
|
|
|
if (upper_bound == 1) {
|
|
|
|
|
/* No need to repeat */
|
1998-01-16 15:19:22 +03:00
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
if (upper_bound == RE_DUP_MAX) {
|
|
|
|
|
many_times_ok = 1;
|
|
|
|
|
zero_times_ok = 0;
|
|
|
|
|
goto repeat;
|
|
|
|
|
}
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* If upper_bound is zero, don't want to succeed at all;
|
|
|
|
|
jump from laststart to b + 3, which will be the end of
|
|
|
|
|
the buffer after this jump is inserted. */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (upper_bound == 0) {
|
|
|
|
|
GET_BUFFER_SPACE(3);
|
|
|
|
|
insert_jump(jump, laststart, b + 3, b);
|
|
|
|
|
b += 3;
|
|
|
|
|
break;
|
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
2000-08-24 10:21:43 +04:00
|
|
|
|
/* If lower_bound == upper_bound, repeat count can be removed */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (lower_bound == upper_bound) {
|
|
|
|
|
int mcnt;
|
|
|
|
|
int skip_stop_paren = 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (b[-1] == stop_paren) {
|
|
|
|
|
skip_stop_paren = 1;
|
|
|
|
|
b--;
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (*laststart == exactn && laststart[1]+2 == b - laststart
|
|
|
|
|
&& laststart[1]*lower_bound < 256) {
|
|
|
|
|
mcnt = laststart[1];
|
|
|
|
|
GET_BUFFER_SPACE((lower_bound-1)*mcnt);
|
|
|
|
|
laststart[1] = lower_bound*mcnt;
|
|
|
|
|
while (--lower_bound) {
|
|
|
|
|
memcpy(b, laststart+2, mcnt);
|
|
|
|
|
b += mcnt;
|
|
|
|
|
}
|
|
|
|
|
if (skip_stop_paren) BUFPUSH(stop_paren);
|
|
|
|
|
break;
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (lower_bound < 5 && b - laststart < 10) {
|
|
|
|
|
/* 5 and 10 are the magic numbers */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
mcnt = b - laststart;
|
|
|
|
|
GET_BUFFER_SPACE((lower_bound-1)*mcnt);
|
|
|
|
|
while (--lower_bound) {
|
|
|
|
|
memcpy(b, laststart, mcnt);
|
|
|
|
|
b += mcnt;
|
|
|
|
|
}
|
|
|
|
|
if (skip_stop_paren) BUFPUSH(stop_paren);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
if (skip_stop_paren) b++; /* push back stop_paren */
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Otherwise, we have a nontrivial interval. When
|
|
|
|
|
we're all done, the pattern will look like:
|
|
|
|
|
set_number_at <jump count> <upper bound>
|
|
|
|
|
set_number_at <succeed_n count> <lower bound>
|
|
|
|
|
succeed_n <after jump addr> <succed_n count>
|
|
|
|
|
<body of loop>
|
|
|
|
|
jump_n <succeed_n addr> <jump count>
|
|
|
|
|
(The upper bound and `jump_n' are omitted if
|
|
|
|
|
`upper_bound' is 1, though.) */
|
|
|
|
|
{ /* If the upper bound is > 1, we need to insert
|
|
|
|
|
more at the end of the loop. */
|
|
|
|
|
unsigned nbytes = upper_bound == 1 ? 10 : 20;
|
|
|
|
|
|
|
|
|
|
GET_BUFFER_SPACE(nbytes);
|
|
|
|
|
/* Initialize lower bound of the `succeed_n', even
|
|
|
|
|
though it will be set during matching by its
|
|
|
|
|
attendant `set_number_at' (inserted next),
|
|
|
|
|
because `re_compile_fastmap' needs to know.
|
|
|
|
|
Jump to the `jump_n' we might insert below. */
|
|
|
|
|
insert_jump_n(succeed_n, laststart, b + (nbytes/2),
|
|
|
|
|
b, lower_bound);
|
|
|
|
|
b += 5; /* Just increment for the succeed_n here. */
|
|
|
|
|
|
|
|
|
|
/* Code to initialize the lower bound. Insert
|
|
|
|
|
before the `succeed_n'. The `5' is the last two
|
|
|
|
|
bytes of this `set_number_at', plus 3 bytes of
|
|
|
|
|
the following `succeed_n'. */
|
|
|
|
|
insert_op_2(set_number_at, laststart, b, 5, lower_bound);
|
|
|
|
|
b += 5;
|
|
|
|
|
|
|
|
|
|
if (upper_bound > 1) {
|
|
|
|
|
/* More than one repetition is allowed, so
|
|
|
|
|
append a backward jump to the `succeed_n'
|
|
|
|
|
that starts this interval.
|
|
|
|
|
|
|
|
|
|
When we've reached this during matching,
|
|
|
|
|
we'll have matched the interval once, so
|
|
|
|
|
jump back only `upper_bound - 1' times. */
|
|
|
|
|
GET_BUFFER_SPACE(5);
|
|
|
|
|
store_jump_n(b, greedy?jump_n:finalize_push_n, laststart + 5,
|
2000-08-28 13:53:42 +04:00
|
|
|
|
upper_bound - 1);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
b += 5;
|
|
|
|
|
|
|
|
|
|
/* The location we want to set is the second
|
|
|
|
|
parameter of the `jump_n'; that is `b-2' as
|
|
|
|
|
an absolute address. `laststart' will be
|
|
|
|
|
the `set_number_at' we're about to insert;
|
|
|
|
|
`laststart+3' the number to set, the source
|
|
|
|
|
for the relative address. But we are
|
|
|
|
|
inserting into the middle of the pattern --
|
|
|
|
|
so everything is getting moved up by 5.
|
|
|
|
|
Conclusion: (b - 2) - (laststart + 3) + 5,
|
|
|
|
|
i.e., b - laststart.
|
|
|
|
|
|
|
|
|
|
We insert this at the beginning of the loop
|
|
|
|
|
so that if we fail during matching, we'll
|
|
|
|
|
reinitialize the bounds. */
|
|
|
|
|
insert_op_2(set_number_at, laststart, b, b - laststart,
|
2000-08-28 13:53:42 +04:00
|
|
|
|
upper_bound - 1);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
b += 5;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
unfetch_interval:
|
|
|
|
|
/* If an invalid interval, match the characters as literals. */
|
|
|
|
|
p = beg_interval;
|
|
|
|
|
beg_interval = 0;
|
|
|
|
|
|
|
|
|
|
/* normal_char and normal_backslash need `c'. */
|
2000-05-24 08:34:26 +04:00
|
|
|
|
PATFETCH(c);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
goto normal_char;
|
|
|
|
|
|
|
|
|
|
case '\\':
|
2000-09-25 13:15:08 +04:00
|
|
|
|
if (p == pend)
|
|
|
|
|
FREE_AND_RETURN(stackb, "invalid regular expression; '\\' can't be last character");
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Do not translate the character after the \, so that we can
|
|
|
|
|
distinguish, e.g., \B from \b, even if we normally would
|
|
|
|
|
translate, e.g., B to b. */
|
|
|
|
|
PATFETCH_RAW(c);
|
|
|
|
|
switch (c) {
|
|
|
|
|
case 's':
|
|
|
|
|
case 'S':
|
|
|
|
|
case 'd':
|
|
|
|
|
case 'D':
|
|
|
|
|
while (b - bufp->buffer + 9 + (1 << BYTEWIDTH) / BYTEWIDTH
|
|
|
|
|
> bufp->allocated)
|
|
|
|
|
EXTEND_BUFFER;
|
|
|
|
|
|
|
|
|
|
laststart = b;
|
|
|
|
|
if (c == 's' || c == 'd') {
|
|
|
|
|
BUFPUSH(charset);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
BUFPUSH(charset_not);
|
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
BUFPUSH((1 << BYTEWIDTH) / BYTEWIDTH);
|
|
|
|
|
memset(b, 0, (1 << BYTEWIDTH) / BYTEWIDTH + 2);
|
|
|
|
|
if (c == 's' || c == 'S') {
|
|
|
|
|
SET_LIST_BIT(' ');
|
|
|
|
|
SET_LIST_BIT('\t');
|
|
|
|
|
SET_LIST_BIT('\n');
|
|
|
|
|
SET_LIST_BIT('\r');
|
|
|
|
|
SET_LIST_BIT('\f');
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
char cc;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (cc = '0'; cc <= '9'; cc++) {
|
|
|
|
|
SET_LIST_BIT(cc);
|
|
|
|
|
}
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
while ((int)b[-1] > 0 && b[b[-1] - 1] == 0)
|
|
|
|
|
b[-1]--;
|
|
|
|
|
if (b[-1] != (1 << BYTEWIDTH) / BYTEWIDTH)
|
2002-09-25 18:52:37 +04:00
|
|
|
|
memmove(&b[(unsigned char)b[-1]], &b[(1 << BYTEWIDTH) / BYTEWIDTH],
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2 + EXTRACT_UNSIGNED(&b[(1 << BYTEWIDTH) / BYTEWIDTH])*8);
|
2002-09-25 18:52:37 +04:00
|
|
|
|
b += b[-1] + 2 + EXTRACT_UNSIGNED(&b[(unsigned char)b[-1]])*8;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case 'w':
|
|
|
|
|
laststart = b;
|
|
|
|
|
BUFPUSH(wordchar);
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case 'W':
|
|
|
|
|
laststart = b;
|
|
|
|
|
BUFPUSH(notwordchar);
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-11-08 06:57:01 +03:00
|
|
|
|
#ifndef RUBY
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '<':
|
|
|
|
|
BUFPUSH(wordbeg);
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case '>':
|
|
|
|
|
BUFPUSH(wordend);
|
|
|
|
|
break;
|
1999-11-08 06:57:01 +03:00
|
|
|
|
#endif
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case 'b':
|
|
|
|
|
BUFPUSH(wordbound);
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case 'B':
|
|
|
|
|
BUFPUSH(notwordbound);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'A':
|
|
|
|
|
BUFPUSH(begbuf);
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case 'Z':
|
2000-05-24 08:34:26 +04:00
|
|
|
|
if ((bufp->options & RE_OPTION_SINGLELINE) == 0) {
|
2000-05-22 11:09:55 +04:00
|
|
|
|
BUFPUSH(endbuf2);
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
/* fall through */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case 'z':
|
|
|
|
|
BUFPUSH(endbuf);
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
2000-02-08 11:54:01 +03:00
|
|
|
|
case 'G':
|
|
|
|
|
BUFPUSH(begpos);
|
|
|
|
|
break;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* hex */
|
|
|
|
|
case 'x':
|
|
|
|
|
had_mbchar = 0;
|
|
|
|
|
c = scan_hex(p, 2, &numlen);
|
2002-06-14 10:27:18 +04:00
|
|
|
|
if (numlen == 0) goto invalid_escape;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
p += numlen;
|
1999-09-01 13:48:03 +04:00
|
|
|
|
had_num_literal = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
goto numeric_char;
|
|
|
|
|
|
|
|
|
|
/* octal */
|
|
|
|
|
case '0':
|
|
|
|
|
had_mbchar = 0;
|
2002-03-12 12:28:50 +03:00
|
|
|
|
c = scan_oct(p, 2, &numlen);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
p += numlen;
|
1999-09-01 13:48:03 +04:00
|
|
|
|
had_num_literal = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
goto numeric_char;
|
|
|
|
|
|
|
|
|
|
/* back-ref or octal */
|
|
|
|
|
case '1': case '2': case '3':
|
|
|
|
|
case '4': case '5': case '6':
|
|
|
|
|
case '7': case '8': case '9':
|
2001-05-30 13:12:34 +04:00
|
|
|
|
PATUNFETCH;
|
|
|
|
|
p0 = p;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
2001-05-30 13:12:34 +04:00
|
|
|
|
had_mbchar = 0;
|
|
|
|
|
c1 = 0;
|
|
|
|
|
GET_UNSIGNED_NUMBER(c1);
|
|
|
|
|
if (!ISDIGIT(c)) PATUNFETCH;
|
|
|
|
|
|
|
|
|
|
if (9 < c1 && c1 >= regnum) {
|
|
|
|
|
/* need to get octal */
|
|
|
|
|
c = scan_oct(p0, 3, &numlen) & 0xff;
|
|
|
|
|
p = p0 + numlen;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
c1 = 0;
|
2001-05-30 13:12:34 +04:00
|
|
|
|
had_num_literal = 1;
|
|
|
|
|
goto numeric_char;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
laststart = b;
|
|
|
|
|
BUFPUSH(duplicate);
|
|
|
|
|
BUFPUSH(c1);
|
|
|
|
|
break;
|
|
|
|
|
|
2000-03-08 09:25:19 +03:00
|
|
|
|
case 'M':
|
|
|
|
|
case 'C':
|
|
|
|
|
case 'c':
|
|
|
|
|
p0 = --p;
|
|
|
|
|
c = read_special(p, pend, &p0);
|
|
|
|
|
if (c > 255) goto invalid_escape;
|
|
|
|
|
p = p0;
|
|
|
|
|
had_num_literal = 1;
|
|
|
|
|
goto numeric_char;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
default:
|
|
|
|
|
c = read_backslash(c);
|
|
|
|
|
goto normal_char;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case '#':
|
|
|
|
|
if (options & RE_OPTION_EXTENDED) {
|
|
|
|
|
while (p != pend) {
|
|
|
|
|
PATFETCH(c);
|
|
|
|
|
if (c == '\n') break;
|
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
goto normal_char;
|
|
|
|
|
|
|
|
|
|
case ' ':
|
|
|
|
|
case '\t':
|
|
|
|
|
case '\f':
|
|
|
|
|
case '\r':
|
|
|
|
|
case '\n':
|
|
|
|
|
if (options & RE_OPTION_EXTENDED)
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
default:
|
2003-04-09 19:45:28 +04:00
|
|
|
|
if (c == ']')
|
|
|
|
|
re_warning("regexp has `]' without escape");
|
1999-08-13 09:45:20 +04:00
|
|
|
|
normal_char: /* Expects the character in `c'. */
|
|
|
|
|
had_mbchar = 0;
|
|
|
|
|
if (ismbchar(c)) {
|
|
|
|
|
had_mbchar = 1;
|
|
|
|
|
c1 = p - pattern;
|
|
|
|
|
}
|
|
|
|
|
numeric_char:
|
|
|
|
|
nextp = p + mbclen(c) - 1;
|
|
|
|
|
if (!pending_exact || pending_exact + *pending_exact + 1 != b
|
|
|
|
|
|| *pending_exact >= (c1 ? 0176 : 0177)
|
|
|
|
|
|| *nextp == '+' || *nextp == '?'
|
|
|
|
|
|| *nextp == '*' || *nextp == '^'
|
|
|
|
|
|| *nextp == '{') {
|
|
|
|
|
laststart = b;
|
|
|
|
|
BUFPUSH(exactn);
|
|
|
|
|
pending_exact = b;
|
|
|
|
|
BUFPUSH(0);
|
|
|
|
|
}
|
1999-09-18 08:48:51 +04:00
|
|
|
|
if (had_num_literal || c == 0xff) {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
BUFPUSH(0xff);
|
|
|
|
|
(*pending_exact)++;
|
1999-09-01 13:48:03 +04:00
|
|
|
|
had_num_literal = 0;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
BUFPUSH(c);
|
|
|
|
|
(*pending_exact)++;
|
|
|
|
|
if (had_mbchar) {
|
|
|
|
|
int len = mbclen(c) - 1;
|
|
|
|
|
while (len--) {
|
|
|
|
|
PATFETCH_RAW(c);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
BUFPUSH(c);
|
|
|
|
|
(*pending_exact)++;
|
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if (fixup_alt_jump)
|
|
|
|
|
store_jump(fixup_alt_jump, jump, b);
|
|
|
|
|
|
|
|
|
|
if (stackp != stackb)
|
|
|
|
|
FREE_AND_RETURN(stackb, "unmatched (");
|
|
|
|
|
|
|
|
|
|
/* set optimize flags */
|
|
|
|
|
laststart = bufp->buffer;
|
|
|
|
|
if (laststart != b) {
|
|
|
|
|
if (*laststart == dummy_failure_jump) laststart += 3;
|
|
|
|
|
else if (*laststart == try_next) laststart += 3;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (*laststart == anychar_repeat) {
|
|
|
|
|
bufp->options |= RE_OPTIMIZE_ANCHOR;
|
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
bufp->used = b - bufp->buffer;
|
|
|
|
|
bufp->re_nsub = regnum;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
laststart = bufp->buffer;
|
|
|
|
|
if (laststart != b) {
|
|
|
|
|
if (*laststart == start_memory) laststart += 3;
|
|
|
|
|
if (*laststart == exactn) {
|
|
|
|
|
bufp->options |= RE_OPTIMIZE_EXACTN;
|
|
|
|
|
bufp->must = laststart+1;
|
|
|
|
|
}
|
|
|
|
|
}
|
2000-10-02 11:48:42 +04:00
|
|
|
|
if (!bufp->must) {
|
1999-01-20 07:59:39 +03:00
|
|
|
|
bufp->must = calculate_must_string(bufp->buffer, b);
|
|
|
|
|
}
|
|
|
|
|
if (current_mbctype == MBCTYPE_SJIS) bufp->options |= RE_OPTIMIZE_NO_BM;
|
|
|
|
|
else if (bufp->must) {
|
|
|
|
|
int i;
|
|
|
|
|
int len = (unsigned char)bufp->must[0];
|
|
|
|
|
|
|
|
|
|
for (i=1; i<len; i++) {
|
|
|
|
|
if ((unsigned char)bufp->must[i] == 0xff ||
|
|
|
|
|
(current_mbctype && ismbchar(bufp->must[i]))) {
|
|
|
|
|
bufp->options |= RE_OPTIMIZE_NO_BM;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
if (!(bufp->options & RE_OPTIMIZE_NO_BM)) {
|
|
|
|
|
bufp->must_skip = (int *) xmalloc((1 << BYTEWIDTH)*sizeof(int));
|
1999-12-06 12:04:03 +03:00
|
|
|
|
bm_init_skip(bufp->must_skip, (unsigned char*)bufp->must+1,
|
1999-01-20 07:59:39 +03:00
|
|
|
|
(unsigned char)bufp->must[0],
|
1999-12-06 12:04:03 +03:00
|
|
|
|
(unsigned char*)(MAY_TRANSLATE()?translate:0));
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
bufp->regstart = TMALLOC(regnum, unsigned char*);
|
|
|
|
|
bufp->regend = TMALLOC(regnum, unsigned char*);
|
|
|
|
|
bufp->old_regstart = TMALLOC(regnum, unsigned char*);
|
|
|
|
|
bufp->old_regend = TMALLOC(regnum, unsigned char*);
|
|
|
|
|
bufp->reg_info = TMALLOC(regnum, register_info_type);
|
|
|
|
|
bufp->best_regstart = TMALLOC(regnum, unsigned char*);
|
|
|
|
|
bufp->best_regend = TMALLOC(regnum, unsigned char*);
|
1999-01-20 07:59:39 +03:00
|
|
|
|
FREE_AND_RETURN(stackb, 0);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
invalid_pattern:
|
1998-01-16 15:19:22 +03:00
|
|
|
|
FREE_AND_RETURN(stackb, "invalid regular expression");
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
end_of_pattern:
|
1998-01-16 15:19:22 +03:00
|
|
|
|
FREE_AND_RETURN(stackb, "premature end of regular expression");
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
too_big:
|
1998-01-16 15:19:22 +03:00
|
|
|
|
FREE_AND_RETURN(stackb, "regular expression too big");
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
memory_exhausted:
|
1998-01-16 15:19:22 +03:00
|
|
|
|
FREE_AND_RETURN(stackb, "memory exhausted");
|
|
|
|
|
|
|
|
|
|
nested_meta:
|
|
|
|
|
FREE_AND_RETURN(stackb, "nested *?+ in regexp");
|
2000-03-08 09:25:19 +03:00
|
|
|
|
|
|
|
|
|
invalid_escape:
|
|
|
|
|
FREE_AND_RETURN(stackb, "Invalid escape character syntax");
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
void
|
|
|
|
|
re_free_pattern(bufp)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
struct re_pattern_buffer *bufp;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
{
|
2000-05-16 06:46:57 +04:00
|
|
|
|
xfree(bufp->buffer);
|
|
|
|
|
xfree(bufp->fastmap);
|
|
|
|
|
if (bufp->must_skip) xfree(bufp->must_skip);
|
|
|
|
|
|
|
|
|
|
xfree(bufp->regstart);
|
|
|
|
|
xfree(bufp->regend);
|
|
|
|
|
xfree(bufp->old_regstart);
|
|
|
|
|
xfree(bufp->old_regend);
|
|
|
|
|
xfree(bufp->best_regstart);
|
|
|
|
|
xfree(bufp->best_regend);
|
|
|
|
|
xfree(bufp->reg_info);
|
|
|
|
|
xfree(bufp);
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* Store a jump of the form <OPCODE> <relative address>.
|
|
|
|
|
Store in the location FROM a jump operation to jump to relative
|
|
|
|
|
address FROM - TO. OPCODE is the opcode to store. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
store_jump(from, opcode, to)
|
|
|
|
|
char *from, *to;
|
|
|
|
|
int opcode;
|
|
|
|
|
{
|
|
|
|
|
from[0] = (char)opcode;
|
|
|
|
|
STORE_NUMBER(from + 1, to - (from + 3));
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Open up space before char FROM, and insert there a jump to TO.
|
|
|
|
|
CURRENT_END gives the end of the storage not in use, so we know
|
|
|
|
|
how much data to copy up. OP is the opcode of the jump to insert.
|
|
|
|
|
|
|
|
|
|
If you call this function, you must zero out pending_exact. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
insert_jump(op, from, to, current_end)
|
|
|
|
|
int op;
|
|
|
|
|
char *from, *to, *current_end;
|
|
|
|
|
{
|
|
|
|
|
register char *pfrom = current_end; /* Copy from here... */
|
|
|
|
|
register char *pto = current_end + 3; /* ...to here. */
|
|
|
|
|
|
|
|
|
|
while (pfrom != from)
|
|
|
|
|
*--pto = *--pfrom;
|
|
|
|
|
store_jump(from, op, to);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Store a jump of the form <opcode> <relative address> <n> .
|
|
|
|
|
|
|
|
|
|
Store in the location FROM a jump operation to jump to relative
|
|
|
|
|
address FROM - TO. OPCODE is the opcode to store, N is a number the
|
|
|
|
|
jump uses, say, to decide how many times to jump.
|
|
|
|
|
|
|
|
|
|
If you call this function, you must zero out pending_exact. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
store_jump_n(from, opcode, to, n)
|
|
|
|
|
char *from, *to;
|
|
|
|
|
int opcode;
|
|
|
|
|
unsigned n;
|
|
|
|
|
{
|
|
|
|
|
from[0] = (char)opcode;
|
|
|
|
|
STORE_NUMBER(from + 1, to - (from + 3));
|
|
|
|
|
STORE_NUMBER(from + 3, n);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Similar to insert_jump, but handles a jump which needs an extra
|
|
|
|
|
number to handle minimum and maximum cases. Open up space at
|
|
|
|
|
location FROM, and insert there a jump to TO. CURRENT_END gives the
|
|
|
|
|
end of the storage in use, so we know how much data to copy up. OP is
|
|
|
|
|
the opcode of the jump to insert.
|
|
|
|
|
|
|
|
|
|
If you call this function, you must zero out pending_exact. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
insert_jump_n(op, from, to, current_end, n)
|
|
|
|
|
int op;
|
|
|
|
|
char *from, *to, *current_end;
|
|
|
|
|
unsigned n;
|
|
|
|
|
{
|
|
|
|
|
register char *pfrom = current_end; /* Copy from here... */
|
|
|
|
|
register char *pto = current_end + 5; /* ...to here. */
|
|
|
|
|
|
|
|
|
|
while (pfrom != from)
|
|
|
|
|
*--pto = *--pfrom;
|
|
|
|
|
store_jump_n(from, op, to, n);
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
|
/* Open up space at location THERE, and insert operation OP.
|
|
|
|
|
CURRENT_END gives the end of the storage in use, so
|
|
|
|
|
we know how much data to copy up.
|
|
|
|
|
|
|
|
|
|
If you call this function, you must zero out pending_exact. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
insert_op(op, there, current_end)
|
|
|
|
|
int op;
|
|
|
|
|
char *there, *current_end;
|
|
|
|
|
{
|
|
|
|
|
register char *pfrom = current_end; /* Copy from here... */
|
|
|
|
|
register char *pto = current_end + 1; /* ...to here. */
|
|
|
|
|
|
|
|
|
|
while (pfrom != there)
|
|
|
|
|
*--pto = *--pfrom;
|
|
|
|
|
|
|
|
|
|
there[0] = (char)op;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
|
/* Open up space at location THERE, and insert operation OP followed by
|
|
|
|
|
NUM_1 and NUM_2. CURRENT_END gives the end of the storage in use, so
|
|
|
|
|
we know how much data to copy up.
|
|
|
|
|
|
|
|
|
|
If you call this function, you must zero out pending_exact. */
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
insert_op_2(op, there, current_end, num_1, num_2)
|
|
|
|
|
int op;
|
|
|
|
|
char *there, *current_end;
|
|
|
|
|
int num_1, num_2;
|
|
|
|
|
{
|
|
|
|
|
register char *pfrom = current_end; /* Copy from here... */
|
|
|
|
|
register char *pto = current_end + 5; /* ...to here. */
|
|
|
|
|
|
|
|
|
|
while (pfrom != there)
|
|
|
|
|
*--pto = *--pfrom;
|
|
|
|
|
|
|
|
|
|
there[0] = (char)op;
|
|
|
|
|
STORE_NUMBER(there + 1, num_1);
|
|
|
|
|
STORE_NUMBER(there + 3, num_2);
|
|
|
|
|
}
|
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
|
|
|
|
#define trans_eq(c1, c2, translate) (translate?(translate[c1]==translate[c2]):((c1)==(c2)))
|
|
|
|
|
static int
|
1999-01-20 07:59:39 +03:00
|
|
|
|
slow_match(little, lend, big, bend, translate)
|
1998-01-16 15:19:22 +03:00
|
|
|
|
unsigned char *little, *lend;
|
|
|
|
|
unsigned char *big, *bend;
|
|
|
|
|
unsigned char *translate;
|
|
|
|
|
{
|
|
|
|
|
int c;
|
|
|
|
|
|
|
|
|
|
while (little < lend && big < bend) {
|
|
|
|
|
c = *little++;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if (c == 0xff)
|
|
|
|
|
c = *little++;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
if (!trans_eq(*big++, c, translate)) break;
|
|
|
|
|
}
|
|
|
|
|
if (little == lend) return 1;
|
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
1999-01-20 07:59:39 +03:00
|
|
|
|
slow_search(little, llen, big, blen, translate)
|
1998-01-16 15:19:22 +03:00
|
|
|
|
unsigned char *little;
|
|
|
|
|
int llen;
|
|
|
|
|
unsigned char *big;
|
|
|
|
|
int blen;
|
|
|
|
|
char *translate;
|
|
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
|
unsigned char *bsave = big;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
unsigned char *bend = big + blen;
|
|
|
|
|
register int c;
|
|
|
|
|
int fescape = 0;
|
|
|
|
|
|
|
|
|
|
c = *little;
|
|
|
|
|
if (c == 0xff) {
|
1999-01-20 07:59:39 +03:00
|
|
|
|
c = little[1];
|
1998-01-16 15:19:22 +03:00
|
|
|
|
fescape = 1;
|
|
|
|
|
}
|
|
|
|
|
else if (translate && !ismbchar(c)) {
|
|
|
|
|
c = translate[c];
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
while (big < bend) {
|
|
|
|
|
/* look for first character */
|
|
|
|
|
if (fescape) {
|
|
|
|
|
while (big < bend) {
|
|
|
|
|
if (*big == c) break;
|
|
|
|
|
big++;
|
|
|
|
|
}
|
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
else if (translate && !ismbchar(c)) {
|
|
|
|
|
while (big < bend) {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (ismbchar(*big)) big+=mbclen(*big)-1;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
else if (translate[*big] == c) break;
|
|
|
|
|
big++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
while (big < bend) {
|
|
|
|
|
if (*big == c) break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (ismbchar(*big)) big+=mbclen(*big)-1;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
big++;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
if (slow_match(little, little+llen, big, bend, translate))
|
|
|
|
|
return big - bsave;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
big+=mbclen(*big);
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static void
|
|
|
|
|
bm_init_skip(skip, pat, m, translate)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
int *skip;
|
|
|
|
|
unsigned char *pat;
|
|
|
|
|
int m;
|
2000-05-01 13:42:38 +04:00
|
|
|
|
const unsigned char *translate;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
{
|
1999-08-13 09:45:20 +04:00
|
|
|
|
int j, c;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (c=0; c<256; c++) {
|
|
|
|
|
skip[c] = m;
|
|
|
|
|
}
|
|
|
|
|
if (translate) {
|
|
|
|
|
for (j=0; j<m-1; j++) {
|
|
|
|
|
skip[translate[pat[j]]] = m-1-j;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
for (j=0; j<m-1; j++) {
|
|
|
|
|
skip[pat[j]] = m-1-j;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
static int
|
|
|
|
|
bm_search(little, llen, big, blen, skip, translate)
|
|
|
|
|
unsigned char *little;
|
|
|
|
|
int llen;
|
|
|
|
|
unsigned char *big;
|
|
|
|
|
int blen;
|
|
|
|
|
int *skip;
|
|
|
|
|
unsigned char *translate;
|
|
|
|
|
{
|
|
|
|
|
int i, j, k;
|
|
|
|
|
|
|
|
|
|
i = llen-1;
|
|
|
|
|
if (translate) {
|
|
|
|
|
while (i < blen) {
|
|
|
|
|
k = i;
|
|
|
|
|
j = llen-1;
|
|
|
|
|
while (j >= 0 && translate[big[k]] == translate[little[j]]) {
|
|
|
|
|
k--;
|
|
|
|
|
j--;
|
|
|
|
|
}
|
|
|
|
|
if (j < 0) return k+1;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
i += skip[translate[big[i]]];
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
while (i < blen) {
|
|
|
|
|
k = i;
|
|
|
|
|
j = llen-1;
|
|
|
|
|
while (j >= 0 && big[k] == little[j]) {
|
|
|
|
|
k--;
|
|
|
|
|
j--;
|
|
|
|
|
}
|
|
|
|
|
if (j < 0) return k+1;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
i += skip[big[i]];
|
|
|
|
|
}
|
|
|
|
|
return -1;
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* Given a pattern, compute a fastmap from it. The fastmap records
|
|
|
|
|
which of the (1 << BYTEWIDTH) possible characters can start a string
|
|
|
|
|
that matches the pattern. This fastmap is used by re_search to skip
|
|
|
|
|
quickly over totally implausible text.
|
|
|
|
|
|
|
|
|
|
The caller must supply the address of a (1 << BYTEWIDTH)-byte data
|
|
|
|
|
area as bufp->fastmap.
|
|
|
|
|
The other components of bufp describe the pattern to be used. */
|
|
|
|
|
void
|
|
|
|
|
re_compile_fastmap(bufp)
|
|
|
|
|
struct re_pattern_buffer *bufp;
|
|
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
|
unsigned char *pattern = (unsigned char*)bufp->buffer;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
int size = bufp->used;
|
|
|
|
|
register char *fastmap = bufp->fastmap;
|
|
|
|
|
register unsigned char *p = pattern;
|
|
|
|
|
register unsigned char *pend = pattern + size;
|
|
|
|
|
register int j, k;
|
|
|
|
|
unsigned is_a_succeed_n;
|
|
|
|
|
|
2000-09-25 21:51:29 +04:00
|
|
|
|
|
|
|
|
|
unsigned char *stacka[NFAILURES];
|
|
|
|
|
unsigned char **stackb = stacka;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
unsigned char **stackp = stackb;
|
|
|
|
|
unsigned char **stacke = stackb + NFAILURES;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
int options = bufp->options;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
memset(fastmap, 0, (1 << BYTEWIDTH));
|
|
|
|
|
bufp->fastmap_accurate = 1;
|
|
|
|
|
bufp->can_be_null = 0;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
while (p) {
|
|
|
|
|
is_a_succeed_n = 0;
|
|
|
|
|
if (p == pend) {
|
|
|
|
|
bufp->can_be_null = 1;
|
|
|
|
|
break;
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#ifdef SWITCH_ENUM_BUG
|
1999-08-13 09:45:20 +04:00
|
|
|
|
switch ((int)((enum regexpcode)*p++))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#else
|
1999-08-13 09:45:20 +04:00
|
|
|
|
switch ((enum regexpcode)*p++)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#endif
|
1999-08-13 09:45:20 +04:00
|
|
|
|
{
|
|
|
|
|
case exactn:
|
|
|
|
|
if (p[1] == 0xff) {
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if (TRANSLATE_P())
|
1999-08-13 09:45:20 +04:00
|
|
|
|
fastmap[translate[p[2]]] = 2;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
else
|
1999-08-13 09:45:20 +04:00
|
|
|
|
fastmap[p[2]] = 2;
|
1999-12-06 12:04:03 +03:00
|
|
|
|
bufp->options |= RE_OPTIMIZE_BMATCH;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
else if (TRANSLATE_P())
|
|
|
|
|
fastmap[translate[p[1]]] = 1;
|
|
|
|
|
else
|
|
|
|
|
fastmap[p[1]] = 1;
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case begline:
|
|
|
|
|
case begbuf:
|
2002-02-16 03:58:51 +03:00
|
|
|
|
case begpos:
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case endbuf:
|
|
|
|
|
case endbuf2:
|
|
|
|
|
case wordbound:
|
|
|
|
|
case notwordbound:
|
|
|
|
|
case wordbeg:
|
|
|
|
|
case wordend:
|
|
|
|
|
case pop_and_fail:
|
|
|
|
|
case push_dummy_failure:
|
|
|
|
|
case start_paren:
|
|
|
|
|
case stop_paren:
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case casefold_on:
|
|
|
|
|
bufp->options |= RE_MAY_IGNORECASE;
|
2003-06-16 08:49:25 +04:00
|
|
|
|
options |= RE_OPTION_IGNORECASE;
|
|
|
|
|
continue;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case casefold_off:
|
2003-06-16 08:49:25 +04:00
|
|
|
|
options &= ~RE_OPTION_IGNORECASE;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
continue;
|
|
|
|
|
|
2000-05-24 08:34:26 +04:00
|
|
|
|
case option_set:
|
|
|
|
|
options = *p++;
|
2000-05-22 11:09:55 +04:00
|
|
|
|
continue;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case endline:
|
|
|
|
|
if (TRANSLATE_P())
|
|
|
|
|
fastmap[translate['\n']] = 1;
|
|
|
|
|
else
|
|
|
|
|
fastmap['\n'] = 1;
|
2000-05-24 08:34:26 +04:00
|
|
|
|
if ((options & RE_OPTION_SINGLELINE) == 0 && bufp->can_be_null == 0)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
bufp->can_be_null = 2;
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case jump_n:
|
|
|
|
|
case finalize_jump:
|
|
|
|
|
case maybe_finalize_jump:
|
|
|
|
|
case jump:
|
|
|
|
|
case jump_past_alt:
|
|
|
|
|
case dummy_failure_jump:
|
|
|
|
|
case finalize_push:
|
|
|
|
|
case finalize_push_n:
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(j, p);
|
|
|
|
|
p += j;
|
|
|
|
|
if (j > 0)
|
1998-01-16 15:19:22 +03:00
|
|
|
|
continue;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Jump backward reached implies we just went through
|
|
|
|
|
the body of a loop and matched nothing.
|
|
|
|
|
Opcode jumped to should be an on_failure_jump.
|
|
|
|
|
Just treat it like an ordinary jump.
|
|
|
|
|
For a * loop, it has pushed its failure point already;
|
|
|
|
|
If so, discard that as redundant. */
|
|
|
|
|
|
|
|
|
|
if ((enum regexpcode)*p != on_failure_jump
|
|
|
|
|
&& (enum regexpcode)*p != try_next
|
|
|
|
|
&& (enum regexpcode)*p != succeed_n)
|
|
|
|
|
continue;
|
|
|
|
|
p++;
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(j, p);
|
|
|
|
|
p += j;
|
|
|
|
|
if (stackp != stackb && *stackp == p)
|
|
|
|
|
stackp--; /* pop */
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case try_next:
|
|
|
|
|
case start_nowidth:
|
|
|
|
|
case stop_nowidth:
|
2000-02-08 11:54:01 +03:00
|
|
|
|
case stop_backtrack:
|
1999-08-13 09:45:20 +04:00
|
|
|
|
p += 2;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case succeed_n:
|
|
|
|
|
is_a_succeed_n = 1;
|
|
|
|
|
/* Get to the number of times to succeed. */
|
|
|
|
|
EXTRACT_NUMBER(k, p + 2);
|
|
|
|
|
/* Increment p past the n for when k != 0. */
|
|
|
|
|
if (k != 0) {
|
1998-01-16 15:19:22 +03:00
|
|
|
|
p += 4;
|
|
|
|
|
continue;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
/* fall through */
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case on_failure_jump:
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(j, p);
|
|
|
|
|
if (p + j < pend) {
|
|
|
|
|
if (stackp == stacke) {
|
2000-09-26 11:07:13 +04:00
|
|
|
|
EXPAND_FAIL_STACK();
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
*++stackp = p + j; /* push */
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
bufp->can_be_null = 1;
|
|
|
|
|
}
|
|
|
|
|
if (is_a_succeed_n)
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(k, p); /* Skip the n. */
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case set_number_at:
|
|
|
|
|
p += 4;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case start_memory:
|
|
|
|
|
case stop_memory:
|
|
|
|
|
p += 2;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case duplicate:
|
|
|
|
|
bufp->can_be_null = 1;
|
2001-08-20 08:29:58 +04:00
|
|
|
|
if (*p >= bufp->re_nsub) break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
fastmap['\n'] = 1;
|
|
|
|
|
case anychar_repeat:
|
|
|
|
|
case anychar:
|
|
|
|
|
for (j = 0; j < (1 << BYTEWIDTH); j++) {
|
2000-05-24 08:34:26 +04:00
|
|
|
|
if (j != '\n' || (options & RE_OPTION_MULTILINE))
|
1999-08-13 09:45:20 +04:00
|
|
|
|
fastmap[j] = 1;
|
|
|
|
|
}
|
|
|
|
|
if (bufp->can_be_null) {
|
|
|
|
|
FREE_AND_RETURN_VOID(stackb);
|
|
|
|
|
}
|
|
|
|
|
/* Don't return; check the alternative paths
|
|
|
|
|
so we can set can_be_null if appropriate. */
|
|
|
|
|
if ((enum regexpcode)p[-1] == anychar_repeat) {
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case wordchar:
|
|
|
|
|
for (j = 0; j < 0x80; j++) {
|
|
|
|
|
if (SYNTAX(j) == Sword)
|
|
|
|
|
fastmap[j] = 1;
|
|
|
|
|
}
|
|
|
|
|
switch (current_mbctype) {
|
|
|
|
|
case MBCTYPE_ASCII:
|
|
|
|
|
for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
|
|
|
|
|
if (SYNTAX(j) == Sword2)
|
|
|
|
|
fastmap[j] = 1;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
|
|
|
|
case MBCTYPE_EUC:
|
|
|
|
|
case MBCTYPE_SJIS:
|
|
|
|
|
case MBCTYPE_UTF8:
|
|
|
|
|
for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
|
|
|
|
|
if (re_mbctab[j])
|
1998-01-16 15:13:05 +03:00
|
|
|
|
fastmap[j] = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case notwordchar:
|
|
|
|
|
for (j = 0; j < 0x80; j++)
|
|
|
|
|
if (SYNTAX(j) != Sword)
|
|
|
|
|
fastmap[j] = 1;
|
|
|
|
|
switch (current_mbctype) {
|
|
|
|
|
case MBCTYPE_ASCII:
|
|
|
|
|
for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
|
|
|
|
|
if (SYNTAX(j) != Sword2)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
fastmap[j] = 1;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case MBCTYPE_EUC:
|
|
|
|
|
case MBCTYPE_SJIS:
|
|
|
|
|
case MBCTYPE_UTF8:
|
|
|
|
|
for (j = 0x80; j < (1 << BYTEWIDTH); j++) {
|
|
|
|
|
if (!re_mbctab[j])
|
1998-01-16 15:13:05 +03:00
|
|
|
|
fastmap[j] = 1;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case charset:
|
|
|
|
|
/* NOTE: Charset for single-byte chars never contain
|
|
|
|
|
multi-byte char. See set_list_bits(). */
|
|
|
|
|
for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
|
|
|
|
|
if (p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH))) {
|
|
|
|
|
int tmp = TRANSLATE_P()?translate[j]:j;
|
1999-09-01 13:48:03 +04:00
|
|
|
|
fastmap[tmp] = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
{
|
|
|
|
|
unsigned short size;
|
|
|
|
|
unsigned long c, beg, end;
|
|
|
|
|
|
|
|
|
|
p += p[-1] + 2;
|
|
|
|
|
size = EXTRACT_UNSIGNED(&p[-2]);
|
|
|
|
|
for (j = 0; j < (int)size; j++) {
|
|
|
|
|
c = EXTRACT_MBC(&p[j*8]);
|
|
|
|
|
beg = WC2MBC1ST(c);
|
|
|
|
|
c = EXTRACT_MBC(&p[j*8+4]);
|
|
|
|
|
end = WC2MBC1ST(c);
|
|
|
|
|
/* set bits for 1st bytes of multi-byte chars. */
|
|
|
|
|
while (beg <= end) {
|
|
|
|
|
/* NOTE: Charset for multi-byte chars might contain
|
|
|
|
|
single-byte chars. We must reject them. */
|
1999-12-06 12:04:03 +03:00
|
|
|
|
if (c < 0x100) {
|
1999-09-01 13:48:03 +04:00
|
|
|
|
fastmap[beg] = 2;
|
1999-12-06 12:04:03 +03:00
|
|
|
|
bufp->options |= RE_OPTIMIZE_BMATCH;
|
|
|
|
|
}
|
1999-09-01 13:48:03 +04:00
|
|
|
|
else if (ismbchar(beg))
|
1999-08-13 09:45:20 +04:00
|
|
|
|
fastmap[beg] = 1;
|
|
|
|
|
beg++;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case charset_not:
|
|
|
|
|
/* S: set of all single-byte chars.
|
|
|
|
|
M: set of all first bytes that can start multi-byte chars.
|
|
|
|
|
s: any set of single-byte chars.
|
|
|
|
|
m: any set of first bytes that can start multi-byte chars.
|
|
|
|
|
|
|
|
|
|
We assume S+M = U.
|
|
|
|
|
___ _ _
|
|
|
|
|
s+m = (S*s+M*m). */
|
|
|
|
|
/* Chars beyond end of map must be allowed */
|
|
|
|
|
/* NOTE: Charset_not for single-byte chars might contain
|
|
|
|
|
multi-byte chars. See set_list_bits(). */
|
|
|
|
|
for (j = *p * BYTEWIDTH; j < (1 << BYTEWIDTH); j++)
|
|
|
|
|
if (!ismbchar(j))
|
|
|
|
|
fastmap[j] = 1;
|
|
|
|
|
|
|
|
|
|
for (j = *p++ * BYTEWIDTH - 1; j >= 0; j--)
|
|
|
|
|
if (!(p[j / BYTEWIDTH] & (1 << (j % BYTEWIDTH)))) {
|
1998-01-16 15:13:05 +03:00
|
|
|
|
if (!ismbchar(j))
|
|
|
|
|
fastmap[j] = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
{
|
|
|
|
|
unsigned short size;
|
|
|
|
|
unsigned long c, beg;
|
1999-09-01 13:48:03 +04:00
|
|
|
|
int num_literal = 0;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
|
|
|
|
|
p += p[-1] + 2;
|
|
|
|
|
size = EXTRACT_UNSIGNED(&p[-2]);
|
|
|
|
|
if (size == 0) {
|
1999-01-20 07:59:39 +03:00
|
|
|
|
for (j = 0x80; j < (1 << BYTEWIDTH); j++)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (ismbchar(j))
|
|
|
|
|
fastmap[j] = 1;
|
|
|
|
|
break;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1999-09-01 13:48:03 +04:00
|
|
|
|
for (j = 0,c = 0;j < (int)size; j++) {
|
1999-10-04 08:51:08 +04:00
|
|
|
|
unsigned int cc = EXTRACT_MBC(&p[j*8]);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
beg = WC2MBC1ST(cc);
|
2000-05-09 08:53:16 +04:00
|
|
|
|
while (c <= beg) {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (ismbchar(c))
|
|
|
|
|
fastmap[c] = 1;
|
|
|
|
|
c++;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
cc = EXTRACT_MBC(&p[j*8+4]);
|
1999-09-01 13:48:03 +04:00
|
|
|
|
if (cc < 0xff) {
|
|
|
|
|
num_literal = 1;
|
2000-05-09 08:53:16 +04:00
|
|
|
|
while (c <= cc) {
|
1999-09-01 13:48:03 +04:00
|
|
|
|
if (ismbchar(c))
|
|
|
|
|
fastmap[c] = 1;
|
|
|
|
|
c++;
|
|
|
|
|
}
|
|
|
|
|
}
|
2000-05-09 08:53:16 +04:00
|
|
|
|
c = WC2MBC1ST(cc);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
|
2000-03-09 12:04:36 +03:00
|
|
|
|
for (j = c; j < (1 << BYTEWIDTH); j++) {
|
1999-09-01 13:48:03 +04:00
|
|
|
|
if (num_literal)
|
|
|
|
|
fastmap[j] = 1;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (ismbchar(j))
|
|
|
|
|
fastmap[j] = 1;
|
2000-03-09 12:04:36 +03:00
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case unused: /* pacify gcc -Wall */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Get here means we have successfully found the possible starting
|
|
|
|
|
characters of one path of the pattern. We need not follow this
|
|
|
|
|
path any farther. Instead, look at the next alternative
|
|
|
|
|
remembered in the stack. */
|
|
|
|
|
if (stackp != stackb)
|
|
|
|
|
p = *stackp--; /* pop */
|
|
|
|
|
else
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
FREE_AND_RETURN_VOID(stackb);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
|
1999-12-14 09:50:43 +03:00
|
|
|
|
/* adjust startpos value to the position between characters. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
int
|
1999-12-14 09:50:43 +03:00
|
|
|
|
re_adjust_startpos(bufp, string, size, startpos, range)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
struct re_pattern_buffer *bufp;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
const char *string;
|
|
|
|
|
int size, startpos, range;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
|
/* Update the fastmap now if not correct already. */
|
1999-12-14 09:50:43 +03:00
|
|
|
|
if (!bufp->fastmap_accurate) {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
re_compile_fastmap(bufp);
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
|
|
|
|
|
1999-12-06 12:04:03 +03:00
|
|
|
|
/* Adjust startpos for mbc string */
|
|
|
|
|
if (current_mbctype && startpos>0 && !(bufp->options&RE_OPTIMIZE_BMATCH)) {
|
2002-01-29 22:33:11 +03:00
|
|
|
|
int i = mbc_startpos(string, startpos);
|
1999-12-06 12:04:03 +03:00
|
|
|
|
|
2002-01-30 10:00:58 +03:00
|
|
|
|
if (i < startpos) {
|
|
|
|
|
if (range > 0) {
|
|
|
|
|
startpos = i + mbclen(string[i]);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
int len = mbclen(string[i]);
|
|
|
|
|
if (i + len <= startpos)
|
|
|
|
|
startpos = i + len;
|
|
|
|
|
else
|
|
|
|
|
startpos = i;
|
|
|
|
|
}
|
1999-12-06 12:04:03 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
1999-12-14 09:50:43 +03:00
|
|
|
|
return startpos;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
2002-12-25 10:44:33 +03:00
|
|
|
|
static int re_match_exec _((struct re_pattern_buffer *, const char *, int, int, int,
|
|
|
|
|
struct re_registers *));
|
|
|
|
|
|
1999-12-14 09:50:43 +03:00
|
|
|
|
/* Using the compiled pattern in BUFP->buffer, first tries to match
|
|
|
|
|
STRING, starting first at index STARTPOS, then at STARTPOS + 1, and
|
|
|
|
|
so on. RANGE is the number of places to try before giving up. If
|
|
|
|
|
RANGE is negative, it searches backwards, i.e., the starting
|
|
|
|
|
positions tried are STARTPOS, STARTPOS - 1, etc. STRING is of SIZE.
|
|
|
|
|
In REGS, return the indices of STRING that matched the entire
|
|
|
|
|
BUFP->buffer and its contained subexpressions.
|
|
|
|
|
|
|
|
|
|
The value returned is the position in the strings at which the match
|
|
|
|
|
was found, or -1 if no match was found, or -2 if error (such as
|
|
|
|
|
failure stack overflow). */
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
re_search(bufp, string, size, startpos, range, regs)
|
|
|
|
|
struct re_pattern_buffer *bufp;
|
|
|
|
|
const char *string;
|
|
|
|
|
int size, startpos, range;
|
|
|
|
|
struct re_registers *regs;
|
|
|
|
|
{
|
|
|
|
|
register char *fastmap = bufp->fastmap;
|
2002-12-25 10:44:33 +03:00
|
|
|
|
int val, anchor = 0, initpos = startpos;
|
1999-12-14 09:50:43 +03:00
|
|
|
|
|
|
|
|
|
/* Check for out-of-range starting position. */
|
|
|
|
|
if (startpos < 0 || startpos > size)
|
|
|
|
|
return -1;
|
|
|
|
|
|
|
|
|
|
/* Update the fastmap now if not correct already. */
|
|
|
|
|
if (fastmap && !bufp->fastmap_accurate) {
|
|
|
|
|
re_compile_fastmap(bufp);
|
|
|
|
|
}
|
|
|
|
|
|
1999-12-06 12:04:03 +03:00
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
|
/* If the search isn't to be a backwards one, don't waste time in a
|
|
|
|
|
search for a pattern that must be anchored. */
|
2000-09-07 10:59:46 +04:00
|
|
|
|
if (bufp->used > 0) {
|
1998-01-16 15:19:22 +03:00
|
|
|
|
switch ((enum regexpcode)bufp->buffer[0]) {
|
|
|
|
|
case begbuf:
|
1999-08-13 09:45:20 +04:00
|
|
|
|
begbuf_match:
|
1998-01-16 15:19:22 +03:00
|
|
|
|
if (range > 0) {
|
2000-02-08 11:54:01 +03:00
|
|
|
|
if (startpos > 0) return -1;
|
|
|
|
|
else {
|
|
|
|
|
val = re_match(bufp, string, size, 0, regs);
|
|
|
|
|
if (val >= 0) return 0;
|
|
|
|
|
return val;
|
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
case begline:
|
|
|
|
|
anchor = 1;
|
|
|
|
|
break;
|
|
|
|
|
|
2000-02-08 11:54:01 +03:00
|
|
|
|
case begpos:
|
|
|
|
|
val = re_match(bufp, string, size, startpos, regs);
|
|
|
|
|
if (val >= 0) return startpos;
|
|
|
|
|
return val;
|
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
|
default:
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if (bufp->options & RE_OPTIMIZE_ANCHOR) {
|
2000-05-24 08:34:26 +04:00
|
|
|
|
if (bufp->options&RE_OPTION_SINGLELINE) {
|
2000-05-22 11:09:55 +04:00
|
|
|
|
goto begbuf_match;
|
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
anchor = 1;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
|
|
|
|
if (bufp->must) {
|
|
|
|
|
int len = ((unsigned char*)bufp->must)[0];
|
|
|
|
|
int pos, pbeg, pend;
|
|
|
|
|
|
|
|
|
|
pbeg = startpos;
|
|
|
|
|
pend = startpos + range;
|
|
|
|
|
if (pbeg > pend) { /* swap pbeg,pend */
|
|
|
|
|
pos = pend; pend = pbeg; pbeg = pos;
|
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
pend = size;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if (bufp->options & RE_OPTIMIZE_NO_BM) {
|
|
|
|
|
pos = slow_search(bufp->must+1, len,
|
|
|
|
|
string+pbeg, pend-pbeg,
|
|
|
|
|
MAY_TRANSLATE()?translate:0);
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
pos = bm_search(bufp->must+1, len,
|
|
|
|
|
string+pbeg, pend-pbeg,
|
|
|
|
|
bufp->must_skip,
|
|
|
|
|
MAY_TRANSLATE()?translate:0);
|
|
|
|
|
}
|
|
|
|
|
if (pos == -1) return -1;
|
|
|
|
|
if (range > 0 && (bufp->options & RE_OPTIMIZE_EXACTN)) {
|
|
|
|
|
startpos += pos;
|
|
|
|
|
range -= pos;
|
2000-09-07 10:59:46 +04:00
|
|
|
|
if (range < 0) return -1;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (;;) {
|
|
|
|
|
/* If a fastmap is supplied, skip quickly over characters that
|
|
|
|
|
cannot possibly be the start of a match. Note, however, that
|
|
|
|
|
if the pattern can possibly match the null string, we must
|
|
|
|
|
test it at each starting point so that we take the first null
|
|
|
|
|
string we get. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (fastmap && startpos < size
|
|
|
|
|
&& bufp->can_be_null != 1 && !(anchor && startpos == 0)) {
|
|
|
|
|
if (range > 0) { /* Searching forwards. */
|
|
|
|
|
register unsigned char *p, c;
|
|
|
|
|
int irange = range;
|
|
|
|
|
|
|
|
|
|
p = (unsigned char*)string+startpos;
|
|
|
|
|
|
|
|
|
|
while (range > 0) {
|
|
|
|
|
c = *p++;
|
|
|
|
|
if (ismbchar(c)) {
|
|
|
|
|
int len;
|
|
|
|
|
|
|
|
|
|
if (fastmap[c])
|
|
|
|
|
break;
|
|
|
|
|
len = mbclen(c) - 1;
|
|
|
|
|
while (len--) {
|
|
|
|
|
c = *p++;
|
|
|
|
|
range--;
|
|
|
|
|
if (fastmap[c] == 2)
|
|
|
|
|
goto startpos_adjust;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
if (fastmap[MAY_TRANSLATE() ? translate[c] : c])
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
range--;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
startpos_adjust:
|
|
|
|
|
startpos += irange - range;
|
|
|
|
|
}
|
|
|
|
|
else { /* Searching backwards. */
|
|
|
|
|
register unsigned char c;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
c = string[startpos];
|
|
|
|
|
c &= 0xff;
|
|
|
|
|
if (MAY_TRANSLATE() ? !fastmap[translate[c]] : !fastmap[c])
|
|
|
|
|
goto advance;
|
|
|
|
|
}
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (startpos > size) return -1;
|
2001-07-14 19:17:19 +04:00
|
|
|
|
if ((anchor || !bufp->can_be_null) && range > 0 && size > 0 && startpos == size)
|
2001-06-19 08:35:17 +04:00
|
|
|
|
return -1;
|
2002-12-25 10:44:33 +03:00
|
|
|
|
val = re_match_exec(bufp, string, size, startpos, initpos, regs);
|
2000-02-08 11:54:01 +03:00
|
|
|
|
if (val >= 0) return startpos;
|
|
|
|
|
if (val == -2) return -2;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
#ifndef NO_ALLOCA
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#ifdef C_ALLOCA
|
1999-08-13 09:45:20 +04:00
|
|
|
|
alloca(0);
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#endif /* C_ALLOCA */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#endif /* NO_ALLOCA */
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (range > 0) {
|
|
|
|
|
if (anchor && startpos < size &&
|
|
|
|
|
(startpos < 1 || string[startpos-1] != '\n')) {
|
|
|
|
|
while (range > 0 && string[startpos] != '\n') {
|
|
|
|
|
range--;
|
|
|
|
|
startpos++;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
advance:
|
|
|
|
|
if (!range)
|
|
|
|
|
break;
|
|
|
|
|
else if (range > 0) {
|
|
|
|
|
const char *d = string + startpos;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (ismbchar(*d)) {
|
|
|
|
|
int len = mbclen(*d) - 1;
|
|
|
|
|
range-=len, startpos+=len;
|
|
|
|
|
if (!range)
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
range--, startpos++;
|
|
|
|
|
}
|
|
|
|
|
else {
|
|
|
|
|
range++, startpos--;
|
|
|
|
|
{
|
|
|
|
|
const char *s, *d, *p;
|
|
|
|
|
|
|
|
|
|
s = string; d = string + startpos;
|
|
|
|
|
for (p = d; p-- > s && ismbchar(*p); )
|
|
|
|
|
/* --p >= s would not work on 80[12]?86.
|
|
|
|
|
(when the offset of s equals 0 other than huge model.) */
|
|
|
|
|
;
|
|
|
|
|
if (!((d - p) & 1)) {
|
1998-01-16 15:13:05 +03:00
|
|
|
|
if (!range)
|
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
range++, startpos--;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
return -1;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* The following are used for re_match, defined below: */
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Accessing macros used in re_match: */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#define IS_ACTIVE(R) ((R).bits.is_active)
|
|
|
|
|
#define MATCHED_SOMETHING(R) ((R).bits.matched_something)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* Macros used by re_match: */
|
|
|
|
|
|
|
|
|
|
/* I.e., regstart, regend, and reg_info. */
|
|
|
|
|
#define NUM_REG_ITEMS 3
|
|
|
|
|
|
2000-08-28 13:53:42 +04:00
|
|
|
|
/* I.e., ptr and count. */
|
|
|
|
|
#define NUM_COUNT_ITEMS 2
|
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
/* Individual items aside from the registers. */
|
2001-06-19 08:35:17 +04:00
|
|
|
|
#define NUM_NONREG_ITEMS 4
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
|
/* We push at most this many things on the stack whenever we
|
|
|
|
|
fail. The `+ 2' refers to PATTERN_PLACE and STRING_PLACE, which are
|
|
|
|
|
arguments to the PUSH_FAILURE_POINT macro. */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#define MAX_NUM_FAILURE_ITEMS (num_regs * NUM_REG_ITEMS + NUM_NONREG_ITEMS)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* We push this many things on the stack whenever we fail. */
|
2001-07-18 09:56:05 +04:00
|
|
|
|
#define NUM_FAILURE_ITEMS (last_used_reg * NUM_REG_ITEMS + NUM_NONREG_ITEMS + 1)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
2000-08-28 13:53:42 +04:00
|
|
|
|
/* This pushes counter information for succeed_n and jump_n */
|
|
|
|
|
#define PUSH_FAILURE_COUNT(ptr) \
|
|
|
|
|
do { \
|
|
|
|
|
int c; \
|
|
|
|
|
EXTRACT_NUMBER(c, ptr); \
|
|
|
|
|
ENSURE_FAIL_STACK(NUM_COUNT_ITEMS); \
|
2000-09-22 09:37:52 +04:00
|
|
|
|
*stackp++ = (unsigned char*)(long)c; \
|
2000-08-28 13:53:42 +04:00
|
|
|
|
*stackp++ = (ptr); \
|
|
|
|
|
num_failure_counts++; \
|
|
|
|
|
} while (0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* This pushes most of the information about the current state we will want
|
|
|
|
|
if we ever fail back to it. */
|
|
|
|
|
|
|
|
|
|
#define PUSH_FAILURE_POINT(pattern_place, string_place) \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
do { \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
long last_used_reg, this_reg; \
|
|
|
|
|
\
|
|
|
|
|
/* Find out how many registers are active or have been matched. \
|
|
|
|
|
(Aside from register zero, which is only set at the end.) */ \
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (last_used_reg = num_regs-1; last_used_reg > 0; last_used_reg--)\
|
1999-01-20 07:59:39 +03:00
|
|
|
|
if (!REG_UNSET(regstart[last_used_reg])) \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
break; \
|
|
|
|
|
\
|
2000-08-28 13:53:42 +04:00
|
|
|
|
ENSURE_FAIL_STACK(NUM_FAILURE_ITEMS); \
|
2000-09-22 09:37:52 +04:00
|
|
|
|
*stackp++ = (unsigned char*)(long)num_failure_counts; \
|
2000-08-28 13:53:42 +04:00
|
|
|
|
num_failure_counts = 0; \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
\
|
|
|
|
|
/* Now push the info for each of those registers. */ \
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (this_reg = 1; this_reg <= last_used_reg; this_reg++) { \
|
|
|
|
|
*stackp++ = regstart[this_reg]; \
|
|
|
|
|
*stackp++ = regend[this_reg]; \
|
|
|
|
|
*stackp++ = reg_info[this_reg].word; \
|
|
|
|
|
} \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
\
|
|
|
|
|
/* Push how many registers we saved. */ \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
*stackp++ = (unsigned char*)last_used_reg; \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
\
|
|
|
|
|
*stackp++ = pattern_place; \
|
|
|
|
|
*stackp++ = string_place; \
|
2001-10-03 11:19:19 +04:00
|
|
|
|
*stackp++ = (unsigned char*)(long)options; /* current option status */ \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
*stackp++ = (unsigned char*)0; /* non-greedy flag */ \
|
|
|
|
|
} while(0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#define NON_GREEDY ((unsigned char*)1)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
2000-08-28 13:53:42 +04:00
|
|
|
|
#define POP_FAILURE_COUNT() \
|
|
|
|
|
do { \
|
|
|
|
|
unsigned char *ptr = *--stackp; \
|
|
|
|
|
int count = (long)*--stackp; \
|
|
|
|
|
STORE_NUMBER(ptr, count); \
|
|
|
|
|
} while (0)
|
|
|
|
|
|
|
|
|
|
/* This pops what PUSH_FAILURE_POINT pushes. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
#define POP_FAILURE_POINT() \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
do { \
|
1999-08-13 09:45:20 +04:00
|
|
|
|
long temp; \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
stackp -= NUM_NONREG_ITEMS; /* Remove failure points (and flag). */ \
|
1999-08-13 09:45:20 +04:00
|
|
|
|
temp = (long)*--stackp; /* How many regs pushed. */ \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
temp *= NUM_REG_ITEMS; /* How much to take off the stack. */ \
|
|
|
|
|
stackp -= temp; /* Remove the register info. */ \
|
2000-08-28 13:53:42 +04:00
|
|
|
|
temp = (long)*--stackp; /* How many counters pushed. */ \
|
|
|
|
|
while (temp--) { \
|
|
|
|
|
POP_FAILURE_COUNT(); /* Remove the counter info. */ \
|
|
|
|
|
} \
|
|
|
|
|
num_failure_counts = 0; /* Reset num_failure_counts. */ \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
} while(0)
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Registers are set to a sentinel when they haven't yet matched. */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#define REG_UNSET_VALUE ((unsigned char*)-1)
|
|
|
|
|
#define REG_UNSET(e) ((e) == REG_UNSET_VALUE)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
#define PREFETCH if (d == dend) goto fail
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Call this when have matched something; it sets `matched' flags for the
|
1998-01-16 15:13:05 +03:00
|
|
|
|
registers corresponding to the subexpressions of which we currently
|
|
|
|
|
are inside. */
|
|
|
|
|
#define SET_REGS_MATCHED \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
do { unsigned this_reg; \
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (this_reg = 0; this_reg < num_regs; this_reg++) { \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
if (IS_ACTIVE(reg_info[this_reg])) \
|
2000-08-24 10:21:43 +04:00
|
|
|
|
MATCHED_SOMETHING(reg_info[this_reg]) = 1; \
|
1998-01-16 15:13:05 +03:00
|
|
|
|
else \
|
|
|
|
|
MATCHED_SOMETHING(reg_info[this_reg]) = 0; \
|
|
|
|
|
} \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
} while(0)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#define AT_STRINGS_BEG(d) ((d) == string)
|
|
|
|
|
#define AT_STRINGS_END(d) ((d) == dend)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
#define IS_A_LETTER(d) (SYNTAX(*(d)) == Sword || \
|
|
|
|
|
(current_mbctype ? \
|
1999-08-13 09:45:20 +04:00
|
|
|
|
(re_mbctab[*(d)] && ((d)+mbclen(*(d)))<=dend): \
|
1999-01-20 07:59:39 +03:00
|
|
|
|
SYNTAX(*(d)) == Sword2))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
#define PREV_IS_A_LETTER(d) ((current_mbctype == MBCTYPE_SJIS)? \
|
|
|
|
|
IS_A_LETTER((d)-(!AT_STRINGS_BEG((d)-1)&& \
|
|
|
|
|
ismbchar((d)[-2])?2:1)): \
|
2001-02-13 08:09:11 +03:00
|
|
|
|
((current_mbctype && ((d)[-1] >= 0x80)) || \
|
|
|
|
|
IS_A_LETTER((d)-1)))
|
1999-08-13 09:45:20 +04:00
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
|
static void
|
|
|
|
|
init_regs(regs, num_regs)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
struct re_registers *regs;
|
|
|
|
|
unsigned int num_regs;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
{
|
1999-08-13 09:45:20 +04:00
|
|
|
|
int i;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
regs->num_regs = num_regs;
|
|
|
|
|
if (num_regs < RE_NREGS)
|
|
|
|
|
num_regs = RE_NREGS;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (regs->allocated == 0) {
|
|
|
|
|
regs->beg = TMALLOC(num_regs, int);
|
|
|
|
|
regs->end = TMALLOC(num_regs, int);
|
|
|
|
|
regs->allocated = num_regs;
|
|
|
|
|
}
|
|
|
|
|
else if (regs->allocated < num_regs) {
|
|
|
|
|
TREALLOC(regs->beg, num_regs, int);
|
|
|
|
|
TREALLOC(regs->end, num_regs, int);
|
2001-07-02 12:46:28 +04:00
|
|
|
|
regs->allocated = num_regs;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
for (i=0; i<num_regs; i++) {
|
|
|
|
|
regs->beg[i] = regs->end[i] = -1;
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Match the pattern described by BUFP against STRING, which is of
|
|
|
|
|
SIZE. Start the match at index POS in STRING. In REGS, return the
|
|
|
|
|
indices of STRING that matched the entire BUFP->buffer and its
|
|
|
|
|
contained subexpressions.
|
|
|
|
|
|
|
|
|
|
If bufp->fastmap is nonzero, then it had better be up to date.
|
|
|
|
|
|
|
|
|
|
The reason that the data to match are specified as two components
|
|
|
|
|
which are to be regarded as concatenated is so this function can be
|
|
|
|
|
used directly on the contents of an Emacs buffer.
|
|
|
|
|
|
|
|
|
|
-1 is returned if there is no match. -2 is returned if there is an
|
|
|
|
|
error (such as match stack overflow). Otherwise the value is the
|
|
|
|
|
length of the substring which was matched. */
|
|
|
|
|
|
|
|
|
|
int
|
|
|
|
|
re_match(bufp, string_arg, size, pos, regs)
|
|
|
|
|
struct re_pattern_buffer *bufp;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
const char *string_arg;
|
|
|
|
|
int size, pos;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
struct re_registers *regs;
|
2002-12-25 10:44:33 +03:00
|
|
|
|
{
|
2003-01-02 19:56:16 +03:00
|
|
|
|
return re_match_exec(bufp, string_arg, size, pos, pos, regs);
|
2002-12-25 10:44:33 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
static int
|
|
|
|
|
re_match_exec(bufp, string_arg, size, pos, beg, regs)
|
|
|
|
|
struct re_pattern_buffer *bufp;
|
|
|
|
|
const char *string_arg;
|
|
|
|
|
int size, pos, beg;
|
|
|
|
|
struct re_registers *regs;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
|
register unsigned char *p = (unsigned char*)bufp->buffer;
|
|
|
|
|
unsigned char *p1;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* Pointer to beyond end of buffer. */
|
|
|
|
|
register unsigned char *pend = p + bufp->used;
|
|
|
|
|
|
|
|
|
|
unsigned num_regs = bufp->re_nsub;
|
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
unsigned char *string = (unsigned char*)string_arg;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
register unsigned char *d, *dend;
|
|
|
|
|
register int mcnt; /* Multipurpose. */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
int options = bufp->options;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Failure point stack. Each place that can handle a failure further
|
|
|
|
|
down the line pushes a failure point on this stack. It consists of
|
|
|
|
|
restart, regend, and reg_info for all registers corresponding to the
|
|
|
|
|
subexpressions we're currently inside, plus the number of such
|
|
|
|
|
registers, and, finally, two char *'s. The first char * is where to
|
|
|
|
|
resume scanning the pattern; the second one is where to resume
|
|
|
|
|
scanning the strings. If the latter is zero, the failure point is a
|
|
|
|
|
``dummy''; if a failure happens and the failure point is a dummy, it
|
|
|
|
|
gets discarded and the next next one is tried. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
2000-09-26 11:07:13 +04:00
|
|
|
|
unsigned char **stacka;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
unsigned char **stackb;
|
|
|
|
|
unsigned char **stackp;
|
|
|
|
|
unsigned char **stacke;
|
|
|
|
|
|
|
|
|
|
/* Information on the contents of registers. These are pointers into
|
|
|
|
|
the input strings; they record just what was matched (on this
|
|
|
|
|
attempt) by a subexpression part of the pattern, that is, the
|
|
|
|
|
regnum-th regstart pointer points to where in the pattern we began
|
|
|
|
|
matching and the regnum-th regend points to right after where we
|
|
|
|
|
stopped matching the regnum-th subexpression. (The zeroth register
|
|
|
|
|
keeps track of what the whole pattern matches.) */
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
unsigned char **regstart = bufp->regstart;
|
|
|
|
|
unsigned char **regend = bufp->regend;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
/* If a group that's operated upon by a repetition operator fails to
|
|
|
|
|
match anything, then the register for its start will need to be
|
|
|
|
|
restored because it will have been set to wherever in the string we
|
|
|
|
|
are when we last see its open-group operator. Similarly for a
|
|
|
|
|
register's end. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
unsigned char **old_regstart = bufp->old_regstart;
|
|
|
|
|
unsigned char **old_regend = bufp->old_regend;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
|
/* The is_active field of reg_info helps us keep track of which (possibly
|
|
|
|
|
nested) subexpressions we are currently in. The matched_something
|
|
|
|
|
field of reg_info[reg_num] helps us tell whether or not we have
|
|
|
|
|
matched any of the pattern so far this time through the reg_num-th
|
|
|
|
|
subexpression. These two fields get reset each time through any
|
|
|
|
|
loop their register is in. */
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
register_info_type *reg_info = bufp->reg_info;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* The following record the register info as found in the above
|
|
|
|
|
variables when we find a match better than any we've seen before.
|
|
|
|
|
This happens as we backtrack through the failure points, which in
|
|
|
|
|
turn happens only if we have not yet matched the entire string. */
|
|
|
|
|
|
|
|
|
|
unsigned best_regs_set = 0;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
unsigned char **best_regstart = bufp->best_regstart;
|
|
|
|
|
unsigned char **best_regend = bufp->best_regend;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
2000-08-28 13:53:42 +04:00
|
|
|
|
int num_failure_counts = 0;
|
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
|
if (regs) {
|
1998-01-16 15:19:22 +03:00
|
|
|
|
init_regs(regs, num_regs);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Initialize the stack. */
|
2000-09-26 11:07:13 +04:00
|
|
|
|
stacka = RE_TALLOC(MAX_NUM_FAILURE_ITEMS * NFAILURES, unsigned char*);
|
2000-09-25 21:51:29 +04:00
|
|
|
|
stackb = stacka;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
stackp = stackb;
|
|
|
|
|
stacke = &stackb[MAX_NUM_FAILURE_ITEMS * NFAILURES];
|
|
|
|
|
|
|
|
|
|
#ifdef DEBUG_REGEX
|
2000-08-25 12:26:06 +04:00
|
|
|
|
fprintf(stderr, "Entering re_match(%s)\n", string_arg);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#endif
|
|
|
|
|
|
|
|
|
|
/* Initialize subexpression text positions to -1 to mark ones that no
|
1999-01-20 07:59:39 +03:00
|
|
|
|
( or ( and ) or ) has been seen for. Also set all registers to
|
1998-01-16 15:13:05 +03:00
|
|
|
|
inactive and mark them as not having matched anything or ever
|
|
|
|
|
failed. */
|
|
|
|
|
for (mcnt = 0; mcnt < num_regs; mcnt++) {
|
1999-01-20 07:59:39 +03:00
|
|
|
|
regstart[mcnt] = regend[mcnt]
|
|
|
|
|
= old_regstart[mcnt] = old_regend[mcnt]
|
|
|
|
|
= best_regstart[mcnt] = best_regend[mcnt] = REG_UNSET_VALUE;
|
|
|
|
|
#ifdef __CHECKER__
|
|
|
|
|
reg_info[mcnt].word = 0;
|
|
|
|
|
#endif
|
|
|
|
|
IS_ACTIVE (reg_info[mcnt]) = 0;
|
|
|
|
|
MATCHED_SOMETHING (reg_info[mcnt]) = 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* Set up pointers to ends of strings.
|
|
|
|
|
Don't allow the second string to be empty unless both are empty. */
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
/* `p' scans through the pattern as `d' scans through the data. `dend'
|
|
|
|
|
is the end of the input string that `d' points within. `d' is
|
|
|
|
|
advanced into the following input string whenever necessary, but
|
|
|
|
|
this happens before fetching; therefore, at the beginning of the
|
|
|
|
|
loop, `d' can be pointing at the end of a string, but it cannot
|
|
|
|
|
equal string2. */
|
|
|
|
|
|
|
|
|
|
d = string + pos, dend = string + size;
|
|
|
|
|
|
|
|
|
|
/* This loops over pattern commands. It exits by returning from the
|
|
|
|
|
function if match is complete, or it drops through if match fails
|
|
|
|
|
at this starting point in the input data. */
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (;;) {
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#ifdef DEBUG_REGEX
|
1999-08-13 09:45:20 +04:00
|
|
|
|
fprintf(stderr,
|
|
|
|
|
"regex loop(%d): matching 0x%02d\n",
|
|
|
|
|
p - (unsigned char*)bufp->buffer,
|
|
|
|
|
*p);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#endif
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* End of pattern means we might have succeeded. */
|
|
|
|
|
if (p == pend) {
|
|
|
|
|
/* If not end of string, try backtracking. Otherwise done. */
|
|
|
|
|
if ((bufp->options & RE_OPTION_LONGEST) && d != dend) {
|
|
|
|
|
if (best_regs_set) /* non-greedy, no need to backtrack */
|
|
|
|
|
goto restore_best_regs;
|
|
|
|
|
while (stackp != stackb && stackp[-1] == NON_GREEDY) {
|
|
|
|
|
if (best_regs_set) /* non-greedy, no need to backtrack */
|
|
|
|
|
goto restore_best_regs;
|
|
|
|
|
POP_FAILURE_POINT();
|
|
|
|
|
}
|
|
|
|
|
if (stackp != stackb) {
|
|
|
|
|
/* More failure points to try. */
|
|
|
|
|
|
|
|
|
|
/* If exceeds best match so far, save it. */
|
|
|
|
|
if (! best_regs_set || (d > best_regend[0])) {
|
|
|
|
|
best_regs_set = 1;
|
|
|
|
|
best_regend[0] = d; /* Never use regstart[0]. */
|
|
|
|
|
|
|
|
|
|
for (mcnt = 1; mcnt < num_regs; mcnt++) {
|
|
|
|
|
best_regstart[mcnt] = regstart[mcnt];
|
|
|
|
|
best_regend[mcnt] = regend[mcnt];
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
/* If no failure points, don't restore garbage. */
|
|
|
|
|
else if (best_regs_set) {
|
|
|
|
|
restore_best_regs:
|
|
|
|
|
/* Restore best match. */
|
|
|
|
|
d = best_regend[0];
|
|
|
|
|
|
|
|
|
|
for (mcnt = 0; mcnt < num_regs; mcnt++) {
|
|
|
|
|
regstart[mcnt] = best_regstart[mcnt];
|
|
|
|
|
regend[mcnt] = best_regend[mcnt];
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/* If caller wants register contents data back, convert it
|
|
|
|
|
to indices. */
|
|
|
|
|
if (regs) {
|
|
|
|
|
regs->beg[0] = pos;
|
|
|
|
|
regs->end[0] = d - string;
|
|
|
|
|
for (mcnt = 1; mcnt < num_regs; mcnt++) {
|
|
|
|
|
if (REG_UNSET(regend[mcnt])) {
|
|
|
|
|
regs->beg[mcnt] = -1;
|
|
|
|
|
regs->end[mcnt] = -1;
|
|
|
|
|
continue;
|
|
|
|
|
}
|
|
|
|
|
regs->beg[mcnt] = regstart[mcnt] - string;
|
|
|
|
|
regs->end[mcnt] = regend[mcnt] - string;
|
|
|
|
|
}
|
|
|
|
|
}
|
|
|
|
|
FREE_AND_RETURN(stackb, (d - pos - string));
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Otherwise match next pattern command. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#ifdef SWITCH_ENUM_BUG
|
1999-08-13 09:45:20 +04:00
|
|
|
|
switch ((int)((enum regexpcode)*p++))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#else
|
1999-08-13 09:45:20 +04:00
|
|
|
|
switch ((enum regexpcode)*p++)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
#endif
|
1999-08-13 09:45:20 +04:00
|
|
|
|
{
|
|
|
|
|
/* ( [or `(', as appropriate] is represented by start_memory,
|
|
|
|
|
) by stop_memory. Both of those commands are followed by
|
|
|
|
|
a register number in the next byte. The text matched
|
|
|
|
|
within the ( and ) is recorded under that number. */
|
|
|
|
|
case start_memory:
|
|
|
|
|
old_regstart[*p] = regstart[*p];
|
|
|
|
|
regstart[*p] = d;
|
|
|
|
|
IS_ACTIVE(reg_info[*p]) = 1;
|
|
|
|
|
MATCHED_SOMETHING(reg_info[*p]) = 0;
|
|
|
|
|
p += 2;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case stop_memory:
|
|
|
|
|
old_regend[*p] = regend[*p];
|
|
|
|
|
regend[*p] = d;
|
|
|
|
|
IS_ACTIVE(reg_info[*p]) = 0;
|
|
|
|
|
p += 2;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case start_paren:
|
|
|
|
|
case stop_paren:
|
|
|
|
|
break;
|
|
|
|
|
|
|
|
|
|
/* \<digit> has been turned into a `duplicate' command which is
|
|
|
|
|
followed by the numeric value of <digit> as the register number. */
|
|
|
|
|
case duplicate:
|
1998-01-16 15:13:05 +03:00
|
|
|
|
{
|
1999-08-13 09:45:20 +04:00
|
|
|
|
int regno = *p++; /* Get which register to match against */
|
|
|
|
|
register unsigned char *d2, *dend2;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
2001-08-20 08:29:58 +04:00
|
|
|
|
/* Check if there's corresponding group */
|
|
|
|
|
if (regno >= num_regs) goto fail;
|
2001-05-30 13:12:34 +04:00
|
|
|
|
/* Check if corresponding group is still open */
|
|
|
|
|
if (IS_ACTIVE(reg_info[regno])) goto fail;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Where in input to try to start matching. */
|
|
|
|
|
d2 = regstart[regno];
|
2001-05-30 13:12:34 +04:00
|
|
|
|
if (REG_UNSET(d2)) goto fail;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Where to stop matching; if both the place to start and
|
|
|
|
|
the place to stop matching are in the same string, then
|
|
|
|
|
set to the place to stop, otherwise, for now have to use
|
|
|
|
|
the end of the first string. */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
dend2 = regend[regno];
|
2001-05-30 13:12:34 +04:00
|
|
|
|
if (REG_UNSET(dend2)) goto fail;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
for (;;) {
|
|
|
|
|
/* At end of register contents => success */
|
|
|
|
|
if (d2 == dend2) break;
|
|
|
|
|
|
|
|
|
|
/* If necessary, advance to next segment in data. */
|
|
|
|
|
PREFETCH;
|
|
|
|
|
|
|
|
|
|
/* How many characters left in this segment to match. */
|
|
|
|
|
mcnt = dend - d;
|
|
|
|
|
|
|
|
|
|
/* Want how many consecutive characters we can match in
|
|
|
|
|
one shot, so, if necessary, adjust the count. */
|
|
|
|
|
if (mcnt > dend2 - d2)
|
|
|
|
|
mcnt = dend2 - d2;
|
|
|
|
|
|
|
|
|
|
/* Compare that many; failure if mismatch, else move
|
|
|
|
|
past them. */
|
|
|
|
|
if ((options & RE_OPTION_IGNORECASE)
|
|
|
|
|
? memcmp_translate(d, d2, mcnt)
|
|
|
|
|
: memcmp((char*)d, (char*)d2, mcnt))
|
|
|
|
|
goto fail;
|
|
|
|
|
d += mcnt, d2 += mcnt;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case start_nowidth:
|
|
|
|
|
PUSH_FAILURE_POINT(0, d);
|
|
|
|
|
if (stackp - stackb > RE_DUP_MAX) {
|
|
|
|
|
FREE_AND_RETURN(stackb,(-2));
|
|
|
|
|
}
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
STORE_NUMBER(p+mcnt, stackp - stackb);
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case stop_nowidth:
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
stackp = stackb + mcnt;
|
2001-06-19 08:35:17 +04:00
|
|
|
|
d = stackp[-3];
|
1999-08-13 09:45:20 +04:00
|
|
|
|
POP_FAILURE_POINT();
|
|
|
|
|
continue;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
2000-02-08 11:54:01 +03:00
|
|
|
|
case stop_backtrack:
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
stackp = stackb + mcnt;
|
|
|
|
|
POP_FAILURE_POINT();
|
|
|
|
|
continue;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case pop_and_fail:
|
|
|
|
|
EXTRACT_NUMBER(mcnt, p+1);
|
|
|
|
|
stackp = stackb + mcnt;
|
|
|
|
|
POP_FAILURE_POINT();
|
|
|
|
|
goto fail;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case anychar:
|
|
|
|
|
PREFETCH;
|
|
|
|
|
if (ismbchar(*d)) {
|
|
|
|
|
if (d + mbclen(*d) > dend)
|
|
|
|
|
goto fail;
|
|
|
|
|
SET_REGS_MATCHED;
|
|
|
|
|
d += mbclen(*d);
|
|
|
|
|
break;
|
|
|
|
|
}
|
2000-05-24 08:34:26 +04:00
|
|
|
|
if (!(options&RE_OPTION_MULTILINE)
|
|
|
|
|
&& (TRANSLATE_P() ? translate[*d] : *d) == '\n')
|
1998-01-16 15:19:22 +03:00
|
|
|
|
goto fail;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
SET_REGS_MATCHED;
|
|
|
|
|
d++;
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case anychar_repeat:
|
|
|
|
|
for (;;) {
|
|
|
|
|
PUSH_FAILURE_POINT(p, d);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
PREFETCH;
|
|
|
|
|
if (ismbchar(*d)) {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (d + mbclen(*d) > dend)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
goto fail;
|
|
|
|
|
SET_REGS_MATCHED;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
d += mbclen(*d);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
continue;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
2000-05-18 08:32:13 +04:00
|
|
|
|
if (!(options&RE_OPTION_MULTILINE) &&
|
1999-08-13 09:45:20 +04:00
|
|
|
|
(TRANSLATE_P() ? translate[*d] : *d) == '\n')
|
1998-01-16 15:13:05 +03:00
|
|
|
|
goto fail;
|
|
|
|
|
SET_REGS_MATCHED;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
d++;
|
|
|
|
|
}
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case charset:
|
|
|
|
|
case charset_not:
|
|
|
|
|
{
|
|
|
|
|
int not; /* Nonzero for charset_not. */
|
|
|
|
|
int part = 0; /* true if matched part of mbc */
|
|
|
|
|
unsigned char *dsave = d + 1;
|
|
|
|
|
int cc, c;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
PREFETCH;
|
2003-01-20 11:29:24 +03:00
|
|
|
|
c = (unsigned char)*d++;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (ismbchar(c)) {
|
|
|
|
|
if (d + mbclen(c) - 1 <= dend) {
|
2003-01-20 11:29:24 +03:00
|
|
|
|
cc = c;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
MBC2WC(c, d);
|
2003-01-20 11:29:24 +03:00
|
|
|
|
not = is_in_list_mbc(c, p);
|
|
|
|
|
if (!not) {
|
2003-07-29 22:26:55 +04:00
|
|
|
|
part = not = is_in_list_sbc(cc, p);
|
2003-01-20 11:29:24 +03:00
|
|
|
|
}
|
|
|
|
|
} else {
|
2003-01-23 06:39:25 +03:00
|
|
|
|
not = is_in_list(c, p);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
2003-01-20 11:29:24 +03:00
|
|
|
|
else {
|
|
|
|
|
if (TRANSLATE_P())
|
|
|
|
|
c = (unsigned char)translate[c];
|
2003-01-23 06:39:25 +03:00
|
|
|
|
not = is_in_list(c, p);
|
2000-02-17 10:11:22 +03:00
|
|
|
|
}
|
2003-01-20 11:29:24 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (*(p - 1) == (unsigned char)charset_not) {
|
|
|
|
|
not = !not;
|
|
|
|
|
}
|
|
|
|
|
if (!not) goto fail;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
p += 1 + *p + 2 + EXTRACT_UNSIGNED(&p[1 + *p])*8;
|
|
|
|
|
SET_REGS_MATCHED;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (part) d = dsave;
|
|
|
|
|
break;
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case begline:
|
|
|
|
|
if (size == 0 || AT_STRINGS_BEG(d))
|
|
|
|
|
break;
|
|
|
|
|
if (d[-1] == '\n' && !AT_STRINGS_END(d))
|
|
|
|
|
break;
|
|
|
|
|
goto fail;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case endline:
|
|
|
|
|
if (AT_STRINGS_END(d)) {
|
2003-05-01 04:00:37 +04:00
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
else if (*d == '\n')
|
|
|
|
|
break;
|
|
|
|
|
goto fail;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
/* Match at the very beginning of the string. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case begbuf:
|
|
|
|
|
if (AT_STRINGS_BEG(d))
|
|
|
|
|
break;
|
|
|
|
|
goto fail;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
/* Match at the very end of the data. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case endbuf:
|
|
|
|
|
if (AT_STRINGS_END(d))
|
|
|
|
|
break;
|
|
|
|
|
goto fail;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
|
/* Match at the very end of the data. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case endbuf2:
|
|
|
|
|
if (AT_STRINGS_END(d)) {
|
2003-05-14 13:44:14 +04:00
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
/* .. or newline just before the end of the data. */
|
|
|
|
|
if (*d == '\n' && AT_STRINGS_END(d+1))
|
|
|
|
|
break;
|
|
|
|
|
goto fail;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
|
/* `or' constructs are handled by starting each alternative with
|
1999-08-13 09:45:20 +04:00
|
|
|
|
an on_failure_jump that points to the start of the next
|
|
|
|
|
alternative. Each alternative except the last ends with a
|
|
|
|
|
jump to the joining point. (Actually, each jump except for
|
|
|
|
|
the last one really jumps to the following jump, because
|
|
|
|
|
tensioning the jumps is a hassle.) */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* The start of a stupid repeat has an on_failure_jump that points
|
|
|
|
|
past the end of the repeat text. This makes a failure point so
|
1999-08-13 09:45:20 +04:00
|
|
|
|
that on failure to match a repetition, matching restarts past
|
|
|
|
|
as many repetitions have been found with no way to fail and
|
|
|
|
|
look for another one. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* A smart repeat is similar but loops back to the on_failure_jump
|
|
|
|
|
so that each repetition makes another failure point. */
|
|
|
|
|
|
2000-02-08 11:54:01 +03:00
|
|
|
|
/* Match at the starting position. */
|
|
|
|
|
case begpos:
|
2002-12-25 10:44:33 +03:00
|
|
|
|
if (d - string == beg)
|
2000-02-08 11:54:01 +03:00
|
|
|
|
break;
|
|
|
|
|
goto fail;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case on_failure_jump:
|
|
|
|
|
on_failure:
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
PUSH_FAILURE_POINT(p + mcnt, d);
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* The end of a smart repeat has a maybe_finalize_jump back.
|
|
|
|
|
Change it either to a finalize_jump or an ordinary jump. */
|
|
|
|
|
case maybe_finalize_jump:
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
p1 = p;
|
|
|
|
|
|
|
|
|
|
/* Compare the beginning of the repeat with what in the
|
|
|
|
|
pattern follows its end. If we can establish that there
|
|
|
|
|
is nothing that they would both match, i.e., that we
|
|
|
|
|
would have to backtrack because of (as in, e.g., `a*a')
|
2000-08-28 13:53:42 +04:00
|
|
|
|
then we can change to finalize_jump, because we'll
|
1999-08-13 09:45:20 +04:00
|
|
|
|
never have to backtrack.
|
|
|
|
|
|
|
|
|
|
This is not true in the case of alternatives: in
|
|
|
|
|
`(a|ab)*' we do need to backtrack to the `ab' alternative
|
|
|
|
|
(e.g., if the string was `ab'). But instead of trying to
|
|
|
|
|
detect that here, the alternative has put on a dummy
|
|
|
|
|
failure point which is what we will end up popping. */
|
|
|
|
|
|
|
|
|
|
/* Skip over open/close-group commands. */
|
|
|
|
|
while (p1 + 2 < pend) {
|
|
|
|
|
if ((enum regexpcode)*p1 == stop_memory ||
|
|
|
|
|
(enum regexpcode)*p1 == start_memory)
|
|
|
|
|
p1 += 3; /* Skip over args, too. */
|
|
|
|
|
else if (/*(enum regexpcode)*p1 == start_paren ||*/
|
|
|
|
|
(enum regexpcode)*p1 == stop_paren)
|
|
|
|
|
p1 += 1;
|
|
|
|
|
else
|
|
|
|
|
break;
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (p1 == pend)
|
|
|
|
|
p[-3] = (unsigned char)finalize_jump;
|
|
|
|
|
else if (*p1 == (unsigned char)exactn ||
|
|
|
|
|
*p1 == (unsigned char)endline) {
|
|
|
|
|
register int c = *p1 == (unsigned char)endline ? '\n' : p1[2];
|
|
|
|
|
register unsigned char *p2 = p + mcnt;
|
|
|
|
|
/* p2[0] ... p2[2] are an on_failure_jump.
|
|
|
|
|
Examine what follows that. */
|
|
|
|
|
if (p2[3] == (unsigned char)exactn && p2[5] != c)
|
|
|
|
|
p[-3] = (unsigned char)finalize_jump;
|
|
|
|
|
else if (p2[3] == (unsigned char)charset ||
|
|
|
|
|
p2[3] == (unsigned char)charset_not) {
|
|
|
|
|
int not;
|
|
|
|
|
if (ismbchar(c)) {
|
|
|
|
|
unsigned char *pp = p1+3;
|
|
|
|
|
MBC2WC(c, pp);
|
1999-01-20 07:59:39 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* `is_in_list()' is TRUE if c would match */
|
|
|
|
|
/* That means it is not safe to finalize. */
|
|
|
|
|
not = is_in_list(c, p2 + 4);
|
|
|
|
|
if (p2[3] == (unsigned char)charset_not)
|
|
|
|
|
not = !not;
|
|
|
|
|
if (!not)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
p[-3] = (unsigned char)finalize_jump;
|
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
p -= 2; /* Point at relative address again. */
|
|
|
|
|
if (p[-1] != (unsigned char)finalize_jump) {
|
|
|
|
|
p[-1] = (unsigned char)jump;
|
|
|
|
|
goto nofinalize;
|
|
|
|
|
}
|
|
|
|
|
/* Note fall through. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* The end of a stupid repeat has a finalize_jump back to the
|
1999-08-13 09:45:20 +04:00
|
|
|
|
start, where another failure point will be made which will
|
|
|
|
|
point to after all the repetitions found so far. */
|
|
|
|
|
|
|
|
|
|
/* Take off failure points put on by matching on_failure_jump
|
|
|
|
|
because didn't fail. Also remove the register information
|
|
|
|
|
put on by the on_failure_jump. */
|
|
|
|
|
case finalize_jump:
|
2001-06-19 08:35:17 +04:00
|
|
|
|
if (stackp > stackb && stackp[-3] == d) {
|
|
|
|
|
p = stackp[-4];
|
1999-08-13 09:45:20 +04:00
|
|
|
|
POP_FAILURE_POINT();
|
1998-01-16 15:19:22 +03:00
|
|
|
|
continue;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
}
|
|
|
|
|
POP_FAILURE_POINT();
|
|
|
|
|
/* Note fall through. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* We need this opcode so we can detect where alternatives end
|
|
|
|
|
in `group_match_null_string_p' et al. */
|
|
|
|
|
case jump_past_alt:
|
|
|
|
|
/* fall through */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Jump without taking off any failure points. */
|
|
|
|
|
case jump:
|
|
|
|
|
nofinalize:
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
2001-06-19 08:35:17 +04:00
|
|
|
|
if (mcnt < 0 && stackp > stackb && stackp[-3] == d) /* avoid infinite loop */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
goto fail;
|
|
|
|
|
p += mcnt;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case dummy_failure_jump:
|
|
|
|
|
/* Normally, the on_failure_jump pushes a failure point, which
|
|
|
|
|
then gets popped at finalize_jump. We will end up at
|
|
|
|
|
finalize_jump, also, and with a pattern of, say, `a+', we
|
|
|
|
|
are skipping over the on_failure_jump, so we have to push
|
|
|
|
|
something meaningless for finalize_jump to pop. */
|
|
|
|
|
PUSH_FAILURE_POINT(0, 0);
|
|
|
|
|
goto nofinalize;
|
|
|
|
|
|
|
|
|
|
/* At the end of an alternative, we need to push a dummy failure
|
|
|
|
|
point in case we are followed by a `finalize_jump', because
|
|
|
|
|
we don't want the failure point for the alternative to be
|
|
|
|
|
popped. For example, matching `(a|ab)*' against `aab'
|
|
|
|
|
requires that we match the `ab' alternative. */
|
|
|
|
|
case push_dummy_failure:
|
|
|
|
|
/* See comments just above at `dummy_failure_jump' about the
|
|
|
|
|
two zeroes. */
|
|
|
|
|
p1 = p;
|
|
|
|
|
/* Skip over open/close-group commands. */
|
|
|
|
|
while (p1 + 2 < pend) {
|
|
|
|
|
if ((enum regexpcode)*p1 == stop_memory ||
|
|
|
|
|
(enum regexpcode)*p1 == start_memory)
|
|
|
|
|
p1 += 3; /* Skip over args, too. */
|
|
|
|
|
else if (/*(enum regexpcode)*p1 == start_paren ||*/
|
|
|
|
|
(enum regexpcode)*p1 == stop_paren)
|
|
|
|
|
p1 += 1;
|
|
|
|
|
else
|
|
|
|
|
break;
|
|
|
|
|
}
|
2002-09-26 04:48:33 +04:00
|
|
|
|
if (p1 < pend && (enum regexpcode)*p1 == jump)
|
1999-08-13 09:45:20 +04:00
|
|
|
|
p[-1] = unused;
|
|
|
|
|
else
|
|
|
|
|
PUSH_FAILURE_POINT(0, 0);
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* Have to succeed matching what follows at least n times. Then
|
|
|
|
|
just handle like an on_failure_jump. */
|
|
|
|
|
case succeed_n:
|
|
|
|
|
EXTRACT_NUMBER(mcnt, p + 2);
|
|
|
|
|
/* Originally, this is how many times we HAVE to succeed. */
|
2000-08-28 13:53:42 +04:00
|
|
|
|
if (mcnt != 0) {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
mcnt--;
|
|
|
|
|
p += 2;
|
2000-08-28 13:53:42 +04:00
|
|
|
|
PUSH_FAILURE_COUNT(p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
STORE_NUMBER_AND_INCR(p, mcnt);
|
|
|
|
|
PUSH_FAILURE_POINT(0, 0);
|
|
|
|
|
}
|
2000-08-28 13:53:42 +04:00
|
|
|
|
else {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
goto on_failure;
|
|
|
|
|
}
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case jump_n:
|
|
|
|
|
EXTRACT_NUMBER(mcnt, p + 2);
|
|
|
|
|
/* Originally, this is how many times we CAN jump. */
|
|
|
|
|
if (mcnt) {
|
|
|
|
|
mcnt--;
|
2000-08-28 13:53:42 +04:00
|
|
|
|
PUSH_FAILURE_COUNT(p + 2);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
STORE_NUMBER(p + 2, mcnt);
|
|
|
|
|
goto nofinalize; /* Do the jump without taking off
|
|
|
|
|
any failure points. */
|
|
|
|
|
}
|
|
|
|
|
/* If don't have to jump any more, skip over the rest of command. */
|
|
|
|
|
else
|
|
|
|
|
p += 4;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case set_number_at:
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
p1 = p + mcnt;
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
STORE_NUMBER(p1, mcnt);
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case try_next:
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
|
|
|
|
if (p + mcnt < pend) {
|
|
|
|
|
PUSH_FAILURE_POINT(p, d);
|
|
|
|
|
stackp[-1] = NON_GREEDY;
|
|
|
|
|
}
|
|
|
|
|
p += mcnt;
|
|
|
|
|
continue;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case finalize_push:
|
|
|
|
|
POP_FAILURE_POINT();
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
2001-06-19 08:35:17 +04:00
|
|
|
|
if (mcnt < 0 && stackp > stackb && stackp[-3] == d) /* avoid infinite loop */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
goto fail;
|
|
|
|
|
PUSH_FAILURE_POINT(p + mcnt, d);
|
|
|
|
|
stackp[-1] = NON_GREEDY;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case finalize_push_n:
|
|
|
|
|
EXTRACT_NUMBER(mcnt, p + 2);
|
|
|
|
|
/* Originally, this is how many times we CAN jump. */
|
|
|
|
|
if (mcnt) {
|
|
|
|
|
int pos, i;
|
|
|
|
|
|
|
|
|
|
mcnt--;
|
|
|
|
|
STORE_NUMBER(p + 2, mcnt);
|
|
|
|
|
EXTRACT_NUMBER(pos, p);
|
|
|
|
|
EXTRACT_NUMBER(i, p+pos+5);
|
|
|
|
|
if (i > 0) goto nofinalize;
|
|
|
|
|
POP_FAILURE_POINT();
|
1998-01-16 15:19:22 +03:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
PUSH_FAILURE_POINT(p + mcnt, d);
|
|
|
|
|
stackp[-1] = NON_GREEDY;
|
|
|
|
|
p += 2; /* skip n */
|
|
|
|
|
}
|
|
|
|
|
/* If don't have to push any more, skip over the rest of command. */
|
|
|
|
|
else
|
|
|
|
|
p += 4;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
/* Ignore these. Used to ignore the n of succeed_n's which
|
|
|
|
|
currently have n == 0. */
|
|
|
|
|
case unused:
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case casefold_on:
|
|
|
|
|
options |= RE_OPTION_IGNORECASE;
|
|
|
|
|
continue;
|
|
|
|
|
|
|
|
|
|
case casefold_off:
|
|
|
|
|
options &= ~RE_OPTION_IGNORECASE;
|
|
|
|
|
continue;
|
|
|
|
|
|
2000-05-24 08:34:26 +04:00
|
|
|
|
case option_set:
|
|
|
|
|
options = *p++;
|
2000-05-22 11:09:55 +04:00
|
|
|
|
continue;
|
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case wordbound:
|
|
|
|
|
if (AT_STRINGS_BEG(d)) {
|
2002-11-22 12:14:24 +03:00
|
|
|
|
if (AT_STRINGS_END(d)) goto fail;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (IS_A_LETTER(d)) break;
|
|
|
|
|
else goto fail;
|
|
|
|
|
}
|
2001-09-03 09:37:42 +04:00
|
|
|
|
if (AT_STRINGS_END(d)) {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (PREV_IS_A_LETTER(d)) break;
|
|
|
|
|
else goto fail;
|
|
|
|
|
}
|
|
|
|
|
if (PREV_IS_A_LETTER(d) != IS_A_LETTER(d))
|
|
|
|
|
break;
|
|
|
|
|
goto fail;
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case notwordbound:
|
|
|
|
|
if (AT_STRINGS_BEG(d)) {
|
|
|
|
|
if (IS_A_LETTER(d)) goto fail;
|
|
|
|
|
else break;
|
|
|
|
|
}
|
|
|
|
|
if (AT_STRINGS_END(d)) {
|
|
|
|
|
if (PREV_IS_A_LETTER(d)) goto fail;
|
|
|
|
|
else break;
|
|
|
|
|
}
|
|
|
|
|
if (PREV_IS_A_LETTER(d) != IS_A_LETTER(d))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
goto fail;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case wordbeg:
|
|
|
|
|
if (IS_A_LETTER(d) && (AT_STRINGS_BEG(d) || !PREV_IS_A_LETTER(d)))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
break;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
goto fail;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case wordend:
|
|
|
|
|
if (!AT_STRINGS_BEG(d) && PREV_IS_A_LETTER(d)
|
|
|
|
|
&& (!IS_A_LETTER(d) || AT_STRINGS_END(d)))
|
|
|
|
|
break;
|
|
|
|
|
goto fail;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case wordchar:
|
|
|
|
|
PREFETCH;
|
|
|
|
|
if (!IS_A_LETTER(d))
|
|
|
|
|
goto fail;
|
|
|
|
|
if (ismbchar(*d) && d + mbclen(*d) - 1 < dend)
|
|
|
|
|
d += mbclen(*d) - 1;
|
|
|
|
|
d++;
|
|
|
|
|
SET_REGS_MATCHED;
|
|
|
|
|
break;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case notwordchar:
|
|
|
|
|
PREFETCH;
|
|
|
|
|
if (IS_A_LETTER(d))
|
|
|
|
|
goto fail;
|
|
|
|
|
if (ismbchar(*d) && d + mbclen(*d) - 1 < dend)
|
|
|
|
|
d += mbclen(*d) - 1;
|
|
|
|
|
d++;
|
|
|
|
|
SET_REGS_MATCHED;
|
|
|
|
|
break;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
case exactn:
|
|
|
|
|
/* Match the next few pattern characters exactly.
|
|
|
|
|
mcnt is how many characters to match. */
|
|
|
|
|
mcnt = *p++;
|
|
|
|
|
/* This is written out as an if-else so we don't waste time
|
|
|
|
|
testing `translate' inside the loop. */
|
|
|
|
|
if (TRANSLATE_P()) {
|
|
|
|
|
do {
|
|
|
|
|
unsigned char c;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
PREFETCH;
|
|
|
|
|
if (*p == 0xff) {
|
|
|
|
|
p++;
|
|
|
|
|
if (!--mcnt
|
|
|
|
|
|| AT_STRINGS_END(d)
|
|
|
|
|
|| (unsigned char)*d++ != (unsigned char)*p++)
|
|
|
|
|
goto fail;
|
|
|
|
|
continue;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
2001-07-14 19:17:19 +04:00
|
|
|
|
c = *d++;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (ismbchar(c)) {
|
|
|
|
|
int n;
|
|
|
|
|
|
|
|
|
|
if (c != (unsigned char)*p++)
|
|
|
|
|
goto fail;
|
|
|
|
|
for (n = mbclen(c) - 1; n > 0; n--)
|
|
|
|
|
if (!--mcnt /* redundant check if pattern was
|
|
|
|
|
compiled properly. */
|
|
|
|
|
|| AT_STRINGS_END(d)
|
|
|
|
|
|| (unsigned char)*d++ != (unsigned char)*p++)
|
|
|
|
|
goto fail;
|
|
|
|
|
continue;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* compiled code translation needed for ruby */
|
|
|
|
|
if ((unsigned char)translate[c] != (unsigned char)translate[*p++])
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
|
|
|
|
while (--mcnt);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
else {
|
|
|
|
|
do {
|
|
|
|
|
PREFETCH;
|
|
|
|
|
if (*p == 0xff) {p++; mcnt--;}
|
|
|
|
|
if (*d++ != *p++) goto fail;
|
|
|
|
|
}
|
|
|
|
|
while (--mcnt);
|
|
|
|
|
}
|
|
|
|
|
SET_REGS_MATCHED;
|
|
|
|
|
break;
|
|
|
|
|
}
|
2000-02-08 11:54:01 +03:00
|
|
|
|
#ifdef RUBY
|
|
|
|
|
CHECK_INTS;
|
|
|
|
|
#endif
|
1999-08-13 09:45:20 +04:00
|
|
|
|
continue; /* Successfully executed one pattern command; keep going. */
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
/* Jump here if any matching operation fails. */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
fail:
|
|
|
|
|
if (stackp != stackb) {
|
|
|
|
|
/* A restart point is known. Restart there and pop it. */
|
|
|
|
|
short last_used_reg, this_reg;
|
|
|
|
|
|
|
|
|
|
/* If this failure point is from a dummy_failure_point, just
|
|
|
|
|
skip it. */
|
2001-06-19 08:35:17 +04:00
|
|
|
|
if (stackp[-4] == 0 || (best_regs_set && stackp[-1] == NON_GREEDY)) {
|
1999-08-13 09:45:20 +04:00
|
|
|
|
POP_FAILURE_POINT();
|
|
|
|
|
goto fail;
|
|
|
|
|
}
|
2001-06-19 08:35:17 +04:00
|
|
|
|
stackp--; /* discard greedy flag */
|
2001-10-03 11:19:19 +04:00
|
|
|
|
options = (long)*--stackp;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
d = *--stackp;
|
|
|
|
|
p = *--stackp;
|
|
|
|
|
/* Restore register info. */
|
|
|
|
|
last_used_reg = (long)*--stackp;
|
|
|
|
|
|
|
|
|
|
/* Make the ones that weren't saved -1 or 0 again. */
|
|
|
|
|
for (this_reg = num_regs - 1; this_reg > last_used_reg; this_reg--) {
|
|
|
|
|
regend[this_reg] = REG_UNSET_VALUE;
|
|
|
|
|
regstart[this_reg] = REG_UNSET_VALUE;
|
|
|
|
|
IS_ACTIVE(reg_info[this_reg]) = 0;
|
|
|
|
|
MATCHED_SOMETHING(reg_info[this_reg]) = 0;
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
/* And restore the rest from the stack. */
|
|
|
|
|
for ( ; this_reg > 0; this_reg--) {
|
|
|
|
|
reg_info[this_reg].word = *--stackp;
|
|
|
|
|
regend[this_reg] = *--stackp;
|
|
|
|
|
regstart[this_reg] = *--stackp;
|
|
|
|
|
}
|
2000-08-28 13:53:42 +04:00
|
|
|
|
mcnt = (long)*--stackp;
|
|
|
|
|
while (mcnt--) {
|
|
|
|
|
POP_FAILURE_COUNT();
|
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (p < pend) {
|
|
|
|
|
int is_a_jump_n = 0;
|
|
|
|
|
int failed_paren = 0;
|
|
|
|
|
|
|
|
|
|
p1 = p;
|
|
|
|
|
/* If failed to a backwards jump that's part of a repetition
|
|
|
|
|
loop, need to pop this failure point and use the next one. */
|
|
|
|
|
switch ((enum regexpcode)*p1) {
|
|
|
|
|
case jump_n:
|
|
|
|
|
case finalize_push_n:
|
|
|
|
|
is_a_jump_n = 1;
|
|
|
|
|
case maybe_finalize_jump:
|
|
|
|
|
case finalize_jump:
|
|
|
|
|
case finalize_push:
|
|
|
|
|
case jump:
|
|
|
|
|
p1++;
|
2000-05-24 08:34:26 +04:00
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
2000-02-23 08:23:12 +03:00
|
|
|
|
|
|
|
|
|
if (mcnt >= 0) break; /* should be backward jump */
|
1999-08-13 09:45:20 +04:00
|
|
|
|
p1 += mcnt;
|
|
|
|
|
|
|
|
|
|
if (( is_a_jump_n && (enum regexpcode)*p1 == succeed_n) ||
|
|
|
|
|
(!is_a_jump_n && (enum regexpcode)*p1 == on_failure_jump)) {
|
|
|
|
|
if (failed_paren) {
|
|
|
|
|
p1++;
|
|
|
|
|
EXTRACT_NUMBER_AND_INCR(mcnt, p1);
|
|
|
|
|
PUSH_FAILURE_POINT(p1 + mcnt, d);
|
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
|
goto fail;
|
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
break;
|
|
|
|
|
default:
|
|
|
|
|
/* do nothing */;
|
|
|
|
|
}
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
else
|
|
|
|
|
break; /* Matching at this starting point really fails. */
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
|
|
if (best_regs_set)
|
|
|
|
|
goto restore_best_regs;
|
|
|
|
|
|
|
|
|
|
FREE_AND_RETURN(stackb,(-1)); /* Failure to match. */
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
|
|
|
|
|
static int
|
1999-01-20 07:59:39 +03:00
|
|
|
|
memcmp_translate(s1, s2, len)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
unsigned char *s1, *s2;
|
|
|
|
|
register int len;
|
|
|
|
|
{
|
|
|
|
|
register unsigned char *p1 = s1, *p2 = s2, c;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
while (len) {
|
|
|
|
|
c = *p1++;
|
|
|
|
|
if (ismbchar(c)) {
|
|
|
|
|
int n;
|
|
|
|
|
|
|
|
|
|
if (c != *p2++) return 1;
|
|
|
|
|
for (n = mbclen(c) - 1; n > 0; n--)
|
|
|
|
|
if (!--len || *p1++ != *p2++)
|
1998-01-16 15:13:05 +03:00
|
|
|
|
return 1;
|
|
|
|
|
}
|
1999-08-13 09:45:20 +04:00
|
|
|
|
else
|
|
|
|
|
if (translate[c] != translate[*p2++])
|
|
|
|
|
return 1;
|
|
|
|
|
len--;
|
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
return 0;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
re_copy_registers(regs1, regs2)
|
|
|
|
|
struct re_registers *regs1, *regs2;
|
|
|
|
|
{
|
1999-08-13 09:45:20 +04:00
|
|
|
|
int i;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (regs1 == regs2) return;
|
|
|
|
|
if (regs1->allocated == 0) {
|
|
|
|
|
regs1->beg = TMALLOC(regs2->num_regs, int);
|
|
|
|
|
regs1->end = TMALLOC(regs2->num_regs, int);
|
|
|
|
|
regs1->allocated = regs2->num_regs;
|
|
|
|
|
}
|
|
|
|
|
else if (regs1->allocated < regs2->num_regs) {
|
|
|
|
|
TREALLOC(regs1->beg, regs2->num_regs, int);
|
|
|
|
|
TREALLOC(regs1->end, regs2->num_regs, int);
|
|
|
|
|
regs1->allocated = regs2->num_regs;
|
|
|
|
|
}
|
|
|
|
|
for (i=0; i<regs2->num_regs; i++) {
|
|
|
|
|
regs1->beg[i] = regs2->beg[i];
|
|
|
|
|
regs1->end[i] = regs2->end[i];
|
|
|
|
|
}
|
|
|
|
|
regs1->num_regs = regs2->num_regs;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
re_free_registers(regs)
|
|
|
|
|
struct re_registers *regs;
|
|
|
|
|
{
|
1999-08-13 09:45:20 +04:00
|
|
|
|
if (regs->allocated == 0) return;
|
2000-05-16 06:46:57 +04:00
|
|
|
|
if (regs->beg) xfree(regs->beg);
|
|
|
|
|
if (regs->end) xfree(regs->end);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
|
|
|
|
|
|
/* Functions for multi-byte support.
|
|
|
|
|
Created for grep multi-byte extension Jul., 1993 by t^2 (Takahiro Tanimoto)
|
|
|
|
|
Last change: Jul. 9, 1993 by t^2 */
|
|
|
|
|
static const unsigned char mbctab_ascii[] = {
|
2002-01-29 22:33:11 +03:00
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
1999-01-20 07:59:39 +03:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const unsigned char mbctab_euc[] = { /* 0xA1-0xFE */
|
2002-01-29 22:33:11 +03:00
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 2,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
1999-01-20 07:59:39 +03:00
|
|
|
|
};
|
|
|
|
|
|
2003-07-09 18:52:20 +04:00
|
|
|
|
static const unsigned char mbctab_sjis[] = { /* 0x81-0x9F,0xE0-0xFC */
|
1999-01-20 07:59:39 +03:00
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
2003-07-09 18:52:20 +04:00
|
|
|
|
0, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
1999-01-20 07:59:39 +03:00
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
2002-02-01 11:49:02 +03:00
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
|
|
|
|
|
};
|
2002-01-29 22:33:11 +03:00
|
|
|
|
|
2002-02-01 11:49:02 +03:00
|
|
|
|
static const unsigned char mbctab_sjis_trail[] = { /* 0x40-0x7E,0x80-0xFC */
|
2002-01-29 22:33:11 +03:00
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 0, 0, 0
|
1999-01-20 07:59:39 +03:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
static const unsigned char mbctab_utf8[] = {
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1, 1,
|
|
|
|
|
2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2, 2,
|
2002-01-29 22:33:11 +03:00
|
|
|
|
3, 3, 3, 3, 3, 3, 3, 3, 4, 4, 4, 4, 5, 5, 0, 0,
|
1999-01-20 07:59:39 +03:00
|
|
|
|
};
|
|
|
|
|
|
|
|
|
|
const unsigned char *re_mbctab = mbctab_ascii;
|
|
|
|
|
|
|
|
|
|
void
|
|
|
|
|
re_mbcinit(mbctype)
|
|
|
|
|
int mbctype;
|
|
|
|
|
{
|
|
|
|
|
switch (mbctype) {
|
|
|
|
|
case MBCTYPE_ASCII:
|
|
|
|
|
re_mbctab = mbctab_ascii;
|
|
|
|
|
current_mbctype = MBCTYPE_ASCII;
|
|
|
|
|
break;
|
|
|
|
|
case MBCTYPE_EUC:
|
|
|
|
|
re_mbctab = mbctab_euc;
|
|
|
|
|
current_mbctype = MBCTYPE_EUC;
|
|
|
|
|
break;
|
|
|
|
|
case MBCTYPE_SJIS:
|
|
|
|
|
re_mbctab = mbctab_sjis;
|
|
|
|
|
current_mbctype = MBCTYPE_SJIS;
|
|
|
|
|
break;
|
|
|
|
|
case MBCTYPE_UTF8:
|
|
|
|
|
re_mbctab = mbctab_utf8;
|
|
|
|
|
current_mbctype = MBCTYPE_UTF8;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
}
|
2002-01-29 22:33:11 +03:00
|
|
|
|
|
2002-02-01 11:49:02 +03:00
|
|
|
|
#define mbc_isfirst(t, c) (t)[(unsigned char)(c)]
|
|
|
|
|
#define mbc_len(t, c) ((t)[(unsigned char)(c)]+1)
|
|
|
|
|
|
|
|
|
|
static unsigned int
|
|
|
|
|
asc_startpos(string, pos)
|
2002-01-29 22:33:11 +03:00
|
|
|
|
const char *string;
|
2002-02-01 11:49:02 +03:00
|
|
|
|
unsigned int pos;
|
2002-01-29 22:33:11 +03:00
|
|
|
|
{
|
2002-02-01 11:49:02 +03:00
|
|
|
|
return pos;
|
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
#define euc_islead(c) ((unsigned char)((c) - 0xa1) > 0xfe - 0xa1)
|
|
|
|
|
#define euc_mbclen(c) mbc_len(mbctab_euc, (c))
|
|
|
|
|
static unsigned int
|
|
|
|
|
euc_startpos(string, pos)
|
|
|
|
|
const char *string;
|
|
|
|
|
unsigned int pos;
|
|
|
|
|
{
|
|
|
|
|
unsigned int i = pos, w;
|
2002-01-29 22:33:11 +03:00
|
|
|
|
|
2002-02-01 11:49:02 +03:00
|
|
|
|
while (i > 0 && !euc_islead(string[i])) {
|
2002-01-29 22:33:11 +03:00
|
|
|
|
--i;
|
|
|
|
|
}
|
2002-02-01 11:49:02 +03:00
|
|
|
|
if (i == pos || i + (w = euc_mbclen(string[i])) > pos) {
|
|
|
|
|
return i;
|
|
|
|
|
}
|
2002-01-29 22:33:11 +03:00
|
|
|
|
i += w;
|
2002-02-01 11:49:02 +03:00
|
|
|
|
return i + ((pos - i) & ~1);
|
|
|
|
|
}
|
2002-01-29 22:33:11 +03:00
|
|
|
|
|
2002-02-01 11:49:02 +03:00
|
|
|
|
#define sjis_isfirst(c) mbc_isfirst(mbctab_sjis, (c))
|
|
|
|
|
#define sjis_istrail(c) mbctab_sjis_trail[(unsigned char)(c)]
|
|
|
|
|
#define sjis_mbclen(c) mbc_len(mbctab_sjis, (c))
|
|
|
|
|
static unsigned int
|
|
|
|
|
sjis_startpos(string, pos)
|
|
|
|
|
const char *string;
|
|
|
|
|
unsigned int pos;
|
|
|
|
|
{
|
|
|
|
|
unsigned int i = pos, w;
|
2002-01-30 10:00:58 +03:00
|
|
|
|
|
2002-02-01 11:49:02 +03:00
|
|
|
|
if (i > 0 && sjis_istrail(string[i])) {
|
|
|
|
|
do {
|
|
|
|
|
if (!sjis_isfirst(string[--i])) {
|
|
|
|
|
++i;
|
|
|
|
|
break;
|
|
|
|
|
}
|
|
|
|
|
} while (i > 0);
|
|
|
|
|
}
|
|
|
|
|
if (i == pos || i + (w = sjis_mbclen(string[i])) > pos) {
|
2002-01-30 10:00:58 +03:00
|
|
|
|
return i;
|
2002-02-01 11:49:02 +03:00
|
|
|
|
}
|
|
|
|
|
i += w;
|
|
|
|
|
return i + ((pos - i) & ~1);
|
|
|
|
|
}
|
2002-01-30 10:00:58 +03:00
|
|
|
|
|
2002-02-01 11:49:02 +03:00
|
|
|
|
#define utf8_islead(c) ((unsigned char)((c) & 0xc0) != 0x80)
|
|
|
|
|
#define utf8_mbclen(c) mbc_len(mbctab_utf8, (c))
|
|
|
|
|
static unsigned int
|
|
|
|
|
utf8_startpos(string, pos)
|
|
|
|
|
const char *string;
|
|
|
|
|
unsigned int pos;
|
|
|
|
|
{
|
|
|
|
|
unsigned int i = pos, w;
|
|
|
|
|
|
|
|
|
|
while (i > 0 && !utf8_islead(string[i])) {
|
|
|
|
|
--i;
|
|
|
|
|
}
|
|
|
|
|
if (i == pos || i + (w = utf8_mbclen(string[i])) > pos) {
|
2002-01-30 10:00:58 +03:00
|
|
|
|
return i;
|
2002-01-29 22:33:11 +03:00
|
|
|
|
}
|
2002-02-01 11:49:02 +03:00
|
|
|
|
return i + w;
|
2002-01-29 22:33:11 +03:00
|
|
|
|
}
|
|
|
|
|
|
|
|
|
|
/*
|
|
|
|
|
vi: sw=2 ts=8
|
|
|
|
|
Local variables:
|
|
|
|
|
mode : C
|
|
|
|
|
c-file-style : "gnu"
|
|
|
|
|
tab-width : 8
|
|
|
|
|
End :
|
|
|
|
|
*/
|