2000-05-01 13:42:38 +04:00
|
|
|
/**********************************************************************
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
re.c -
|
|
|
|
|
|
|
|
$Author$
|
|
|
|
created at: Mon Aug 9 18:24:49 JST 1993
|
|
|
|
|
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing.
* parse.y (pragma_encoding): encoding specification pragma.
* parse.y (rb_intern3): encoding specified symbols.
* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.
* string.c (rb_str_index_m): index based on characters. rindex as
well.
* string.c (succ_char): encoding aware succeeding string.
* string.c (rb_str_reverse): reverse based on characters.
* string.c (rb_str_inspect): encoding aware string description.
* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.
* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.
* string.c (rb_str_split_m): split based on characters.
* string.c (rb_str_each_line): encoding aware each_line.
* string.c (rb_str_each_char): added. iteration based on
characters.
* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.
* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).
* string.c (str_encoding): get encoding attribute from a string.
* re.c (rb_reg_initialize): encoding aware regular expression
* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.
* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.
* ext/stringio/stringio.c (strio_getc): ditto.
* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.
* ext/stringio/stringio.c (strio_ungetc): ditto.
* ext/strscan/strscan.c: encoding support.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-08-25 07:29:39 +04:00
|
|
|
Copyright (C) 1993-2007 Yukihiro Matsumoto
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2000-05-01 13:42:38 +04:00
|
|
|
**********************************************************************/
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2020-05-08 12:31:09 +03:00
|
|
|
#include "ruby/internal/config.h"
|
2019-12-04 11:16:30 +03:00
|
|
|
|
|
|
|
#include <ctype.h>
|
|
|
|
|
|
|
|
#include "encindex.h"
|
2022-03-24 10:59:11 +03:00
|
|
|
#include "hrtime.h"
|
2019-12-04 11:16:30 +03:00
|
|
|
#include "internal.h"
|
2022-11-15 07:21:45 +03:00
|
|
|
#include "internal/encoding.h"
|
2019-12-04 11:16:30 +03:00
|
|
|
#include "internal/hash.h"
|
|
|
|
#include "internal/imemo.h"
|
|
|
|
#include "internal/re.h"
|
2020-04-08 07:28:13 +03:00
|
|
|
#include "internal/string.h"
|
2022-05-24 10:51:15 +03:00
|
|
|
#include "internal/object.h"
|
2022-03-30 08:46:18 +03:00
|
|
|
#include "internal/ractor.h"
|
Some global variables can be accessed from ractors
Some global variables should be used from non-main Ractors.
[Bug #17268]
```ruby
# ractor-local (derived from created ractor): debug
'$DEBUG' => $DEBUG,
'$-d' => $-d,
# ractor-local (derived from created ractor): verbose
'$VERBOSE' => $VERBOSE,
'$-w' => $-w,
'$-W' => $-W,
'$-v' => $-v,
# process-local (readonly): other commandline parameters
'$-p' => $-p,
'$-l' => $-l,
'$-a' => $-a,
# process-local (readonly): getpid
'$$' => $$,
# thread local: process result
'$?' => $?,
# scope local: match
'$~' => $~.inspect,
'$&' => $&,
'$`' => $`,
'$\'' => $',
'$+' => $+,
'$1' => $1,
# scope local: last line
'$_' => $_,
# scope local: last backtrace
'$@' => $@,
'$!' => $!,
# ractor local: stdin, out, err
'$stdin' => $stdin.inspect,
'$stdout' => $stdout.inspect,
'$stderr' => $stderr.inspect,
```
2020-10-20 04:46:43 +03:00
|
|
|
#include "internal/variable.h"
|
2019-12-04 11:16:30 +03:00
|
|
|
#include "regint.h"
|
2018-01-09 09:24:11 +03:00
|
|
|
#include "ruby/encoding.h"
|
2007-06-10 07:06:15 +04:00
|
|
|
#include "ruby/re.h"
|
2007-12-01 19:56:19 +03:00
|
|
|
#include "ruby/util.h"
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2022-03-28 09:03:17 +03:00
|
|
|
VALUE rb_eRegexpError, rb_eRegexpTimeoutError;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
* include/ruby/{intern,ruby}.h, compile.[ch], error.c, eval.c,
eval_load.c, gc.c, iseq.c, main.c, parse.y, re.c, ruby.c,
yarvcore.[ch] (ruby_eval_tree, ruby_sourcefile, ruby_sourceline,
ruby_nerrs): purge global variables.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12700 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-05 12:12:18 +04:00
|
|
|
typedef char onig_errmsg_buffer[ONIG_MAX_ERROR_MESSAGE_LEN];
|
2008-12-16 13:44:36 +03:00
|
|
|
#define errcpy(err, msg) strlcpy((err), (msg), ONIG_MAX_ERROR_MESSAGE_LEN)
|
* include/ruby/{intern,ruby}.h, compile.[ch], error.c, eval.c,
eval_load.c, gc.c, iseq.c, main.c, parse.y, re.c, ruby.c,
yarvcore.[ch] (ruby_eval_tree, ruby_sourcefile, ruby_sourceline,
ruby_nerrs): purge global variables.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12700 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-05 12:12:18 +04:00
|
|
|
|
2010-12-27 12:27:43 +03:00
|
|
|
#define BEG(no) (regs->beg[(no)])
|
|
|
|
#define END(no) (regs->end[(no)])
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
#if 'a' == 97 /* it's ascii */
|
1999-08-13 09:45:20 +04:00
|
|
|
static const char casetable[] = {
|
1998-01-16 15:13:05 +03:00
|
|
|
'\000', '\001', '\002', '\003', '\004', '\005', '\006', '\007',
|
|
|
|
'\010', '\011', '\012', '\013', '\014', '\015', '\016', '\017',
|
|
|
|
'\020', '\021', '\022', '\023', '\024', '\025', '\026', '\027',
|
|
|
|
'\030', '\031', '\032', '\033', '\034', '\035', '\036', '\037',
|
|
|
|
/* ' ' '!' '"' '#' '$' '%' '&' ''' */
|
|
|
|
'\040', '\041', '\042', '\043', '\044', '\045', '\046', '\047',
|
|
|
|
/* '(' ')' '*' '+' ',' '-' '.' '/' */
|
|
|
|
'\050', '\051', '\052', '\053', '\054', '\055', '\056', '\057',
|
|
|
|
/* '0' '1' '2' '3' '4' '5' '6' '7' */
|
|
|
|
'\060', '\061', '\062', '\063', '\064', '\065', '\066', '\067',
|
|
|
|
/* '8' '9' ':' ';' '<' '=' '>' '?' */
|
|
|
|
'\070', '\071', '\072', '\073', '\074', '\075', '\076', '\077',
|
|
|
|
/* '@' 'A' 'B' 'C' 'D' 'E' 'F' 'G' */
|
|
|
|
'\100', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
|
|
|
/* 'H' 'I' 'J' 'K' 'L' 'M' 'N' 'O' */
|
|
|
|
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
|
|
|
/* 'P' 'Q' 'R' 'S' 'T' 'U' 'V' 'W' */
|
|
|
|
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
|
|
|
/* 'X' 'Y' 'Z' '[' '\' ']' '^' '_' */
|
|
|
|
'\170', '\171', '\172', '\133', '\134', '\135', '\136', '\137',
|
|
|
|
/* '`' 'a' 'b' 'c' 'd' 'e' 'f' 'g' */
|
|
|
|
'\140', '\141', '\142', '\143', '\144', '\145', '\146', '\147',
|
|
|
|
/* 'h' 'i' 'j' 'k' 'l' 'm' 'n' 'o' */
|
|
|
|
'\150', '\151', '\152', '\153', '\154', '\155', '\156', '\157',
|
|
|
|
/* 'p' 'q' 'r' 's' 't' 'u' 'v' 'w' */
|
|
|
|
'\160', '\161', '\162', '\163', '\164', '\165', '\166', '\167',
|
|
|
|
/* 'x' 'y' 'z' '{' '|' '}' '~' */
|
|
|
|
'\170', '\171', '\172', '\173', '\174', '\175', '\176', '\177',
|
|
|
|
'\200', '\201', '\202', '\203', '\204', '\205', '\206', '\207',
|
|
|
|
'\210', '\211', '\212', '\213', '\214', '\215', '\216', '\217',
|
|
|
|
'\220', '\221', '\222', '\223', '\224', '\225', '\226', '\227',
|
|
|
|
'\230', '\231', '\232', '\233', '\234', '\235', '\236', '\237',
|
|
|
|
'\240', '\241', '\242', '\243', '\244', '\245', '\246', '\247',
|
|
|
|
'\250', '\251', '\252', '\253', '\254', '\255', '\256', '\257',
|
|
|
|
'\260', '\261', '\262', '\263', '\264', '\265', '\266', '\267',
|
|
|
|
'\270', '\271', '\272', '\273', '\274', '\275', '\276', '\277',
|
|
|
|
'\300', '\301', '\302', '\303', '\304', '\305', '\306', '\307',
|
|
|
|
'\310', '\311', '\312', '\313', '\314', '\315', '\316', '\317',
|
|
|
|
'\320', '\321', '\322', '\323', '\324', '\325', '\326', '\327',
|
|
|
|
'\330', '\331', '\332', '\333', '\334', '\335', '\336', '\337',
|
|
|
|
'\340', '\341', '\342', '\343', '\344', '\345', '\346', '\347',
|
|
|
|
'\350', '\351', '\352', '\353', '\354', '\355', '\356', '\357',
|
|
|
|
'\360', '\361', '\362', '\363', '\364', '\365', '\366', '\367',
|
|
|
|
'\370', '\371', '\372', '\373', '\374', '\375', '\376', '\377',
|
|
|
|
};
|
|
|
|
#else
|
2002-08-21 19:47:54 +04:00
|
|
|
# error >>> "You lose. You will need a translation table for your character set." <<<
|
1998-01-16 15:13:05 +03:00
|
|
|
#endif
|
|
|
|
|
2024-01-30 22:16:51 +03:00
|
|
|
// The process-global timeout for regexp matching
|
|
|
|
rb_hrtime_t rb_reg_match_time_limit = 0;
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
int
|
* sprintf.c (rb_str_format): allow %c to print one character
string (e.g. ?x).
* lib/tempfile.rb (Tempfile::make_tmpname): put dot between
basename and pid. [ruby-talk:196272]
* parse.y (do_block): remove -> style block.
* parse.y (parser_yylex): remove tLAMBDA_ARG.
* eval.c (rb_call0): binding for the return event hook should have
consistent scope. [ruby-core:07928]
* eval.c (proc_invoke): return behavior should depend whether it
is surrounded by a lambda or a mere block.
* eval.c (formal_assign): handles post splat arguments.
* eval.c (rb_call0): ditto.
* st.c (strhash): use FNV-1a hash.
* parse.y (parser_yylex): removed experimental ';;' terminator.
* eval.c (rb_node_arity): should be aware of post splat arguments.
* eval.c (rb_proc_arity): ditto.
* parse.y (f_args): syntax rule enhanced to support arguments
after the splat.
* parse.y (block_param): ditto for block parameters.
* parse.y (f_post_arg): mandatory formal arguments after the splat
argument.
* parse.y (new_args_gen): generate nodes for mandatory formal
arguments after the splat argument.
* eval.c (rb_eval): dispatch mandatory formal arguments after the
splat argument.
* parse.y (args): allow more than one splat in the argument list.
* parse.y (method_call): allow aref [] to accept all kind of
method argument, including assocs, splat, and block argument.
* eval.c (SETUP_ARGS0): prepare block argument as well.
* lib/mathn.rb (Integer): remove Integer#gcd2. [ruby-core:07931]
* eval.c (error_line): print receivers true/false/nil specially.
* eval.c (rb_proc_yield): handles parameters in yield semantics.
* eval.c (nil_yield): gives LocalJumpError to denote no block
error.
* io.c (rb_io_getc): now takes one-character string.
* string.c (rb_str_hash): use FNV-1a hash from Fowler/Noll/Vo
hashing algorithm.
* string.c (rb_str_aref): str[0] now returns 1 character string,
instead of a fixnum. [Ruby2]
* parse.y (parser_yylex): ?c now returns 1 character string,
instead of a fixnum. [Ruby2]
* string.c (rb_str_aset): no longer support fixnum insertion.
* eval.c (umethod_bind): should not update original class.
[ruby-dev:28636]
* eval.c (ev_const_get): should support constant access from
within instance_eval(). [ruby-dev:28327]
* time.c (time_timeval): should round for usec floating
number. [ruby-core:07896]
* time.c (time_add): ditto.
* dir.c (sys_warning): should not call a vararg function
rb_sys_warning() indirectly. [ruby-core:07886]
* numeric.c (flo_divmod): the first element of Float#divmod should
be an integer. [ruby-dev:28589]
* test/ruby/test_float.rb: add tests for divmod, div, modulo and remainder.
* re.c (rb_reg_initialize): should not allow modifying literal
regexps. frozen check moved from rb_reg_initialize_m as well.
* re.c (rb_reg_initialize): should not modify untainted objects in
safe levels higher than 3.
* re.c (rb_memcmp): type change from char* to const void*.
* dir.c (dir_close): should not close untainted dir stream.
* dir.c (GetDIR): add tainted/frozen check for each dir operation.
* lib/rdoc/parsers/parse_rb.rb (RDoc::RubyParser::parse_symbol_arg):
typo fixed. a patch from Florian Gross <florg at florg.net>.
* eval.c (EXEC_EVENT_HOOK): trace_func may remove itself from
event_hooks. no guarantee for arbitrary hook deletion.
[ruby-dev:28632]
* util.c (ruby_strtod): differ addition to minimize error.
[ruby-dev:28619]
* util.c (ruby_strtod): should not raise ERANGE when the input
string does not have any digits. [ruby-dev:28629]
* eval.c (proc_invoke): should restore old ruby_frame->block.
thanks to ts <decoux at moulon.inra.fr>. [ruby-core:07833]
also fix [ruby-dev:28614] as well.
* signal.c (trap): sig should be less then NSIG. Coverity found
this bug. a patch from Kevin Tew <tewk at tewk.com>.
[ruby-core:07823]
* math.c (math_log2): add new method inspired by
[ruby-talk:191237].
* math.c (math_log): add optional base argument to Math::log().
[ruby-talk:191308]
* ext/syck/emitter.c (syck_scan_scalar): avoid accessing
uninitialized array element. a patch from Pat Eyler
<rubypate at gmail.com>. [ruby-core:07809]
* array.c (rb_ary_fill): initialize local variables first. a
patch from Pat Eyler <rubypate at gmail.com>. [ruby-core:07810]
* ext/syck/yaml2byte.c (syck_yaml2byte_handler): need to free
type_tag. a patch from Pat Eyler <rubypate at gmail.com>.
[ruby-core:07808]
* ext/socket/socket.c (make_hostent_internal): accept ai_family
check from Sam Roberts <sroberts at uniserve.com>.
[ruby-core:07691]
* util.c (ruby_strtod): should not cut off 18 digits for no
reason. [ruby-core:07796]
* array.c (rb_ary_fill): internalize local variable "beg" to
pacify Coverity. [ruby-core:07770]
* pack.c (pack_unpack): now supports CRLF newlines. a patch from
<tommy at tmtm.org>. [ruby-dev:28601]
* applied code clean-up patch from Stefan Huehner
<stefan at huehner.org>. [ruby-core:07764]
* lib/jcode.rb (String::tr_s): should have translated non
squeezing character sequence (i.e. a character) as well. thanks
to Hiroshi Ichikawa <gimite at gimite.ddo.jp> [ruby-list:42090]
* ext/socket/socket.c: document update patch from Sam Roberts
<sroberts at uniserve.com>. [ruby-core:07701]
* lib/mathn.rb (Integer): need not to remove gcd2. a patch from
NARUSE, Yui <naruse at airemix.com>. [ruby-dev:28570]
* parse.y (arg): too much NEW_LIST()
* eval.c (SETUP_ARGS0): remove unnecessary access to nd_alen.
* eval.c (rb_eval): use ARGSCAT for NODE_OP_ASGN1.
[ruby-dev:28585]
* parse.y (arg): use NODE_ARGSCAT for placeholder.
* lib/getoptlong.rb (GetoptLong::get): RDoc update patch from
mathew <meta at pobox.com>. [ruby-core:07738]
* variable.c (rb_const_set): raise error when no target klass is
supplied. [ruby-dev:28582]
* prec.c (prec_prec_f): documentation patch from
<gerardo.santana at gmail.com>. [ruby-core:07689]
* bignum.c (rb_big_pow): second operand may be too big even if
it's a Fixnum. [ruby-talk:187984]
* README.EXT: update symbol description. [ruby-talk:188104]
* COPYING: explicitly note GPLv2. [ruby-talk:187922]
* parse.y: remove some obsolete syntax rules (unparenthesized
method calls in argument list).
* eval.c (rb_call0): insecure calling should be checked for non
NODE_SCOPE method invocations too.
* eval.c (rb_alias): should preserve the current safe level as
well as method definition.
* process.c (rb_f_sleep): remove RDoc description about SIGALRM
which is not valid on the current implementation. [ruby-dev:28464]
Thu Mar 23 21:40:47 2006 K.Kosako <sndgk393 AT ybb.ne.jp>
* eval.c (method_missing): should support argument splat in
super. a bug in combination of super, splat and
method_missing. [ruby-talk:185438]
* configure.in: Solaris SunPro compiler -rapth patch from
<kuwa at labs.fujitsu.com>. [ruby-dev:28443]
* configure.in: remove enable_rpath=no for Solaris.
[ruby-dev:28440]
* ext/win32ole/win32ole.c (ole_val2olevariantdata): change behavior
of converting OLE Variant object with VT_ARRAY|VT_UI1 and Ruby
String object.
* ruby.1: a clarification patch from David Lutterkort
<dlutter at redhat.com>. [ruby-core:7508]
* lib/rdoc/ri/ri_paths.rb (RI::Paths): adding paths from rubygems
directories. a patch from Eric Hodel <drbrain at segment7.net>.
[ruby-core:07423]
* eval.c (rb_clear_cache_by_class): clearing wrong cache.
* ext/extmk.rb: use :remove_destination to install extension libraries
to avoid SEGV. [ruby-dev:28417]
* eval.c (rb_thread_fd_writable): should not re-schedule output
from KILLED thread (must be error printing).
* array.c (rb_ary_flatten_bang): allow specifying recursion
level. [ruby-talk:182170]
* array.c (rb_ary_flatten): ditto.
* gc.c (add_heap): a heap_slots may overflow. a patch from Stefan
Weil <weil at mail.berlios.de>.
* eval.c (rb_call): use separate cache for fcall/vcall
invocation.
* eval.c (rb_eval): NODE_FCALL, NODE_VCALL can call local
functions.
* eval.c (rb_mod_local): a new method to specify newly added
visibility "local".
* eval.c (search_method): search for local methods which are
visible only from the current class.
* class.c (rb_class_local_methods): a method to list local methods.
* object.c (Init_Object): add BasicObject class as a top level
BlankSlate class.
* ruby.h (SYM2ID): should not cast to signed long.
[ruby-core:07414]
* class.c (rb_include_module): allow module duplication.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10235 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-06-10 01:20:17 +04:00
|
|
|
rb_memcicmp(const void *x, const void *y, long len)
|
2000-06-14 09:30:29 +04:00
|
|
|
{
|
* sprintf.c (rb_str_format): allow %c to print one character
string (e.g. ?x).
* lib/tempfile.rb (Tempfile::make_tmpname): put dot between
basename and pid. [ruby-talk:196272]
* parse.y (do_block): remove -> style block.
* parse.y (parser_yylex): remove tLAMBDA_ARG.
* eval.c (rb_call0): binding for the return event hook should have
consistent scope. [ruby-core:07928]
* eval.c (proc_invoke): return behavior should depend whether it
is surrounded by a lambda or a mere block.
* eval.c (formal_assign): handles post splat arguments.
* eval.c (rb_call0): ditto.
* st.c (strhash): use FNV-1a hash.
* parse.y (parser_yylex): removed experimental ';;' terminator.
* eval.c (rb_node_arity): should be aware of post splat arguments.
* eval.c (rb_proc_arity): ditto.
* parse.y (f_args): syntax rule enhanced to support arguments
after the splat.
* parse.y (block_param): ditto for block parameters.
* parse.y (f_post_arg): mandatory formal arguments after the splat
argument.
* parse.y (new_args_gen): generate nodes for mandatory formal
arguments after the splat argument.
* eval.c (rb_eval): dispatch mandatory formal arguments after the
splat argument.
* parse.y (args): allow more than one splat in the argument list.
* parse.y (method_call): allow aref [] to accept all kind of
method argument, including assocs, splat, and block argument.
* eval.c (SETUP_ARGS0): prepare block argument as well.
* lib/mathn.rb (Integer): remove Integer#gcd2. [ruby-core:07931]
* eval.c (error_line): print receivers true/false/nil specially.
* eval.c (rb_proc_yield): handles parameters in yield semantics.
* eval.c (nil_yield): gives LocalJumpError to denote no block
error.
* io.c (rb_io_getc): now takes one-character string.
* string.c (rb_str_hash): use FNV-1a hash from Fowler/Noll/Vo
hashing algorithm.
* string.c (rb_str_aref): str[0] now returns 1 character string,
instead of a fixnum. [Ruby2]
* parse.y (parser_yylex): ?c now returns 1 character string,
instead of a fixnum. [Ruby2]
* string.c (rb_str_aset): no longer support fixnum insertion.
* eval.c (umethod_bind): should not update original class.
[ruby-dev:28636]
* eval.c (ev_const_get): should support constant access from
within instance_eval(). [ruby-dev:28327]
* time.c (time_timeval): should round for usec floating
number. [ruby-core:07896]
* time.c (time_add): ditto.
* dir.c (sys_warning): should not call a vararg function
rb_sys_warning() indirectly. [ruby-core:07886]
* numeric.c (flo_divmod): the first element of Float#divmod should
be an integer. [ruby-dev:28589]
* test/ruby/test_float.rb: add tests for divmod, div, modulo and remainder.
* re.c (rb_reg_initialize): should not allow modifying literal
regexps. frozen check moved from rb_reg_initialize_m as well.
* re.c (rb_reg_initialize): should not modify untainted objects in
safe levels higher than 3.
* re.c (rb_memcmp): type change from char* to const void*.
* dir.c (dir_close): should not close untainted dir stream.
* dir.c (GetDIR): add tainted/frozen check for each dir operation.
* lib/rdoc/parsers/parse_rb.rb (RDoc::RubyParser::parse_symbol_arg):
typo fixed. a patch from Florian Gross <florg at florg.net>.
* eval.c (EXEC_EVENT_HOOK): trace_func may remove itself from
event_hooks. no guarantee for arbitrary hook deletion.
[ruby-dev:28632]
* util.c (ruby_strtod): differ addition to minimize error.
[ruby-dev:28619]
* util.c (ruby_strtod): should not raise ERANGE when the input
string does not have any digits. [ruby-dev:28629]
* eval.c (proc_invoke): should restore old ruby_frame->block.
thanks to ts <decoux at moulon.inra.fr>. [ruby-core:07833]
also fix [ruby-dev:28614] as well.
* signal.c (trap): sig should be less then NSIG. Coverity found
this bug. a patch from Kevin Tew <tewk at tewk.com>.
[ruby-core:07823]
* math.c (math_log2): add new method inspired by
[ruby-talk:191237].
* math.c (math_log): add optional base argument to Math::log().
[ruby-talk:191308]
* ext/syck/emitter.c (syck_scan_scalar): avoid accessing
uninitialized array element. a patch from Pat Eyler
<rubypate at gmail.com>. [ruby-core:07809]
* array.c (rb_ary_fill): initialize local variables first. a
patch from Pat Eyler <rubypate at gmail.com>. [ruby-core:07810]
* ext/syck/yaml2byte.c (syck_yaml2byte_handler): need to free
type_tag. a patch from Pat Eyler <rubypate at gmail.com>.
[ruby-core:07808]
* ext/socket/socket.c (make_hostent_internal): accept ai_family
check from Sam Roberts <sroberts at uniserve.com>.
[ruby-core:07691]
* util.c (ruby_strtod): should not cut off 18 digits for no
reason. [ruby-core:07796]
* array.c (rb_ary_fill): internalize local variable "beg" to
pacify Coverity. [ruby-core:07770]
* pack.c (pack_unpack): now supports CRLF newlines. a patch from
<tommy at tmtm.org>. [ruby-dev:28601]
* applied code clean-up patch from Stefan Huehner
<stefan at huehner.org>. [ruby-core:07764]
* lib/jcode.rb (String::tr_s): should have translated non
squeezing character sequence (i.e. a character) as well. thanks
to Hiroshi Ichikawa <gimite at gimite.ddo.jp> [ruby-list:42090]
* ext/socket/socket.c: document update patch from Sam Roberts
<sroberts at uniserve.com>. [ruby-core:07701]
* lib/mathn.rb (Integer): need not to remove gcd2. a patch from
NARUSE, Yui <naruse at airemix.com>. [ruby-dev:28570]
* parse.y (arg): too much NEW_LIST()
* eval.c (SETUP_ARGS0): remove unnecessary access to nd_alen.
* eval.c (rb_eval): use ARGSCAT for NODE_OP_ASGN1.
[ruby-dev:28585]
* parse.y (arg): use NODE_ARGSCAT for placeholder.
* lib/getoptlong.rb (GetoptLong::get): RDoc update patch from
mathew <meta at pobox.com>. [ruby-core:07738]
* variable.c (rb_const_set): raise error when no target klass is
supplied. [ruby-dev:28582]
* prec.c (prec_prec_f): documentation patch from
<gerardo.santana at gmail.com>. [ruby-core:07689]
* bignum.c (rb_big_pow): second operand may be too big even if
it's a Fixnum. [ruby-talk:187984]
* README.EXT: update symbol description. [ruby-talk:188104]
* COPYING: explicitly note GPLv2. [ruby-talk:187922]
* parse.y: remove some obsolete syntax rules (unparenthesized
method calls in argument list).
* eval.c (rb_call0): insecure calling should be checked for non
NODE_SCOPE method invocations too.
* eval.c (rb_alias): should preserve the current safe level as
well as method definition.
* process.c (rb_f_sleep): remove RDoc description about SIGALRM
which is not valid on the current implementation. [ruby-dev:28464]
Thu Mar 23 21:40:47 2006 K.Kosako <sndgk393 AT ybb.ne.jp>
* eval.c (method_missing): should support argument splat in
super. a bug in combination of super, splat and
method_missing. [ruby-talk:185438]
* configure.in: Solaris SunPro compiler -rapth patch from
<kuwa at labs.fujitsu.com>. [ruby-dev:28443]
* configure.in: remove enable_rpath=no for Solaris.
[ruby-dev:28440]
* ext/win32ole/win32ole.c (ole_val2olevariantdata): change behavior
of converting OLE Variant object with VT_ARRAY|VT_UI1 and Ruby
String object.
* ruby.1: a clarification patch from David Lutterkort
<dlutter at redhat.com>. [ruby-core:7508]
* lib/rdoc/ri/ri_paths.rb (RI::Paths): adding paths from rubygems
directories. a patch from Eric Hodel <drbrain at segment7.net>.
[ruby-core:07423]
* eval.c (rb_clear_cache_by_class): clearing wrong cache.
* ext/extmk.rb: use :remove_destination to install extension libraries
to avoid SEGV. [ruby-dev:28417]
* eval.c (rb_thread_fd_writable): should not re-schedule output
from KILLED thread (must be error printing).
* array.c (rb_ary_flatten_bang): allow specifying recursion
level. [ruby-talk:182170]
* array.c (rb_ary_flatten): ditto.
* gc.c (add_heap): a heap_slots may overflow. a patch from Stefan
Weil <weil at mail.berlios.de>.
* eval.c (rb_call): use separate cache for fcall/vcall
invocation.
* eval.c (rb_eval): NODE_FCALL, NODE_VCALL can call local
functions.
* eval.c (rb_mod_local): a new method to specify newly added
visibility "local".
* eval.c (search_method): search for local methods which are
visible only from the current class.
* class.c (rb_class_local_methods): a method to list local methods.
* object.c (Init_Object): add BasicObject class as a top level
BlankSlate class.
* ruby.h (SYM2ID): should not cast to signed long.
[ruby-core:07414]
* class.c (rb_include_module): allow module duplication.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10235 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-06-10 01:20:17 +04:00
|
|
|
const unsigned char *p1 = x, *p2 = y;
|
2000-06-14 09:30:29 +04:00
|
|
|
int tmp;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2000-06-14 09:30:29 +04:00
|
|
|
while (len--) {
|
2006-03-26 17:04:13 +04:00
|
|
|
if ((tmp = casetable[(unsigned)*p1++] - casetable[(unsigned)*p2++]))
|
2000-06-14 09:30:29 +04:00
|
|
|
return tmp;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
2000-06-14 09:30:29 +04:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2012-11-13 06:12:40 +04:00
|
|
|
#ifdef HAVE_MEMMEM
|
|
|
|
static inline long
|
|
|
|
rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
|
|
|
|
{
|
|
|
|
const unsigned char *y;
|
|
|
|
|
2018-09-21 13:19:10 +03:00
|
|
|
if ((y = memmem(ys, n, xs, m)) != NULL)
|
2012-11-13 06:12:40 +04:00
|
|
|
return y - ys;
|
|
|
|
else
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
#else
|
2008-03-17 22:04:29 +03:00
|
|
|
static inline long
|
|
|
|
rb_memsearch_ss(const unsigned char *xs, long m, const unsigned char *ys, long n)
|
2003-02-03 08:34:16 +03:00
|
|
|
{
|
2008-03-17 22:04:29 +03:00
|
|
|
const unsigned char *x = xs, *xe = xs + m;
|
|
|
|
const unsigned char *y = ys, *ye = ys + n;
|
2018-01-04 10:51:16 +03:00
|
|
|
#define VALUE_MAX ((VALUE)~(VALUE)0)
|
2008-03-17 22:04:29 +03:00
|
|
|
VALUE hx, hy, mask = VALUE_MAX >> ((SIZEOF_VALUE - m) * CHAR_BIT);
|
2003-02-03 08:34:16 +03:00
|
|
|
|
2008-03-17 22:04:29 +03:00
|
|
|
if (m > SIZEOF_VALUE)
|
|
|
|
rb_bug("!!too long pattern string!!");
|
2003-02-03 08:34:16 +03:00
|
|
|
|
2012-11-22 09:23:12 +04:00
|
|
|
if (!(y = memchr(y, *x, n - m + 1)))
|
|
|
|
return -1;
|
|
|
|
|
2009-11-26 08:25:08 +03:00
|
|
|
/* Prepare hash value */
|
2008-03-17 22:04:29 +03:00
|
|
|
for (hx = *x++, hy = *y++; x < xe; ++x, ++y) {
|
|
|
|
hx <<= CHAR_BIT;
|
|
|
|
hy <<= CHAR_BIT;
|
|
|
|
hx |= *x;
|
|
|
|
hy |= *y;
|
|
|
|
}
|
|
|
|
/* Searching */
|
|
|
|
while (hx != hy) {
|
|
|
|
if (y == ye)
|
|
|
|
return -1;
|
|
|
|
hy <<= CHAR_BIT;
|
|
|
|
hy |= *y;
|
|
|
|
hy &= mask;
|
|
|
|
y++;
|
|
|
|
}
|
|
|
|
return y - ys - m;
|
|
|
|
}
|
2012-11-13 06:12:40 +04:00
|
|
|
#endif
|
2008-03-17 22:04:29 +03:00
|
|
|
|
|
|
|
static inline long
|
|
|
|
rb_memsearch_qs(const unsigned char *xs, long m, const unsigned char *ys, long n)
|
|
|
|
{
|
|
|
|
const unsigned char *x = xs, *xe = xs + m;
|
2008-04-04 18:26:19 +04:00
|
|
|
const unsigned char *y = ys;
|
2008-03-17 22:04:29 +03:00
|
|
|
VALUE i, qstable[256];
|
2003-02-03 08:34:16 +03:00
|
|
|
|
|
|
|
/* Preprocessing */
|
2008-03-17 22:04:29 +03:00
|
|
|
for (i = 0; i < 256; ++i)
|
|
|
|
qstable[i] = m + 1;
|
|
|
|
for (; x < xe; ++x)
|
|
|
|
qstable[*x] = xe - x;
|
|
|
|
/* Searching */
|
2008-04-04 18:26:19 +04:00
|
|
|
for (; y + m <= ys + n; y += *(qstable + y[m])) {
|
2008-03-17 22:04:29 +03:00
|
|
|
if (*xs == *y && memcmp(xs, y, m) == 0)
|
|
|
|
return y - ys;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
2003-02-03 08:34:16 +03:00
|
|
|
|
2008-03-17 22:04:29 +03:00
|
|
|
static inline unsigned int
|
|
|
|
rb_memsearch_qs_utf8_hash(const unsigned char *x)
|
|
|
|
{
|
|
|
|
register const unsigned int mix = 8353;
|
|
|
|
register unsigned int h = *x;
|
|
|
|
if (h < 0xC0) {
|
|
|
|
return h + 256;
|
|
|
|
}
|
|
|
|
else if (h < 0xE0) {
|
|
|
|
h *= mix;
|
|
|
|
h += x[1];
|
2003-02-03 08:34:16 +03:00
|
|
|
}
|
2008-03-17 22:04:29 +03:00
|
|
|
else if (h < 0xF0) {
|
|
|
|
h *= mix;
|
|
|
|
h += x[1];
|
|
|
|
h *= mix;
|
|
|
|
h += x[2];
|
|
|
|
}
|
|
|
|
else if (h < 0xF5) {
|
|
|
|
h *= mix;
|
|
|
|
h += x[1];
|
|
|
|
h *= mix;
|
|
|
|
h += x[2];
|
|
|
|
h *= mix;
|
|
|
|
h += x[3];
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
return h + 256;
|
|
|
|
}
|
|
|
|
return (unsigned char)h;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline long
|
|
|
|
rb_memsearch_qs_utf8(const unsigned char *xs, long m, const unsigned char *ys, long n)
|
|
|
|
{
|
|
|
|
const unsigned char *x = xs, *xe = xs + m;
|
2008-04-04 18:26:19 +04:00
|
|
|
const unsigned char *y = ys;
|
2008-03-17 22:04:29 +03:00
|
|
|
VALUE i, qstable[512];
|
|
|
|
|
|
|
|
/* Preprocessing */
|
|
|
|
for (i = 0; i < 512; ++i) {
|
|
|
|
qstable[i] = m + 1;
|
|
|
|
}
|
|
|
|
for (; x < xe; ++x) {
|
|
|
|
qstable[rb_memsearch_qs_utf8_hash(x)] = xe - x;
|
2007-10-04 11:31:50 +04:00
|
|
|
}
|
|
|
|
/* Searching */
|
2008-04-04 18:26:19 +04:00
|
|
|
for (; y + m <= ys + n; y += qstable[rb_memsearch_qs_utf8_hash(y+m)]) {
|
2008-03-17 22:04:29 +03:00
|
|
|
if (*xs == *y && memcmp(xs, y, m) == 0)
|
|
|
|
return y - ys;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2015-08-03 04:08:36 +03:00
|
|
|
static inline long
|
2022-10-23 09:00:55 +03:00
|
|
|
rb_memsearch_with_char_size(const unsigned char *xs, long m, const unsigned char *ys, long n, int char_size)
|
2015-08-03 04:08:36 +03:00
|
|
|
{
|
|
|
|
const unsigned char *x = xs, x0 = *xs, *y = ys;
|
|
|
|
|
2015-08-26 04:52:04 +03:00
|
|
|
for (n -= m; n >= 0; n -= char_size, y += char_size) {
|
2015-08-03 04:08:36 +03:00
|
|
|
if (x0 == *y && memcmp(x+1, y+1, m-1) == 0)
|
|
|
|
return y - ys;
|
|
|
|
}
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
static inline long
|
2022-10-22 12:26:53 +03:00
|
|
|
rb_memsearch_wchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
|
2015-08-03 04:08:36 +03:00
|
|
|
{
|
2022-10-23 09:00:55 +03:00
|
|
|
return rb_memsearch_with_char_size(xs, m, ys, n, 2);
|
2022-10-22 12:26:53 +03:00
|
|
|
}
|
2015-08-03 04:08:36 +03:00
|
|
|
|
2022-10-22 12:26:53 +03:00
|
|
|
static inline long
|
|
|
|
rb_memsearch_qchar(const unsigned char *xs, long m, const unsigned char *ys, long n)
|
|
|
|
{
|
2022-10-23 09:00:55 +03:00
|
|
|
return rb_memsearch_with_char_size(xs, m, ys, n, 4);
|
2015-08-03 04:08:36 +03:00
|
|
|
}
|
|
|
|
|
2008-03-17 22:04:29 +03:00
|
|
|
long
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_memsearch(const void *x0, long m, const void *y0, long n, rb_encoding *enc)
|
2008-03-17 22:04:29 +03:00
|
|
|
{
|
|
|
|
const unsigned char *x = x0, *y = y0;
|
|
|
|
|
|
|
|
if (m > n) return -1;
|
|
|
|
else if (m == n) {
|
|
|
|
return memcmp(x0, y0, m) == 0 ? 0 : -1;
|
|
|
|
}
|
|
|
|
else if (m < 1) {
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
else if (m == 1) {
|
2013-08-09 17:41:26 +04:00
|
|
|
const unsigned char *ys = memchr(y, *x, n);
|
2012-11-08 17:40:33 +04:00
|
|
|
|
2013-08-09 17:41:26 +04:00
|
|
|
if (ys)
|
2012-11-08 17:40:33 +04:00
|
|
|
return ys - y;
|
|
|
|
else
|
|
|
|
return -1;
|
2008-03-17 22:04:29 +03:00
|
|
|
}
|
2015-08-03 04:08:36 +03:00
|
|
|
else if (LIKELY(rb_enc_mbminlen(enc) == 1)) {
|
|
|
|
if (m <= SIZEOF_VALUE) {
|
|
|
|
return rb_memsearch_ss(x0, m, y0, n);
|
|
|
|
}
|
|
|
|
else if (enc == rb_utf8_encoding()){
|
|
|
|
return rb_memsearch_qs_utf8(x0, m, y0, n);
|
|
|
|
}
|
2008-03-17 22:04:29 +03:00
|
|
|
}
|
2015-08-03 04:08:36 +03:00
|
|
|
else if (LIKELY(rb_enc_mbminlen(enc) == 2)) {
|
|
|
|
return rb_memsearch_wchar(x0, m, y0, n);
|
2008-03-17 22:04:29 +03:00
|
|
|
}
|
2015-08-03 04:08:36 +03:00
|
|
|
else if (LIKELY(rb_enc_mbminlen(enc) == 4)) {
|
|
|
|
return rb_memsearch_qchar(x0, m, y0, n);
|
2003-02-03 08:34:16 +03:00
|
|
|
}
|
2015-08-03 04:08:36 +03:00
|
|
|
return rb_memsearch_qs(x0, m, y0, n);
|
2003-02-03 08:34:16 +03:00
|
|
|
}
|
|
|
|
|
2007-12-21 19:39:36 +03:00
|
|
|
#define REG_ENCODING_NONE FL_USER6
|
* sprintf.c (rb_str_format): allow %c to print one character
string (e.g. ?x).
* lib/tempfile.rb (Tempfile::make_tmpname): put dot between
basename and pid. [ruby-talk:196272]
* parse.y (do_block): remove -> style block.
* parse.y (parser_yylex): remove tLAMBDA_ARG.
* eval.c (rb_call0): binding for the return event hook should have
consistent scope. [ruby-core:07928]
* eval.c (proc_invoke): return behavior should depend whether it
is surrounded by a lambda or a mere block.
* eval.c (formal_assign): handles post splat arguments.
* eval.c (rb_call0): ditto.
* st.c (strhash): use FNV-1a hash.
* parse.y (parser_yylex): removed experimental ';;' terminator.
* eval.c (rb_node_arity): should be aware of post splat arguments.
* eval.c (rb_proc_arity): ditto.
* parse.y (f_args): syntax rule enhanced to support arguments
after the splat.
* parse.y (block_param): ditto for block parameters.
* parse.y (f_post_arg): mandatory formal arguments after the splat
argument.
* parse.y (new_args_gen): generate nodes for mandatory formal
arguments after the splat argument.
* eval.c (rb_eval): dispatch mandatory formal arguments after the
splat argument.
* parse.y (args): allow more than one splat in the argument list.
* parse.y (method_call): allow aref [] to accept all kind of
method argument, including assocs, splat, and block argument.
* eval.c (SETUP_ARGS0): prepare block argument as well.
* lib/mathn.rb (Integer): remove Integer#gcd2. [ruby-core:07931]
* eval.c (error_line): print receivers true/false/nil specially.
* eval.c (rb_proc_yield): handles parameters in yield semantics.
* eval.c (nil_yield): gives LocalJumpError to denote no block
error.
* io.c (rb_io_getc): now takes one-character string.
* string.c (rb_str_hash): use FNV-1a hash from Fowler/Noll/Vo
hashing algorithm.
* string.c (rb_str_aref): str[0] now returns 1 character string,
instead of a fixnum. [Ruby2]
* parse.y (parser_yylex): ?c now returns 1 character string,
instead of a fixnum. [Ruby2]
* string.c (rb_str_aset): no longer support fixnum insertion.
* eval.c (umethod_bind): should not update original class.
[ruby-dev:28636]
* eval.c (ev_const_get): should support constant access from
within instance_eval(). [ruby-dev:28327]
* time.c (time_timeval): should round for usec floating
number. [ruby-core:07896]
* time.c (time_add): ditto.
* dir.c (sys_warning): should not call a vararg function
rb_sys_warning() indirectly. [ruby-core:07886]
* numeric.c (flo_divmod): the first element of Float#divmod should
be an integer. [ruby-dev:28589]
* test/ruby/test_float.rb: add tests for divmod, div, modulo and remainder.
* re.c (rb_reg_initialize): should not allow modifying literal
regexps. frozen check moved from rb_reg_initialize_m as well.
* re.c (rb_reg_initialize): should not modify untainted objects in
safe levels higher than 3.
* re.c (rb_memcmp): type change from char* to const void*.
* dir.c (dir_close): should not close untainted dir stream.
* dir.c (GetDIR): add tainted/frozen check for each dir operation.
* lib/rdoc/parsers/parse_rb.rb (RDoc::RubyParser::parse_symbol_arg):
typo fixed. a patch from Florian Gross <florg at florg.net>.
* eval.c (EXEC_EVENT_HOOK): trace_func may remove itself from
event_hooks. no guarantee for arbitrary hook deletion.
[ruby-dev:28632]
* util.c (ruby_strtod): differ addition to minimize error.
[ruby-dev:28619]
* util.c (ruby_strtod): should not raise ERANGE when the input
string does not have any digits. [ruby-dev:28629]
* eval.c (proc_invoke): should restore old ruby_frame->block.
thanks to ts <decoux at moulon.inra.fr>. [ruby-core:07833]
also fix [ruby-dev:28614] as well.
* signal.c (trap): sig should be less then NSIG. Coverity found
this bug. a patch from Kevin Tew <tewk at tewk.com>.
[ruby-core:07823]
* math.c (math_log2): add new method inspired by
[ruby-talk:191237].
* math.c (math_log): add optional base argument to Math::log().
[ruby-talk:191308]
* ext/syck/emitter.c (syck_scan_scalar): avoid accessing
uninitialized array element. a patch from Pat Eyler
<rubypate at gmail.com>. [ruby-core:07809]
* array.c (rb_ary_fill): initialize local variables first. a
patch from Pat Eyler <rubypate at gmail.com>. [ruby-core:07810]
* ext/syck/yaml2byte.c (syck_yaml2byte_handler): need to free
type_tag. a patch from Pat Eyler <rubypate at gmail.com>.
[ruby-core:07808]
* ext/socket/socket.c (make_hostent_internal): accept ai_family
check from Sam Roberts <sroberts at uniserve.com>.
[ruby-core:07691]
* util.c (ruby_strtod): should not cut off 18 digits for no
reason. [ruby-core:07796]
* array.c (rb_ary_fill): internalize local variable "beg" to
pacify Coverity. [ruby-core:07770]
* pack.c (pack_unpack): now supports CRLF newlines. a patch from
<tommy at tmtm.org>. [ruby-dev:28601]
* applied code clean-up patch from Stefan Huehner
<stefan at huehner.org>. [ruby-core:07764]
* lib/jcode.rb (String::tr_s): should have translated non
squeezing character sequence (i.e. a character) as well. thanks
to Hiroshi Ichikawa <gimite at gimite.ddo.jp> [ruby-list:42090]
* ext/socket/socket.c: document update patch from Sam Roberts
<sroberts at uniserve.com>. [ruby-core:07701]
* lib/mathn.rb (Integer): need not to remove gcd2. a patch from
NARUSE, Yui <naruse at airemix.com>. [ruby-dev:28570]
* parse.y (arg): too much NEW_LIST()
* eval.c (SETUP_ARGS0): remove unnecessary access to nd_alen.
* eval.c (rb_eval): use ARGSCAT for NODE_OP_ASGN1.
[ruby-dev:28585]
* parse.y (arg): use NODE_ARGSCAT for placeholder.
* lib/getoptlong.rb (GetoptLong::get): RDoc update patch from
mathew <meta at pobox.com>. [ruby-core:07738]
* variable.c (rb_const_set): raise error when no target klass is
supplied. [ruby-dev:28582]
* prec.c (prec_prec_f): documentation patch from
<gerardo.santana at gmail.com>. [ruby-core:07689]
* bignum.c (rb_big_pow): second operand may be too big even if
it's a Fixnum. [ruby-talk:187984]
* README.EXT: update symbol description. [ruby-talk:188104]
* COPYING: explicitly note GPLv2. [ruby-talk:187922]
* parse.y: remove some obsolete syntax rules (unparenthesized
method calls in argument list).
* eval.c (rb_call0): insecure calling should be checked for non
NODE_SCOPE method invocations too.
* eval.c (rb_alias): should preserve the current safe level as
well as method definition.
* process.c (rb_f_sleep): remove RDoc description about SIGALRM
which is not valid on the current implementation. [ruby-dev:28464]
Thu Mar 23 21:40:47 2006 K.Kosako <sndgk393 AT ybb.ne.jp>
* eval.c (method_missing): should support argument splat in
super. a bug in combination of super, splat and
method_missing. [ruby-talk:185438]
* configure.in: Solaris SunPro compiler -rapth patch from
<kuwa at labs.fujitsu.com>. [ruby-dev:28443]
* configure.in: remove enable_rpath=no for Solaris.
[ruby-dev:28440]
* ext/win32ole/win32ole.c (ole_val2olevariantdata): change behavior
of converting OLE Variant object with VT_ARRAY|VT_UI1 and Ruby
String object.
* ruby.1: a clarification patch from David Lutterkort
<dlutter at redhat.com>. [ruby-core:7508]
* lib/rdoc/ri/ri_paths.rb (RI::Paths): adding paths from rubygems
directories. a patch from Eric Hodel <drbrain at segment7.net>.
[ruby-core:07423]
* eval.c (rb_clear_cache_by_class): clearing wrong cache.
* ext/extmk.rb: use :remove_destination to install extension libraries
to avoid SEGV. [ruby-dev:28417]
* eval.c (rb_thread_fd_writable): should not re-schedule output
from KILLED thread (must be error printing).
* array.c (rb_ary_flatten_bang): allow specifying recursion
level. [ruby-talk:182170]
* array.c (rb_ary_flatten): ditto.
* gc.c (add_heap): a heap_slots may overflow. a patch from Stefan
Weil <weil at mail.berlios.de>.
* eval.c (rb_call): use separate cache for fcall/vcall
invocation.
* eval.c (rb_eval): NODE_FCALL, NODE_VCALL can call local
functions.
* eval.c (rb_mod_local): a new method to specify newly added
visibility "local".
* eval.c (search_method): search for local methods which are
visible only from the current class.
* class.c (rb_class_local_methods): a method to list local methods.
* object.c (Init_Object): add BasicObject class as a top level
BlankSlate class.
* ruby.h (SYM2ID): should not cast to signed long.
[ruby-core:07414]
* class.c (rb_include_module): allow module duplication.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10235 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-06-10 01:20:17 +04:00
|
|
|
|
2000-05-22 11:09:55 +04:00
|
|
|
#define KCODE_FIXED FL_USER4
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2007-10-19 11:41:03 +04:00
|
|
|
#define ARG_REG_OPTION_MASK \
|
|
|
|
(ONIG_OPTION_IGNORECASE|ONIG_OPTION_MULTILINE|ONIG_OPTION_EXTEND)
|
|
|
|
#define ARG_ENCODING_FIXED 16
|
2007-12-21 19:39:36 +03:00
|
|
|
#define ARG_ENCODING_NONE 32
|
2007-10-19 11:41:03 +04:00
|
|
|
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
static int
|
|
|
|
char_to_option(int c)
|
|
|
|
{
|
|
|
|
int val;
|
2006-03-26 17:04:13 +04:00
|
|
|
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
switch (c) {
|
|
|
|
case 'i':
|
|
|
|
val = ONIG_OPTION_IGNORECASE;
|
|
|
|
break;
|
|
|
|
case 'x':
|
|
|
|
val = ONIG_OPTION_EXTEND;
|
|
|
|
break;
|
|
|
|
case 'm':
|
|
|
|
val = ONIG_OPTION_MULTILINE;
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
val = 0;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
return val;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2020-08-28 16:03:06 +03:00
|
|
|
enum { OPTBUF_SIZE = 4 };
|
2020-08-28 13:29:16 +03:00
|
|
|
|
2007-08-02 18:42:59 +04:00
|
|
|
static char *
|
2020-08-28 13:29:16 +03:00
|
|
|
option_to_str(char str[OPTBUF_SIZE], int options)
|
2007-08-02 18:42:59 +04:00
|
|
|
{
|
|
|
|
char *p = str;
|
|
|
|
if (options & ONIG_OPTION_MULTILINE) *p++ = 'm';
|
|
|
|
if (options & ONIG_OPTION_IGNORECASE) *p++ = 'i';
|
|
|
|
if (options & ONIG_OPTION_EXTEND) *p++ = 'x';
|
|
|
|
*p = 0;
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
extern int
|
|
|
|
rb_char_to_option_kcode(int c, int *option, int *kcode)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*option = 0;
|
2006-03-26 17:04:13 +04:00
|
|
|
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
switch (c) {
|
2007-10-19 11:41:03 +04:00
|
|
|
case 'n':
|
2009-09-10 21:07:38 +04:00
|
|
|
*kcode = rb_ascii8bit_encindex();
|
2007-12-21 19:39:36 +03:00
|
|
|
return (*option = ARG_ENCODING_NONE);
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
case 'e':
|
2013-07-02 12:22:47 +04:00
|
|
|
*kcode = ENCINDEX_EUC_JP;
|
2007-10-19 11:41:03 +04:00
|
|
|
break;
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
case 's':
|
2013-07-02 12:22:47 +04:00
|
|
|
*kcode = ENCINDEX_Windows_31J;
|
2007-10-19 11:41:03 +04:00
|
|
|
break;
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
case 'u':
|
2009-09-10 21:07:38 +04:00
|
|
|
*kcode = rb_utf8_encindex();
|
2007-10-19 11:41:03 +04:00
|
|
|
break;
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
default:
|
2007-10-19 11:41:03 +04:00
|
|
|
*kcode = -1;
|
2007-10-16 09:48:40 +04:00
|
|
|
return (*option = char_to_option(c));
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
2007-10-19 11:41:03 +04:00
|
|
|
*option = ARG_ENCODING_FIXED;
|
|
|
|
return 1;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2000-05-22 11:09:55 +04:00
|
|
|
static void
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_check(VALUE re)
|
2000-05-22 11:09:55 +04:00
|
|
|
{
|
2016-02-02 07:39:44 +03:00
|
|
|
if (!RREGEXP_PTR(re) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
|
2000-05-22 11:09:55 +04:00
|
|
|
rb_raise(rb_eTypeError, "uninitialized Regexp");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
static void
|
2010-06-05 15:32:05 +04:00
|
|
|
rb_reg_expr_str(VALUE str, const char *s, long len,
|
2018-03-16 16:37:44 +03:00
|
|
|
rb_encoding *enc, rb_encoding *resenc, int term)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
1999-08-13 09:45:20 +04:00
|
|
|
const char *p, *pend;
|
2010-12-29 12:24:37 +03:00
|
|
|
int cr = ENC_CODERANGE_UNKNOWN;
|
2000-05-30 08:24:17 +04:00
|
|
|
int need_escape = 0;
|
2007-12-11 06:08:50 +03:00
|
|
|
int c, clen;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
p = s; pend = p + len;
|
2010-12-29 12:24:37 +03:00
|
|
|
rb_str_coderange_scan_restartable(p, pend, enc, &cr);
|
2015-07-17 09:39:29 +03:00
|
|
|
if (rb_enc_asciicompat(enc) && ENC_CODERANGE_CLEAN_P(cr)) {
|
2010-06-05 15:32:05 +04:00
|
|
|
while (p < pend) {
|
|
|
|
c = rb_enc_ascget(p, pend, &clen, enc);
|
|
|
|
if (c == -1) {
|
|
|
|
if (enc == resenc) {
|
|
|
|
p += mbclen(p, pend, enc);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
need_escape = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2018-03-16 16:37:44 +03:00
|
|
|
else if (c != term && rb_enc_isprint(c, enc)) {
|
2010-06-05 15:32:05 +04:00
|
|
|
p += clen;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
need_escape = 1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
need_escape = 1;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
2010-06-05 15:32:05 +04:00
|
|
|
|
2000-05-30 08:24:17 +04:00
|
|
|
if (!need_escape) {
|
2001-05-30 13:12:34 +04:00
|
|
|
rb_str_buf_cat(str, s, len);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
else {
|
2010-06-05 15:32:05 +04:00
|
|
|
int unicode_p = rb_enc_unicode_p(enc);
|
2004-03-06 01:37:35 +03:00
|
|
|
p = s;
|
1998-01-16 15:13:05 +03:00
|
|
|
while (p<pend) {
|
2007-12-11 10:39:16 +03:00
|
|
|
c = rb_enc_ascget(p, pend, &clen, enc);
|
2007-12-11 06:08:50 +03:00
|
|
|
if (c == '\\' && p+clen < pend) {
|
|
|
|
int n = clen + mbclen(p+clen, pend, enc);
|
2002-09-02 18:04:19 +04:00
|
|
|
rb_str_buf_cat(str, p, n);
|
|
|
|
p += n;
|
2002-06-28 18:42:46 +04:00
|
|
|
continue;
|
|
|
|
}
|
2007-12-08 05:50:43 +03:00
|
|
|
else if (c == -1) {
|
2010-07-23 04:02:51 +04:00
|
|
|
clen = rb_enc_precise_mbclen(p, pend, enc);
|
|
|
|
if (!MBCLEN_CHARFOUND_P(clen)) {
|
|
|
|
c = (unsigned char)*p;
|
|
|
|
clen = 1;
|
|
|
|
goto hex;
|
|
|
|
}
|
2010-06-05 15:32:05 +04:00
|
|
|
if (resenc) {
|
|
|
|
unsigned int c = rb_enc_mbc_to_codepoint(p, pend, enc);
|
2010-07-22 11:29:32 +04:00
|
|
|
rb_str_buf_cat_escaped_char(str, c, unicode_p);
|
2010-06-05 15:32:05 +04:00
|
|
|
}
|
|
|
|
else {
|
2010-07-23 04:02:51 +04:00
|
|
|
clen = MBCLEN_CHARFOUND_LEN(clen);
|
|
|
|
rb_str_buf_cat(str, p, clen);
|
2010-06-05 15:32:05 +04:00
|
|
|
}
|
2000-05-30 08:24:17 +04:00
|
|
|
}
|
2018-03-16 16:37:44 +03:00
|
|
|
else if (c == term) {
|
|
|
|
char c = '\\';
|
|
|
|
rb_str_buf_cat(str, &c, 1);
|
|
|
|
rb_str_buf_cat(str, p, clen);
|
|
|
|
}
|
2007-12-08 05:50:43 +03:00
|
|
|
else if (rb_enc_isprint(c, enc)) {
|
2007-12-11 06:08:50 +03:00
|
|
|
rb_str_buf_cat(str, p, clen);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
2007-12-08 05:50:43 +03:00
|
|
|
else if (!rb_enc_isspace(c, enc)) {
|
2000-05-30 08:24:17 +04:00
|
|
|
char b[8];
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2010-07-23 04:02:51 +04:00
|
|
|
hex:
|
* dir.c, dln.c, parse.y, re.c, ruby.c, sprintf.c, strftime.c,
string.c, util.c, variable.c: use strlcpy, memcpy and snprintf
instead of strcpy, strncpy and sprintf.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@22984 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2009-03-17 04:29:17 +03:00
|
|
|
snprintf(b, sizeof(b), "\\x%02X", c);
|
2002-04-24 08:54:16 +04:00
|
|
|
rb_str_buf_cat(str, b, 4);
|
|
|
|
}
|
|
|
|
else {
|
2007-12-11 06:08:50 +03:00
|
|
|
rb_str_buf_cat(str, p, clen);
|
2000-05-30 08:24:17 +04:00
|
|
|
}
|
2007-12-11 06:08:50 +03:00
|
|
|
p += clen;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2023-12-24 17:08:01 +03:00
|
|
|
rb_reg_desc(VALUE re)
|
1998-01-16 15:19:22 +03:00
|
|
|
{
|
2010-06-05 15:32:05 +04:00
|
|
|
rb_encoding *enc = rb_enc_get(re);
|
2001-05-30 13:12:34 +04:00
|
|
|
VALUE str = rb_str_buf_new2("/");
|
2010-06-05 15:32:05 +04:00
|
|
|
rb_encoding *resenc = rb_default_internal_encoding();
|
|
|
|
if (resenc == NULL) resenc = rb_default_external_encoding();
|
|
|
|
|
|
|
|
if (re && rb_enc_asciicompat(enc)) {
|
2008-09-20 03:07:22 +04:00
|
|
|
rb_enc_copy(str, re);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
rb_enc_associate(str, rb_usascii_encoding());
|
|
|
|
}
|
2023-12-24 17:08:01 +03:00
|
|
|
|
|
|
|
VALUE src_str = RREGEXP_SRC(re);
|
|
|
|
rb_reg_expr_str(str, RSTRING_PTR(src_str), RSTRING_LEN(src_str), enc, resenc, '/');
|
|
|
|
RB_GC_GUARD(src_str);
|
|
|
|
|
2001-05-30 13:12:34 +04:00
|
|
|
rb_str_buf_cat2(str, "/");
|
1998-01-16 15:13:05 +03:00
|
|
|
if (re) {
|
2020-08-28 13:29:16 +03:00
|
|
|
char opts[OPTBUF_SIZE];
|
2000-05-24 11:36:46 +04:00
|
|
|
rb_reg_check(re);
|
2016-02-02 07:39:44 +03:00
|
|
|
if (*option_to_str(opts, RREGEXP_PTR(re)->options))
|
2007-08-02 18:42:59 +04:00
|
|
|
rb_str_buf_cat2(str, opts);
|
2010-06-05 15:32:05 +04:00
|
|
|
if (RBASIC(re)->flags & REG_ENCODING_NONE)
|
|
|
|
rb_str_buf_cat2(str, "n");
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-15 21:31:15 +03:00
|
|
|
* source -> string
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* Returns the original string of +self+:
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* /ab+c/ix.source # => "ab+c"
|
2007-12-09 16:35:38 +03:00
|
|
|
*
|
2022-06-21 01:17:23 +03:00
|
|
|
* Regexp escape sequences are retained:
|
2007-12-09 16:35:38 +03:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* /\x20\+/.source # => "\\x20\\+"
|
2007-12-09 16:35:38 +03:00
|
|
|
*
|
2022-06-21 01:17:23 +03:00
|
|
|
* Lexer escape characters are not retained:
|
|
|
|
*
|
|
|
|
* /\//.source # => "/"
|
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_source(VALUE re)
|
1998-01-16 15:19:22 +03:00
|
|
|
{
|
2000-05-22 11:09:55 +04:00
|
|
|
VALUE str;
|
|
|
|
|
|
|
|
rb_reg_check(re);
|
2016-02-04 09:44:55 +03:00
|
|
|
str = rb_str_dup(RREGEXP_SRC(re));
|
1999-12-01 12:24:48 +03:00
|
|
|
return str;
|
1998-01-16 15:19:22 +03:00
|
|
|
}
|
|
|
|
|
2003-12-30 19:38:32 +03:00
|
|
|
/*
|
2022-04-15 21:31:15 +03:00
|
|
|
* call-seq:
|
|
|
|
* inspect -> string
|
2003-12-30 19:38:32 +03:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* Returns a nicely-formatted string representation of +self+:
|
2003-12-30 19:38:32 +03:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* /ab+c/ix.inspect # => "/ab+c/ix"
|
2007-12-09 16:35:38 +03:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* Related: Regexp#to_s.
|
2008-03-09 04:04:46 +03:00
|
|
|
*/
|
2003-12-30 19:38:32 +03:00
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_inspect(VALUE re)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2016-02-02 07:39:44 +03:00
|
|
|
if (!RREGEXP_PTR(re) || !RREGEXP_SRC(re) || !RREGEXP_SRC_PTR(re)) {
|
2008-08-18 19:56:38 +04:00
|
|
|
return rb_any_to_s(re);
|
|
|
|
}
|
2023-12-24 17:08:01 +03:00
|
|
|
return rb_reg_desc(re);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2018-03-16 16:37:44 +03:00
|
|
|
static VALUE rb_reg_str_with_term(VALUE re, int term);
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-15 21:31:15 +03:00
|
|
|
* to_s -> string
|
|
|
|
*
|
|
|
|
* Returns a string showing the options and string of +self+:
|
|
|
|
*
|
|
|
|
* r0 = /ab+c/ix
|
|
|
|
* s0 = r0.to_s # => "(?ix-m:ab+c)"
|
|
|
|
*
|
|
|
|
* The returned string may be used as an argument to Regexp.new,
|
|
|
|
* or as interpolated text for a
|
2023-11-14 07:53:59 +03:00
|
|
|
* {Regexp interpolation}[rdoc-ref:Regexp@Interpolation+Mode]:
|
2022-04-15 21:31:15 +03:00
|
|
|
*
|
|
|
|
* r1 = Regexp.new(s0) # => /(?ix-m:ab+c)/
|
|
|
|
* r2 = /#{s0}/ # => /(?ix-m:ab+c)/
|
|
|
|
*
|
|
|
|
* Note that +r1+ and +r2+ are not equal to +r0+
|
|
|
|
* because their original strings are different:
|
|
|
|
*
|
|
|
|
* r0 == r1 # => false
|
|
|
|
* r0.source # => "ab+c"
|
|
|
|
* r1.source # => "(?ix-m:ab+c)"
|
|
|
|
*
|
|
|
|
* Related: Regexp#inspect.
|
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
2002-04-18 12:04:57 +04:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_to_s(VALUE re)
|
2018-03-16 16:37:44 +03:00
|
|
|
{
|
|
|
|
return rb_reg_str_with_term(re, '/');
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
rb_reg_str_with_term(VALUE re, int term)
|
2002-04-18 12:04:57 +04:00
|
|
|
{
|
2006-03-26 17:04:13 +04:00
|
|
|
int options, opt;
|
2005-02-17 17:43:38 +03:00
|
|
|
const int embeddable = ONIG_OPTION_MULTILINE|ONIG_OPTION_IGNORECASE|ONIG_OPTION_EXTEND;
|
2002-04-18 12:04:57 +04:00
|
|
|
VALUE str = rb_str_buf_new2("(?");
|
2020-08-28 13:29:16 +03:00
|
|
|
char optbuf[OPTBUF_SIZE + 1]; /* for '-' */
|
2010-06-05 15:32:05 +04:00
|
|
|
rb_encoding *enc = rb_enc_get(re);
|
2002-04-18 12:04:57 +04:00
|
|
|
|
|
|
|
rb_reg_check(re);
|
|
|
|
|
2007-10-16 09:48:40 +04:00
|
|
|
rb_enc_copy(str, re);
|
2016-02-02 07:39:44 +03:00
|
|
|
options = RREGEXP_PTR(re)->options;
|
2023-12-23 22:51:43 +03:00
|
|
|
VALUE src_str = RREGEXP_SRC(re);
|
|
|
|
const UChar *ptr = (UChar *)RSTRING_PTR(src_str);
|
|
|
|
long len = RSTRING_LEN(src_str);
|
2002-04-24 08:54:16 +04:00
|
|
|
again:
|
|
|
|
if (len >= 4 && ptr[0] == '(' && ptr[1] == '?') {
|
2002-04-19 09:59:45 +04:00
|
|
|
int err = 1;
|
2002-04-19 08:14:07 +04:00
|
|
|
ptr += 2;
|
2002-04-24 08:54:16 +04:00
|
|
|
if ((len -= 2) > 0) {
|
2002-04-19 08:14:07 +04:00
|
|
|
do {
|
2006-03-26 17:04:13 +04:00
|
|
|
opt = char_to_option((int )*ptr);
|
|
|
|
if (opt != 0) {
|
|
|
|
options |= opt;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
break;
|
|
|
|
}
|
2002-04-19 08:14:07 +04:00
|
|
|
++ptr;
|
|
|
|
} while (--len > 0);
|
|
|
|
}
|
|
|
|
if (len > 1 && *ptr == '-') {
|
|
|
|
++ptr;
|
|
|
|
--len;
|
|
|
|
do {
|
2006-03-26 17:04:13 +04:00
|
|
|
opt = char_to_option((int )*ptr);
|
|
|
|
if (opt != 0) {
|
|
|
|
options &= ~opt;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
break;
|
|
|
|
}
|
2002-04-19 08:14:07 +04:00
|
|
|
++ptr;
|
|
|
|
} while (--len > 0);
|
|
|
|
}
|
2002-04-24 08:54:16 +04:00
|
|
|
if (*ptr == ')') {
|
|
|
|
--len;
|
|
|
|
++ptr;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
if (*ptr == ':' && ptr[len-1] == ')') {
|
|
|
|
Regexp *rp;
|
2013-04-02 12:44:16 +04:00
|
|
|
VALUE verbose = ruby_verbose;
|
|
|
|
ruby_verbose = Qfalse;
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2010-03-02 00:54:59 +03:00
|
|
|
++ptr;
|
|
|
|
len -= 2;
|
2017-08-11 05:05:24 +03:00
|
|
|
err = onig_new(&rp, ptr, ptr + len, options,
|
2010-06-05 15:32:05 +04:00
|
|
|
enc, OnigDefaultSyntax, NULL);
|
2005-02-17 17:43:38 +03:00
|
|
|
onig_free(rp);
|
2013-04-02 12:44:16 +04:00
|
|
|
ruby_verbose = verbose;
|
2002-04-19 08:14:07 +04:00
|
|
|
}
|
2002-04-19 09:59:45 +04:00
|
|
|
if (err) {
|
2016-02-02 07:39:44 +03:00
|
|
|
options = RREGEXP_PTR(re)->options;
|
2008-06-28 16:25:45 +04:00
|
|
|
ptr = (UChar*)RREGEXP_SRC_PTR(re);
|
|
|
|
len = RREGEXP_SRC_LEN(re);
|
2002-04-19 08:14:07 +04:00
|
|
|
}
|
|
|
|
}
|
2002-04-18 12:04:57 +04:00
|
|
|
|
2007-08-02 18:42:59 +04:00
|
|
|
if (*option_to_str(optbuf, options)) rb_str_buf_cat2(str, optbuf);
|
2002-04-19 08:14:07 +04:00
|
|
|
|
|
|
|
if ((options & embeddable) != embeddable) {
|
2007-08-02 18:42:59 +04:00
|
|
|
optbuf[0] = '-';
|
|
|
|
option_to_str(optbuf + 1, ~options);
|
|
|
|
rb_str_buf_cat2(str, optbuf);
|
2002-04-18 12:04:57 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
rb_str_buf_cat2(str, ":");
|
2013-07-25 12:52:32 +04:00
|
|
|
if (rb_enc_asciicompat(enc)) {
|
2018-03-16 16:37:44 +03:00
|
|
|
rb_reg_expr_str(str, (char*)ptr, len, enc, NULL, term);
|
2013-07-25 12:52:32 +04:00
|
|
|
rb_str_buf_cat2(str, ")");
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
const char *s, *e;
|
|
|
|
char *paren;
|
|
|
|
ptrdiff_t n;
|
|
|
|
rb_str_buf_cat2(str, ")");
|
|
|
|
rb_enc_associate(str, rb_usascii_encoding());
|
|
|
|
str = rb_str_encode(str, rb_enc_from_encoding(enc), 0, Qnil);
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2013-07-25 12:52:32 +04:00
|
|
|
/* backup encoded ")" to paren */
|
|
|
|
s = RSTRING_PTR(str);
|
|
|
|
e = RSTRING_END(str);
|
|
|
|
s = rb_enc_left_char_head(s, e-1, e, enc);
|
|
|
|
n = e - s;
|
|
|
|
paren = ALLOCA_N(char, n);
|
|
|
|
memcpy(paren, s, n);
|
|
|
|
rb_str_resize(str, RSTRING_LEN(str) - n);
|
2022-07-21 19:23:58 +03:00
|
|
|
|
2018-03-16 16:37:44 +03:00
|
|
|
rb_reg_expr_str(str, (char*)ptr, len, enc, NULL, term);
|
2013-07-25 12:52:32 +04:00
|
|
|
rb_str_buf_cat(str, paren, n);
|
|
|
|
}
|
2007-10-16 09:48:40 +04:00
|
|
|
rb_enc_copy(str, re);
|
2002-04-18 12:04:57 +04:00
|
|
|
|
2023-12-23 22:51:43 +03:00
|
|
|
RB_GC_GUARD(src_str);
|
|
|
|
|
2002-04-18 12:04:57 +04:00
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
2023-12-24 17:08:01 +03:00
|
|
|
NORETURN(static void rb_reg_raise(const char *err, VALUE re));
|
2018-01-18 12:44:40 +03:00
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
static void
|
2023-12-24 17:08:01 +03:00
|
|
|
rb_reg_raise(const char *err, VALUE re)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2023-12-24 17:08:01 +03:00
|
|
|
VALUE desc = rb_reg_desc(re);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2013-06-24 15:31:21 +04:00
|
|
|
rb_raise(rb_eRegexpError, "%s: %"PRIsVALUE, err, desc);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2007-08-25 11:06:47 +04:00
|
|
|
static VALUE
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_enc_reg_error_desc(const char *s, long len, rb_encoding *enc, int options, const char *err)
|
2007-08-25 11:06:47 +04:00
|
|
|
{
|
2020-08-28 13:29:16 +03:00
|
|
|
char opts[OPTBUF_SIZE + 1]; /* for '/' */
|
2007-08-25 11:06:47 +04:00
|
|
|
VALUE desc = rb_str_buf_new2(err);
|
2010-06-05 15:32:05 +04:00
|
|
|
rb_encoding *resenc = rb_default_internal_encoding();
|
|
|
|
if (resenc == NULL) resenc = rb_default_external_encoding();
|
2007-08-25 11:06:47 +04:00
|
|
|
|
2008-01-04 19:30:33 +03:00
|
|
|
rb_enc_associate(desc, enc);
|
2007-08-25 11:06:47 +04:00
|
|
|
rb_str_buf_cat2(desc, ": /");
|
2018-03-16 16:37:44 +03:00
|
|
|
rb_reg_expr_str(desc, s, len, enc, resenc, '/');
|
2007-08-25 11:06:47 +04:00
|
|
|
opts[0] = '/';
|
|
|
|
option_to_str(opts + 1, options);
|
|
|
|
rb_str_buf_cat2(desc, opts);
|
|
|
|
return rb_exc_new3(rb_eRegexpError, desc);
|
|
|
|
}
|
|
|
|
|
2018-01-18 12:44:41 +03:00
|
|
|
NORETURN(static void rb_enc_reg_raise(const char *s, long len, rb_encoding *enc, int options, const char *err));
|
|
|
|
|
2008-01-04 19:30:33 +03:00
|
|
|
static void
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_enc_reg_raise(const char *s, long len, rb_encoding *enc, int options, const char *err)
|
2008-01-04 19:30:33 +03:00
|
|
|
{
|
|
|
|
rb_exc_raise(rb_enc_reg_error_desc(s, len, enc, options, err));
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
rb_reg_error_desc(VALUE str, int options, const char *err)
|
|
|
|
{
|
|
|
|
return rb_enc_reg_error_desc(RSTRING_PTR(str), RSTRING_LEN(str),
|
|
|
|
rb_enc_get(str), options, err);
|
|
|
|
}
|
|
|
|
|
2018-01-18 12:44:42 +03:00
|
|
|
NORETURN(static void rb_reg_raise_str(VALUE str, int options, const char *err));
|
|
|
|
|
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing.
* parse.y (pragma_encoding): encoding specification pragma.
* parse.y (rb_intern3): encoding specified symbols.
* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.
* string.c (rb_str_index_m): index based on characters. rindex as
well.
* string.c (succ_char): encoding aware succeeding string.
* string.c (rb_str_reverse): reverse based on characters.
* string.c (rb_str_inspect): encoding aware string description.
* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.
* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.
* string.c (rb_str_split_m): split based on characters.
* string.c (rb_str_each_line): encoding aware each_line.
* string.c (rb_str_each_char): added. iteration based on
characters.
* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.
* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).
* string.c (str_encoding): get encoding attribute from a string.
* re.c (rb_reg_initialize): encoding aware regular expression
* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.
* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.
* ext/stringio/stringio.c (strio_getc): ditto.
* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.
* ext/stringio/stringio.c (strio_ungetc): ditto.
* ext/strscan/strscan.c: encoding support.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-08-25 07:29:39 +04:00
|
|
|
static void
|
2007-08-25 11:06:47 +04:00
|
|
|
rb_reg_raise_str(VALUE str, int options, const char *err)
|
2007-08-18 09:05:36 +04:00
|
|
|
{
|
2007-08-25 11:06:47 +04:00
|
|
|
rb_exc_raise(rb_reg_error_desc(str, options, err));
|
2007-08-18 09:05:36 +04:00
|
|
|
}
|
2003-12-26 18:58:28 +03:00
|
|
|
|
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing.
* parse.y (pragma_encoding): encoding specification pragma.
* parse.y (rb_intern3): encoding specified symbols.
* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.
* string.c (rb_str_index_m): index based on characters. rindex as
well.
* string.c (succ_char): encoding aware succeeding string.
* string.c (rb_str_reverse): reverse based on characters.
* string.c (rb_str_inspect): encoding aware string description.
* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.
* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.
* string.c (rb_str_split_m): split based on characters.
* string.c (rb_str_each_line): encoding aware each_line.
* string.c (rb_str_each_char): added. iteration based on
characters.
* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.
* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).
* string.c (str_encoding): get encoding attribute from a string.
* re.c (rb_reg_initialize): encoding aware regular expression
* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.
* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.
* ext/stringio/stringio.c (strio_getc): ditto.
* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.
* ext/stringio/stringio.c (strio_ungetc): ditto.
* ext/strscan/strscan.c: encoding support.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-08-25 07:29:39 +04:00
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-15 21:31:15 +03:00
|
|
|
* casefold?-> true or false
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* Returns +true+ if the case-insensitivity flag in +self+ is set,
|
|
|
|
* +false+ otherwise:
|
|
|
|
*
|
|
|
|
* /a/.casefold? # => false
|
|
|
|
* /a/i.casefold? # => true
|
|
|
|
* /(?i:a)/.casefold? # => false
|
2007-12-09 16:35:38 +03:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_casefold_p(VALUE re)
|
1998-01-16 15:19:22 +03:00
|
|
|
{
|
2000-05-22 11:09:55 +04:00
|
|
|
rb_reg_check(re);
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(RREGEXP_PTR(re)->options & ONIG_OPTION_IGNORECASE);
|
1998-01-16 15:19:22 +03:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-15 21:31:15 +03:00
|
|
|
* options -> integer
|
|
|
|
*
|
|
|
|
* Returns an integer whose bits show the options set in +self+.
|
|
|
|
*
|
|
|
|
* The option bits are:
|
|
|
|
*
|
|
|
|
* Regexp::IGNORECASE # => 1
|
|
|
|
* Regexp::EXTENDED # => 2
|
|
|
|
* Regexp::MULTILINE # => 4
|
|
|
|
*
|
|
|
|
* Examples:
|
|
|
|
*
|
|
|
|
* /foo/.options # => 0
|
|
|
|
* /foo/i.options # => 1
|
|
|
|
* /foo/x.options # => 2
|
|
|
|
* /foo/m.options # => 4
|
|
|
|
* /foo/mix.options # => 7
|
|
|
|
*
|
|
|
|
* Note that additional bits may be set in the returned integer;
|
2023-02-10 12:32:21 +03:00
|
|
|
* these are maintained internally in +self+, are ignored if passed
|
|
|
|
* to Regexp.new, and may be ignored by the caller:
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2019-03-28 06:33:35 +03:00
|
|
|
* Returns the set of bits corresponding to the options used when
|
2022-04-15 21:31:15 +03:00
|
|
|
* creating this regexp (see Regexp::new for details). Note that
|
2019-03-28 06:33:35 +03:00
|
|
|
* additional bits may be set in the returned options: these are used
|
|
|
|
* internally by the regular expression code. These extra bits are
|
2022-04-15 21:31:15 +03:00
|
|
|
* ignored if the options are passed to Regexp::new:
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* r = /\xa1\xa2/e # => /\xa1\xa2/
|
|
|
|
* r.source # => "\\xa1\\xa2"
|
|
|
|
* r.options # => 16
|
|
|
|
* Regexp.new(r.source, r.options) # => /\xa1\xa2/
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
2001-02-08 12:19:27 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_options_m(VALUE re)
|
2001-02-08 12:19:27 +03:00
|
|
|
{
|
2003-07-25 08:38:42 +04:00
|
|
|
int options = rb_reg_options(re);
|
|
|
|
return INT2NUM(options);
|
2001-02-08 12:19:27 +03:00
|
|
|
}
|
|
|
|
|
2007-12-10 00:44:19 +03:00
|
|
|
static int
|
|
|
|
reg_names_iter(const OnigUChar *name, const OnigUChar *name_end,
|
|
|
|
int back_num, int *back_refs, OnigRegex regex, void *arg)
|
|
|
|
{
|
|
|
|
VALUE ary = (VALUE)arg;
|
2015-12-17 03:46:07 +03:00
|
|
|
rb_ary_push(ary, rb_enc_str_new((const char *)name, name_end-name, regex->enc));
|
2007-12-10 00:44:19 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2022-04-15 21:31:15 +03:00
|
|
|
* call-seq:
|
|
|
|
* names -> array_of_names
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* Returns an array of names of captures
|
|
|
|
* (see {Named Captures}[rdoc-ref:Regexp@Named+Captures]):
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* /(?<foo>.)(?<bar>.)(?<baz>.)/.names # => ["foo", "bar", "baz"]
|
|
|
|
* /(?<foo>.)(?<foo>.)/.names # => ["foo"]
|
|
|
|
* /(.)(.)/.names # => []
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
rb_reg_names(VALUE re)
|
|
|
|
{
|
2016-05-18 18:17:31 +03:00
|
|
|
VALUE ary;
|
2008-06-02 16:45:42 +04:00
|
|
|
rb_reg_check(re);
|
2016-05-18 18:17:31 +03:00
|
|
|
ary = rb_ary_new_capa(onig_number_of_names(RREGEXP_PTR(re)));
|
2016-02-02 07:39:44 +03:00
|
|
|
onig_foreach_name(RREGEXP_PTR(re), reg_names_iter, (void*)ary);
|
2007-12-10 00:44:19 +03:00
|
|
|
return ary;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
reg_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end,
|
|
|
|
int back_num, int *back_refs, OnigRegex regex, void *arg)
|
|
|
|
{
|
|
|
|
VALUE hash = (VALUE)arg;
|
|
|
|
VALUE ary = rb_ary_new2(back_num);
|
|
|
|
int i;
|
|
|
|
|
2012-12-29 16:22:04 +04:00
|
|
|
for (i = 0; i < back_num; i++)
|
2007-12-10 00:44:19 +03:00
|
|
|
rb_ary_store(ary, i, INT2NUM(back_refs[i]));
|
|
|
|
|
|
|
|
rb_hash_aset(hash, rb_str_new((const char*)name, name_end-name),ary);
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2022-04-15 21:31:15 +03:00
|
|
|
* call-seq:
|
|
|
|
* named_captures -> hash
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* Returns a hash representing named captures of +self+
|
|
|
|
* (see {Named Captures}[rdoc-ref:Regexp@Named+Captures]):
|
2009-02-22 17:23:33 +03:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* - Each key is the name of a named capture.
|
|
|
|
* - Each value is an array of integer indexes for that named capture.
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* Examples:
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
2022-04-15 21:31:15 +03:00
|
|
|
* /(?<foo>.)(?<bar>.)/.named_captures # => {"foo"=>[1], "bar"=>[2]}
|
|
|
|
* /(?<foo>.)(?<foo>.)/.named_captures # => {"foo"=>[1, 2]}
|
|
|
|
* /(.)(.)/.named_captures # => {}
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
rb_reg_named_captures(VALUE re)
|
|
|
|
{
|
2017-10-02 15:23:17 +03:00
|
|
|
regex_t *reg = (rb_reg_check(re), RREGEXP_PTR(re));
|
|
|
|
VALUE hash = rb_hash_new_with_size(onig_number_of_names(reg));
|
|
|
|
onig_foreach_name(reg, reg_named_captures_iter, (void*)hash);
|
2007-12-10 00:44:19 +03:00
|
|
|
return hash;
|
|
|
|
}
|
2003-12-26 18:58:28 +03:00
|
|
|
|
2010-04-05 14:57:38 +04:00
|
|
|
static int
|
|
|
|
onig_new_with_source(regex_t** reg, const UChar* pattern, const UChar* pattern_end,
|
2015-10-07 11:26:56 +03:00
|
|
|
OnigOptionType option, OnigEncoding enc, const OnigSyntaxType* syntax,
|
|
|
|
OnigErrorInfo* einfo, const char *sourcefile, int sourceline)
|
2010-04-05 14:57:38 +04:00
|
|
|
{
|
2015-10-07 11:26:56 +03:00
|
|
|
int r;
|
2010-04-05 14:57:38 +04:00
|
|
|
|
2015-10-07 11:26:56 +03:00
|
|
|
*reg = (regex_t* )malloc(sizeof(regex_t));
|
|
|
|
if (IS_NULL(*reg)) return ONIGERR_MEMORY;
|
2010-04-05 14:57:38 +04:00
|
|
|
|
2015-10-07 11:26:56 +03:00
|
|
|
r = onig_reg_init(*reg, option, ONIGENC_CASE_FOLD_DEFAULT, enc, syntax);
|
|
|
|
if (r) goto err;
|
2010-04-05 14:57:38 +04:00
|
|
|
|
2016-12-10 20:47:04 +03:00
|
|
|
r = onig_compile_ruby(*reg, pattern, pattern_end, einfo, sourcefile, sourceline);
|
2015-10-07 11:26:56 +03:00
|
|
|
if (r) {
|
|
|
|
err:
|
|
|
|
onig_free(*reg);
|
|
|
|
*reg = NULL;
|
|
|
|
}
|
|
|
|
return r;
|
2010-04-05 14:57:38 +04:00
|
|
|
}
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
static Regexp*
|
2014-06-03 00:23:47 +04:00
|
|
|
make_regexp(const char *s, long len, rb_encoding *enc, int flags, onig_errmsg_buffer err,
|
2009-08-30 12:00:31 +04:00
|
|
|
const char *sourcefile, int sourceline)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
|
|
|
Regexp *rp;
|
2004-03-05 18:40:36 +03:00
|
|
|
int r;
|
2005-02-17 17:43:38 +03:00
|
|
|
OnigErrorInfo einfo;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
/* Handle escaped characters first. */
|
|
|
|
|
|
|
|
/* Build a copy of the string (in dest) with the
|
|
|
|
escaped characters translated, and generate the regex
|
|
|
|
from that.
|
|
|
|
*/
|
|
|
|
|
2010-04-05 14:57:38 +04:00
|
|
|
r = onig_new_with_source(&rp, (UChar*)s, (UChar*)(s + len), flags,
|
|
|
|
enc, OnigDefaultSyntax, &einfo, sourcefile, sourceline);
|
2004-03-05 18:40:36 +03:00
|
|
|
if (r) {
|
2010-03-02 00:54:59 +03:00
|
|
|
onig_error_code_to_str((UChar*)err, r, &einfo);
|
2005-05-23 07:24:28 +04:00
|
|
|
return 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
return rp;
|
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* Document-class: MatchData
|
|
|
|
*
|
2019-03-28 06:33:35 +03:00
|
|
|
* MatchData encapsulates the result of matching a Regexp against
|
|
|
|
* string. It is returned by Regexp#match and String#match, and also
|
|
|
|
* stored in a global variable returned by Regexp.last_match.
|
2003-12-26 18:58:28 +03:00
|
|
|
*
|
2018-12-12 09:10:29 +03:00
|
|
|
* Usage:
|
|
|
|
*
|
|
|
|
* url = 'https://docs.ruby-lang.org/en/2.5.0/MatchData.html'
|
|
|
|
* m = url.match(/(\d\.?)+/) # => #<MatchData "2.5.0" 1:"0">
|
|
|
|
* m.string # => "https://docs.ruby-lang.org/en/2.5.0/MatchData.html"
|
|
|
|
* m.regexp # => /(\d\.?)+/
|
|
|
|
* # entire matched substring:
|
|
|
|
* m[0] # => "2.5.0"
|
|
|
|
*
|
|
|
|
* # Working with unnamed captures
|
|
|
|
* m = url.match(%r{([^/]+)/([^/]+)\.html$})
|
|
|
|
* m.captures # => ["2.5.0", "MatchData"]
|
|
|
|
* m[1] # => "2.5.0"
|
|
|
|
* m.values_at(1, 2) # => ["2.5.0", "MatchData"]
|
|
|
|
*
|
|
|
|
* # Working with named captures
|
|
|
|
* m = url.match(%r{(?<version>[^/]+)/(?<module>[^/]+)\.html$})
|
|
|
|
* m.captures # => ["2.5.0", "MatchData"]
|
|
|
|
* m.named_captures # => {"version"=>"2.5.0", "module"=>"MatchData"}
|
|
|
|
* m[:version] # => "2.5.0"
|
|
|
|
* m.values_at(:version, :module)
|
|
|
|
* # => ["2.5.0", "MatchData"]
|
|
|
|
* # Numerical indexes are working, too
|
|
|
|
* m[1] # => "2.5.0"
|
|
|
|
* m.values_at(1, 2) # => ["2.5.0", "MatchData"]
|
|
|
|
*
|
|
|
|
* == Global variables equivalence
|
|
|
|
*
|
2019-03-28 06:33:35 +03:00
|
|
|
* Parts of last MatchData (returned by Regexp.last_match) are also
|
2018-12-13 23:25:36 +03:00
|
|
|
* aliased as global variables:
|
2018-12-12 09:10:29 +03:00
|
|
|
*
|
2019-03-28 06:33:35 +03:00
|
|
|
* * <code>$~</code> is Regexp.last_match;
|
2020-04-07 07:59:38 +03:00
|
|
|
* * <code>$&</code> is Regexp.last_match<code>[ 0 ]</code>;
|
2018-12-13 23:25:36 +03:00
|
|
|
* * <code>$1</code>, <code>$2</code>, and so on are
|
2020-04-07 07:59:38 +03:00
|
|
|
* Regexp.last_match<code>[ i ]</code> (captures by number);
|
2019-03-28 06:33:35 +03:00
|
|
|
* * <code>$`</code> is Regexp.last_match<code>.pre_match</code>;
|
|
|
|
* * <code>$'</code> is Regexp.last_match<code>.post_match</code>;
|
2020-04-07 07:59:38 +03:00
|
|
|
* * <code>$+</code> is Regexp.last_match<code>[ -1 ]</code> (the last capture).
|
2018-12-12 09:10:29 +03:00
|
|
|
*
|
|
|
|
* See also "Special global variables" section in Regexp documentation.
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
2006-07-20 21:36:36 +04:00
|
|
|
VALUE rb_cMatch;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_alloc(VALUE klass)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2023-07-20 21:17:38 +03:00
|
|
|
size_t alloc_size = sizeof(struct RMatch) + sizeof(rb_matchext_t);
|
|
|
|
VALUE flags = T_MATCH | (RGENGC_WB_PROTECTED_MATCH ? FL_WB_PROTECTED : 0);
|
|
|
|
NEWOBJ_OF(match, struct RMatch, klass, flags, alloc_size, 0);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2023-02-10 16:32:46 +03:00
|
|
|
match->str = Qfalse;
|
|
|
|
match->regexp = Qfalse;
|
2023-07-20 21:17:38 +03:00
|
|
|
memset(RMATCH_EXT(match), 0, sizeof(rb_matchext_t));
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
return (VALUE)match;
|
|
|
|
}
|
|
|
|
|
2014-12-02 00:30:58 +03:00
|
|
|
int
|
|
|
|
rb_reg_region_copy(struct re_registers *to, const struct re_registers *from)
|
|
|
|
{
|
|
|
|
onig_region_copy(to, (OnigRegion *)from);
|
|
|
|
if (to->allocated) return 0;
|
|
|
|
rb_gc();
|
|
|
|
onig_region_copy(to, (OnigRegion *)from);
|
|
|
|
if (to->allocated) return 0;
|
|
|
|
return ONIGERR_MEMORY;
|
|
|
|
}
|
|
|
|
|
2008-02-16 23:08:35 +03:00
|
|
|
typedef struct {
|
2009-06-30 06:08:54 +04:00
|
|
|
long byte_pos;
|
|
|
|
long char_pos;
|
2008-02-16 23:08:35 +03:00
|
|
|
} pair_t;
|
|
|
|
|
|
|
|
static int
|
|
|
|
pair_byte_cmp(const void *pair1, const void *pair2)
|
|
|
|
{
|
2009-06-30 06:08:54 +04:00
|
|
|
long diff = ((pair_t*)pair1)->byte_pos - ((pair_t*)pair2)->byte_pos;
|
|
|
|
#if SIZEOF_LONG > SIZEOF_INT
|
|
|
|
return diff ? diff > 0 ? 1 : -1 : 0;
|
|
|
|
#else
|
|
|
|
return (int)diff;
|
|
|
|
#endif
|
2008-02-16 23:08:35 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
|
|
|
update_char_offset(VALUE match)
|
|
|
|
{
|
2023-07-20 21:17:38 +03:00
|
|
|
rb_matchext_t *rm = RMATCH_EXT(match);
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs;
|
2009-09-05 03:49:18 +04:00
|
|
|
int i, num_regs, num_pos;
|
|
|
|
long c;
|
2010-10-04 05:23:58 +04:00
|
|
|
char *s, *p, *q;
|
2008-02-16 23:08:35 +03:00
|
|
|
rb_encoding *enc;
|
|
|
|
pair_t *pairs;
|
|
|
|
|
2019-03-16 18:53:18 +03:00
|
|
|
if (rm->char_offset_num_allocated)
|
2008-02-16 23:08:35 +03:00
|
|
|
return;
|
|
|
|
|
|
|
|
regs = &rm->regs;
|
|
|
|
num_regs = rm->regs.num_regs;
|
|
|
|
|
|
|
|
if (rm->char_offset_num_allocated < num_regs) {
|
|
|
|
REALLOC_N(rm->char_offset, struct rmatch_offset, num_regs);
|
|
|
|
rm->char_offset_num_allocated = num_regs;
|
|
|
|
}
|
|
|
|
|
|
|
|
enc = rb_enc_get(RMATCH(match)->str);
|
|
|
|
if (rb_enc_mbmaxlen(enc) == 1) {
|
|
|
|
for (i = 0; i < num_regs; i++) {
|
|
|
|
rm->char_offset[i].beg = BEG(i);
|
|
|
|
rm->char_offset[i].end = END(i);
|
|
|
|
}
|
|
|
|
return;
|
|
|
|
}
|
|
|
|
|
|
|
|
pairs = ALLOCA_N(pair_t, num_regs*2);
|
|
|
|
num_pos = 0;
|
|
|
|
for (i = 0; i < num_regs; i++) {
|
|
|
|
if (BEG(i) < 0)
|
|
|
|
continue;
|
|
|
|
pairs[num_pos++].byte_pos = BEG(i);
|
|
|
|
pairs[num_pos++].byte_pos = END(i);
|
|
|
|
}
|
|
|
|
qsort(pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
|
|
|
|
|
|
|
|
s = p = RSTRING_PTR(RMATCH(match)->str);
|
|
|
|
c = 0;
|
|
|
|
for (i = 0; i < num_pos; i++) {
|
|
|
|
q = s + pairs[i].byte_pos;
|
|
|
|
c += rb_enc_strlen(p, q, enc);
|
|
|
|
pairs[i].char_pos = c;
|
|
|
|
p = q;
|
|
|
|
}
|
|
|
|
|
|
|
|
for (i = 0; i < num_regs; i++) {
|
|
|
|
pair_t key, *found;
|
|
|
|
if (BEG(i) < 0) {
|
|
|
|
rm->char_offset[i].beg = -1;
|
|
|
|
rm->char_offset[i].end = -1;
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
key.byte_pos = BEG(i);
|
|
|
|
found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
|
|
|
|
rm->char_offset[i].beg = found->char_pos;
|
|
|
|
|
|
|
|
key.byte_pos = END(i);
|
|
|
|
found = bsearch(&key, pairs, num_pos, sizeof(pair_t), pair_byte_cmp);
|
|
|
|
rm->char_offset[i].end = found->char_pos;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2022-11-08 09:06:15 +03:00
|
|
|
static VALUE
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(VALUE match)
|
|
|
|
{
|
|
|
|
if (!RMATCH(match)->regexp) {
|
2019-10-24 21:43:04 +03:00
|
|
|
rb_raise(rb_eTypeError, "uninitialized MatchData");
|
2008-06-02 16:45:42 +04:00
|
|
|
}
|
2022-11-08 09:06:15 +03:00
|
|
|
return match;
|
2008-06-02 16:45:42 +04:00
|
|
|
}
|
|
|
|
|
2004-01-18 17:16:47 +03:00
|
|
|
/* :nodoc: */
|
1998-01-16 15:19:22 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_init_copy(VALUE obj, VALUE orig)
|
1998-01-16 15:19:22 +03:00
|
|
|
{
|
2023-07-20 21:17:38 +03:00
|
|
|
rb_matchext_t *rm;
|
2008-02-16 23:08:35 +03:00
|
|
|
|
2012-06-05 15:13:18 +04:00
|
|
|
if (!OBJ_INIT_COPY(obj, orig)) return obj;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
2023-02-10 16:32:46 +03:00
|
|
|
RB_OBJ_WRITE(obj, &RMATCH(obj)->str, RMATCH(orig)->str);
|
|
|
|
RB_OBJ_WRITE(obj, &RMATCH(obj)->regexp, RMATCH(orig)->regexp);
|
2008-02-16 23:08:35 +03:00
|
|
|
|
2023-07-20 21:17:38 +03:00
|
|
|
rm = RMATCH_EXT(obj);
|
2014-12-02 00:30:58 +03:00
|
|
|
if (rb_reg_region_copy(&rm->regs, RMATCH_REGS(orig)))
|
|
|
|
rb_memerror();
|
2008-02-16 23:08:35 +03:00
|
|
|
|
2023-07-20 21:17:38 +03:00
|
|
|
if (RMATCH_EXT(orig)->char_offset_num_allocated) {
|
2008-02-16 23:08:35 +03:00
|
|
|
if (rm->char_offset_num_allocated < rm->regs.num_regs) {
|
|
|
|
REALLOC_N(rm->char_offset, struct rmatch_offset, rm->regs.num_regs);
|
|
|
|
rm->char_offset_num_allocated = rm->regs.num_regs;
|
|
|
|
}
|
2023-07-20 21:17:38 +03:00
|
|
|
MEMCPY(rm->char_offset, RMATCH_EXT(orig)->char_offset,
|
2008-02-16 23:08:35 +03:00
|
|
|
struct rmatch_offset, rm->regs.num_regs);
|
2014-12-02 00:30:58 +03:00
|
|
|
RB_GC_GUARD(orig);
|
2008-02-16 23:08:35 +03:00
|
|
|
}
|
2002-08-27 12:31:08 +04:00
|
|
|
|
2002-08-30 12:22:09 +04:00
|
|
|
return obj;
|
2002-08-27 12:31:08 +04:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
2007-12-10 00:44:19 +03:00
|
|
|
/*
|
2022-04-18 19:31:30 +03:00
|
|
|
* call-seq:
|
|
|
|
* regexp -> regexp
|
|
|
|
*
|
|
|
|
* Returns the regexp that produced the match:
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
2022-04-18 19:31:30 +03:00
|
|
|
* m = /a.*b/.match("abc") # => #<MatchData "ab">
|
|
|
|
* m.regexp # => /a.*b/
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
match_regexp(VALUE match)
|
|
|
|
{
|
2014-03-27 13:58:12 +04:00
|
|
|
VALUE regexp;
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2014-03-27 13:58:12 +04:00
|
|
|
regexp = RMATCH(match)->regexp;
|
|
|
|
if (NIL_P(regexp)) {
|
|
|
|
VALUE str = rb_reg_nth_match(0, match);
|
|
|
|
regexp = rb_reg_regcomp(rb_reg_quote(str));
|
2023-02-10 16:32:46 +03:00
|
|
|
RB_OBJ_WRITE(match, &RMATCH(match)->regexp, regexp);
|
2014-03-27 13:58:12 +04:00
|
|
|
}
|
|
|
|
return regexp;
|
2007-12-10 00:44:19 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2022-04-18 19:31:30 +03:00
|
|
|
* call-seq:
|
|
|
|
* names -> array_of_names
|
|
|
|
*
|
|
|
|
* Returns an array of the capture names
|
|
|
|
* (see {Named Captures}[rdoc-ref:Regexp@Named+Captures]):
|
|
|
|
*
|
|
|
|
* m = /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge")
|
|
|
|
* # => #<MatchData "hog" foo:"h" bar:"o" baz:"g">
|
|
|
|
* m.names # => ["foo", "bar", "baz"]
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
2022-04-18 19:31:30 +03:00
|
|
|
* m = /foo/.match('foo') # => #<MatchData "foo">
|
|
|
|
* m.names # => [] # No named captures.
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
2022-04-18 19:31:30 +03:00
|
|
|
* Equivalent to:
|
|
|
|
*
|
|
|
|
* m = /(?<foo>.)(?<bar>.)(?<baz>.)/.match("hoge")
|
|
|
|
* m.regexp.names # => ["foo", "bar", "baz"]
|
2007-12-10 00:44:19 +03:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
match_names(VALUE match)
|
|
|
|
{
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2016-12-20 10:32:23 +03:00
|
|
|
if (NIL_P(RMATCH(match)->regexp))
|
|
|
|
return rb_ary_new_capa(0);
|
2007-12-10 00:44:19 +03:00
|
|
|
return rb_reg_names(RMATCH(match)->regexp);
|
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 19:31:30 +03:00
|
|
|
* size -> integer
|
|
|
|
*
|
|
|
|
* Returns size of the match array:
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
|
|
|
|
* m.size # => 5
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_size(VALUE match)
|
1999-08-13 09:45:20 +04:00
|
|
|
{
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2008-02-16 23:08:35 +03:00
|
|
|
return INT2FIX(RMATCH_REGS(match)->num_regs);
|
2007-11-23 05:10:44 +03:00
|
|
|
}
|
|
|
|
|
2016-12-18 14:43:51 +03:00
|
|
|
static int name_to_backref_number(struct re_registers *, VALUE, const char*, const char*);
|
2021-09-11 15:43:11 +03:00
|
|
|
NORETURN(static void name_to_backref_error(VALUE name));
|
|
|
|
|
|
|
|
static void
|
|
|
|
name_to_backref_error(VALUE name)
|
|
|
|
{
|
|
|
|
rb_raise(rb_eIndexError, "undefined group name reference: % "PRIsVALUE,
|
|
|
|
name);
|
|
|
|
}
|
2016-12-18 14:43:51 +03:00
|
|
|
|
2021-09-11 16:50:59 +03:00
|
|
|
static void
|
|
|
|
backref_number_check(struct re_registers *regs, int i)
|
|
|
|
{
|
|
|
|
if (i < 0 || regs->num_regs <= i)
|
|
|
|
rb_raise(rb_eIndexError, "index %d out of matches", i);
|
|
|
|
}
|
|
|
|
|
2007-12-09 10:12:44 +03:00
|
|
|
static int
|
|
|
|
match_backref_number(VALUE match, VALUE backref)
|
|
|
|
{
|
|
|
|
const char *name;
|
|
|
|
int num;
|
|
|
|
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs = RMATCH_REGS(match);
|
2007-12-09 10:12:44 +03:00
|
|
|
VALUE regexp = RMATCH(match)->regexp;
|
|
|
|
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2016-12-19 07:07:40 +03:00
|
|
|
if (SYMBOL_P(backref)) {
|
2014-07-30 17:46:41 +04:00
|
|
|
backref = rb_sym2str(backref);
|
2007-12-09 10:12:44 +03:00
|
|
|
}
|
2016-12-19 07:07:40 +03:00
|
|
|
else if (!RB_TYPE_P(backref, T_STRING)) {
|
|
|
|
return NUM2INT(backref);
|
|
|
|
}
|
|
|
|
name = StringValueCStr(backref);
|
2007-12-09 10:12:44 +03:00
|
|
|
|
2021-09-11 15:43:11 +03:00
|
|
|
num = name_to_backref_number(regs, regexp, name, name + RSTRING_LEN(backref));
|
2007-12-09 10:12:44 +03:00
|
|
|
|
|
|
|
if (num < 1) {
|
2021-09-11 15:43:11 +03:00
|
|
|
name_to_backref_error(backref);
|
2007-12-09 10:12:44 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return num;
|
|
|
|
}
|
|
|
|
|
2009-03-14 21:04:21 +03:00
|
|
|
int
|
|
|
|
rb_reg_backref_number(VALUE match, VALUE backref)
|
|
|
|
{
|
|
|
|
return match_backref_number(match, backref);
|
|
|
|
}
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 19:31:30 +03:00
|
|
|
* offset(n) -> [start_offset, end_offset]
|
|
|
|
* offset(name) -> [start_offset, end_offset]
|
2007-12-09 10:35:54 +03:00
|
|
|
*
|
2022-04-18 19:31:30 +03:00
|
|
|
* :include: doc/matchdata/offset.rdoc
|
2007-12-09 10:35:54 +03:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_offset(VALUE match, VALUE n)
|
1999-08-13 09:45:20 +04:00
|
|
|
{
|
2007-12-09 10:12:44 +03:00
|
|
|
int i = match_backref_number(match, n);
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs = RMATCH_REGS(match);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2021-09-11 16:50:59 +03:00
|
|
|
backref_number_check(regs, i);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2008-02-16 23:08:35 +03:00
|
|
|
if (BEG(i) < 0)
|
1999-08-13 09:45:20 +04:00
|
|
|
return rb_assoc_new(Qnil, Qnil);
|
|
|
|
|
2008-02-16 23:08:35 +03:00
|
|
|
update_char_offset(match);
|
2023-07-20 21:17:38 +03:00
|
|
|
return rb_assoc_new(LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg),
|
|
|
|
LONG2NUM(RMATCH_EXT(match)->char_offset[i].end));
|
1999-08-13 09:45:20 +04:00
|
|
|
}
|
|
|
|
|
2022-02-19 13:10:00 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
|
|
|
* mtch.byteoffset(n) -> array
|
|
|
|
*
|
|
|
|
* Returns a two-element array containing the beginning and ending byte-based offsets of
|
|
|
|
* the <em>n</em>th match.
|
|
|
|
* <em>n</em> can be a string or symbol to reference a named capture.
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
|
|
|
|
* m.byteoffset(0) #=> [1, 7]
|
|
|
|
* m.byteoffset(4) #=> [6, 7]
|
|
|
|
*
|
|
|
|
* m = /(?<foo>.)(.)(?<bar>.)/.match("hoge")
|
|
|
|
* p m.byteoffset(:foo) #=> [0, 1]
|
|
|
|
* p m.byteoffset(:bar) #=> [2, 3]
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
match_byteoffset(VALUE match, VALUE n)
|
|
|
|
{
|
|
|
|
int i = match_backref_number(match, n);
|
|
|
|
struct re_registers *regs = RMATCH_REGS(match);
|
|
|
|
|
|
|
|
match_check(match);
|
|
|
|
backref_number_check(regs, i);
|
|
|
|
|
|
|
|
if (BEG(i) < 0)
|
|
|
|
return rb_assoc_new(Qnil, Qnil);
|
|
|
|
return rb_assoc_new(LONG2NUM(BEG(i)), LONG2NUM(END(i)));
|
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 21:02:35 +03:00
|
|
|
* begin(n) -> integer
|
|
|
|
* begin(name) -> integer
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 21:02:35 +03:00
|
|
|
* :include: doc/matchdata/begin.rdoc
|
2007-12-09 10:35:54 +03:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_begin(VALUE match, VALUE n)
|
1999-08-13 09:45:20 +04:00
|
|
|
{
|
2007-12-09 10:12:44 +03:00
|
|
|
int i = match_backref_number(match, n);
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs = RMATCH_REGS(match);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2021-09-11 16:50:59 +03:00
|
|
|
backref_number_check(regs, i);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2008-02-16 23:08:35 +03:00
|
|
|
if (BEG(i) < 0)
|
1999-08-13 09:45:20 +04:00
|
|
|
return Qnil;
|
|
|
|
|
2008-02-16 23:08:35 +03:00
|
|
|
update_char_offset(match);
|
2023-07-20 21:17:38 +03:00
|
|
|
return LONG2NUM(RMATCH_EXT(match)->char_offset[i].beg);
|
1999-08-13 09:45:20 +04:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 21:02:35 +03:00
|
|
|
* end(n) -> integer
|
|
|
|
* end(name) -> integer
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 21:02:35 +03:00
|
|
|
* :include: doc/matchdata/end.rdoc
|
2007-12-09 10:35:54 +03:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_end(VALUE match, VALUE n)
|
1999-08-13 09:45:20 +04:00
|
|
|
{
|
2007-12-09 10:12:44 +03:00
|
|
|
int i = match_backref_number(match, n);
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs = RMATCH_REGS(match);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2021-09-11 16:50:59 +03:00
|
|
|
backref_number_check(regs, i);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2008-02-16 23:08:35 +03:00
|
|
|
if (BEG(i) < 0)
|
1999-08-13 09:45:20 +04:00
|
|
|
return Qnil;
|
|
|
|
|
2008-02-16 23:08:35 +03:00
|
|
|
update_char_offset(match);
|
2023-07-20 21:17:38 +03:00
|
|
|
return LONG2NUM(RMATCH_EXT(match)->char_offset[i].end);
|
1999-08-13 09:45:20 +04:00
|
|
|
}
|
|
|
|
|
2021-09-16 13:37:52 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 21:02:35 +03:00
|
|
|
* match(n) -> string or nil
|
|
|
|
* match(name) -> string or nil
|
2021-09-16 13:37:52 +03:00
|
|
|
*
|
2022-04-18 21:02:35 +03:00
|
|
|
* Returns the matched substring corresponding to the given argument.
|
|
|
|
*
|
|
|
|
* When non-negative argument +n+ is given,
|
|
|
|
* returns the matched substring for the <tt>n</tt>th match:
|
2021-09-16 13:37:52 +03:00
|
|
|
*
|
2022-04-18 21:02:35 +03:00
|
|
|
* m = /(.)(.)(\d+)(\d)(\w)?/.match("THX1138.")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8" 5:nil>
|
|
|
|
* m.match(0) # => "HX1138"
|
|
|
|
* m.match(4) # => "8"
|
|
|
|
* m.match(5) # => nil
|
2021-09-16 13:37:52 +03:00
|
|
|
*
|
2022-04-18 21:02:35 +03:00
|
|
|
* When string or symbol argument +name+ is given,
|
|
|
|
* returns the matched substring for the given name:
|
|
|
|
*
|
|
|
|
* m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge")
|
2022-04-18 23:52:07 +03:00
|
|
|
* # => #<MatchData "hoge" foo:"h" bar:"ge">
|
2022-04-18 21:02:35 +03:00
|
|
|
* m.match('foo') # => "h"
|
|
|
|
* m.match(:bar) # => "ge"
|
2021-09-16 13:37:52 +03:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
match_nth(VALUE match, VALUE n)
|
|
|
|
{
|
|
|
|
int i = match_backref_number(match, n);
|
|
|
|
struct re_registers *regs = RMATCH_REGS(match);
|
|
|
|
|
|
|
|
backref_number_check(regs, i);
|
|
|
|
|
|
|
|
long start = BEG(i), end = END(i);
|
|
|
|
if (start < 0)
|
|
|
|
return Qnil;
|
|
|
|
|
|
|
|
return rb_str_subseq(RMATCH(match)->str, start, end - start);
|
|
|
|
}
|
|
|
|
|
2021-09-16 13:50:29 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 21:02:35 +03:00
|
|
|
* match_length(n) -> integer or nil
|
|
|
|
* match_length(name) -> integer or nil
|
|
|
|
*
|
|
|
|
* Returns the length (in characters) of the matched substring
|
|
|
|
* corresponding to the given argument.
|
|
|
|
*
|
|
|
|
* When non-negative argument +n+ is given,
|
|
|
|
* returns the length of the matched substring
|
|
|
|
* for the <tt>n</tt>th match:
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)(\w)?/.match("THX1138.")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8" 5:nil>
|
|
|
|
* m.match_length(0) # => 6
|
|
|
|
* m.match_length(4) # => 1
|
|
|
|
* m.match_length(5) # => nil
|
|
|
|
*
|
|
|
|
* When string or symbol argument +name+ is given,
|
|
|
|
* returns the length of the matched substring
|
|
|
|
* for the named match:
|
|
|
|
*
|
|
|
|
* m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge")
|
|
|
|
* # => #<MatchData "hoge" foo:"h" bar:"ge">
|
|
|
|
* m.match_length('foo') # => 1
|
|
|
|
* m.match_length(:bar) # => 2
|
2021-09-16 13:50:29 +03:00
|
|
|
*
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
match_nth_length(VALUE match, VALUE n)
|
|
|
|
{
|
|
|
|
int i = match_backref_number(match, n);
|
|
|
|
struct re_registers *regs = RMATCH_REGS(match);
|
|
|
|
|
|
|
|
match_check(match);
|
|
|
|
backref_number_check(regs, i);
|
|
|
|
|
|
|
|
if (BEG(i) < 0)
|
|
|
|
return Qnil;
|
|
|
|
|
|
|
|
update_char_offset(match);
|
|
|
|
const struct rmatch_offset *const ofs =
|
2023-07-20 21:17:38 +03:00
|
|
|
&RMATCH_EXT(match)->char_offset[i];
|
2021-09-16 13:50:29 +03:00
|
|
|
return LONG2NUM(ofs->end - ofs->beg);
|
|
|
|
}
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
#define MATCH_BUSY FL_USER2
|
|
|
|
|
|
|
|
void
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_match_busy(VALUE match)
|
1999-08-13 09:45:20 +04:00
|
|
|
{
|
2000-02-08 11:54:01 +03:00
|
|
|
FL_SET(match, MATCH_BUSY);
|
1999-08-13 09:45:20 +04:00
|
|
|
}
|
|
|
|
|
2019-07-28 01:33:21 +03:00
|
|
|
void
|
|
|
|
rb_match_unbusy(VALUE match)
|
|
|
|
{
|
|
|
|
FL_UNSET(match, MATCH_BUSY);
|
|
|
|
}
|
|
|
|
|
2016-01-14 11:36:49 +03:00
|
|
|
int
|
|
|
|
rb_match_count(VALUE match)
|
|
|
|
{
|
|
|
|
struct re_registers *regs;
|
|
|
|
if (NIL_P(match)) return -1;
|
|
|
|
regs = RMATCH_REGS(match);
|
|
|
|
if (!regs) return -1;
|
|
|
|
return regs->num_regs;
|
|
|
|
}
|
|
|
|
|
2014-03-27 13:58:12 +04:00
|
|
|
static void
|
|
|
|
match_set_string(VALUE m, VALUE string, long pos, long len)
|
|
|
|
{
|
|
|
|
struct RMatch *match = (struct RMatch *)m;
|
2023-07-20 21:17:38 +03:00
|
|
|
rb_matchext_t *rmatch = RMATCH_EXT(match);
|
2014-03-27 13:58:12 +04:00
|
|
|
|
2023-02-10 16:32:46 +03:00
|
|
|
RB_OBJ_WRITE(match, &RMATCH(match)->str, string);
|
|
|
|
RB_OBJ_WRITE(match, &RMATCH(match)->regexp, Qnil);
|
2019-10-12 16:43:34 +03:00
|
|
|
int err = onig_region_resize(&rmatch->regs, 1);
|
|
|
|
if (err) rb_memerror();
|
2014-03-27 13:58:12 +04:00
|
|
|
rmatch->regs.beg[0] = pos;
|
|
|
|
rmatch->regs.end[0] = pos + len;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
rb_backref_set_string(VALUE string, long pos, long len)
|
|
|
|
{
|
|
|
|
VALUE match = rb_backref_get();
|
|
|
|
if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
|
|
|
|
match = match_alloc(rb_cMatch);
|
|
|
|
}
|
|
|
|
match_set_string(match, string, pos, len);
|
|
|
|
rb_backref_set(match);
|
|
|
|
}
|
|
|
|
|
2007-12-09 14:57:06 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-16 23:20:03 +03:00
|
|
|
* fixed_encoding? -> true or false
|
|
|
|
*
|
|
|
|
* Returns +false+ if +self+ is applicable to
|
|
|
|
* a string with any ASCII-compatible encoding;
|
|
|
|
* otherwise returns +true+:
|
|
|
|
*
|
|
|
|
* r = /a/ # => /a/
|
|
|
|
* r.fixed_encoding? # => false
|
|
|
|
* r.match?("\u{6666} a") # => true
|
|
|
|
* r.match?("\xa1\xa2 a".force_encoding("euc-jp")) # => true
|
|
|
|
* r.match?("abc".force_encoding("euc-jp")) # => true
|
|
|
|
*
|
|
|
|
* r = /a/u # => /a/
|
|
|
|
* r.fixed_encoding? # => true
|
|
|
|
* r.match?("\u{6666} a") # => true
|
|
|
|
* r.match?("\xa1\xa2".force_encoding("euc-jp")) # Raises exception.
|
|
|
|
* r.match?("abc".force_encoding("euc-jp")) # => true
|
|
|
|
*
|
|
|
|
* r = /\u{6666}/ # => /\u{6666}/
|
|
|
|
* r.fixed_encoding? # => true
|
|
|
|
* r.encoding # => #<Encoding:UTF-8>
|
|
|
|
* r.match?("\u{6666} a") # => true
|
|
|
|
* r.match?("\xa1\xa2".force_encoding("euc-jp")) # Raises exception.
|
|
|
|
* r.match?("abc".force_encoding("euc-jp")) # => false
|
|
|
|
*
|
2007-12-09 14:57:06 +03:00
|
|
|
*/
|
|
|
|
|
2007-11-26 05:27:59 +03:00
|
|
|
static VALUE
|
|
|
|
rb_reg_fixed_encoding_p(VALUE re)
|
|
|
|
{
|
2021-08-02 06:06:44 +03:00
|
|
|
return RBOOL(FL_TEST(re, KCODE_FIXED));
|
2007-11-26 05:27:59 +03:00
|
|
|
}
|
|
|
|
|
2007-12-01 19:56:19 +03:00
|
|
|
static VALUE
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
rb_encoding **fixed_enc, onig_errmsg_buffer err, int options);
|
2007-12-01 19:56:19 +03:00
|
|
|
|
2018-01-18 12:44:41 +03:00
|
|
|
NORETURN(static void reg_enc_error(VALUE re, VALUE str));
|
2008-04-14 19:47:51 +04:00
|
|
|
|
2008-05-15 14:45:58 +04:00
|
|
|
static void
|
|
|
|
reg_enc_error(VALUE re, VALUE str)
|
|
|
|
{
|
2008-12-11 07:40:08 +03:00
|
|
|
rb_raise(rb_eEncCompatError,
|
2008-05-15 14:45:58 +04:00
|
|
|
"incompatible encoding regexp match (%s regexp with %s string)",
|
2009-07-18 19:00:50 +04:00
|
|
|
rb_enc_name(rb_enc_get(re)),
|
2008-05-15 14:45:58 +04:00
|
|
|
rb_enc_name(rb_enc_get(str)));
|
|
|
|
}
|
|
|
|
|
2016-05-02 15:04:04 +03:00
|
|
|
static inline int
|
|
|
|
str_coderange(VALUE str)
|
|
|
|
{
|
|
|
|
int cr = ENC_CODERANGE(str);
|
|
|
|
if (cr == ENC_CODERANGE_UNKNOWN) {
|
|
|
|
cr = rb_enc_str_coderange(str);
|
|
|
|
}
|
|
|
|
return cr;
|
|
|
|
}
|
|
|
|
|
2014-06-03 00:23:47 +04:00
|
|
|
static rb_encoding*
|
2008-04-14 19:47:51 +04:00
|
|
|
rb_reg_prepare_enc(VALUE re, VALUE str, int warn)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_encoding *enc = 0;
|
2016-05-02 15:04:04 +03:00
|
|
|
int cr = str_coderange(str);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2016-02-02 20:38:00 +03:00
|
|
|
if (cr == ENC_CODERANGE_BROKEN) {
|
2008-01-04 08:01:58 +03:00
|
|
|
rb_raise(rb_eArgError,
|
2008-12-11 07:40:08 +03:00
|
|
|
"invalid byte sequence in %s",
|
2008-01-04 08:01:58 +03:00
|
|
|
rb_enc_name(rb_enc_get(str)));
|
|
|
|
}
|
|
|
|
|
2000-05-22 11:09:55 +04:00
|
|
|
rb_reg_check(re);
|
2008-05-15 14:45:58 +04:00
|
|
|
enc = rb_enc_get(str);
|
2016-05-02 15:04:04 +03:00
|
|
|
if (RREGEXP_PTR(re)->enc == enc) {
|
|
|
|
}
|
|
|
|
else if (cr == ENC_CODERANGE_7BIT &&
|
|
|
|
RREGEXP_PTR(re)->enc == rb_usascii_encoding()) {
|
|
|
|
enc = RREGEXP_PTR(re)->enc;
|
|
|
|
}
|
|
|
|
else if (!rb_enc_asciicompat(enc)) {
|
|
|
|
reg_enc_error(re, str);
|
2007-11-23 09:30:26 +03:00
|
|
|
}
|
2008-05-15 14:45:58 +04:00
|
|
|
else if (rb_reg_fixed_encoding_p(re)) {
|
2016-05-02 15:04:04 +03:00
|
|
|
if ((!rb_enc_asciicompat(RREGEXP_PTR(re)->enc) ||
|
2016-02-02 20:38:00 +03:00
|
|
|
cr != ENC_CODERANGE_7BIT)) {
|
2008-05-15 14:45:58 +04:00
|
|
|
reg_enc_error(re, str);
|
2008-04-14 19:47:51 +04:00
|
|
|
}
|
2016-02-02 07:39:44 +03:00
|
|
|
enc = RREGEXP_PTR(re)->enc;
|
2008-05-15 14:45:58 +04:00
|
|
|
}
|
2016-05-02 15:04:04 +03:00
|
|
|
else if (warn && (RBASIC(re)->flags & REG_ENCODING_NONE) &&
|
2008-05-15 14:45:58 +04:00
|
|
|
enc != rb_ascii8bit_encoding() &&
|
2016-02-02 20:38:00 +03:00
|
|
|
cr != ENC_CODERANGE_7BIT) {
|
2017-03-03 02:33:06 +03:00
|
|
|
rb_warn("historical binary regexp match /.../n against %s string",
|
2008-05-15 14:45:58 +04:00
|
|
|
rb_enc_name(enc));
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
2008-05-15 14:45:58 +04:00
|
|
|
return enc;
|
2008-04-14 19:47:51 +04:00
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2023-07-28 01:28:23 +03:00
|
|
|
regex_t *
|
2023-07-27 21:04:02 +03:00
|
|
|
rb_reg_prepare_re(VALUE re, VALUE str)
|
2008-04-14 19:47:51 +04:00
|
|
|
{
|
|
|
|
int r;
|
|
|
|
OnigErrorInfo einfo;
|
|
|
|
VALUE unescaped;
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_encoding *fixed_enc = 0;
|
|
|
|
rb_encoding *enc = rb_reg_prepare_enc(re, str, 1);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2023-07-27 21:41:12 +03:00
|
|
|
regex_t *reg = RREGEXP_PTR(re);
|
2008-04-14 19:47:51 +04:00
|
|
|
if (reg->enc == enc) return reg;
|
2005-02-17 17:43:38 +03:00
|
|
|
|
2008-04-14 19:47:51 +04:00
|
|
|
rb_reg_check(re);
|
2023-12-24 04:24:15 +03:00
|
|
|
|
|
|
|
VALUE src_str = RREGEXP_SRC(re);
|
|
|
|
const char *pattern = RSTRING_PTR(src_str);
|
2007-12-01 19:56:19 +03:00
|
|
|
|
2023-07-27 21:04:02 +03:00
|
|
|
onig_errmsg_buffer err = "";
|
2008-04-14 19:47:51 +04:00
|
|
|
unescaped = rb_reg_preprocess(
|
2023-12-24 04:24:15 +03:00
|
|
|
pattern, pattern + RSTRING_LEN(src_str), enc,
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
&fixed_enc, err, 0);
|
2008-01-23 07:40:43 +03:00
|
|
|
|
2021-10-03 16:34:45 +03:00
|
|
|
if (NIL_P(unescaped)) {
|
2008-04-14 19:47:51 +04:00
|
|
|
rb_raise(rb_eArgError, "regexp preprocess failed: %s", err);
|
|
|
|
}
|
2006-05-15 16:39:25 +04:00
|
|
|
|
2022-03-24 10:59:11 +03:00
|
|
|
// inherit the timeout settings
|
|
|
|
rb_hrtime_t timelimit = reg->timelimit;
|
|
|
|
|
2020-08-13 14:56:23 +03:00
|
|
|
const char *ptr;
|
|
|
|
long len;
|
|
|
|
RSTRING_GETMEM(unescaped, ptr, len);
|
2023-07-28 18:28:44 +03:00
|
|
|
|
|
|
|
/* If there are no other users of this regex, then we can directly overwrite it. */
|
|
|
|
if (RREGEXP(re)->usecnt == 0) {
|
|
|
|
regex_t tmp_reg;
|
|
|
|
r = onig_new_without_alloc(&tmp_reg, (UChar *)ptr, (UChar *)(ptr + len),
|
|
|
|
reg->options, enc,
|
|
|
|
OnigDefaultSyntax, &einfo);
|
|
|
|
|
|
|
|
if (r) {
|
|
|
|
/* There was an error so perform cleanups. */
|
|
|
|
onig_free_body(&tmp_reg);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
onig_free_body(reg);
|
|
|
|
/* There are no errors so set reg to tmp_reg. */
|
|
|
|
*reg = tmp_reg;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
r = onig_new(®, (UChar *)ptr, (UChar *)(ptr + len),
|
|
|
|
reg->options, enc,
|
|
|
|
OnigDefaultSyntax, &einfo);
|
|
|
|
}
|
|
|
|
|
2008-04-14 19:47:51 +04:00
|
|
|
if (r) {
|
|
|
|
onig_error_code_to_str((UChar*)err, r, &einfo);
|
2023-12-24 17:08:01 +03:00
|
|
|
rb_reg_raise(err, re);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
2008-04-14 19:47:51 +04:00
|
|
|
|
2022-03-24 10:59:11 +03:00
|
|
|
reg->timelimit = timelimit;
|
|
|
|
|
2008-04-14 19:47:51 +04:00
|
|
|
RB_GC_GUARD(unescaped);
|
2023-12-24 04:24:15 +03:00
|
|
|
RB_GC_GUARD(src_str);
|
2008-04-14 19:47:51 +04:00
|
|
|
return reg;
|
1998-01-16 15:19:22 +03:00
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2023-07-26 22:57:03 +03:00
|
|
|
OnigPosition
|
|
|
|
rb_reg_onig_match(VALUE re, VALUE str,
|
|
|
|
OnigPosition (*match)(regex_t *reg, VALUE str, struct re_registers *regs, void *args),
|
|
|
|
void *args, struct re_registers *regs)
|
2016-05-02 15:04:04 +03:00
|
|
|
{
|
2023-07-27 21:04:02 +03:00
|
|
|
regex_t *reg = rb_reg_prepare_re(re, str);
|
2023-07-26 22:57:03 +03:00
|
|
|
|
|
|
|
bool tmpreg = reg != RREGEXP_PTR(re);
|
|
|
|
if (!tmpreg) RREGEXP(re)->usecnt++;
|
|
|
|
|
|
|
|
OnigPosition result = match(reg, str, regs, args);
|
|
|
|
|
|
|
|
if (!tmpreg) RREGEXP(re)->usecnt--;
|
|
|
|
if (tmpreg) {
|
2023-07-28 18:28:44 +03:00
|
|
|
onig_free(reg);
|
2023-07-26 22:57:03 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (result < 0) {
|
|
|
|
onig_region_free(regs, 0);
|
|
|
|
|
|
|
|
if (result != ONIG_MISMATCH) {
|
2023-07-27 21:04:02 +03:00
|
|
|
onig_errmsg_buffer err = "";
|
2023-07-26 22:57:03 +03:00
|
|
|
onig_error_code_to_str((UChar*)err, (int)result);
|
2023-12-24 17:08:01 +03:00
|
|
|
rb_reg_raise(err, re);
|
2023-07-26 22:57:03 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
2016-05-02 15:04:04 +03:00
|
|
|
}
|
|
|
|
|
2009-06-30 06:08:54 +04:00
|
|
|
long
|
|
|
|
rb_reg_adjust_startpos(VALUE re, VALUE str, long pos, int reverse)
|
1999-12-14 09:50:43 +03:00
|
|
|
{
|
2009-06-30 06:08:54 +04:00
|
|
|
long range;
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_encoding *enc;
|
2005-02-17 17:43:38 +03:00
|
|
|
UChar *p, *string;
|
1999-12-14 09:50:43 +03:00
|
|
|
|
2008-04-14 19:47:51 +04:00
|
|
|
enc = rb_reg_prepare_enc(re, str, 0);
|
1999-12-14 09:50:43 +03:00
|
|
|
|
|
|
|
if (reverse) {
|
|
|
|
range = -pos;
|
|
|
|
}
|
|
|
|
else {
|
2006-08-31 14:47:44 +04:00
|
|
|
range = RSTRING_LEN(str) - pos;
|
1999-12-14 09:50:43 +03:00
|
|
|
}
|
2005-02-17 17:43:38 +03:00
|
|
|
|
2006-08-31 14:47:44 +04:00
|
|
|
if (pos > 0 && ONIGENC_MBC_MAXLEN(enc) != 1 && pos < RSTRING_LEN(str)) {
|
|
|
|
string = (UChar*)RSTRING_PTR(str);
|
2005-02-17 17:43:38 +03:00
|
|
|
|
|
|
|
if (range > 0) {
|
2008-09-13 20:40:31 +04:00
|
|
|
p = onigenc_get_right_adjust_char_head(enc, string, string + pos, string + RSTRING_LEN(str));
|
2005-02-17 17:43:38 +03:00
|
|
|
}
|
|
|
|
else {
|
2008-09-13 23:23:52 +04:00
|
|
|
p = ONIGENC_LEFT_ADJUST_CHAR_HEAD(enc, string, string + pos, string + RSTRING_LEN(str));
|
2005-02-17 17:43:38 +03:00
|
|
|
}
|
|
|
|
return p - string;
|
|
|
|
}
|
|
|
|
|
|
|
|
return pos;
|
1999-12-14 09:50:43 +03:00
|
|
|
}
|
|
|
|
|
2023-07-26 22:57:03 +03:00
|
|
|
struct reg_onig_search_args {
|
|
|
|
long pos;
|
|
|
|
long range;
|
|
|
|
};
|
|
|
|
|
|
|
|
static OnigPosition
|
|
|
|
reg_onig_search(regex_t *reg, VALUE str, struct re_registers *regs, void *args_ptr)
|
|
|
|
{
|
|
|
|
struct reg_onig_search_args *args = (struct reg_onig_search_args *)args_ptr;
|
|
|
|
const char *ptr;
|
|
|
|
long len;
|
|
|
|
RSTRING_GETMEM(str, ptr, len);
|
|
|
|
|
|
|
|
return onig_search(
|
|
|
|
reg,
|
|
|
|
(UChar *)ptr,
|
|
|
|
(UChar *)(ptr + len),
|
|
|
|
(UChar *)(ptr + args->pos),
|
|
|
|
(UChar *)(ptr + args->range),
|
|
|
|
regs,
|
|
|
|
ONIG_OPTION_NONE);
|
|
|
|
}
|
|
|
|
|
2024-01-30 22:16:51 +03:00
|
|
|
struct rb_reg_onig_match_args {
|
|
|
|
VALUE re;
|
|
|
|
VALUE str;
|
|
|
|
struct reg_onig_search_args args;
|
|
|
|
struct re_registers regs;
|
|
|
|
|
|
|
|
OnigPosition result;
|
|
|
|
};
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
rb_reg_onig_match_try(VALUE value_args)
|
|
|
|
{
|
|
|
|
struct rb_reg_onig_match_args *args = (struct rb_reg_onig_match_args *)value_args;
|
|
|
|
args->result = rb_reg_onig_match(args->re, args->str, reg_onig_search, &args->args, &args->regs);
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
|
2013-03-24 17:18:54 +04:00
|
|
|
/* returns byte offset */
|
2021-08-11 23:50:59 +03:00
|
|
|
static long
|
|
|
|
rb_reg_search_set_match(VALUE re, VALUE str, long pos, int reverse, int set_backref_str, VALUE *set_match)
|
1998-01-16 15:19:22 +03:00
|
|
|
{
|
2023-07-26 22:57:03 +03:00
|
|
|
long len = RSTRING_LEN(str);
|
2020-08-13 14:56:23 +03:00
|
|
|
if (pos > len || pos < 0) {
|
2002-03-19 12:03:11 +03:00
|
|
|
rb_backref_set(Qnil);
|
|
|
|
return -1;
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
|
2024-01-30 22:16:51 +03:00
|
|
|
struct rb_reg_onig_match_args args = {
|
|
|
|
.re = re,
|
|
|
|
.str = str,
|
|
|
|
.args = {
|
|
|
|
.pos = pos,
|
|
|
|
.range = reverse ? 0 : len,
|
|
|
|
},
|
|
|
|
.regs = {0}
|
2023-07-26 22:57:03 +03:00
|
|
|
};
|
1998-01-16 15:19:22 +03:00
|
|
|
|
2024-01-30 22:16:51 +03:00
|
|
|
/* If there is a timeout set, then rb_reg_onig_match could raise a
|
|
|
|
* Regexp::TimeoutError so we want to protect it from leaking memory. */
|
|
|
|
if (rb_reg_match_time_limit) {
|
|
|
|
int state;
|
|
|
|
rb_protect(rb_reg_onig_match_try, (VALUE)&args, &state);
|
|
|
|
if (state) {
|
|
|
|
onig_region_free(&args.regs, false);
|
|
|
|
rb_jump_tag(state);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
rb_reg_onig_match_try((VALUE)&args);
|
|
|
|
}
|
2000-02-08 11:54:01 +03:00
|
|
|
|
2024-01-30 22:16:51 +03:00
|
|
|
if (args.result == ONIG_MISMATCH) {
|
2023-07-26 22:57:03 +03:00
|
|
|
rb_backref_set(Qnil);
|
|
|
|
return ONIG_MISMATCH;
|
|
|
|
}
|
2002-01-04 17:15:33 +03:00
|
|
|
|
2023-12-30 21:26:59 +03:00
|
|
|
VALUE match = match_alloc(rb_cMatch);
|
|
|
|
rb_matchext_t *rm = RMATCH_EXT(match);
|
2024-01-30 22:16:51 +03:00
|
|
|
rm->regs = args.regs;
|
2023-12-30 21:26:59 +03:00
|
|
|
|
2014-03-26 03:46:05 +04:00
|
|
|
if (set_backref_str) {
|
2023-02-10 16:32:46 +03:00
|
|
|
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
|
2014-03-26 03:46:05 +04:00
|
|
|
}
|
2022-11-30 19:31:24 +03:00
|
|
|
else {
|
|
|
|
/* Note that a MatchData object with RMATCH(match)->str == 0 is incomplete!
|
|
|
|
* We need to hide the object from ObjectSpace.each_object.
|
|
|
|
* https://bugs.ruby-lang.org/issues/19159
|
|
|
|
*/
|
|
|
|
rb_obj_hide(match);
|
|
|
|
}
|
2014-03-26 03:46:05 +04:00
|
|
|
|
2023-02-10 16:32:46 +03:00
|
|
|
RB_OBJ_WRITE(match, &RMATCH(match)->regexp, re);
|
2000-02-08 11:54:01 +03:00
|
|
|
rb_backref_set(match);
|
2021-08-11 23:50:59 +03:00
|
|
|
if (set_match) *set_match = match;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2024-01-30 22:16:51 +03:00
|
|
|
return args.result;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2021-08-11 23:50:59 +03:00
|
|
|
long
|
|
|
|
rb_reg_search0(VALUE re, VALUE str, long pos, int reverse, int set_backref_str)
|
|
|
|
{
|
|
|
|
return rb_reg_search_set_match(re, str, pos, reverse, set_backref_str, NULL);
|
|
|
|
}
|
|
|
|
|
2014-03-26 03:46:05 +04:00
|
|
|
long
|
|
|
|
rb_reg_search(VALUE re, VALUE str, long pos, int reverse)
|
|
|
|
{
|
2014-03-27 06:58:34 +04:00
|
|
|
return rb_reg_search0(re, str, pos, reverse, 1);
|
2014-03-26 03:46:05 +04:00
|
|
|
}
|
|
|
|
|
2023-07-26 22:57:03 +03:00
|
|
|
static OnigPosition
|
|
|
|
reg_onig_match(regex_t *reg, VALUE str, struct re_registers *regs, void *_)
|
2017-10-21 09:51:01 +03:00
|
|
|
{
|
2020-08-13 14:56:23 +03:00
|
|
|
const char *ptr;
|
|
|
|
long len;
|
|
|
|
RSTRING_GETMEM(str, ptr, len);
|
2017-10-21 09:51:01 +03:00
|
|
|
|
2023-07-26 22:57:03 +03:00
|
|
|
return onig_match(
|
|
|
|
reg,
|
|
|
|
(UChar *)ptr,
|
|
|
|
(UChar *)(ptr + len),
|
|
|
|
(UChar *)ptr,
|
|
|
|
regs,
|
|
|
|
ONIG_OPTION_NONE);
|
|
|
|
}
|
|
|
|
|
|
|
|
bool
|
|
|
|
rb_reg_start_with_p(VALUE re, VALUE str)
|
|
|
|
{
|
|
|
|
VALUE match = rb_backref_get();
|
|
|
|
if (NIL_P(match) || FL_TEST(match, MATCH_BUSY)) {
|
2017-10-21 09:51:01 +03:00
|
|
|
match = match_alloc(rb_cMatch);
|
2023-07-26 22:57:03 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
struct re_registers *regs = RMATCH_REGS(match);
|
|
|
|
|
|
|
|
if (rb_reg_onig_match(re, str, reg_onig_match, NULL, regs) == ONIG_MISMATCH) {
|
|
|
|
rb_backref_set(Qnil);
|
|
|
|
return false;
|
2017-10-21 09:51:01 +03:00
|
|
|
}
|
|
|
|
|
2023-02-10 16:32:46 +03:00
|
|
|
RB_OBJ_WRITE(match, &RMATCH(match)->str, rb_str_new4(str));
|
|
|
|
RB_OBJ_WRITE(match, &RMATCH(match)->regexp, re);
|
2017-10-21 09:51:01 +03:00
|
|
|
rb_backref_set(match);
|
|
|
|
|
|
|
|
return true;
|
|
|
|
}
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_nth_defined(int nth, VALUE match)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs;
|
1998-01-16 15:13:05 +03:00
|
|
|
if (NIL_P(match)) return Qnil;
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2008-02-16 23:08:35 +03:00
|
|
|
regs = RMATCH_REGS(match);
|
|
|
|
if (nth >= regs->num_regs) {
|
2001-10-03 11:19:19 +04:00
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
if (nth < 0) {
|
2008-02-16 23:08:35 +03:00
|
|
|
nth += regs->num_regs;
|
2001-10-03 11:19:19 +04:00
|
|
|
if (nth <= 0) return Qnil;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
2021-09-15 02:11:05 +03:00
|
|
|
return RBOOL(BEG(nth) != -1);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_nth_match(int nth, VALUE match)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2000-01-05 07:41:21 +03:00
|
|
|
VALUE str;
|
2002-08-28 12:05:23 +04:00
|
|
|
long start, end, len;
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
if (NIL_P(match)) return Qnil;
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2008-02-16 23:08:35 +03:00
|
|
|
regs = RMATCH_REGS(match);
|
|
|
|
if (nth >= regs->num_regs) {
|
1998-01-16 15:13:05 +03:00
|
|
|
return Qnil;
|
|
|
|
}
|
2001-10-03 11:19:19 +04:00
|
|
|
if (nth < 0) {
|
2008-02-16 23:08:35 +03:00
|
|
|
nth += regs->num_regs;
|
2001-10-03 11:19:19 +04:00
|
|
|
if (nth <= 0) return Qnil;
|
|
|
|
}
|
2008-02-16 23:08:35 +03:00
|
|
|
start = BEG(nth);
|
1998-01-16 15:13:05 +03:00
|
|
|
if (start == -1) return Qnil;
|
2008-02-16 23:08:35 +03:00
|
|
|
end = END(nth);
|
1998-01-16 15:13:05 +03:00
|
|
|
len = end - start;
|
2007-08-28 10:45:32 +04:00
|
|
|
str = rb_str_subseq(RMATCH(match)->str, start, len);
|
2000-01-05 07:41:21 +03:00
|
|
|
return str;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_last_match(VALUE match)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
return rb_reg_nth_match(0, match);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 22:34:40 +03:00
|
|
|
* pre_match -> string
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 22:34:40 +03:00
|
|
|
* Returns the substring of the target string from its beginning
|
|
|
|
* up to the first match in +self+ (that is, <tt>self[0]</tt>);
|
|
|
|
* equivalent to regexp global variable <tt>$`</tt>:
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
|
|
|
|
* m[0] # => "HX1138"
|
|
|
|
* m.pre_match # => "T"
|
|
|
|
*
|
|
|
|
* Related: MatchData#post_match.
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_match_pre(VALUE match)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2001-10-02 08:31:23 +04:00
|
|
|
VALUE str;
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs;
|
2001-10-02 08:31:23 +04:00
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
if (NIL_P(match)) return Qnil;
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2008-02-16 23:08:35 +03:00
|
|
|
regs = RMATCH_REGS(match);
|
|
|
|
if (BEG(0) == -1) return Qnil;
|
|
|
|
str = rb_str_subseq(RMATCH(match)->str, 0, BEG(0));
|
2001-10-02 08:31:23 +04:00
|
|
|
return str;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 22:34:40 +03:00
|
|
|
* post_match -> str
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 22:34:40 +03:00
|
|
|
* Returns the substring of the target string from
|
|
|
|
* the end of the first match in +self+ (that is, <tt>self[0]</tt>)
|
|
|
|
* to the end of the string;
|
|
|
|
* equivalent to regexp global variable <tt>$'</tt>:
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
|
|
|
|
* m[0] # => "HX1138"
|
|
|
|
* m.post_match # => ": The Movie"\
|
|
|
|
*
|
|
|
|
* Related: MatchData.pre_match.
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_match_post(VALUE match)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2001-10-02 08:31:23 +04:00
|
|
|
VALUE str;
|
2002-12-19 18:21:01 +03:00
|
|
|
long pos;
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs;
|
2001-10-02 08:31:23 +04:00
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
if (NIL_P(match)) return Qnil;
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2008-02-16 23:08:35 +03:00
|
|
|
regs = RMATCH_REGS(match);
|
|
|
|
if (BEG(0) == -1) return Qnil;
|
2002-12-19 18:21:01 +03:00
|
|
|
str = RMATCH(match)->str;
|
2008-02-16 23:08:35 +03:00
|
|
|
pos = END(0);
|
2007-08-28 10:45:32 +04:00
|
|
|
str = rb_str_subseq(str, pos, RSTRING_LEN(str) - pos);
|
2001-10-02 08:31:23 +04:00
|
|
|
return str;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2023-06-27 17:14:10 +03:00
|
|
|
static int
|
|
|
|
match_last_index(VALUE match)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
|
|
|
int i;
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2023-06-27 17:14:10 +03:00
|
|
|
if (NIL_P(match)) return -1;
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2008-02-16 23:08:35 +03:00
|
|
|
regs = RMATCH_REGS(match);
|
2023-06-27 17:14:10 +03:00
|
|
|
if (BEG(0) == -1) return -1;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2008-02-16 23:08:35 +03:00
|
|
|
for (i=regs->num_regs-1; BEG(i) == -1 && i > 0; i--)
|
1998-01-16 15:13:05 +03:00
|
|
|
;
|
2023-06-27 17:14:10 +03:00
|
|
|
return i;
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_reg_match_last(VALUE match)
|
|
|
|
{
|
|
|
|
int i = match_last_index(match);
|
|
|
|
if (i <= 0) return Qnil;
|
|
|
|
struct re_registers *regs = RMATCH_REGS(match);
|
|
|
|
return rb_str_subseq(RMATCH(match)->str, BEG(i), END(i) - BEG(i));
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_reg_last_defined(VALUE match)
|
|
|
|
{
|
|
|
|
int i = match_last_index(match);
|
|
|
|
if (i < 0) return Qnil;
|
|
|
|
return RBOOL(i);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2019-08-27 05:16:52 +03:00
|
|
|
last_match_getter(ID _x, VALUE *_y)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
return rb_reg_last_match(rb_backref_get());
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2019-08-27 05:16:52 +03:00
|
|
|
prematch_getter(ID _x, VALUE *_y)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
return rb_reg_match_pre(rb_backref_get());
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2019-08-27 05:16:52 +03:00
|
|
|
postmatch_getter(ID _x, VALUE *_y)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
return rb_reg_match_post(rb_backref_get());
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
2019-08-27 05:16:52 +03:00
|
|
|
last_paren_match_getter(ID _x, VALUE *_y)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
return rb_reg_match_last(rb_backref_get());
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_array(VALUE match, int start)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2009-02-03 08:20:27 +03:00
|
|
|
struct re_registers *regs;
|
|
|
|
VALUE ary;
|
|
|
|
VALUE target;
|
1998-01-16 15:13:05 +03:00
|
|
|
int i;
|
2004-03-06 01:37:35 +03:00
|
|
|
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2009-02-03 08:20:27 +03:00
|
|
|
regs = RMATCH_REGS(match);
|
|
|
|
ary = rb_ary_new2(regs->num_regs);
|
|
|
|
target = RMATCH(match)->str;
|
|
|
|
|
2003-07-03 15:02:53 +04:00
|
|
|
for (i=start; i<regs->num_regs; i++) {
|
2001-12-21 12:23:28 +03:00
|
|
|
if (regs->beg[i] == -1) {
|
|
|
|
rb_ary_push(ary, Qnil);
|
2002-05-01 13:41:50 +04:00
|
|
|
}
|
|
|
|
else {
|
2007-08-28 10:45:32 +04:00
|
|
|
VALUE str = rb_str_subseq(target, regs->beg[i], regs->end[i]-regs->beg[i]);
|
2001-12-21 12:23:28 +03:00
|
|
|
rb_ary_push(ary, str);
|
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
return ary;
|
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 22:34:40 +03:00
|
|
|
* to_a -> array
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 22:34:40 +03:00
|
|
|
* Returns the array of matches:
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
|
2022-04-19 02:19:10 +03:00
|
|
|
* m.to_a # => ["HX1138", "H", "X", "113", "8"]
|
2022-04-18 22:34:40 +03:00
|
|
|
*
|
|
|
|
* Related: MatchData#captures.
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
2003-07-03 15:02:53 +04:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_to_a(VALUE match)
|
2003-07-03 15:02:53 +04:00
|
|
|
{
|
|
|
|
return match_array(match, 0);
|
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 22:34:40 +03:00
|
|
|
* captures -> array
|
|
|
|
*
|
|
|
|
* Returns the array of captures,
|
|
|
|
* which are all matches except <tt>m[0]</tt>:
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
|
|
|
|
* m[0] # => "HX1138"
|
|
|
|
* m.captures # => ["H", "X", "113", "8"]
|
2003-12-26 18:58:28 +03:00
|
|
|
*
|
2022-04-18 22:34:40 +03:00
|
|
|
* Related: MatchData.to_a.
|
2003-12-26 18:58:28 +03:00
|
|
|
*
|
|
|
|
*/
|
2003-07-03 15:02:53 +04:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_captures(VALUE match)
|
2003-07-03 15:02:53 +04:00
|
|
|
{
|
|
|
|
return match_array(match, 1);
|
|
|
|
}
|
|
|
|
|
2006-03-22 18:03:40 +03:00
|
|
|
static int
|
* sprintf.c (rb_str_format): allow %c to print one character
string (e.g. ?x).
* lib/tempfile.rb (Tempfile::make_tmpname): put dot between
basename and pid. [ruby-talk:196272]
* parse.y (do_block): remove -> style block.
* parse.y (parser_yylex): remove tLAMBDA_ARG.
* eval.c (rb_call0): binding for the return event hook should have
consistent scope. [ruby-core:07928]
* eval.c (proc_invoke): return behavior should depend whether it
is surrounded by a lambda or a mere block.
* eval.c (formal_assign): handles post splat arguments.
* eval.c (rb_call0): ditto.
* st.c (strhash): use FNV-1a hash.
* parse.y (parser_yylex): removed experimental ';;' terminator.
* eval.c (rb_node_arity): should be aware of post splat arguments.
* eval.c (rb_proc_arity): ditto.
* parse.y (f_args): syntax rule enhanced to support arguments
after the splat.
* parse.y (block_param): ditto for block parameters.
* parse.y (f_post_arg): mandatory formal arguments after the splat
argument.
* parse.y (new_args_gen): generate nodes for mandatory formal
arguments after the splat argument.
* eval.c (rb_eval): dispatch mandatory formal arguments after the
splat argument.
* parse.y (args): allow more than one splat in the argument list.
* parse.y (method_call): allow aref [] to accept all kind of
method argument, including assocs, splat, and block argument.
* eval.c (SETUP_ARGS0): prepare block argument as well.
* lib/mathn.rb (Integer): remove Integer#gcd2. [ruby-core:07931]
* eval.c (error_line): print receivers true/false/nil specially.
* eval.c (rb_proc_yield): handles parameters in yield semantics.
* eval.c (nil_yield): gives LocalJumpError to denote no block
error.
* io.c (rb_io_getc): now takes one-character string.
* string.c (rb_str_hash): use FNV-1a hash from Fowler/Noll/Vo
hashing algorithm.
* string.c (rb_str_aref): str[0] now returns 1 character string,
instead of a fixnum. [Ruby2]
* parse.y (parser_yylex): ?c now returns 1 character string,
instead of a fixnum. [Ruby2]
* string.c (rb_str_aset): no longer support fixnum insertion.
* eval.c (umethod_bind): should not update original class.
[ruby-dev:28636]
* eval.c (ev_const_get): should support constant access from
within instance_eval(). [ruby-dev:28327]
* time.c (time_timeval): should round for usec floating
number. [ruby-core:07896]
* time.c (time_add): ditto.
* dir.c (sys_warning): should not call a vararg function
rb_sys_warning() indirectly. [ruby-core:07886]
* numeric.c (flo_divmod): the first element of Float#divmod should
be an integer. [ruby-dev:28589]
* test/ruby/test_float.rb: add tests for divmod, div, modulo and remainder.
* re.c (rb_reg_initialize): should not allow modifying literal
regexps. frozen check moved from rb_reg_initialize_m as well.
* re.c (rb_reg_initialize): should not modify untainted objects in
safe levels higher than 3.
* re.c (rb_memcmp): type change from char* to const void*.
* dir.c (dir_close): should not close untainted dir stream.
* dir.c (GetDIR): add tainted/frozen check for each dir operation.
* lib/rdoc/parsers/parse_rb.rb (RDoc::RubyParser::parse_symbol_arg):
typo fixed. a patch from Florian Gross <florg at florg.net>.
* eval.c (EXEC_EVENT_HOOK): trace_func may remove itself from
event_hooks. no guarantee for arbitrary hook deletion.
[ruby-dev:28632]
* util.c (ruby_strtod): differ addition to minimize error.
[ruby-dev:28619]
* util.c (ruby_strtod): should not raise ERANGE when the input
string does not have any digits. [ruby-dev:28629]
* eval.c (proc_invoke): should restore old ruby_frame->block.
thanks to ts <decoux at moulon.inra.fr>. [ruby-core:07833]
also fix [ruby-dev:28614] as well.
* signal.c (trap): sig should be less then NSIG. Coverity found
this bug. a patch from Kevin Tew <tewk at tewk.com>.
[ruby-core:07823]
* math.c (math_log2): add new method inspired by
[ruby-talk:191237].
* math.c (math_log): add optional base argument to Math::log().
[ruby-talk:191308]
* ext/syck/emitter.c (syck_scan_scalar): avoid accessing
uninitialized array element. a patch from Pat Eyler
<rubypate at gmail.com>. [ruby-core:07809]
* array.c (rb_ary_fill): initialize local variables first. a
patch from Pat Eyler <rubypate at gmail.com>. [ruby-core:07810]
* ext/syck/yaml2byte.c (syck_yaml2byte_handler): need to free
type_tag. a patch from Pat Eyler <rubypate at gmail.com>.
[ruby-core:07808]
* ext/socket/socket.c (make_hostent_internal): accept ai_family
check from Sam Roberts <sroberts at uniserve.com>.
[ruby-core:07691]
* util.c (ruby_strtod): should not cut off 18 digits for no
reason. [ruby-core:07796]
* array.c (rb_ary_fill): internalize local variable "beg" to
pacify Coverity. [ruby-core:07770]
* pack.c (pack_unpack): now supports CRLF newlines. a patch from
<tommy at tmtm.org>. [ruby-dev:28601]
* applied code clean-up patch from Stefan Huehner
<stefan at huehner.org>. [ruby-core:07764]
* lib/jcode.rb (String::tr_s): should have translated non
squeezing character sequence (i.e. a character) as well. thanks
to Hiroshi Ichikawa <gimite at gimite.ddo.jp> [ruby-list:42090]
* ext/socket/socket.c: document update patch from Sam Roberts
<sroberts at uniserve.com>. [ruby-core:07701]
* lib/mathn.rb (Integer): need not to remove gcd2. a patch from
NARUSE, Yui <naruse at airemix.com>. [ruby-dev:28570]
* parse.y (arg): too much NEW_LIST()
* eval.c (SETUP_ARGS0): remove unnecessary access to nd_alen.
* eval.c (rb_eval): use ARGSCAT for NODE_OP_ASGN1.
[ruby-dev:28585]
* parse.y (arg): use NODE_ARGSCAT for placeholder.
* lib/getoptlong.rb (GetoptLong::get): RDoc update patch from
mathew <meta at pobox.com>. [ruby-core:07738]
* variable.c (rb_const_set): raise error when no target klass is
supplied. [ruby-dev:28582]
* prec.c (prec_prec_f): documentation patch from
<gerardo.santana at gmail.com>. [ruby-core:07689]
* bignum.c (rb_big_pow): second operand may be too big even if
it's a Fixnum. [ruby-talk:187984]
* README.EXT: update symbol description. [ruby-talk:188104]
* COPYING: explicitly note GPLv2. [ruby-talk:187922]
* parse.y: remove some obsolete syntax rules (unparenthesized
method calls in argument list).
* eval.c (rb_call0): insecure calling should be checked for non
NODE_SCOPE method invocations too.
* eval.c (rb_alias): should preserve the current safe level as
well as method definition.
* process.c (rb_f_sleep): remove RDoc description about SIGALRM
which is not valid on the current implementation. [ruby-dev:28464]
Thu Mar 23 21:40:47 2006 K.Kosako <sndgk393 AT ybb.ne.jp>
* eval.c (method_missing): should support argument splat in
super. a bug in combination of super, splat and
method_missing. [ruby-talk:185438]
* configure.in: Solaris SunPro compiler -rapth patch from
<kuwa at labs.fujitsu.com>. [ruby-dev:28443]
* configure.in: remove enable_rpath=no for Solaris.
[ruby-dev:28440]
* ext/win32ole/win32ole.c (ole_val2olevariantdata): change behavior
of converting OLE Variant object with VT_ARRAY|VT_UI1 and Ruby
String object.
* ruby.1: a clarification patch from David Lutterkort
<dlutter at redhat.com>. [ruby-core:7508]
* lib/rdoc/ri/ri_paths.rb (RI::Paths): adding paths from rubygems
directories. a patch from Eric Hodel <drbrain at segment7.net>.
[ruby-core:07423]
* eval.c (rb_clear_cache_by_class): clearing wrong cache.
* ext/extmk.rb: use :remove_destination to install extension libraries
to avoid SEGV. [ruby-dev:28417]
* eval.c (rb_thread_fd_writable): should not re-schedule output
from KILLED thread (must be error printing).
* array.c (rb_ary_flatten_bang): allow specifying recursion
level. [ruby-talk:182170]
* array.c (rb_ary_flatten): ditto.
* gc.c (add_heap): a heap_slots may overflow. a patch from Stefan
Weil <weil at mail.berlios.de>.
* eval.c (rb_call): use separate cache for fcall/vcall
invocation.
* eval.c (rb_eval): NODE_FCALL, NODE_VCALL can call local
functions.
* eval.c (rb_mod_local): a new method to specify newly added
visibility "local".
* eval.c (search_method): search for local methods which are
visible only from the current class.
* class.c (rb_class_local_methods): a method to list local methods.
* object.c (Init_Object): add BasicObject class as a top level
BlankSlate class.
* ruby.h (SYM2ID): should not cast to signed long.
[ruby-core:07414]
* class.c (rb_include_module): allow module duplication.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@10235 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2006-06-10 01:20:17 +04:00
|
|
|
name_to_backref_number(struct re_registers *regs, VALUE regexp, const char* name, const char* name_end)
|
2006-03-22 18:03:40 +03:00
|
|
|
{
|
2020-01-16 05:25:43 +03:00
|
|
|
if (NIL_P(regexp)) return -1;
|
2016-02-02 07:39:44 +03:00
|
|
|
return onig_name_to_backref_number(RREGEXP_PTR(regexp),
|
2016-12-18 14:43:51 +03:00
|
|
|
(const unsigned char *)name, (const unsigned char *)name_end, regs);
|
2014-06-04 16:33:18 +04:00
|
|
|
}
|
2012-04-14 03:45:37 +04:00
|
|
|
|
2016-12-19 06:11:57 +03:00
|
|
|
#define NAME_TO_NUMBER(regs, re, name, name_ptr, name_end) \
|
|
|
|
(NIL_P(re) ? 0 : \
|
|
|
|
!rb_enc_compatible(RREGEXP_SRC(re), (name)) ? 0 : \
|
|
|
|
name_to_backref_number((regs), (re), (name_ptr), (name_end)))
|
|
|
|
|
2016-05-17 20:10:01 +03:00
|
|
|
static int
|
|
|
|
namev_to_backref_number(struct re_registers *regs, VALUE re, VALUE name)
|
|
|
|
{
|
|
|
|
int num;
|
|
|
|
|
2016-12-19 07:07:40 +03:00
|
|
|
if (SYMBOL_P(name)) {
|
2016-05-17 20:10:01 +03:00
|
|
|
name = rb_sym2str(name);
|
2016-12-19 07:07:40 +03:00
|
|
|
}
|
|
|
|
else if (!RB_TYPE_P(name, T_STRING)) {
|
2016-05-17 20:10:01 +03:00
|
|
|
return -1;
|
|
|
|
}
|
2016-12-19 07:07:40 +03:00
|
|
|
num = NAME_TO_NUMBER(regs, re, name,
|
|
|
|
RSTRING_PTR(name), RSTRING_END(name));
|
|
|
|
if (num < 1) {
|
|
|
|
name_to_backref_error(name);
|
|
|
|
}
|
|
|
|
return num;
|
2016-05-17 20:10:01 +03:00
|
|
|
}
|
|
|
|
|
2016-05-18 07:56:02 +03:00
|
|
|
static VALUE
|
|
|
|
match_ary_subseq(VALUE match, long beg, long len, VALUE result)
|
|
|
|
{
|
|
|
|
long olen = RMATCH_REGS(match)->num_regs;
|
|
|
|
long j, end = olen < beg+len ? olen : beg+len;
|
|
|
|
if (NIL_P(result)) result = rb_ary_new_capa(len);
|
|
|
|
if (len == 0) return result;
|
|
|
|
|
|
|
|
for (j = beg; j < end; j++) {
|
|
|
|
rb_ary_push(result, rb_reg_nth_match((int)j, match));
|
|
|
|
}
|
|
|
|
if (beg + len > j) {
|
|
|
|
rb_ary_resize(result, RARRAY_LEN(result) + (beg + len) - j);
|
|
|
|
}
|
|
|
|
return result;
|
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
match_ary_aref(VALUE match, VALUE idx, VALUE result)
|
|
|
|
{
|
|
|
|
long beg, len;
|
|
|
|
int num_regs = RMATCH_REGS(match)->num_regs;
|
|
|
|
|
|
|
|
/* check if idx is Range */
|
|
|
|
switch (rb_range_beg_len(idx, &beg, &len, (long)num_regs, !NIL_P(result))) {
|
|
|
|
case Qfalse:
|
|
|
|
if (NIL_P(result)) return rb_reg_nth_match(NUM2INT(idx), match);
|
|
|
|
rb_ary_push(result, rb_reg_nth_match(NUM2INT(idx), match));
|
|
|
|
return result;
|
|
|
|
case Qnil:
|
|
|
|
return Qnil;
|
|
|
|
default:
|
|
|
|
return match_ary_subseq(match, beg, len, result);
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 23:52:07 +03:00
|
|
|
* matchdata[index] -> string or nil
|
|
|
|
* matchdata[start, length] -> array
|
|
|
|
* matchdata[range] -> array
|
|
|
|
* matchdata[name] -> string or nil
|
|
|
|
*
|
|
|
|
* When arguments +index+, +start and +length+, or +range+ are given,
|
|
|
|
* returns match and captures in the style of Array#[]:
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
|
|
|
|
* m[0] # => "HX1138"
|
|
|
|
* m[1, 2] # => ["H", "X"]
|
|
|
|
* m[1..3] # => ["H", "X", "113"]
|
|
|
|
* m[-3, 2] # => ["X", "113"]
|
|
|
|
*
|
|
|
|
* When string or symbol argument +name+ is given,
|
|
|
|
* returns the matched substring for the given name:
|
|
|
|
*
|
|
|
|
* m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge")
|
|
|
|
* # => #<MatchData "hoge" foo:"h" bar:"ge">
|
|
|
|
* m['foo'] # => "h"
|
|
|
|
* m[:bar] # => "ge"
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2023-12-19 06:59:43 +03:00
|
|
|
* If multiple captures have the same name, returns the last matched
|
|
|
|
* substring.
|
|
|
|
*
|
|
|
|
* m = /(?<foo>.)(?<foo>.+)/.match("hoge")
|
|
|
|
* # => #<MatchData "hoge" foo:"h" foo:"oge">
|
|
|
|
* m[:foo] #=> "oge"
|
|
|
|
*
|
|
|
|
* m = /\W(?<foo>.+)|\w(?<foo>.+)|(?<foo>.+)/.match("hoge")
|
|
|
|
* #<MatchData "hoge" foo:nil foo:"oge" foo:nil>
|
|
|
|
* m[:foo] #=> "oge"
|
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1998-01-16 15:19:22 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_aref(int argc, VALUE *argv, VALUE match)
|
1998-01-16 15:19:22 +03:00
|
|
|
{
|
2016-05-18 07:56:02 +03:00
|
|
|
VALUE idx, length;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2016-05-18 07:56:02 +03:00
|
|
|
rb_scan_args(argc, argv, "11", &idx, &length);
|
1998-01-16 15:19:22 +03:00
|
|
|
|
2016-05-18 07:56:02 +03:00
|
|
|
if (NIL_P(length)) {
|
2010-02-13 22:31:42 +03:00
|
|
|
if (FIXNUM_P(idx)) {
|
2016-05-17 20:10:01 +03:00
|
|
|
return rb_reg_nth_match(FIX2INT(idx), match);
|
2010-02-13 22:31:42 +03:00
|
|
|
}
|
|
|
|
else {
|
2016-05-17 20:10:01 +03:00
|
|
|
int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, idx);
|
|
|
|
if (num >= 0) {
|
2010-02-13 22:31:42 +03:00
|
|
|
return rb_reg_nth_match(num, match);
|
|
|
|
}
|
2016-05-18 07:56:02 +03:00
|
|
|
else {
|
|
|
|
return match_ary_aref(match, idx, Qnil);
|
|
|
|
}
|
2010-02-13 22:31:42 +03:00
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
}
|
2016-05-18 07:56:02 +03:00
|
|
|
else {
|
|
|
|
long beg = NUM2LONG(idx);
|
|
|
|
long len = NUM2LONG(length);
|
|
|
|
long num_regs = RMATCH_REGS(match)->num_regs;
|
|
|
|
if (len < 0) {
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
if (beg < 0) {
|
|
|
|
beg += num_regs;
|
|
|
|
if (beg < 0) return Qnil;
|
|
|
|
}
|
|
|
|
else if (beg > num_regs) {
|
|
|
|
return Qnil;
|
|
|
|
}
|
2022-03-31 12:01:15 +03:00
|
|
|
if (beg+len > num_regs) {
|
2016-05-18 07:56:02 +03:00
|
|
|
len = num_regs - beg;
|
|
|
|
}
|
|
|
|
return match_ary_subseq(match, beg, len, Qnil);
|
|
|
|
}
|
1998-01-16 15:19:22 +03:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 23:52:07 +03:00
|
|
|
* values_at(*indexes) -> array
|
|
|
|
*
|
|
|
|
* Returns match and captures at the given +indexes+,
|
|
|
|
* which may include any mixture of:
|
|
|
|
*
|
|
|
|
* - Integers.
|
|
|
|
* - Ranges.
|
|
|
|
* - Names (strings and symbols).
|
2007-10-12 18:35:26 +04:00
|
|
|
*
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 23:52:07 +03:00
|
|
|
* Examples:
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)/.match("THX1138: The Movie")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
|
2022-04-19 02:19:10 +03:00
|
|
|
* m.values_at(0, 2, -2) # => ["HX1138", "X", "113"]
|
|
|
|
* m.values_at(1..2, -1) # => ["H", "X", "8"]
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 23:52:07 +03:00
|
|
|
* m = /(?<a>\d+) *(?<op>[+\-*\/]) *(?<b>\d+)/.match("1 + 2")
|
|
|
|
* # => #<MatchData "1 + 2" a:"1" op:"+" b:"2">
|
|
|
|
* m.values_at(0, 1..2, :a, :b, :op)
|
|
|
|
* # => ["1 + 2", "1", "+", "1", "2", "+"]
|
2016-05-17 20:10:01 +03:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
2001-12-11 06:48:08 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_values_at(int argc, VALUE *argv, VALUE match)
|
2001-12-11 06:48:08 +03:00
|
|
|
{
|
2016-05-17 20:10:01 +03:00
|
|
|
VALUE result;
|
|
|
|
int i;
|
2009-02-03 08:20:27 +03:00
|
|
|
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
2016-05-17 20:10:01 +03:00
|
|
|
result = rb_ary_new2(argc);
|
|
|
|
|
|
|
|
for (i=0; i<argc; i++) {
|
|
|
|
if (FIXNUM_P(argv[i])) {
|
|
|
|
rb_ary_push(result, rb_reg_nth_match(FIX2INT(argv[i]), match));
|
|
|
|
}
|
2016-05-18 07:56:02 +03:00
|
|
|
else {
|
|
|
|
int num = namev_to_backref_number(RMATCH_REGS(match), RMATCH(match)->regexp, argv[i]);
|
|
|
|
if (num >= 0) {
|
|
|
|
rb_ary_push(result, rb_reg_nth_match(num, match));
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
match_ary_aref(match, argv[i], result);
|
2016-05-17 21:16:08 +03:00
|
|
|
}
|
2016-05-17 20:10:01 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
return result;
|
2001-12-11 06:48:08 +03:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-19 02:19:10 +03:00
|
|
|
* to_s -> string
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* Returns the matched string:
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
|
|
|
|
* m.to_s # => "HX1138"
|
|
|
|
*
|
|
|
|
* m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge")
|
|
|
|
* # => #<MatchData "hoge" foo:"h" bar:"ge">
|
|
|
|
* m.to_s # => "hoge"
|
|
|
|
*
|
|
|
|
* Related: MatchData.inspect.
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_to_s(VALUE match)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2022-11-08 09:06:15 +03:00
|
|
|
VALUE str = rb_reg_last_match(match_check(match));
|
1998-01-16 15:13:05 +03:00
|
|
|
|
1999-12-01 12:24:48 +03:00
|
|
|
if (NIL_P(str)) str = rb_str_new(0,0);
|
1998-01-16 15:13:05 +03:00
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
2016-02-18 08:29:18 +03:00
|
|
|
static int
|
|
|
|
match_named_captures_iter(const OnigUChar *name, const OnigUChar *name_end,
|
2022-11-08 09:06:15 +03:00
|
|
|
int back_num, int *back_refs, OnigRegex regex, void *arg)
|
|
|
|
{
|
2016-02-18 08:29:18 +03:00
|
|
|
struct MEMO *memo = MEMO_CAST(arg);
|
|
|
|
VALUE hash = memo->v1;
|
|
|
|
VALUE match = memo->v2;
|
2022-08-06 03:13:09 +03:00
|
|
|
long symbolize = memo->u3.state;
|
2016-02-18 08:29:18 +03:00
|
|
|
|
|
|
|
VALUE key = rb_enc_str_new((const char *)name, name_end-name, regex->enc);
|
2022-08-06 03:13:09 +03:00
|
|
|
|
|
|
|
if (symbolize > 0) {
|
|
|
|
key = rb_str_intern(key);
|
|
|
|
}
|
|
|
|
|
2016-02-18 08:29:18 +03:00
|
|
|
VALUE value;
|
|
|
|
|
|
|
|
int i;
|
|
|
|
int found = 0;
|
|
|
|
|
|
|
|
for (i = 0; i < back_num; i++) {
|
|
|
|
value = rb_reg_nth_match(back_refs[i], match);
|
|
|
|
if (RTEST(value)) {
|
|
|
|
rb_hash_aset(hash, key, value);
|
|
|
|
found = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
if (found == 0) {
|
|
|
|
rb_hash_aset(hash, key, Qnil);
|
|
|
|
}
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2023-04-19 02:19:31 +03:00
|
|
|
* named_captures(symbolize_names: false) -> hash
|
2016-02-18 08:29:18 +03:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* Returns a hash of the named captures;
|
|
|
|
* each key is a capture name; each value is its captured string or +nil+:
|
2016-02-18 08:29:18 +03:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* m = /(?<foo>.)(.)(?<bar>.+)/.match("hoge")
|
|
|
|
* # => #<MatchData "hoge" foo:"h" bar:"ge">
|
|
|
|
* m.named_captures # => {"foo"=>"h", "bar"=>"ge"}
|
2016-02-18 08:29:18 +03:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* m = /(?<a>.)(?<b>.)/.match("01")
|
|
|
|
* # => #<MatchData "01" a:"0" b:"1">
|
|
|
|
* m.named_captures #=> {"a" => "0", "b" => "1"}
|
2016-02-18 08:29:18 +03:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* m = /(?<a>.)(?<b>.)?/.match("0")
|
|
|
|
* # => #<MatchData "0" a:"0" b:nil>
|
|
|
|
* m.named_captures #=> {"a" => "0", "b" => nil}
|
2016-02-18 08:29:18 +03:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* m = /(?<a>.)(?<a>.)/.match("01")
|
|
|
|
* # => #<MatchData "01" a:"0" a:"1">
|
|
|
|
* m.named_captures #=> {"a" => "1"}
|
2016-02-18 08:29:18 +03:00
|
|
|
*
|
2023-12-15 00:01:48 +03:00
|
|
|
* If keyword argument +symbolize_names+ is given
|
|
|
|
* a true value, the keys in the resulting hash are Symbols:
|
2023-04-19 02:19:31 +03:00
|
|
|
*
|
|
|
|
* m = /(?<a>.)(?<a>.)/.match("01")
|
|
|
|
* # => #<MatchData "01" a:"0" a:"1">
|
|
|
|
* m.named_captures(symbolize_names: true) #=> {:a => "1"}
|
|
|
|
*
|
2016-02-18 08:29:18 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
2023-04-19 02:19:31 +03:00
|
|
|
match_named_captures(int argc, VALUE *argv, VALUE match)
|
2016-02-18 08:29:18 +03:00
|
|
|
{
|
|
|
|
VALUE hash;
|
|
|
|
struct MEMO *memo;
|
|
|
|
|
|
|
|
match_check(match);
|
2016-12-20 10:32:23 +03:00
|
|
|
if (NIL_P(RMATCH(match)->regexp))
|
|
|
|
return rb_hash_new();
|
2016-02-18 08:29:18 +03:00
|
|
|
|
2023-04-19 02:19:31 +03:00
|
|
|
VALUE opt;
|
|
|
|
VALUE symbolize_names = 0;
|
|
|
|
|
|
|
|
rb_scan_args(argc, argv, "0:", &opt);
|
|
|
|
|
|
|
|
if (!NIL_P(opt)) {
|
|
|
|
static ID keyword_ids[1];
|
|
|
|
|
|
|
|
VALUE symbolize_names_val;
|
|
|
|
|
|
|
|
if (!keyword_ids[0]) {
|
|
|
|
keyword_ids[0] = rb_intern_const("symbolize_names");
|
|
|
|
}
|
|
|
|
rb_get_kwargs(opt, keyword_ids, 0, 1, &symbolize_names_val);
|
|
|
|
if (!UNDEF_P(symbolize_names_val) && RTEST(symbolize_names_val)) {
|
|
|
|
symbolize_names = 1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2016-02-18 08:29:18 +03:00
|
|
|
hash = rb_hash_new();
|
2023-04-19 02:19:31 +03:00
|
|
|
memo = MEMO_NEW(hash, match, symbolize_names);
|
2016-02-18 08:29:18 +03:00
|
|
|
|
|
|
|
onig_foreach_name(RREGEXP(RMATCH(match)->regexp)->ptr, match_named_captures_iter, (void*)memo);
|
|
|
|
|
|
|
|
return hash;
|
|
|
|
}
|
2003-12-26 18:58:28 +03:00
|
|
|
|
2022-08-06 03:13:09 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
|
|
|
* deconstruct_keys(array_of_names) -> hash
|
|
|
|
*
|
|
|
|
* Returns a hash of the named captures for the given names.
|
|
|
|
*
|
|
|
|
* m = /(?<hours>\d{2}):(?<minutes>\d{2}):(?<seconds>\d{2})/.match("18:37:22")
|
|
|
|
* m.deconstruct_keys([:hours, :minutes]) # => {:hours => "18", :minutes => "37"}
|
|
|
|
* m.deconstruct_keys(nil) # => {:hours => "18", :minutes => "37", :seconds => "22"}
|
|
|
|
*
|
2023-10-20 01:18:03 +03:00
|
|
|
* Returns an empty hash if no named captures were defined:
|
2022-08-06 03:13:09 +03:00
|
|
|
*
|
|
|
|
* m = /(\d{2}):(\d{2}):(\d{2})/.match("18:37:22")
|
|
|
|
* m.deconstruct_keys(nil) # => {}
|
|
|
|
*
|
|
|
|
*/
|
|
|
|
static VALUE
|
|
|
|
match_deconstruct_keys(VALUE match, VALUE keys)
|
|
|
|
{
|
|
|
|
VALUE h;
|
|
|
|
long i;
|
|
|
|
|
|
|
|
match_check(match);
|
|
|
|
|
|
|
|
if (NIL_P(RMATCH(match)->regexp)) {
|
|
|
|
return rb_hash_new_with_size(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
if (NIL_P(keys)) {
|
|
|
|
h = rb_hash_new_with_size(onig_number_of_names(RREGEXP_PTR(RMATCH(match)->regexp)));
|
|
|
|
|
|
|
|
struct MEMO *memo;
|
|
|
|
memo = MEMO_NEW(h, match, 1);
|
|
|
|
|
|
|
|
onig_foreach_name(RREGEXP_PTR(RMATCH(match)->regexp), match_named_captures_iter, (void*)memo);
|
|
|
|
|
|
|
|
return h;
|
|
|
|
}
|
|
|
|
|
2022-10-10 07:21:57 +03:00
|
|
|
Check_Type(keys, T_ARRAY);
|
2022-08-06 03:13:09 +03:00
|
|
|
|
|
|
|
if (onig_number_of_names(RREGEXP_PTR(RMATCH(match)->regexp)) < RARRAY_LEN(keys)) {
|
|
|
|
return rb_hash_new_with_size(0);
|
|
|
|
}
|
|
|
|
|
|
|
|
h = rb_hash_new_with_size(RARRAY_LEN(keys));
|
|
|
|
|
|
|
|
for (i=0; i<RARRAY_LEN(keys); i++) {
|
|
|
|
VALUE key = RARRAY_AREF(keys, i);
|
|
|
|
VALUE name;
|
|
|
|
|
2022-10-10 07:22:15 +03:00
|
|
|
Check_Type(key, T_SYMBOL);
|
2022-08-06 03:13:09 +03:00
|
|
|
|
|
|
|
name = rb_sym2str(key);
|
|
|
|
|
|
|
|
int num = NAME_TO_NUMBER(RMATCH_REGS(match), RMATCH(match)->regexp, RMATCH(match)->regexp,
|
|
|
|
RSTRING_PTR(name), RSTRING_END(name));
|
|
|
|
|
|
|
|
if (num >= 0) {
|
|
|
|
rb_hash_aset(h, key, rb_reg_nth_match(num, match));
|
2022-10-10 07:21:57 +03:00
|
|
|
}
|
2022-10-10 07:22:15 +03:00
|
|
|
else {
|
2022-08-06 03:13:09 +03:00
|
|
|
return h;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
|
|
|
return h;
|
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-19 02:19:10 +03:00
|
|
|
* string -> string
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* Returns the target string if it was frozen;
|
|
|
|
* otherwise, returns a frozen copy of the target string:
|
|
|
|
*
|
|
|
|
* m = /(.)(.)(\d+)(\d)/.match("THX1138.")
|
|
|
|
* # => #<MatchData "HX1138" 1:"H" 2:"X" 3:"113" 4:"8">
|
|
|
|
* m.string # => "THX1138."
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_string(VALUE match)
|
1999-08-13 09:45:20 +04:00
|
|
|
{
|
2008-06-02 16:45:42 +04:00
|
|
|
match_check(match);
|
1999-10-15 12:52:18 +04:00
|
|
|
return RMATCH(match)->str; /* str is frozen */
|
1999-08-13 09:45:20 +04:00
|
|
|
}
|
|
|
|
|
2007-12-09 10:12:44 +03:00
|
|
|
struct backref_name_tag {
|
|
|
|
const UChar *name;
|
|
|
|
long len;
|
|
|
|
};
|
|
|
|
|
|
|
|
static int
|
|
|
|
match_inspect_name_iter(const OnigUChar *name, const OnigUChar *name_end,
|
|
|
|
int back_num, int *back_refs, OnigRegex regex, void *arg0)
|
|
|
|
{
|
|
|
|
struct backref_name_tag *arg = (struct backref_name_tag *)arg0;
|
|
|
|
int i;
|
|
|
|
|
|
|
|
for (i = 0; i < back_num; i++) {
|
|
|
|
arg[back_refs[i]].name = name;
|
|
|
|
arg[back_refs[i]].len = name_end - name;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-12-02 17:42:05 +03:00
|
|
|
/*
|
2023-10-20 12:26:37 +03:00
|
|
|
* call-seq:
|
|
|
|
* inspect -> string
|
2007-12-02 17:42:05 +03:00
|
|
|
*
|
2023-10-20 12:26:37 +03:00
|
|
|
* Returns a string representation of +self+:
|
2007-12-02 17:42:05 +03:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* m = /.$/.match("foo")
|
|
|
|
* # => #<MatchData "o">
|
|
|
|
* m.inspect # => "#<MatchData \"o\">"
|
2007-12-02 17:42:05 +03:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* m = /(.)(.)(.)/.match("foo")
|
|
|
|
* # => #<MatchData "foo" 1:"f" 2:"o" 3:"o">
|
|
|
|
* m.inspect # => "#<MatchData \"foo\" 1:\"f\" 2:\"o\
|
2007-12-09 10:35:54 +03:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* m = /(.)(.)?(.)/.match("fo")
|
|
|
|
* # => #<MatchData "fo" 1:"f" 2:nil 3:"o">
|
|
|
|
* m.inspect # => "#<MatchData \"fo\" 1:\"f\" 2:nil 3:\"o\">"
|
2007-12-09 16:35:38 +03:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* Related: MatchData#to_s.
|
2007-12-02 17:42:05 +03:00
|
|
|
*/
|
|
|
|
|
2007-06-23 12:26:08 +04:00
|
|
|
static VALUE
|
|
|
|
match_inspect(VALUE match)
|
|
|
|
{
|
2014-03-27 13:08:54 +04:00
|
|
|
VALUE cname = rb_class_path(rb_obj_class(match));
|
2007-06-23 12:26:08 +04:00
|
|
|
VALUE str;
|
|
|
|
int i;
|
2008-02-16 23:08:35 +03:00
|
|
|
struct re_registers *regs = RMATCH_REGS(match);
|
|
|
|
int num_regs = regs->num_regs;
|
2007-12-09 10:12:44 +03:00
|
|
|
struct backref_name_tag *names;
|
2007-12-10 00:44:19 +03:00
|
|
|
VALUE regexp = RMATCH(match)->regexp;
|
2007-12-09 10:12:44 +03:00
|
|
|
|
2008-02-16 14:13:47 +03:00
|
|
|
if (regexp == 0) {
|
2014-03-27 13:08:54 +04:00
|
|
|
return rb_sprintf("#<%"PRIsVALUE":%p>", cname, (void*)match);
|
2008-02-16 14:13:47 +03:00
|
|
|
}
|
2014-03-27 13:58:12 +04:00
|
|
|
else if (NIL_P(regexp)) {
|
|
|
|
return rb_sprintf("#<%"PRIsVALUE": %"PRIsVALUE">",
|
|
|
|
cname, rb_reg_nth_match(0, match));
|
|
|
|
}
|
2008-02-16 14:13:47 +03:00
|
|
|
|
2007-12-09 10:12:44 +03:00
|
|
|
names = ALLOCA_N(struct backref_name_tag, num_regs);
|
|
|
|
MEMZERO(names, struct backref_name_tag, num_regs);
|
|
|
|
|
2016-02-02 07:39:44 +03:00
|
|
|
onig_foreach_name(RREGEXP_PTR(regexp),
|
2007-12-09 10:12:44 +03:00
|
|
|
match_inspect_name_iter, names);
|
2007-06-23 12:26:08 +04:00
|
|
|
|
|
|
|
str = rb_str_buf_new2("#<");
|
2014-03-27 13:08:54 +04:00
|
|
|
rb_str_append(str, cname);
|
2007-06-23 12:26:08 +04:00
|
|
|
|
2007-12-09 10:12:44 +03:00
|
|
|
for (i = 0; i < num_regs; i++) {
|
2007-06-23 12:26:08 +04:00
|
|
|
VALUE v;
|
|
|
|
rb_str_buf_cat2(str, " ");
|
2007-12-09 10:12:44 +03:00
|
|
|
if (0 < i) {
|
|
|
|
if (names[i].name)
|
|
|
|
rb_str_buf_cat(str, (const char *)names[i].name, names[i].len);
|
|
|
|
else {
|
* compile.c (insn_data_to_s_detail), file.c (rb_stat_inspect),
iseq.c (ruby_iseq_disasm_insn, ruby_iseq_disasm),
process.c (pst_message), re.c (match_inspect): use rb_str_catf.
* dir.c (dir_inspect), iseq.c (iseq_inspect, insn_operand_intern): use
rb_sprintf.
* error.c (rb_name_error, rb_raise, rb_loaderror, rb_fatal): use
rb_vsprintf.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@18158 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2008-07-22 12:53:34 +04:00
|
|
|
rb_str_catf(str, "%d", i);
|
2007-12-09 10:12:44 +03:00
|
|
|
}
|
|
|
|
rb_str_buf_cat2(str, ":");
|
|
|
|
}
|
2007-06-23 12:26:08 +04:00
|
|
|
v = rb_reg_nth_match(i, match);
|
2021-10-03 16:34:45 +03:00
|
|
|
if (NIL_P(v))
|
2007-06-23 12:26:08 +04:00
|
|
|
rb_str_buf_cat2(str, "nil");
|
|
|
|
else
|
|
|
|
rb_str_buf_append(str, rb_str_inspect(v));
|
|
|
|
}
|
|
|
|
rb_str_buf_cat2(str, ">");
|
|
|
|
|
|
|
|
return str;
|
|
|
|
}
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
VALUE rb_cRegexp;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2007-12-01 19:56:19 +03:00
|
|
|
static int
|
|
|
|
read_escaped_byte(const char **pp, const char *end, onig_errmsg_buffer err)
|
|
|
|
{
|
|
|
|
const char *p = *pp;
|
|
|
|
int code;
|
|
|
|
int meta_prefix = 0, ctrl_prefix = 0;
|
2009-03-14 12:25:20 +03:00
|
|
|
size_t len;
|
2007-12-01 19:56:19 +03:00
|
|
|
|
|
|
|
if (p == end || *p++ != '\\') {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "too short escaped multibyte character");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
again:
|
|
|
|
if (p == end) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "too short escape sequence");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
switch (*p++) {
|
|
|
|
case '\\': code = '\\'; break;
|
|
|
|
case 'n': code = '\n'; break;
|
|
|
|
case 't': code = '\t'; break;
|
|
|
|
case 'r': code = '\r'; break;
|
|
|
|
case 'f': code = '\f'; break;
|
|
|
|
case 'v': code = '\013'; break;
|
|
|
|
case 'a': code = '\007'; break;
|
|
|
|
case 'e': code = '\033'; break;
|
|
|
|
|
|
|
|
/* \OOO */
|
|
|
|
case '0': case '1': case '2': case '3':
|
|
|
|
case '4': case '5': case '6': case '7':
|
|
|
|
p--;
|
2009-06-30 06:08:54 +04:00
|
|
|
code = scan_oct(p, end < p+3 ? end-p : 3, &len);
|
2007-12-01 19:56:19 +03:00
|
|
|
p += len;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'x': /* \xHH */
|
2009-06-30 06:08:54 +04:00
|
|
|
code = scan_hex(p, end < p+2 ? end-p : 2, &len);
|
2007-12-01 19:56:19 +03:00
|
|
|
if (len < 1) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "invalid hex escape");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
p += len;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case 'M': /* \M-X, \M-\C-X, \M-\cX */
|
|
|
|
if (meta_prefix) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "duplicate meta escape");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
meta_prefix = 1;
|
|
|
|
if (p+1 < end && *p++ == '-' && (*p & 0x80) == 0) {
|
|
|
|
if (*p == '\\') {
|
|
|
|
p++;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
code = *p++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "too short meta escape");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
case 'C': /* \C-X, \C-\M-X */
|
|
|
|
if (p == end || *p++ != '-') {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "too short control escape");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
case 'c': /* \cX, \c\M-X */
|
|
|
|
if (ctrl_prefix) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "duplicate control escape");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
ctrl_prefix = 1;
|
|
|
|
if (p < end && (*p & 0x80) == 0) {
|
|
|
|
if (*p == '\\') {
|
|
|
|
p++;
|
|
|
|
goto again;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
code = *p++;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "too short control escape");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
|
|
|
|
default:
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "unexpected escape sequence");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (code < 0 || 0xff < code) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "invalid escape code");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (ctrl_prefix)
|
|
|
|
code &= 0x1f;
|
|
|
|
if (meta_prefix)
|
|
|
|
code |= 0x80;
|
|
|
|
|
|
|
|
*pp = p;
|
|
|
|
return code;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
2014-06-03 00:23:47 +04:00
|
|
|
unescape_escaped_nonascii(const char **pp, const char *end, rb_encoding *enc,
|
|
|
|
VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
|
2007-12-01 19:56:19 +03:00
|
|
|
{
|
|
|
|
const char *p = *pp;
|
|
|
|
int chmaxlen = rb_enc_mbmaxlen(enc);
|
2018-11-12 05:39:24 +03:00
|
|
|
unsigned char *area = ALLOCA_N(unsigned char, chmaxlen);
|
|
|
|
char *chbuf = (char *)area;
|
2007-12-01 19:56:19 +03:00
|
|
|
int chlen = 0;
|
|
|
|
int byte;
|
2007-12-08 05:50:43 +03:00
|
|
|
int l;
|
2007-12-01 19:56:19 +03:00
|
|
|
|
|
|
|
memset(chbuf, 0, chmaxlen);
|
|
|
|
|
|
|
|
byte = read_escaped_byte(&p, end, err);
|
|
|
|
if (byte == -1) {
|
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
2018-11-12 05:39:24 +03:00
|
|
|
area[chlen++] = byte;
|
2007-12-08 05:50:43 +03:00
|
|
|
while (chlen < chmaxlen &&
|
2008-01-27 17:27:07 +03:00
|
|
|
MBCLEN_NEEDMORE_P(rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc))) {
|
2007-12-01 19:56:19 +03:00
|
|
|
byte = read_escaped_byte(&p, end, err);
|
|
|
|
if (byte == -1) {
|
|
|
|
return -1;
|
|
|
|
}
|
2018-11-12 05:39:24 +03:00
|
|
|
area[chlen++] = byte;
|
2007-12-01 19:56:19 +03:00
|
|
|
}
|
|
|
|
|
2007-12-08 05:50:43 +03:00
|
|
|
l = rb_enc_precise_mbclen(chbuf, chbuf+chlen, enc);
|
2008-01-27 17:27:07 +03:00
|
|
|
if (MBCLEN_INVALID_P(l)) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "invalid multibyte escape");
|
2008-10-22 08:27:32 +04:00
|
|
|
return -1;
|
2007-12-01 19:56:19 +03:00
|
|
|
}
|
2018-11-12 05:39:24 +03:00
|
|
|
if (1 < chlen || (area[0] & 0x80)) {
|
2007-12-01 19:56:19 +03:00
|
|
|
rb_str_buf_cat(buf, chbuf, chlen);
|
|
|
|
|
|
|
|
if (*encp == 0)
|
|
|
|
*encp = enc;
|
|
|
|
else if (*encp != enc) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "escaped non ASCII character in UTF-8 regexp");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
char escbuf[5];
|
2018-11-12 05:39:24 +03:00
|
|
|
snprintf(escbuf, sizeof(escbuf), "\\x%02X", area[0]&0xff);
|
2007-12-01 19:56:19 +03:00
|
|
|
rb_str_buf_cat(buf, escbuf, 4);
|
|
|
|
}
|
|
|
|
*pp = p;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-12-09 06:50:11 +03:00
|
|
|
static int
|
|
|
|
check_unicode_range(unsigned long code, onig_errmsg_buffer err)
|
|
|
|
{
|
|
|
|
if ((0xd800 <= code && code <= 0xdfff) || /* Surrogates */
|
|
|
|
0x10ffff < code) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "invalid Unicode range");
|
2007-12-09 06:50:11 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
2007-12-01 19:56:19 +03:00
|
|
|
static int
|
|
|
|
append_utf8(unsigned long uv,
|
2014-06-03 00:23:47 +04:00
|
|
|
VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
|
2007-12-01 19:56:19 +03:00
|
|
|
{
|
2007-12-09 06:50:11 +03:00
|
|
|
if (check_unicode_range(uv, err) != 0)
|
|
|
|
return -1;
|
2007-12-01 19:56:19 +03:00
|
|
|
if (uv < 0x80) {
|
|
|
|
char escbuf[5];
|
2007-12-12 17:30:54 +03:00
|
|
|
snprintf(escbuf, sizeof(escbuf), "\\x%02X", (int)uv);
|
2007-12-01 19:56:19 +03:00
|
|
|
rb_str_buf_cat(buf, escbuf, 4);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
int len;
|
|
|
|
char utf8buf[6];
|
|
|
|
len = rb_uv_to_utf8(utf8buf, uv);
|
|
|
|
rb_str_buf_cat(buf, utf8buf, len);
|
|
|
|
|
|
|
|
if (*encp == 0)
|
2007-12-21 10:07:21 +03:00
|
|
|
*encp = rb_utf8_encoding();
|
|
|
|
else if (*encp != rb_utf8_encoding()) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "UTF-8 character in non UTF-8 regexp");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
unescape_unicode_list(const char **pp, const char *end,
|
2014-06-03 00:23:47 +04:00
|
|
|
VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
|
2007-12-01 19:56:19 +03:00
|
|
|
{
|
|
|
|
const char *p = *pp;
|
|
|
|
int has_unicode = 0;
|
|
|
|
unsigned long code;
|
2009-03-14 12:25:20 +03:00
|
|
|
size_t len;
|
2007-12-01 19:56:19 +03:00
|
|
|
|
|
|
|
while (p < end && ISSPACE(*p)) p++;
|
|
|
|
|
|
|
|
while (1) {
|
|
|
|
code = ruby_scan_hex(p, end-p, &len);
|
|
|
|
if (len == 0)
|
|
|
|
break;
|
|
|
|
if (6 < len) { /* max 10FFFF */
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "invalid Unicode range");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
p += len;
|
|
|
|
if (append_utf8(code, buf, encp, err) != 0)
|
|
|
|
return -1;
|
|
|
|
has_unicode = 1;
|
|
|
|
|
|
|
|
while (p < end && ISSPACE(*p)) p++;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (has_unicode == 0) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "invalid Unicode list");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
|
|
|
|
*pp = p;
|
|
|
|
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
|
|
|
unescape_unicode_bmp(const char **pp, const char *end,
|
2014-06-03 00:23:47 +04:00
|
|
|
VALUE buf, rb_encoding **encp, onig_errmsg_buffer err)
|
2007-12-01 19:56:19 +03:00
|
|
|
{
|
|
|
|
const char *p = *pp;
|
2009-03-14 12:25:20 +03:00
|
|
|
size_t len;
|
2007-12-01 19:56:19 +03:00
|
|
|
unsigned long code;
|
|
|
|
|
|
|
|
if (end < p+4) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "invalid Unicode escape");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
code = ruby_scan_hex(p, 4, &len);
|
|
|
|
if (len != 4) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "invalid Unicode escape");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (append_utf8(code, buf, encp, err) != 0)
|
|
|
|
return -1;
|
|
|
|
*pp = p + 4;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
|
|
|
static int
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
unescape_nonascii0(const char **pp, const char *end, rb_encoding *enc,
|
2014-06-03 00:23:47 +04:00
|
|
|
VALUE buf, rb_encoding **encp, int *has_property,
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
onig_errmsg_buffer err, int options, int recurse)
|
2007-12-01 19:56:19 +03:00
|
|
|
{
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
const char *p = *pp;
|
2018-11-21 11:51:39 +03:00
|
|
|
unsigned char c;
|
2007-12-01 19:56:19 +03:00
|
|
|
char smallbuf[2];
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
int in_char_class = 0;
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
int parens = 1; /* ignored unless recurse is true */
|
|
|
|
int extended_mode = options & ONIG_OPTION_EXTEND;
|
2007-12-01 19:56:19 +03:00
|
|
|
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
begin_scan:
|
2007-12-01 19:56:19 +03:00
|
|
|
while (p < end) {
|
2007-12-08 05:50:43 +03:00
|
|
|
int chlen = rb_enc_precise_mbclen(p, end, enc);
|
2008-01-27 17:27:07 +03:00
|
|
|
if (!MBCLEN_CHARFOUND_P(chlen)) {
|
2018-03-11 03:05:12 +03:00
|
|
|
invalid_multibyte:
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "invalid multibyte character");
|
2007-12-08 05:50:43 +03:00
|
|
|
return -1;
|
|
|
|
}
|
2008-01-27 17:27:07 +03:00
|
|
|
chlen = MBCLEN_CHARFOUND_LEN(chlen);
|
2007-12-01 19:56:19 +03:00
|
|
|
if (1 < chlen || (*p & 0x80)) {
|
2018-03-11 03:05:12 +03:00
|
|
|
multibyte:
|
2007-12-01 19:56:19 +03:00
|
|
|
rb_str_buf_cat(buf, p, chlen);
|
|
|
|
p += chlen;
|
|
|
|
if (*encp == 0)
|
|
|
|
*encp = enc;
|
|
|
|
else if (*encp != enc) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "non ASCII character in UTF-8 regexp");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
switch (c = *p++) {
|
|
|
|
case '\\':
|
|
|
|
if (p == end) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "too short escape sequence");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
2018-03-11 03:05:12 +03:00
|
|
|
chlen = rb_enc_precise_mbclen(p, end, enc);
|
|
|
|
if (!MBCLEN_CHARFOUND_P(chlen)) {
|
|
|
|
goto invalid_multibyte;
|
|
|
|
}
|
|
|
|
if ((chlen = MBCLEN_CHARFOUND_LEN(chlen)) > 1) {
|
|
|
|
/* include the previous backslash */
|
|
|
|
--p;
|
|
|
|
++chlen;
|
|
|
|
goto multibyte;
|
|
|
|
}
|
2007-12-01 19:56:19 +03:00
|
|
|
switch (c = *p++) {
|
|
|
|
case '1': case '2': case '3':
|
|
|
|
case '4': case '5': case '6': case '7': /* \O, \OO, \OOO or backref */
|
|
|
|
{
|
2016-05-25 14:51:37 +03:00
|
|
|
size_t len = end-(p-1), octlen;
|
|
|
|
if (ruby_scan_oct(p-1, len < 3 ? len : 3, &octlen) <= 0177) {
|
2007-12-01 19:56:19 +03:00
|
|
|
/* backref or 7bit octal.
|
|
|
|
no need to unescape anyway.
|
|
|
|
re-escaping may break backref */
|
|
|
|
goto escape_asis;
|
|
|
|
}
|
|
|
|
}
|
2009-02-22 17:23:33 +03:00
|
|
|
/* xxx: How about more than 199 subexpressions? */
|
2007-12-01 19:56:19 +03:00
|
|
|
|
|
|
|
case '0': /* \0, \0O, \0OO */
|
|
|
|
|
|
|
|
case 'x': /* \xHH */
|
|
|
|
case 'c': /* \cX, \c\M-X */
|
|
|
|
case 'C': /* \C-X, \C-\M-X */
|
|
|
|
case 'M': /* \M-X, \M-\C-X, \M-\cX */
|
|
|
|
p = p-2;
|
2022-11-15 07:21:45 +03:00
|
|
|
if (rb_is_usascii_enc(enc)) {
|
2014-12-29 12:58:48 +03:00
|
|
|
const char *pbeg = p;
|
2018-11-21 11:51:39 +03:00
|
|
|
int byte = read_escaped_byte(&p, end, err);
|
|
|
|
if (byte == -1) return -1;
|
|
|
|
c = byte;
|
2014-12-29 12:58:48 +03:00
|
|
|
rb_str_buf_cat(buf, pbeg, p-pbeg);
|
2014-10-17 10:06:43 +04:00
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (unescape_escaped_nonascii(&p, end, enc, buf, encp, err) != 0)
|
|
|
|
return -1;
|
|
|
|
}
|
2007-12-01 19:56:19 +03:00
|
|
|
break;
|
|
|
|
|
|
|
|
case 'u':
|
|
|
|
if (p == end) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "too short escape sequence");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
if (*p == '{') {
|
|
|
|
/* \u{H HH HHH HHHH HHHHH HHHHHH ...} */
|
|
|
|
p++;
|
|
|
|
if (unescape_unicode_list(&p, end, buf, encp, err) != 0)
|
|
|
|
return -1;
|
|
|
|
if (p == end || *p++ != '}') {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "invalid Unicode list");
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* \uHHHH */
|
|
|
|
if (unescape_unicode_bmp(&p, end, buf, encp, err) != 0)
|
|
|
|
return -1;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
|
2008-07-05 03:33:04 +04:00
|
|
|
case 'p': /* \p{Hiragana} */
|
2010-06-01 18:20:59 +04:00
|
|
|
case 'P':
|
2008-07-05 03:33:04 +04:00
|
|
|
if (!*encp) {
|
2008-07-05 03:50:33 +04:00
|
|
|
*has_property = 1;
|
2008-07-05 03:33:04 +04:00
|
|
|
}
|
|
|
|
goto escape_asis;
|
|
|
|
|
2007-12-01 19:56:19 +03:00
|
|
|
default: /* \n, \\, \d, \9, etc. */
|
|
|
|
escape_asis:
|
|
|
|
smallbuf[0] = '\\';
|
|
|
|
smallbuf[1] = c;
|
|
|
|
rb_str_buf_cat(buf, smallbuf, 2);
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
break;
|
|
|
|
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
case '#':
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
if (extended_mode && !in_char_class) {
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
/* consume and ignore comment in extended regexp */
|
2023-03-24 21:53:53 +03:00
|
|
|
while ((p < end) && ((c = *p++) != '\n')) {
|
|
|
|
if ((c & 0x80) && !*encp && enc == rb_utf8_encoding()) {
|
|
|
|
*encp = enc;
|
|
|
|
}
|
|
|
|
}
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
rb_str_buf_cat(buf, (char *)&c, 1);
|
|
|
|
break;
|
|
|
|
case '[':
|
|
|
|
in_char_class++;
|
|
|
|
rb_str_buf_cat(buf, (char *)&c, 1);
|
|
|
|
break;
|
|
|
|
case ']':
|
|
|
|
if (in_char_class) {
|
|
|
|
in_char_class--;
|
|
|
|
}
|
|
|
|
rb_str_buf_cat(buf, (char *)&c, 1);
|
|
|
|
break;
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
case ')':
|
|
|
|
rb_str_buf_cat(buf, (char *)&c, 1);
|
|
|
|
if (!in_char_class && recurse) {
|
|
|
|
if (--parens == 0) {
|
|
|
|
*pp = p;
|
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
break;
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
case '(':
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
if (!in_char_class && p + 1 < end && *p == '?') {
|
|
|
|
if (*(p+1) == '#') {
|
|
|
|
/* (?# is comment inside any regexp, and content inside should be ignored */
|
|
|
|
const char *orig_p = p;
|
|
|
|
int cont = 1;
|
|
|
|
|
|
|
|
while (cont && (p < end)) {
|
|
|
|
switch (c = *p++) {
|
|
|
|
default:
|
|
|
|
if (!(c & 0x80)) break;
|
2023-03-24 21:53:53 +03:00
|
|
|
if (!*encp && enc == rb_utf8_encoding()) {
|
|
|
|
*encp = enc;
|
|
|
|
}
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
--p;
|
|
|
|
/* fallthrough */
|
|
|
|
case '\\':
|
|
|
|
chlen = rb_enc_precise_mbclen(p, end, enc);
|
|
|
|
if (!MBCLEN_CHARFOUND_P(chlen)) {
|
|
|
|
goto invalid_multibyte;
|
|
|
|
}
|
|
|
|
p += MBCLEN_CHARFOUND_LEN(chlen);
|
|
|
|
break;
|
|
|
|
case ')':
|
|
|
|
cont = 0;
|
|
|
|
break;
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
if (cont) {
|
|
|
|
/* unterminated (?#, rewind so it is syntax error */
|
|
|
|
p = orig_p;
|
|
|
|
c = '(';
|
|
|
|
rb_str_buf_cat(buf, (char *)&c, 1);
|
|
|
|
}
|
|
|
|
break;
|
2023-02-26 07:20:43 +03:00
|
|
|
}
|
|
|
|
else {
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
/* potential change of extended option */
|
|
|
|
int invert = 0;
|
|
|
|
int local_extend = 0;
|
|
|
|
const char *s;
|
|
|
|
|
|
|
|
if (recurse) {
|
|
|
|
parens++;
|
|
|
|
}
|
|
|
|
|
2024-01-07 18:50:41 +03:00
|
|
|
for (s = p+1; s < end; s++) {
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
switch(*s) {
|
2024-01-07 18:50:59 +03:00
|
|
|
case 'x':
|
|
|
|
local_extend = invert ? -1 : 1;
|
|
|
|
break;
|
|
|
|
case '-':
|
|
|
|
invert = 1;
|
|
|
|
break;
|
|
|
|
case ':':
|
|
|
|
case ')':
|
|
|
|
if (local_extend == 0 ||
|
|
|
|
(local_extend == -1 && !extended_mode) ||
|
|
|
|
(local_extend == 1 && extended_mode)) {
|
|
|
|
/* no changes to extended flag */
|
|
|
|
goto fallthrough;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (*s == ':') {
|
|
|
|
/* change extended flag until ')' */
|
|
|
|
int local_options = options;
|
|
|
|
if (local_extend == 1) {
|
|
|
|
local_options |= ONIG_OPTION_EXTEND;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
local_options &= ~ONIG_OPTION_EXTEND;
|
|
|
|
}
|
|
|
|
|
|
|
|
rb_str_buf_cat(buf, (char *)&c, 1);
|
|
|
|
int ret = unescape_nonascii0(&p, end, enc, buf, encp,
|
|
|
|
has_property, err,
|
|
|
|
local_options, 1);
|
|
|
|
if (ret < 0) return ret;
|
|
|
|
goto begin_scan;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
/* change extended flag for rest of expression */
|
|
|
|
extended_mode = local_extend == 1;
|
|
|
|
goto fallthrough;
|
|
|
|
}
|
|
|
|
case 'i':
|
|
|
|
case 'm':
|
|
|
|
case 'a':
|
|
|
|
case 'd':
|
|
|
|
case 'u':
|
|
|
|
/* other option flags, ignored during scanning */
|
|
|
|
break;
|
|
|
|
default:
|
|
|
|
/* other character, no extended flag change*/
|
|
|
|
goto fallthrough;
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
}
|
|
|
|
}
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
}
|
2023-02-26 07:20:43 +03:00
|
|
|
}
|
|
|
|
else if (!in_char_class && recurse) {
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
parens++;
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
}
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
/* FALLTHROUGH */
|
2007-12-01 19:56:19 +03:00
|
|
|
default:
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
fallthrough:
|
2018-11-21 11:51:39 +03:00
|
|
|
rb_str_buf_cat(buf, (char *)&c, 1);
|
2007-12-01 19:56:19 +03:00
|
|
|
break;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
if (recurse) {
|
|
|
|
*pp = p;
|
|
|
|
}
|
2007-12-01 19:56:19 +03:00
|
|
|
return 0;
|
|
|
|
}
|
|
|
|
|
Fix parsing of regexps that toggle extended mode on/off inside regexp
This was broken in ec3542229b29ec93062e9d90e877ea29d3c19472. That commit
didn't handle cases where extended mode was turned on/off inside the
regexp. There are two ways to turn extended mode on/off:
```
/(?-x:#y)#z
/x =~ '#y'
/(?-x)#y(?x)#z
/x =~ '#y'
```
These can be nested inside the same regexp:
```
/(?-x:(?x)#x
(?-x)#y)#z
/x =~ '#y'
```
As you can probably imagine, this makes handling these regexps
somewhat complex. Due to the nesting inside portions of regexps,
the unassign_nonascii function needs to be recursive. In
recursive mode, it needs to track both opening and closing
parentheses, similar to how it already tracked opening and
closing brackets for character classes.
When scanning the regexp and coming to `(?` not followed by `#`,
scan for options, and use `x` and `i` to determine whether to
turn on or off extended mode. For `:`, indicting only the
current regexp section should have the extended mode
switched, recurse with the extended mode set or unset. For `)`,
indicating the remainder of the regexp (or current regexp portion
if already recursing) should turn extended mode on or off, just
change the extended mode flag and keep scanning.
While testing this, I noticed that `a`, `d`, and `u` are accepted
as options, in addition to `i`, `m`, and `x`, but I can't see
where those options are documented. I'm not sure whether or not
handling `a`, `d`, and `u` as options is a bug.
Fixes [Bug #19379]
2023-01-27 22:08:49 +03:00
|
|
|
static int
|
|
|
|
unescape_nonascii(const char *p, const char *end, rb_encoding *enc,
|
|
|
|
VALUE buf, rb_encoding **encp, int *has_property,
|
|
|
|
onig_errmsg_buffer err, int options)
|
|
|
|
{
|
|
|
|
return unescape_nonascii0(&p, end, enc, buf, encp, has_property,
|
|
|
|
err, options, 0);
|
|
|
|
}
|
|
|
|
|
2007-12-01 19:56:19 +03:00
|
|
|
static VALUE
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_reg_preprocess(const char *p, const char *end, rb_encoding *enc,
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
rb_encoding **fixed_enc, onig_errmsg_buffer err, int options)
|
2007-12-01 19:56:19 +03:00
|
|
|
{
|
|
|
|
VALUE buf;
|
2008-07-05 03:50:33 +04:00
|
|
|
int has_property = 0;
|
2007-12-01 19:56:19 +03:00
|
|
|
|
|
|
|
buf = rb_str_buf_new(0);
|
|
|
|
|
2008-01-27 00:01:52 +03:00
|
|
|
if (rb_enc_asciicompat(enc))
|
|
|
|
*fixed_enc = 0;
|
|
|
|
else {
|
|
|
|
*fixed_enc = enc;
|
|
|
|
rb_enc_associate(buf, enc);
|
|
|
|
}
|
|
|
|
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
if (unescape_nonascii(p, end, enc, buf, fixed_enc, &has_property, err, options) != 0)
|
2007-12-01 19:56:19 +03:00
|
|
|
return Qnil;
|
|
|
|
|
2008-07-05 03:50:33 +04:00
|
|
|
if (has_property && !*fixed_enc) {
|
|
|
|
*fixed_enc = enc;
|
|
|
|
}
|
|
|
|
|
2008-01-07 07:55:26 +03:00
|
|
|
if (*fixed_enc) {
|
2007-12-01 19:56:19 +03:00
|
|
|
rb_enc_associate(buf, *fixed_enc);
|
|
|
|
}
|
|
|
|
|
|
|
|
return buf;
|
|
|
|
}
|
|
|
|
|
2007-12-08 10:21:05 +03:00
|
|
|
VALUE
|
|
|
|
rb_reg_check_preprocess(VALUE str)
|
|
|
|
{
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_encoding *fixed_enc = 0;
|
2008-01-14 07:51:10 +03:00
|
|
|
onig_errmsg_buffer err = "";
|
2007-12-08 10:21:05 +03:00
|
|
|
VALUE buf;
|
|
|
|
char *p, *end;
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_encoding *enc;
|
2007-12-08 10:21:05 +03:00
|
|
|
|
|
|
|
StringValue(str);
|
|
|
|
p = RSTRING_PTR(str);
|
|
|
|
end = p + RSTRING_LEN(str);
|
|
|
|
enc = rb_enc_get(str);
|
|
|
|
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
buf = rb_reg_preprocess(p, end, enc, &fixed_enc, err, 0);
|
2007-12-08 10:21:05 +03:00
|
|
|
RB_GC_GUARD(str);
|
|
|
|
|
2021-10-03 16:34:45 +03:00
|
|
|
if (NIL_P(buf)) {
|
2007-12-08 10:21:05 +03:00
|
|
|
return rb_reg_error_desc(str, 0, err);
|
|
|
|
}
|
|
|
|
return Qnil;
|
|
|
|
}
|
|
|
|
|
2007-12-01 19:56:19 +03:00
|
|
|
static VALUE
|
2009-08-05 05:38:36 +04:00
|
|
|
rb_reg_preprocess_dregexp(VALUE ary, int options)
|
2007-12-01 19:56:19 +03:00
|
|
|
{
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_encoding *fixed_enc = 0;
|
|
|
|
rb_encoding *regexp_enc = 0;
|
2008-01-29 11:03:51 +03:00
|
|
|
onig_errmsg_buffer err = "";
|
|
|
|
int i;
|
|
|
|
VALUE result = 0;
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_encoding *ascii8bit = rb_ascii8bit_encoding();
|
2007-12-01 19:56:19 +03:00
|
|
|
|
2008-09-08 13:14:59 +04:00
|
|
|
if (RARRAY_LEN(ary) == 0) {
|
2008-01-29 11:03:51 +03:00
|
|
|
rb_raise(rb_eArgError, "no arguments given");
|
|
|
|
}
|
2007-12-01 19:56:19 +03:00
|
|
|
|
2008-09-08 13:14:59 +04:00
|
|
|
for (i = 0; i < RARRAY_LEN(ary); i++) {
|
2013-05-13 13:56:22 +04:00
|
|
|
VALUE str = RARRAY_AREF(ary, i);
|
2008-01-29 11:03:51 +03:00
|
|
|
VALUE buf;
|
|
|
|
char *p, *end;
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_encoding *src_enc;
|
2007-12-01 19:56:19 +03:00
|
|
|
|
2009-08-05 05:38:36 +04:00
|
|
|
src_enc = rb_enc_get(str);
|
|
|
|
if (options & ARG_ENCODING_NONE &&
|
2009-08-27 10:10:30 +04:00
|
|
|
src_enc != ascii8bit) {
|
2016-05-02 15:04:04 +03:00
|
|
|
if (str_coderange(str) != ENC_CODERANGE_7BIT)
|
2009-08-27 10:10:30 +04:00
|
|
|
rb_raise(rb_eRegexpError, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
|
|
|
|
else
|
|
|
|
src_enc = ascii8bit;
|
2009-08-05 05:38:36 +04:00
|
|
|
}
|
|
|
|
|
2008-01-29 11:03:51 +03:00
|
|
|
StringValue(str);
|
|
|
|
p = RSTRING_PTR(str);
|
|
|
|
end = p + RSTRING_LEN(str);
|
|
|
|
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
buf = rb_reg_preprocess(p, end, src_enc, &fixed_enc, err, options);
|
2008-01-29 11:03:51 +03:00
|
|
|
|
2021-10-03 16:34:45 +03:00
|
|
|
if (NIL_P(buf))
|
2008-01-29 11:03:51 +03:00
|
|
|
rb_raise(rb_eArgError, "%s", err);
|
|
|
|
|
2008-02-14 06:34:12 +03:00
|
|
|
if (fixed_enc != 0) {
|
|
|
|
if (regexp_enc != 0 && regexp_enc != fixed_enc) {
|
2009-08-14 13:05:44 +04:00
|
|
|
rb_raise(rb_eRegexpError, "encoding mismatch in dynamic regexp : %s and %s",
|
2008-02-14 06:34:12 +03:00
|
|
|
rb_enc_name(regexp_enc), rb_enc_name(fixed_enc));
|
2008-01-29 11:03:51 +03:00
|
|
|
}
|
2008-02-14 06:34:12 +03:00
|
|
|
regexp_enc = fixed_enc;
|
2008-01-29 11:03:51 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (!result)
|
2008-02-14 06:34:12 +03:00
|
|
|
result = rb_str_new3(str);
|
2008-01-29 11:03:51 +03:00
|
|
|
else
|
2008-02-14 06:34:12 +03:00
|
|
|
rb_str_buf_append(result, str);
|
|
|
|
}
|
|
|
|
if (regexp_enc) {
|
|
|
|
rb_enc_associate(result, regexp_enc);
|
2008-01-29 11:03:51 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return result;
|
2007-12-01 19:56:19 +03:00
|
|
|
}
|
|
|
|
|
2023-06-09 10:10:30 +03:00
|
|
|
static void
|
|
|
|
rb_reg_initialize_check(VALUE obj)
|
|
|
|
{
|
|
|
|
rb_check_frozen(obj);
|
|
|
|
if (RREGEXP_PTR(obj)) {
|
|
|
|
rb_raise(rb_eTypeError, "already initialized regexp");
|
|
|
|
}
|
|
|
|
}
|
|
|
|
|
* include/ruby/{intern,ruby}.h, compile.[ch], error.c, eval.c,
eval_load.c, gc.c, iseq.c, main.c, parse.y, re.c, ruby.c,
yarvcore.[ch] (ruby_eval_tree, ruby_sourcefile, ruby_sourceline,
ruby_nerrs): purge global variables.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12700 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-05 12:12:18 +04:00
|
|
|
static int
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_reg_initialize(VALUE obj, const char *s, long len, rb_encoding *enc,
|
2009-08-30 12:00:31 +04:00
|
|
|
int options, onig_errmsg_buffer err,
|
|
|
|
const char *sourcefile, int sourceline)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2000-05-22 11:09:55 +04:00
|
|
|
struct RRegexp *re = RREGEXP(obj);
|
2007-12-01 19:56:19 +03:00
|
|
|
VALUE unescaped;
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_encoding *fixed_enc = 0;
|
|
|
|
rb_encoding *a_enc = rb_ascii8bit_encoding();
|
2000-05-22 11:09:55 +04:00
|
|
|
|
2023-06-09 10:10:30 +03:00
|
|
|
rb_reg_initialize_check(obj);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2008-12-10 05:29:05 +03:00
|
|
|
if (rb_enc_dummy_p(enc)) {
|
2011-11-20 18:05:44 +04:00
|
|
|
errcpy(err, "can't make regexp with dummy encoding");
|
|
|
|
return -1;
|
2008-12-10 05:29:05 +03:00
|
|
|
}
|
|
|
|
|
Ignore invalid escapes in regexp comments
Invalid escapes are handled at multiple levels. The first level
is in parse.y, so skip invalid unicode escape checks for regexps
in parse.y.
Make rb_reg_preprocess and unescape_nonascii accept the regexp
options. In unescape_nonascii, if the regexp is an extended
regexp, when "#" is encountered, ignore all characters until the
end of line or end of regexp.
Unfortunately, in extended regexps, you can use "#" as a non-comment
character inside a character class, so also parse "[" and "]"
specially for extended regexps, and only skip comments if "#" is
not inside a character class. Handle nested character classes as well.
This issue doesn't just affect extended regexps, it also affects
"(#?" comments inside all regexps. So for those comments, scan
until trailing ")" and ignore content inside.
I'm not sure if there are other corner cases not handled. A
better fix would be to redesign the regexp parser so that it
unescaped during parsing instead of before parsing, so you already
know the current parsing state.
Fixes [Bug #18294]
Co-authored-by: Nobuyoshi Nakada <nobu@ruby-lang.org>
2022-06-06 23:50:03 +03:00
|
|
|
unescaped = rb_reg_preprocess(s, s+len, enc, &fixed_enc, err, options);
|
2021-10-03 16:34:45 +03:00
|
|
|
if (NIL_P(unescaped))
|
2007-12-01 19:56:19 +03:00
|
|
|
return -1;
|
|
|
|
|
2007-12-13 19:09:53 +03:00
|
|
|
if (fixed_enc) {
|
2007-12-21 19:39:36 +03:00
|
|
|
if ((fixed_enc != enc && (options & ARG_ENCODING_FIXED)) ||
|
2007-12-21 21:55:30 +03:00
|
|
|
(fixed_enc != a_enc && (options & ARG_ENCODING_NONE))) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "incompatible character encoding");
|
2007-12-13 19:09:53 +03:00
|
|
|
return -1;
|
|
|
|
}
|
2007-12-21 21:55:30 +03:00
|
|
|
if (fixed_enc != a_enc) {
|
2007-12-13 19:09:53 +03:00
|
|
|
options |= ARG_ENCODING_FIXED;
|
|
|
|
enc = fixed_enc;
|
|
|
|
}
|
|
|
|
}
|
2010-12-02 22:19:44 +03:00
|
|
|
else if (!(options & ARG_ENCODING_FIXED)) {
|
|
|
|
enc = rb_usascii_encoding();
|
|
|
|
}
|
2007-12-21 19:39:36 +03:00
|
|
|
|
2007-10-19 11:41:03 +04:00
|
|
|
rb_enc_associate((VALUE)re, enc);
|
2007-12-01 19:56:19 +03:00
|
|
|
if ((options & ARG_ENCODING_FIXED) || fixed_enc) {
|
2007-10-16 09:48:40 +04:00
|
|
|
re->basic.flags |= KCODE_FIXED;
|
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing.
* parse.y (pragma_encoding): encoding specification pragma.
* parse.y (rb_intern3): encoding specified symbols.
* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.
* string.c (rb_str_index_m): index based on characters. rindex as
well.
* string.c (succ_char): encoding aware succeeding string.
* string.c (rb_str_reverse): reverse based on characters.
* string.c (rb_str_inspect): encoding aware string description.
* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.
* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.
* string.c (rb_str_split_m): split based on characters.
* string.c (rb_str_each_line): encoding aware each_line.
* string.c (rb_str_each_char): added. iteration based on
characters.
* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.
* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).
* string.c (str_encoding): get encoding attribute from a string.
* re.c (rb_reg_initialize): encoding aware regular expression
* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.
* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.
* ext/stringio/stringio.c (strio_getc): ditto.
* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.
* ext/stringio/stringio.c (strio_ungetc): ditto.
* ext/strscan/strscan.c: encoding support.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-08-25 07:29:39 +04:00
|
|
|
}
|
2007-12-21 19:39:36 +03:00
|
|
|
if (options & ARG_ENCODING_NONE) {
|
|
|
|
re->basic.flags |= REG_ENCODING_NONE;
|
|
|
|
}
|
2009-02-22 17:23:33 +03:00
|
|
|
|
2007-12-01 19:56:19 +03:00
|
|
|
re->ptr = make_regexp(RSTRING_PTR(unescaped), RSTRING_LEN(unescaped), enc,
|
2009-08-30 12:00:31 +04:00
|
|
|
options & ARG_REG_OPTION_MASK, err,
|
|
|
|
sourcefile, sourceline);
|
* include/ruby/{intern,ruby}.h, compile.[ch], error.c, eval.c,
eval_load.c, gc.c, iseq.c, main.c, parse.y, re.c, ruby.c,
yarvcore.[ch] (ruby_eval_tree, ruby_sourcefile, ruby_sourceline,
ruby_nerrs): purge global variables.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12700 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-05 12:12:18 +04:00
|
|
|
if (!re->ptr) return -1;
|
2007-12-01 19:56:19 +03:00
|
|
|
RB_GC_GUARD(unescaped);
|
* include/ruby/{intern,ruby}.h, compile.[ch], error.c, eval.c,
eval_load.c, gc.c, iseq.c, main.c, parse.y, re.c, ruby.c,
yarvcore.[ch] (ruby_eval_tree, ruby_sourcefile, ruby_sourceline,
ruby_nerrs): purge global variables.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12700 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-05 12:12:18 +04:00
|
|
|
return 0;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2016-02-04 11:38:06 +03:00
|
|
|
static void
|
|
|
|
reg_set_source(VALUE reg, VALUE str, rb_encoding *enc)
|
|
|
|
{
|
|
|
|
rb_encoding *regenc = rb_enc_get(reg);
|
|
|
|
if (regenc != enc) {
|
|
|
|
str = rb_enc_associate(rb_str_dup(str), enc = regenc);
|
|
|
|
}
|
|
|
|
RB_OBJ_WRITE(reg, &RREGEXP(reg)->src, rb_fstring(str));
|
|
|
|
}
|
|
|
|
|
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing.
* parse.y (pragma_encoding): encoding specification pragma.
* parse.y (rb_intern3): encoding specified symbols.
* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.
* string.c (rb_str_index_m): index based on characters. rindex as
well.
* string.c (succ_char): encoding aware succeeding string.
* string.c (rb_str_reverse): reverse based on characters.
* string.c (rb_str_inspect): encoding aware string description.
* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.
* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.
* string.c (rb_str_split_m): split based on characters.
* string.c (rb_str_each_line): encoding aware each_line.
* string.c (rb_str_each_char): added. iteration based on
characters.
* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.
* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).
* string.c (str_encoding): get encoding attribute from a string.
* re.c (rb_reg_initialize): encoding aware regular expression
* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.
* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.
* ext/stringio/stringio.c (strio_getc): ditto.
* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.
* ext/stringio/stringio.c (strio_ungetc): ditto.
* ext/strscan/strscan.c: encoding support.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-08-25 07:29:39 +04:00
|
|
|
static int
|
2009-08-30 12:00:31 +04:00
|
|
|
rb_reg_initialize_str(VALUE obj, VALUE str, int options, onig_errmsg_buffer err,
|
|
|
|
const char *sourcefile, int sourceline)
|
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing.
* parse.y (pragma_encoding): encoding specification pragma.
* parse.y (rb_intern3): encoding specified symbols.
* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.
* string.c (rb_str_index_m): index based on characters. rindex as
well.
* string.c (succ_char): encoding aware succeeding string.
* string.c (rb_str_reverse): reverse based on characters.
* string.c (rb_str_inspect): encoding aware string description.
* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.
* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.
* string.c (rb_str_split_m): split based on characters.
* string.c (rb_str_each_line): encoding aware each_line.
* string.c (rb_str_each_char): added. iteration based on
characters.
* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.
* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).
* string.c (str_encoding): get encoding attribute from a string.
* re.c (rb_reg_initialize): encoding aware regular expression
* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.
* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.
* ext/stringio/stringio.c (strio_getc): ditto.
* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.
* ext/stringio/stringio.c (strio_ungetc): ditto.
* ext/strscan/strscan.c: encoding support.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-08-25 07:29:39 +04:00
|
|
|
{
|
2008-01-05 19:39:38 +03:00
|
|
|
int ret;
|
2016-02-04 11:38:06 +03:00
|
|
|
rb_encoding *str_enc = rb_enc_get(str), *enc = str_enc;
|
2008-01-06 15:15:48 +03:00
|
|
|
if (options & ARG_ENCODING_NONE) {
|
|
|
|
rb_encoding *ascii8bit = rb_ascii8bit_encoding();
|
|
|
|
if (enc != ascii8bit) {
|
2016-05-02 15:04:04 +03:00
|
|
|
if (str_coderange(str) != ENC_CODERANGE_7BIT) {
|
2008-12-16 13:44:36 +03:00
|
|
|
errcpy(err, "/.../n has a non escaped non ASCII character in non ASCII-8BIT script");
|
2008-01-06 15:15:48 +03:00
|
|
|
return -1;
|
|
|
|
}
|
|
|
|
enc = ascii8bit;
|
|
|
|
}
|
|
|
|
}
|
2008-01-05 19:39:38 +03:00
|
|
|
ret = rb_reg_initialize(obj, RSTRING_PTR(str), RSTRING_LEN(str), enc,
|
2009-08-30 12:00:31 +04:00
|
|
|
options, err, sourcefile, sourceline);
|
2016-02-04 11:38:06 +03:00
|
|
|
if (ret == 0) reg_set_source(obj, str, str_enc);
|
2008-01-05 19:39:38 +03:00
|
|
|
return ret;
|
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing.
* parse.y (pragma_encoding): encoding specification pragma.
* parse.y (rb_intern3): encoding specified symbols.
* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.
* string.c (rb_str_index_m): index based on characters. rindex as
well.
* string.c (succ_char): encoding aware succeeding string.
* string.c (rb_str_reverse): reverse based on characters.
* string.c (rb_str_inspect): encoding aware string description.
* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.
* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.
* string.c (rb_str_split_m): split based on characters.
* string.c (rb_str_each_line): encoding aware each_line.
* string.c (rb_str_each_char): added. iteration based on
characters.
* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.
* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).
* string.c (str_encoding): get encoding attribute from a string.
* re.c (rb_reg_initialize): encoding aware regular expression
* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.
* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.
* ext/stringio/stringio.c (strio_getc): ditto.
* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.
* ext/stringio/stringio.c (strio_ungetc): ditto.
* ext/strscan/strscan.c: encoding support.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-08-25 07:29:39 +04:00
|
|
|
}
|
|
|
|
|
2001-10-05 10:30:42 +04:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_s_alloc(VALUE klass)
|
2001-10-05 10:30:42 +04:00
|
|
|
{
|
2023-02-17 18:51:16 +03:00
|
|
|
NEWOBJ_OF(re, struct RRegexp, klass, T_REGEXP | (RGENGC_WB_PROTECTED_REGEXP ? FL_WB_PROTECTED : 0), sizeof(struct RRegexp), 0);
|
2001-10-05 10:30:42 +04:00
|
|
|
|
|
|
|
re->ptr = 0;
|
* include/ruby/ruby.h: rename OBJ_WRITE and OBJ_WRITTEN into
RB_OBJ_WRITE and RB_OBJ_WRITTEN.
* array.c, class.c, compile.c, hash.c, internal.h, iseq.c,
proc.c, process.c, re.c, string.c, variable.c, vm.c,
vm_eval.c, vm_insnhelper.c, vm_insnhelper.h,
vm_method.c: catch up this change.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@44299 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2013-12-20 12:07:47 +04:00
|
|
|
RB_OBJ_WRITE(re, &re->src, 0);
|
2008-06-28 16:25:45 +04:00
|
|
|
re->usecnt = 0;
|
2001-10-05 10:30:42 +04:00
|
|
|
|
|
|
|
return (VALUE)re;
|
|
|
|
}
|
|
|
|
|
2010-02-13 22:45:35 +03:00
|
|
|
VALUE
|
|
|
|
rb_reg_alloc(void)
|
|
|
|
{
|
|
|
|
return rb_reg_s_alloc(rb_cRegexp);
|
|
|
|
}
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
VALUE
|
2008-01-04 19:30:33 +03:00
|
|
|
rb_reg_new_str(VALUE s, int options)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2010-02-13 22:45:35 +03:00
|
|
|
return rb_reg_init_str(rb_reg_alloc(), s, options);
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_reg_init_str(VALUE re, VALUE s, int options)
|
|
|
|
{
|
2008-01-14 07:51:10 +03:00
|
|
|
onig_errmsg_buffer err = "";
|
2000-05-22 11:09:55 +04:00
|
|
|
|
2009-08-30 12:00:31 +04:00
|
|
|
if (rb_reg_initialize_str(re, s, options, err, NULL, 0) != 0) {
|
2007-08-25 11:06:47 +04:00
|
|
|
rb_reg_raise_str(s, options, err);
|
* include/ruby/{intern,ruby}.h, compile.[ch], error.c, eval.c,
eval_load.c, gc.c, iseq.c, main.c, parse.y, re.c, ruby.c,
yarvcore.[ch] (ruby_eval_tree, ruby_sourcefile, ruby_sourceline,
ruby_nerrs): purge global variables.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12700 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-05 12:12:18 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
return re;
|
2004-09-24 09:53:43 +04:00
|
|
|
}
|
|
|
|
|
2016-02-04 11:38:06 +03:00
|
|
|
static VALUE
|
|
|
|
rb_reg_init_str_enc(VALUE re, VALUE s, rb_encoding *enc, int options)
|
|
|
|
{
|
|
|
|
onig_errmsg_buffer err = "";
|
|
|
|
|
|
|
|
if (rb_reg_initialize(re, RSTRING_PTR(s), RSTRING_LEN(s),
|
|
|
|
enc, options, err, NULL, 0) != 0) {
|
|
|
|
rb_reg_raise_str(s, options, err);
|
|
|
|
}
|
|
|
|
reg_set_source(re, s, enc);
|
|
|
|
|
|
|
|
return re;
|
|
|
|
}
|
|
|
|
|
2023-03-07 08:34:31 +03:00
|
|
|
VALUE
|
2008-01-29 11:03:51 +03:00
|
|
|
rb_reg_new_ary(VALUE ary, int opt)
|
|
|
|
{
|
2020-10-20 09:16:21 +03:00
|
|
|
VALUE re = rb_reg_new_str(rb_reg_preprocess_dregexp(ary, opt), opt);
|
|
|
|
rb_obj_freeze(re);
|
|
|
|
return re;
|
2008-01-29 11:03:51 +03:00
|
|
|
}
|
|
|
|
|
2008-01-04 19:30:33 +03:00
|
|
|
VALUE
|
2014-06-03 00:23:47 +04:00
|
|
|
rb_enc_reg_new(const char *s, long len, rb_encoding *enc, int options)
|
2008-01-04 19:30:33 +03:00
|
|
|
{
|
2010-02-13 22:45:35 +03:00
|
|
|
VALUE re = rb_reg_alloc();
|
2008-01-14 07:51:10 +03:00
|
|
|
onig_errmsg_buffer err = "";
|
2008-01-04 19:30:33 +03:00
|
|
|
|
2009-08-30 12:00:31 +04:00
|
|
|
if (rb_reg_initialize(re, s, len, enc, options, err, NULL, 0) != 0) {
|
2008-01-04 19:30:33 +03:00
|
|
|
rb_enc_reg_raise(s, len, enc, options, err);
|
|
|
|
}
|
2016-02-04 11:38:06 +03:00
|
|
|
RB_OBJ_WRITE(re, &RREGEXP(re)->src, rb_fstring(rb_enc_str_new(s, len, enc)));
|
2008-01-04 19:30:33 +03:00
|
|
|
|
|
|
|
return re;
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_reg_new(const char *s, long len, int options)
|
|
|
|
{
|
|
|
|
return rb_enc_reg_new(s, len, rb_ascii8bit_encoding(), options);
|
|
|
|
}
|
|
|
|
|
2004-09-24 09:53:43 +04:00
|
|
|
VALUE
|
2009-08-30 12:00:31 +04:00
|
|
|
rb_reg_compile(VALUE str, int options, const char *sourcefile, int sourceline)
|
2004-09-24 09:53:43 +04:00
|
|
|
{
|
2010-02-13 22:45:35 +03:00
|
|
|
VALUE re = rb_reg_alloc();
|
2008-01-14 07:51:10 +03:00
|
|
|
onig_errmsg_buffer err = "";
|
2004-09-24 09:53:43 +04:00
|
|
|
|
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing.
* parse.y (pragma_encoding): encoding specification pragma.
* parse.y (rb_intern3): encoding specified symbols.
* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.
* string.c (rb_str_index_m): index based on characters. rindex as
well.
* string.c (succ_char): encoding aware succeeding string.
* string.c (rb_str_reverse): reverse based on characters.
* string.c (rb_str_inspect): encoding aware string description.
* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.
* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.
* string.c (rb_str_split_m): split based on characters.
* string.c (rb_str_each_line): encoding aware each_line.
* string.c (rb_str_each_char): added. iteration based on
characters.
* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.
* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).
* string.c (str_encoding): get encoding attribute from a string.
* re.c (rb_reg_initialize): encoding aware regular expression
* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.
* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.
* ext/stringio/stringio.c (strio_getc): ditto.
* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.
* ext/stringio/stringio.c (strio_ungetc): ditto.
* ext/strscan/strscan.c: encoding support.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-08-25 07:29:39 +04:00
|
|
|
if (!str) str = rb_str_new(0,0);
|
2009-08-30 12:00:31 +04:00
|
|
|
if (rb_reg_initialize_str(re, str, options, err, sourcefile, sourceline) != 0) {
|
2007-08-25 11:06:47 +04:00
|
|
|
rb_set_errinfo(rb_reg_error_desc(str, options, err));
|
|
|
|
return Qnil;
|
* include/ruby/{intern,ruby}.h, compile.[ch], error.c, eval.c,
eval_load.c, gc.c, iseq.c, main.c, parse.y, re.c, ruby.c,
yarvcore.[ch] (ruby_eval_tree, ruby_sourcefile, ruby_sourceline,
ruby_nerrs): purge global variables.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12700 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-05 12:12:18 +04:00
|
|
|
}
|
2019-11-27 14:40:18 +03:00
|
|
|
rb_obj_freeze(re);
|
* include/ruby/{intern,ruby}.h, compile.[ch], error.c, eval.c,
eval_load.c, gc.c, iseq.c, main.c, parse.y, re.c, ruby.c,
yarvcore.[ch] (ruby_eval_tree, ruby_sourcefile, ruby_sourceline,
ruby_nerrs): purge global variables.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12700 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-05 12:12:18 +04:00
|
|
|
return re;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE reg_cache;
|
|
|
|
|
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_regcomp(VALUE str)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2008-06-28 16:25:45 +04:00
|
|
|
if (reg_cache && RREGEXP_SRC_LEN(reg_cache) == RSTRING_LEN(str)
|
2007-10-16 09:48:40 +04:00
|
|
|
&& ENCODING_GET(reg_cache) == ENCODING_GET(str)
|
2008-06-28 16:25:45 +04:00
|
|
|
&& memcmp(RREGEXP_SRC_PTR(reg_cache), RSTRING_PTR(str), RSTRING_LEN(str)) == 0)
|
1998-01-16 15:13:05 +03:00
|
|
|
return reg_cache;
|
|
|
|
|
2014-02-13 13:36:40 +04:00
|
|
|
return reg_cache = rb_reg_new_str(str, 0);
|
1998-01-16 15:19:22 +03:00
|
|
|
}
|
|
|
|
|
2009-09-08 17:10:04 +04:00
|
|
|
static st_index_t reg_hash(VALUE re);
|
2003-12-30 19:38:32 +03:00
|
|
|
/*
|
2022-04-16 23:20:03 +03:00
|
|
|
* call-seq:
|
|
|
|
* hash -> integer
|
2003-12-30 19:38:32 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* Returns the integer hash value for +self+.
|
|
|
|
*
|
|
|
|
* Related: Object#hash.
|
2014-03-14 05:27:43 +04:00
|
|
|
*
|
2003-12-30 19:38:32 +03:00
|
|
|
*/
|
|
|
|
|
2021-06-03 07:26:11 +03:00
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_hash(VALUE re)
|
2002-12-12 12:17:32 +03:00
|
|
|
{
|
2009-09-08 17:11:32 +04:00
|
|
|
st_index_t hashval = reg_hash(re);
|
2016-10-04 19:25:01 +03:00
|
|
|
return ST2FIX(hashval);
|
2009-09-08 17:10:04 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
static st_index_t
|
|
|
|
reg_hash(VALUE re)
|
|
|
|
{
|
|
|
|
st_index_t hashval;
|
2002-12-12 12:17:32 +03:00
|
|
|
|
|
|
|
rb_reg_check(re);
|
2016-02-02 07:39:44 +03:00
|
|
|
hashval = RREGEXP_PTR(re)->options;
|
2009-09-08 17:10:04 +04:00
|
|
|
hashval = rb_hash_uint(hashval, rb_memhash(RREGEXP_SRC_PTR(re), RREGEXP_SRC_LEN(re)));
|
|
|
|
return rb_hash_end(hashval);
|
2002-12-12 12:17:32 +03:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-16 23:20:03 +03:00
|
|
|
* regexp == object -> true or false
|
|
|
|
*
|
|
|
|
* Returns +true+ if +object+ is another \Regexp whose pattern,
|
|
|
|
* flags, and encoding are the same as +self+, +false+ otherwise:
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* /foo/ == Regexp.new('foo') # => true
|
|
|
|
* /foo/ == /foo/i # => false
|
|
|
|
* /foo/ == Regexp.new('food') # => false
|
|
|
|
* /foo/ == Regexp.new("abc".force_encoding("euc-jp")) # => false
|
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
2021-06-03 07:26:11 +03:00
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_equal(VALUE re1, VALUE re2)
|
1998-01-16 15:19:22 +03:00
|
|
|
{
|
1999-01-20 07:59:39 +03:00
|
|
|
if (re1 == re2) return Qtrue;
|
2011-09-29 15:07:45 +04:00
|
|
|
if (!RB_TYPE_P(re2, T_REGEXP)) return Qfalse;
|
2000-05-22 11:09:55 +04:00
|
|
|
rb_reg_check(re1); rb_reg_check(re2);
|
2007-12-09 16:35:38 +03:00
|
|
|
if (FL_TEST(re1, KCODE_FIXED) != FL_TEST(re2, KCODE_FIXED)) return Qfalse;
|
2016-02-02 07:39:44 +03:00
|
|
|
if (RREGEXP_PTR(re1)->options != RREGEXP_PTR(re2)->options) return Qfalse;
|
2008-06-28 16:25:45 +04:00
|
|
|
if (RREGEXP_SRC_LEN(re1) != RREGEXP_SRC_LEN(re2)) return Qfalse;
|
2007-10-16 09:48:40 +04:00
|
|
|
if (ENCODING_GET(re1) != ENCODING_GET(re2)) return Qfalse;
|
2021-09-15 02:11:05 +03:00
|
|
|
return RBOOL(memcmp(RREGEXP_SRC_PTR(re1), RREGEXP_SRC_PTR(re2), RREGEXP_SRC_LEN(re1)) == 0);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2009-09-05 03:51:44 +04:00
|
|
|
/*
|
2022-04-19 02:19:10 +03:00
|
|
|
* call-seq:
|
|
|
|
* hash -> integer
|
2009-09-05 03:51:44 +04:00
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* Returns the integer hash value for +self+,
|
|
|
|
* based on the target string, regexp, match, and captures.
|
|
|
|
*
|
|
|
|
* See also Object#hash.
|
2014-03-14 05:27:43 +04:00
|
|
|
*
|
2009-09-05 03:51:44 +04:00
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
match_hash(VALUE match)
|
|
|
|
{
|
|
|
|
const struct re_registers *regs;
|
2016-12-06 09:14:17 +03:00
|
|
|
st_index_t hashval;
|
2009-09-05 03:51:44 +04:00
|
|
|
|
2016-12-06 09:14:17 +03:00
|
|
|
match_check(match);
|
|
|
|
hashval = rb_hash_start(rb_str_hash(RMATCH(match)->str));
|
2016-12-20 10:32:23 +03:00
|
|
|
hashval = rb_hash_uint(hashval, reg_hash(match_regexp(match)));
|
2009-09-05 03:51:44 +04:00
|
|
|
regs = RMATCH_REGS(match);
|
|
|
|
hashval = rb_hash_uint(hashval, regs->num_regs);
|
|
|
|
hashval = rb_hash_uint(hashval, rb_memhash(regs->beg, regs->num_regs * sizeof(*regs->beg)));
|
|
|
|
hashval = rb_hash_uint(hashval, rb_memhash(regs->end, regs->num_regs * sizeof(*regs->end)));
|
|
|
|
hashval = rb_hash_end(hashval);
|
2016-10-04 19:25:01 +03:00
|
|
|
return ST2FIX(hashval);
|
2009-09-05 03:51:44 +04:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
2022-04-19 02:19:10 +03:00
|
|
|
* call-seq:
|
|
|
|
* matchdata == object -> true or false
|
|
|
|
*
|
|
|
|
* Returns +true+ if +object+ is another \MatchData object
|
|
|
|
* whose target string, regexp, match, and captures
|
|
|
|
* are the same as +self+, +false+ otherwise.
|
2009-09-05 03:51:44 +04:00
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
match_equal(VALUE match1, VALUE match2)
|
|
|
|
{
|
|
|
|
const struct re_registers *regs1, *regs2;
|
2016-12-20 10:32:23 +03:00
|
|
|
|
2009-09-05 03:51:44 +04:00
|
|
|
if (match1 == match2) return Qtrue;
|
2011-09-29 15:07:45 +04:00
|
|
|
if (!RB_TYPE_P(match2, T_MATCH)) return Qfalse;
|
2016-12-06 09:14:17 +03:00
|
|
|
if (!RMATCH(match1)->regexp || !RMATCH(match2)->regexp) return Qfalse;
|
2009-09-05 03:51:44 +04:00
|
|
|
if (!rb_str_equal(RMATCH(match1)->str, RMATCH(match2)->str)) return Qfalse;
|
2016-12-20 10:32:23 +03:00
|
|
|
if (!rb_reg_equal(match_regexp(match1), match_regexp(match2))) return Qfalse;
|
2009-09-05 03:51:44 +04:00
|
|
|
regs1 = RMATCH_REGS(match1);
|
|
|
|
regs2 = RMATCH_REGS(match2);
|
|
|
|
if (regs1->num_regs != regs2->num_regs) return Qfalse;
|
|
|
|
if (memcmp(regs1->beg, regs2->beg, regs1->num_regs * sizeof(*regs1->beg))) return Qfalse;
|
|
|
|
if (memcmp(regs1->end, regs2->end, regs1->num_regs * sizeof(*regs1->end))) return Qfalse;
|
|
|
|
return Qtrue;
|
|
|
|
}
|
|
|
|
|
2007-02-14 07:57:25 +03:00
|
|
|
static VALUE
|
|
|
|
reg_operand(VALUE s, int check)
|
|
|
|
{
|
|
|
|
if (SYMBOL_P(s)) {
|
2015-04-14 06:31:28 +03:00
|
|
|
return rb_sym2str(s);
|
2007-02-14 07:57:25 +03:00
|
|
|
}
|
2018-09-29 20:49:33 +03:00
|
|
|
else if (RB_TYPE_P(s, T_STRING)) {
|
|
|
|
return s;
|
|
|
|
}
|
2007-02-14 07:57:25 +03:00
|
|
|
else {
|
2018-09-29 20:49:06 +03:00
|
|
|
return check ? rb_str_to_str(s) : rb_check_string_type(s);
|
2007-02-14 07:57:25 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-12-10 07:50:35 +03:00
|
|
|
static long
|
2021-08-11 23:50:59 +03:00
|
|
|
reg_match_pos(VALUE re, VALUE *strp, long pos, VALUE* set_match)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2007-12-10 13:03:48 +03:00
|
|
|
VALUE str = *strp;
|
|
|
|
|
2004-07-17 12:02:20 +04:00
|
|
|
if (NIL_P(str)) {
|
|
|
|
rb_backref_set(Qnil);
|
2007-12-10 07:50:35 +03:00
|
|
|
return -1;
|
2004-07-17 12:02:20 +04:00
|
|
|
}
|
2009-09-05 03:49:18 +04:00
|
|
|
*strp = str = reg_operand(str, TRUE);
|
2004-07-17 10:28:10 +04:00
|
|
|
if (pos != 0) {
|
|
|
|
if (pos < 0) {
|
2007-12-19 20:02:29 +03:00
|
|
|
VALUE l = rb_str_length(str);
|
|
|
|
pos += NUM2INT(l);
|
2004-07-17 10:28:10 +04:00
|
|
|
if (pos < 0) {
|
2007-12-10 07:50:35 +03:00
|
|
|
return pos;
|
2004-07-17 10:28:10 +04:00
|
|
|
}
|
|
|
|
}
|
2009-06-30 13:06:48 +04:00
|
|
|
pos = rb_str_offset(str, pos);
|
2004-07-17 10:28:10 +04:00
|
|
|
}
|
2021-08-11 23:50:59 +03:00
|
|
|
return rb_reg_search_set_match(re, str, pos, 0, 1, set_match);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2004-10-20 05:38:04 +04:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-16 23:20:03 +03:00
|
|
|
* regexp =~ string -> integer or nil
|
|
|
|
*
|
|
|
|
* Returns the integer index (in characters) of the first match
|
|
|
|
* for +self+ and +string+, or +nil+ if none;
|
|
|
|
* also sets the
|
2023-06-20 16:28:21 +03:00
|
|
|
* {rdoc-ref:Regexp global variables}[rdoc-ref:Regexp@Global+Variables]:
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* /at/ =~ 'input data' # => 7
|
|
|
|
* $~ # => #<MatchData "at">
|
|
|
|
* /ax/ =~ 'input data' # => nil
|
|
|
|
* $~ # => nil
|
2004-10-20 05:38:04 +04:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* Assigns named captures to local variables of the same names
|
|
|
|
* if and only if +self+:
|
2007-12-18 14:26:24 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* - Is a regexp literal;
|
|
|
|
* see {Regexp Literals}[rdoc-ref:literals.rdoc@Regexp+Literals].
|
|
|
|
* - Does not contain interpolations;
|
2023-06-20 16:28:21 +03:00
|
|
|
* see {Regexp interpolation}[rdoc-ref:Regexp@Interpolation+Mode].
|
2022-04-16 23:20:03 +03:00
|
|
|
* - Is at the left of the expression.
|
2007-12-18 14:26:24 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* Example:
|
2007-12-18 14:26:24 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = y '
|
|
|
|
* p lhs # => "x"
|
|
|
|
* p rhs # => "y"
|
2007-12-18 14:26:24 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* Assigns +nil+ if not matched:
|
2007-12-18 14:26:24 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ ' x = '
|
|
|
|
* p lhs # => nil
|
|
|
|
* p rhs # => nil
|
2008-10-11 13:56:07 +04:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* Does not make local variable assignments if +self+ is not a regexp literal:
|
2007-12-18 14:26:24 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* r = /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
|
|
|
|
* r =~ ' x = y '
|
|
|
|
* p foo # Undefined local variable
|
|
|
|
* p bar # Undefined local variable
|
2007-12-18 14:26:24 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* The assignment does not occur if the regexp is not at the left:
|
2007-12-18 14:26:24 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* ' x = y ' =~ /(?<foo>\w+)\s*=\s*(?<foo>\w+)/
|
|
|
|
* p foo, foo # Undefined local variables
|
2007-12-18 14:26:24 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* A regexp interpolation, <tt>#{}</tt>, also disables
|
|
|
|
* the assignment:
|
2008-10-11 13:56:07 +04:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* r = /(?<foo>\w+)/
|
|
|
|
* /(?<foo>\w+)\s*=\s*#{r}/ =~ 'x = y'
|
|
|
|
* p foo # Undefined local variable
|
2008-10-11 13:56:07 +04:00
|
|
|
*
|
2004-10-20 05:38:04 +04:00
|
|
|
*/
|
|
|
|
|
2004-07-17 10:28:10 +04:00
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_match(VALUE re, VALUE str)
|
2004-07-17 10:28:10 +04:00
|
|
|
{
|
2021-08-11 23:50:59 +03:00
|
|
|
long pos = reg_match_pos(re, &str, 0, NULL);
|
2007-12-10 07:50:35 +03:00
|
|
|
if (pos < 0) return Qnil;
|
|
|
|
pos = rb_str_sublen(str, pos);
|
|
|
|
return LONG2FIX(pos);
|
2004-07-17 10:28:10 +04:00
|
|
|
}
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 18:45:29 +03:00
|
|
|
* regexp === string -> true or false
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 18:45:29 +03:00
|
|
|
* Returns +true+ if +self+ finds a match in +string+:
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 18:45:29 +03:00
|
|
|
* /^[a-z]*$/ === 'HELLO' # => false
|
|
|
|
* /^[A-Z]*$/ === 'HELLO' # => true
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 18:45:29 +03:00
|
|
|
* This method is called in case statements:
|
|
|
|
*
|
|
|
|
* s = 'HELLO'
|
|
|
|
* case s
|
|
|
|
* when /\A[a-z]*\z/; print "Lower case\n"
|
|
|
|
* when /\A[A-Z]*\z/; print "Upper case\n"
|
|
|
|
* else print "Mixed case\n"
|
|
|
|
* end # => "Upper case"
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
2021-06-01 10:59:33 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_eqq(VALUE re, VALUE str)
|
2002-12-19 12:20:20 +03:00
|
|
|
{
|
|
|
|
long start;
|
|
|
|
|
2009-09-05 03:49:18 +04:00
|
|
|
str = reg_operand(str, FALSE);
|
2007-02-14 07:57:25 +03:00
|
|
|
if (NIL_P(str)) {
|
|
|
|
rb_backref_set(Qnil);
|
|
|
|
return Qfalse;
|
2002-12-19 12:20:20 +03:00
|
|
|
}
|
|
|
|
start = rb_reg_search(re, str, 0, 0);
|
2022-01-15 17:07:32 +03:00
|
|
|
return RBOOL(start >= 0);
|
2002-12-19 12:20:20 +03:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 18:45:29 +03:00
|
|
|
* ~ rxp -> integer or nil
|
|
|
|
*
|
|
|
|
* Equivalent to <tt><i>rxp</i> =~ $_</tt>:
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 18:45:29 +03:00
|
|
|
* $_ = "input data"
|
|
|
|
* ~ /at/ # => 7
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_match2(VALUE re)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2002-08-28 12:05:23 +04:00
|
|
|
long start;
|
1999-01-20 07:59:39 +03:00
|
|
|
VALUE line = rb_lastline_get();
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2011-09-29 15:07:45 +04:00
|
|
|
if (!RB_TYPE_P(line, T_STRING)) {
|
2002-03-14 09:23:46 +03:00
|
|
|
rb_backref_set(Qnil);
|
1999-01-20 07:59:39 +03:00
|
|
|
return Qnil;
|
2002-03-14 09:23:46 +03:00
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
start = rb_reg_search(re, line, 0, 0);
|
1998-01-16 15:13:05 +03:00
|
|
|
if (start < 0) {
|
1999-01-20 07:59:39 +03:00
|
|
|
return Qnil;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
2007-12-10 07:50:35 +03:00
|
|
|
start = rb_str_sublen(line, start);
|
2002-08-28 12:05:23 +04:00
|
|
|
return LONG2FIX(start);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-16 23:20:03 +03:00
|
|
|
* match(string, offset = 0) -> matchdata or nil
|
|
|
|
* match(string, offset = 0) {|matchdata| ... } -> object
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* With no block given, returns the MatchData object
|
|
|
|
* that describes the match, if any, or +nil+ if none;
|
2022-08-18 17:25:05 +03:00
|
|
|
* the search begins at the given character +offset+ in +string+:
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* /abra/.match('abracadabra') # => #<MatchData "abra">
|
|
|
|
* /abra/.match('abracadabra', 4) # => #<MatchData "abra">
|
|
|
|
* /abra/.match('abracadabra', 8) # => nil
|
|
|
|
* /abra/.match('abracadabra', 800) # => nil
|
2009-02-22 17:23:33 +03:00
|
|
|
*
|
2022-08-18 17:25:05 +03:00
|
|
|
* string = "\u{5d0 5d1 5e8 5d0}cadabra"
|
|
|
|
* /abra/.match(string, 7) #=> #<MatchData "abra">
|
|
|
|
* /abra/.match(string, 8) #=> nil
|
|
|
|
* /abra/.match(string.b, 8) #=> #<MatchData "abra">
|
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* With a block given, calls the block if and only if a match is found;
|
|
|
|
* returns the block's value:
|
2009-02-22 17:23:33 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* /abra/.match('abracadabra') {|matchdata| p matchdata }
|
|
|
|
* # => #<MatchData "abra">
|
|
|
|
* /abra/.match('abracadabra', 4) {|matchdata| p matchdata }
|
|
|
|
* # => #<MatchData "abra">
|
|
|
|
* /abra/.match('abracadabra', 8) {|matchdata| p matchdata }
|
|
|
|
* # => nil
|
|
|
|
* /abra/.match('abracadabra', 8) {|marchdata| fail 'Cannot happen' }
|
|
|
|
* # => nil
|
2009-02-22 17:23:33 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* Output (from the first two blocks above):
|
2009-02-22 17:23:33 +03:00
|
|
|
*
|
2022-04-16 23:20:03 +03:00
|
|
|
* #<MatchData "abra">
|
|
|
|
* #<MatchData "abra">
|
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* /(.)(.)(.)/.match("abc")[2] # => "b"
|
|
|
|
* /(.)(.)/.match("abc", 1)[2] # => "c"
|
2009-02-22 17:23:33 +03:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1999-08-13 09:45:20 +04:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_match_m(int argc, VALUE *argv, VALUE re)
|
1999-08-13 09:45:20 +04:00
|
|
|
{
|
2021-08-11 23:50:59 +03:00
|
|
|
VALUE result = Qnil, str, initpos;
|
2004-07-17 10:28:10 +04:00
|
|
|
long pos;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2004-07-17 10:28:10 +04:00
|
|
|
if (rb_scan_args(argc, argv, "11", &str, &initpos) == 2) {
|
|
|
|
pos = NUM2LONG(initpos);
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
pos = 0;
|
|
|
|
}
|
|
|
|
|
2021-08-11 23:50:59 +03:00
|
|
|
pos = reg_match_pos(re, &str, pos, &result);
|
2007-12-10 07:50:35 +03:00
|
|
|
if (pos < 0) {
|
2004-07-17 10:28:10 +04:00
|
|
|
rb_backref_set(Qnil);
|
|
|
|
return Qnil;
|
|
|
|
}
|
2000-07-14 11:18:58 +04:00
|
|
|
rb_match_busy(result);
|
2007-09-20 21:14:01 +04:00
|
|
|
if (!NIL_P(result) && rb_block_given_p()) {
|
|
|
|
return rb_yield(result);
|
|
|
|
}
|
2000-07-14 11:18:58 +04:00
|
|
|
return result;
|
1999-08-13 09:45:20 +04:00
|
|
|
}
|
|
|
|
|
2016-05-18 13:37:13 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-16 23:20:03 +03:00
|
|
|
* match?(string) -> true or false
|
|
|
|
* match?(string, offset = 0) -> true or false
|
2016-05-18 13:37:13 +03:00
|
|
|
*
|
2021-09-01 03:24:34 +03:00
|
|
|
* Returns <code>true</code> or <code>false</code> to indicate whether the
|
2016-05-18 13:37:13 +03:00
|
|
|
* regexp is matched or not without updating $~ and other related variables.
|
|
|
|
* If the second parameter is present, it specifies the position in the string
|
|
|
|
* to begin the search.
|
|
|
|
*
|
2022-04-19 02:19:10 +03:00
|
|
|
* /R.../.match?("Ruby") # => true
|
|
|
|
* /R.../.match?("Ruby", 1) # => false
|
|
|
|
* /P.../.match?("Ruby") # => false
|
|
|
|
* $& # => nil
|
2016-05-18 13:37:13 +03:00
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
rb_reg_match_m_p(int argc, VALUE *argv, VALUE re)
|
|
|
|
{
|
2016-12-12 05:56:12 +03:00
|
|
|
long pos = rb_check_arity(argc, 1, 2) > 1 ? NUM2LONG(argv[1]) : 0;
|
|
|
|
return rb_reg_match_p(re, argv[0], pos);
|
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
|
|
|
rb_reg_match_p(VALUE re, VALUE str, long pos)
|
|
|
|
{
|
2019-12-03 19:26:29 +03:00
|
|
|
if (NIL_P(str)) return Qfalse;
|
2016-12-12 05:56:12 +03:00
|
|
|
str = SYMBOL_P(str) ? rb_sym2str(str) : StringValue(str);
|
|
|
|
if (pos) {
|
2016-05-18 13:37:13 +03:00
|
|
|
if (pos < 0) {
|
|
|
|
pos += NUM2LONG(rb_str_length(str));
|
2016-05-19 05:37:38 +03:00
|
|
|
if (pos < 0) return Qfalse;
|
2016-05-18 13:37:13 +03:00
|
|
|
}
|
2016-05-19 06:10:12 +03:00
|
|
|
if (pos > 0) {
|
2016-12-12 05:38:53 +03:00
|
|
|
long len = 1;
|
|
|
|
const char *beg = rb_str_subpos(str, pos, &len);
|
2016-05-19 06:10:12 +03:00
|
|
|
if (!beg) return Qfalse;
|
|
|
|
pos = beg - RSTRING_PTR(str);
|
|
|
|
}
|
2016-05-18 13:37:13 +03:00
|
|
|
}
|
2023-07-26 22:57:03 +03:00
|
|
|
|
|
|
|
struct reg_onig_search_args args = {
|
|
|
|
.pos = pos,
|
|
|
|
.range = RSTRING_LEN(str),
|
|
|
|
};
|
|
|
|
|
|
|
|
return rb_reg_onig_match(re, str, reg_onig_search, &args, NULL) == ONIG_MISMATCH ? Qfalse : Qtrue;
|
2016-05-18 13:37:13 +03:00
|
|
|
}
|
|
|
|
|
2003-12-30 19:38:32 +03:00
|
|
|
/*
|
|
|
|
* Document-method: compile
|
|
|
|
*
|
2019-03-28 06:33:35 +03:00
|
|
|
* Alias for Regexp.new
|
2003-12-30 19:38:32 +03:00
|
|
|
*/
|
|
|
|
|
2022-06-16 12:53:35 +03:00
|
|
|
static int
|
|
|
|
str_to_option(VALUE str)
|
|
|
|
{
|
|
|
|
int flag = 0;
|
|
|
|
const char *ptr;
|
|
|
|
long len;
|
|
|
|
str = rb_check_string_type(str);
|
|
|
|
if (NIL_P(str)) return -1;
|
|
|
|
RSTRING_GETMEM(str, ptr, len);
|
|
|
|
for (long i = 0; i < len; ++i) {
|
|
|
|
int f = char_to_option(ptr[i]);
|
|
|
|
if (!f) {
|
|
|
|
rb_raise(rb_eArgError, "unknown regexp option: %"PRIsVALUE, str);
|
|
|
|
}
|
|
|
|
flag |= f;
|
|
|
|
}
|
|
|
|
return flag;
|
|
|
|
}
|
|
|
|
|
2022-10-24 12:21:30 +03:00
|
|
|
static void
|
|
|
|
set_timeout(rb_hrtime_t *hrt, VALUE timeout)
|
|
|
|
{
|
|
|
|
double timeout_d = NIL_P(timeout) ? 0.0 : NUM2DBL(timeout);
|
|
|
|
if (!NIL_P(timeout) && timeout_d <= 0) {
|
|
|
|
rb_raise(rb_eArgError, "invalid timeout: %"PRIsVALUE, timeout);
|
|
|
|
}
|
|
|
|
double2hrtime(hrt, timeout_d);
|
|
|
|
}
|
|
|
|
|
2023-06-09 10:10:30 +03:00
|
|
|
static VALUE
|
|
|
|
reg_copy(VALUE copy, VALUE orig)
|
|
|
|
{
|
|
|
|
int r;
|
|
|
|
regex_t *re;
|
|
|
|
|
|
|
|
rb_reg_initialize_check(copy);
|
|
|
|
if ((r = onig_reg_copy(&re, RREGEXP_PTR(orig))) != 0) {
|
2023-06-09 15:45:58 +03:00
|
|
|
/* ONIGERR_MEMORY only */
|
|
|
|
rb_raise(rb_eRegexpError, "%s", onig_error_code_to_format(r));
|
2023-06-09 10:10:30 +03:00
|
|
|
}
|
|
|
|
RREGEXP_PTR(copy) = re;
|
|
|
|
RB_OBJ_WRITE(copy, &RREGEXP(copy)->src, RREGEXP(orig)->src);
|
|
|
|
RREGEXP_PTR(copy)->timelimit = RREGEXP_PTR(orig)->timelimit;
|
|
|
|
rb_enc_copy(copy, orig);
|
2023-12-07 06:25:29 +03:00
|
|
|
FL_SET_RAW(copy, FL_TEST_RAW(orig, KCODE_FIXED|REG_ENCODING_NONE));
|
|
|
|
|
2023-06-09 10:10:30 +03:00
|
|
|
return copy;
|
|
|
|
}
|
|
|
|
|
2022-12-21 17:17:37 +03:00
|
|
|
struct reg_init_args {
|
2022-12-22 09:59:31 +03:00
|
|
|
VALUE str;
|
|
|
|
VALUE timeout;
|
2022-12-21 17:17:37 +03:00
|
|
|
rb_encoding *enc;
|
|
|
|
int flags;
|
|
|
|
};
|
|
|
|
|
2022-12-22 09:59:31 +03:00
|
|
|
static VALUE reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args);
|
2022-12-21 17:17:37 +03:00
|
|
|
static VALUE reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags);
|
2022-12-20 23:44:11 +03:00
|
|
|
void rb_warn_deprecated_to_remove(const char *removal, const char *fmt, const char *suggest, ...);
|
2022-12-21 17:17:37 +03:00
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-12-20 23:44:11 +03:00
|
|
|
* Regexp.new(string, options = 0, timeout: nil) -> regexp
|
2022-06-19 20:32:17 +03:00
|
|
|
* Regexp.new(regexp, timeout: nil) -> regexp
|
2022-04-18 18:45:29 +03:00
|
|
|
*
|
|
|
|
* With argument +string+ given, returns a new regexp with the given string
|
|
|
|
* and options:
|
|
|
|
*
|
|
|
|
* r = Regexp.new('foo') # => /foo/
|
|
|
|
* r.source # => "foo"
|
|
|
|
* r.options # => 0
|
|
|
|
*
|
|
|
|
* Optional argument +options+ is one of the following:
|
|
|
|
*
|
2022-06-20 07:35:21 +03:00
|
|
|
* - A String of options:
|
|
|
|
*
|
|
|
|
* Regexp.new('foo', 'i') # => /foo/i
|
|
|
|
* Regexp.new('foo', 'im') # => /foo/im
|
|
|
|
*
|
2023-03-03 10:02:02 +03:00
|
|
|
* - The bit-wise OR of one or more of the constants
|
2022-12-20 23:44:11 +03:00
|
|
|
* Regexp::EXTENDED, Regexp::IGNORECASE, Regexp::MULTILINE, and
|
|
|
|
* Regexp::NOENCODING:
|
2022-04-18 18:45:29 +03:00
|
|
|
*
|
|
|
|
* Regexp.new('foo', Regexp::IGNORECASE) # => /foo/i
|
|
|
|
* Regexp.new('foo', Regexp::EXTENDED) # => /foo/x
|
|
|
|
* Regexp.new('foo', Regexp::MULTILINE) # => /foo/m
|
2022-12-20 23:44:11 +03:00
|
|
|
* Regexp.new('foo', Regexp::NOENCODING) # => /foo/n
|
2022-04-18 18:45:29 +03:00
|
|
|
* flags = Regexp::IGNORECASE | Regexp::EXTENDED | Regexp::MULTILINE
|
|
|
|
* Regexp.new('foo', flags) # => /foo/mix
|
|
|
|
*
|
|
|
|
* - +nil+ or +false+, which is ignored.
|
2023-01-16 20:02:23 +03:00
|
|
|
* - Any other truthy value, in which case the regexp will be
|
|
|
|
* case-insensitive.
|
2022-04-18 18:45:29 +03:00
|
|
|
*
|
|
|
|
* If optional keyword argument +timeout+ is given,
|
2022-06-20 11:47:44 +03:00
|
|
|
* its float value overrides the timeout interval for the class,
|
2022-04-18 18:45:29 +03:00
|
|
|
* Regexp.timeout.
|
2022-10-24 12:03:26 +03:00
|
|
|
* If +nil+ is passed as +timeout, it uses the timeout interval
|
|
|
|
* for the class, Regexp.timeout.
|
2022-04-18 18:45:29 +03:00
|
|
|
*
|
2022-06-20 06:53:41 +03:00
|
|
|
* With argument +regexp+ given, returns a new regexp. The source,
|
|
|
|
* options, timeout are the same as +regexp+. +options+ and +n_flag+
|
|
|
|
* arguments are ineffective. The timeout can be overridden by
|
|
|
|
* +timeout+ keyword.
|
|
|
|
*
|
|
|
|
* options = Regexp::MULTILINE
|
2022-06-26 08:17:14 +03:00
|
|
|
* r = Regexp.new('foo', options, timeout: 1.1) # => /foo/m
|
2022-06-20 06:53:41 +03:00
|
|
|
* r2 = Regexp.new(r) # => /foo/m
|
|
|
|
* r2.timeout # => 1.1
|
|
|
|
* r3 = Regexp.new(r, timeout: 3.14) # => /foo/m
|
|
|
|
* r3.timeout # => 3.14
|
2022-04-18 18:45:29 +03:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_initialize_m(int argc, VALUE *argv, VALUE self)
|
2022-12-21 17:17:37 +03:00
|
|
|
{
|
|
|
|
struct reg_init_args args;
|
2023-06-09 10:10:30 +03:00
|
|
|
VALUE re = reg_extract_args(argc, argv, &args);
|
2022-12-21 17:17:37 +03:00
|
|
|
|
2023-06-09 10:10:30 +03:00
|
|
|
if (NIL_P(re)) {
|
2023-06-09 15:45:58 +03:00
|
|
|
reg_init_args(self, args.str, args.enc, args.flags);
|
2023-06-09 10:10:30 +03:00
|
|
|
}
|
|
|
|
else {
|
2023-06-09 15:45:58 +03:00
|
|
|
reg_copy(self, re);
|
2023-06-09 10:10:30 +03:00
|
|
|
}
|
2022-12-21 17:17:37 +03:00
|
|
|
|
|
|
|
set_timeout(&RREGEXP_PTR(self)->timelimit, args.timeout);
|
|
|
|
|
|
|
|
return self;
|
|
|
|
}
|
|
|
|
|
2022-12-22 09:59:31 +03:00
|
|
|
static VALUE
|
2022-12-21 17:17:37 +03:00
|
|
|
reg_extract_args(int argc, VALUE *argv, struct reg_init_args *args)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2001-02-02 14:38:20 +03:00
|
|
|
int flags = 0;
|
2016-02-04 11:38:06 +03:00
|
|
|
rb_encoding *enc = 0;
|
2022-12-28 19:08:12 +03:00
|
|
|
VALUE str, src, opts = Qundef, kwargs;
|
2022-12-22 09:59:31 +03:00
|
|
|
VALUE re = Qnil;
|
2022-03-24 11:00:51 +03:00
|
|
|
|
2022-12-28 19:08:12 +03:00
|
|
|
rb_scan_args(argc, argv, "11:", &src, &opts, &kwargs);
|
2007-10-16 09:48:40 +04:00
|
|
|
|
2022-12-21 17:17:37 +03:00
|
|
|
args->timeout = Qnil;
|
2022-03-24 11:00:51 +03:00
|
|
|
if (!NIL_P(kwargs)) {
|
|
|
|
static ID keywords[1];
|
|
|
|
if (!keywords[0]) {
|
|
|
|
keywords[0] = rb_intern_const("timeout");
|
|
|
|
}
|
2022-12-21 17:17:37 +03:00
|
|
|
rb_get_kwargs(kwargs, keywords, 0, 1, &args->timeout);
|
2022-03-24 11:00:51 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
if (RB_TYPE_P(src, T_REGEXP)) {
|
2022-12-22 09:59:31 +03:00
|
|
|
re = src;
|
2022-03-24 11:00:51 +03:00
|
|
|
|
2022-12-21 17:17:37 +03:00
|
|
|
if (!NIL_P(opts)) {
|
2007-10-16 09:48:40 +04:00
|
|
|
rb_warn("flags ignored");
|
2003-02-03 11:45:26 +03:00
|
|
|
}
|
2007-10-16 09:48:40 +04:00
|
|
|
rb_reg_check(re);
|
2007-10-19 11:41:03 +04:00
|
|
|
flags = rb_reg_options(re);
|
2016-02-04 11:38:06 +03:00
|
|
|
str = RREGEXP_SRC(re);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
else {
|
2023-03-03 09:52:24 +03:00
|
|
|
if (!NIL_P(opts)) {
|
2022-06-16 12:53:35 +03:00
|
|
|
int f;
|
2022-03-24 11:00:51 +03:00
|
|
|
if (FIXNUM_P(opts)) flags = FIX2INT(opts);
|
2022-06-16 12:53:35 +03:00
|
|
|
else if ((f = str_to_option(opts)) >= 0) flags = f;
|
2023-03-03 09:52:24 +03:00
|
|
|
else if (rb_bool_expected(opts, "ignorecase", FALSE))
|
2022-05-24 10:51:15 +03:00
|
|
|
flags = ONIG_OPTION_IGNORECASE;
|
2003-02-03 11:45:26 +03:00
|
|
|
}
|
2022-03-24 11:00:51 +03:00
|
|
|
str = StringValue(src);
|
* include/ruby/{intern,ruby}.h, compile.[ch], error.c, eval.c,
eval_load.c, gc.c, iseq.c, main.c, parse.y, re.c, ruby.c,
yarvcore.[ch] (ruby_eval_tree, ruby_sourcefile, ruby_sourceline,
ruby_nerrs): purge global variables.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12700 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-07-05 12:12:18 +04:00
|
|
|
}
|
2022-12-21 17:17:37 +03:00
|
|
|
args->str = str;
|
|
|
|
args->enc = enc;
|
|
|
|
args->flags = flags;
|
2022-12-22 09:59:31 +03:00
|
|
|
return re;
|
2022-12-21 17:17:37 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
reg_init_args(VALUE self, VALUE str, rb_encoding *enc, int flags)
|
|
|
|
{
|
2016-02-04 11:38:06 +03:00
|
|
|
if (enc && rb_enc_get(str) != enc)
|
|
|
|
rb_reg_init_str_enc(self, str, enc, flags);
|
|
|
|
else
|
|
|
|
rb_reg_init_str(self, str, flags);
|
2001-01-15 10:01:00 +03:00
|
|
|
return self;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2002-07-26 10:12:39 +04:00
|
|
|
VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_quote(VALUE str)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
* encoding.c: provide basic features for M17N.
* parse.y: encoding aware parsing.
* parse.y (pragma_encoding): encoding specification pragma.
* parse.y (rb_intern3): encoding specified symbols.
* string.c (rb_str_length): length based on characters.
for older behavior, bytesize method added.
* string.c (rb_str_index_m): index based on characters. rindex as
well.
* string.c (succ_char): encoding aware succeeding string.
* string.c (rb_str_reverse): reverse based on characters.
* string.c (rb_str_inspect): encoding aware string description.
* string.c (rb_str_upcase_bang): encoding aware case conversion.
downcase, capitalize, swapcase as well.
* string.c (rb_str_tr_bang): tr based on characters. delete,
squeeze, tr_s, count as well.
* string.c (rb_str_split_m): split based on characters.
* string.c (rb_str_each_line): encoding aware each_line.
* string.c (rb_str_each_char): added. iteration based on
characters.
* string.c (rb_str_strip_bang): encoding aware whitespace
stripping. lstrip, rstrip as well.
* string.c (rb_str_justify): encoding aware justifying (ljust,
rjust, center).
* string.c (str_encoding): get encoding attribute from a string.
* re.c (rb_reg_initialize): encoding aware regular expression
* sprintf.c (rb_str_format): formatting (i.e. length count) based
on characters.
* io.c (rb_io_getc): getc to return one-character string.
for older behavior, getbyte method added.
* ext/stringio/stringio.c (strio_getc): ditto.
* io.c (rb_io_ungetc): allow pushing arbitrary string at the
current reading point.
* ext/stringio/stringio.c (strio_ungetc): ditto.
* ext/strscan/strscan.c: encoding support.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@13261 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-08-25 07:29:39 +04:00
|
|
|
rb_encoding *enc = rb_enc_get(str);
|
1999-08-13 09:45:20 +04:00
|
|
|
char *s, *send, *t;
|
2000-10-10 11:03:36 +04:00
|
|
|
VALUE tmp;
|
2007-12-11 06:08:50 +03:00
|
|
|
int c, clen;
|
2007-11-25 16:25:34 +03:00
|
|
|
int ascii_only = rb_enc_str_asciionly_p(str);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2006-08-31 14:47:44 +04:00
|
|
|
s = RSTRING_PTR(str);
|
|
|
|
send = s + RSTRING_LEN(str);
|
2007-12-11 06:08:50 +03:00
|
|
|
while (s < send) {
|
2007-12-11 10:39:16 +03:00
|
|
|
c = rb_enc_ascget(s, send, &clen, enc);
|
2007-12-08 05:50:43 +03:00
|
|
|
if (c == -1) {
|
2007-12-11 06:08:50 +03:00
|
|
|
s += mbclen(s, send, enc);
|
2002-07-26 10:12:39 +04:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
switch (c) {
|
|
|
|
case '[': case ']': case '{': case '}':
|
|
|
|
case '(': case ')': case '|': case '-':
|
|
|
|
case '*': case '.': case '\\':
|
|
|
|
case '?': case '+': case '^': case '$':
|
|
|
|
case ' ': case '#':
|
2007-11-04 18:03:31 +03:00
|
|
|
case '\t': case '\f': case '\v': case '\n': case '\r':
|
2002-07-26 10:12:39 +04:00
|
|
|
goto meta_found;
|
|
|
|
}
|
2007-12-11 06:08:50 +03:00
|
|
|
s += clen;
|
2002-07-26 10:12:39 +04:00
|
|
|
}
|
2008-04-08 05:53:35 +04:00
|
|
|
tmp = rb_str_new3(str);
|
2008-02-17 05:00:05 +03:00
|
|
|
if (ascii_only) {
|
2008-04-08 05:53:35 +04:00
|
|
|
rb_enc_associate(tmp, rb_usascii_encoding());
|
2007-11-25 16:25:34 +03:00
|
|
|
}
|
2008-04-08 05:53:35 +04:00
|
|
|
return tmp;
|
2002-07-26 10:12:39 +04:00
|
|
|
|
|
|
|
meta_found:
|
2006-08-31 14:47:44 +04:00
|
|
|
tmp = rb_str_new(0, RSTRING_LEN(str)*2);
|
2008-02-17 05:00:05 +03:00
|
|
|
if (ascii_only) {
|
|
|
|
rb_enc_associate(tmp, rb_usascii_encoding());
|
|
|
|
}
|
|
|
|
else {
|
2007-11-25 16:25:34 +03:00
|
|
|
rb_enc_copy(tmp, str);
|
|
|
|
}
|
2006-08-31 14:47:44 +04:00
|
|
|
t = RSTRING_PTR(tmp);
|
2002-07-29 10:14:10 +04:00
|
|
|
/* copy upto metacharacter */
|
2020-08-14 10:12:58 +03:00
|
|
|
const char *p = RSTRING_PTR(str);
|
2020-08-13 14:56:23 +03:00
|
|
|
memcpy(t, p, s - p);
|
|
|
|
t += s - p;
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2007-12-11 06:08:50 +03:00
|
|
|
while (s < send) {
|
2007-12-11 10:39:16 +03:00
|
|
|
c = rb_enc_ascget(s, send, &clen, enc);
|
2007-12-08 05:50:43 +03:00
|
|
|
if (c == -1) {
|
2007-09-06 16:33:45 +04:00
|
|
|
int n = mbclen(s, send, enc);
|
1999-08-13 09:45:20 +04:00
|
|
|
|
2007-12-11 06:08:50 +03:00
|
|
|
while (n--)
|
1999-08-13 09:45:20 +04:00
|
|
|
*t++ = *s++;
|
|
|
|
continue;
|
|
|
|
}
|
2007-12-11 06:08:50 +03:00
|
|
|
s += clen;
|
2002-05-01 13:41:50 +04:00
|
|
|
switch (c) {
|
|
|
|
case '[': case ']': case '{': case '}':
|
|
|
|
case '(': case ')': case '|': case '-':
|
|
|
|
case '*': case '.': case '\\':
|
|
|
|
case '?': case '+': case '^': case '$':
|
2003-05-17 03:28:31 +04:00
|
|
|
case '#':
|
2008-09-15 20:01:08 +04:00
|
|
|
t += rb_enc_mbcput('\\', t, enc);
|
2003-05-17 03:28:31 +04:00
|
|
|
break;
|
|
|
|
case ' ':
|
2008-09-15 20:01:08 +04:00
|
|
|
t += rb_enc_mbcput('\\', t, enc);
|
|
|
|
t += rb_enc_mbcput(' ', t, enc);
|
2003-05-19 08:48:57 +04:00
|
|
|
continue;
|
2003-05-17 03:28:31 +04:00
|
|
|
case '\t':
|
2008-09-15 20:01:08 +04:00
|
|
|
t += rb_enc_mbcput('\\', t, enc);
|
|
|
|
t += rb_enc_mbcput('t', t, enc);
|
2003-05-19 08:48:57 +04:00
|
|
|
continue;
|
2003-05-17 03:28:31 +04:00
|
|
|
case '\n':
|
2008-09-15 20:01:08 +04:00
|
|
|
t += rb_enc_mbcput('\\', t, enc);
|
|
|
|
t += rb_enc_mbcput('n', t, enc);
|
2003-05-19 08:48:57 +04:00
|
|
|
continue;
|
2003-05-17 03:28:31 +04:00
|
|
|
case '\r':
|
2008-09-15 20:01:08 +04:00
|
|
|
t += rb_enc_mbcput('\\', t, enc);
|
|
|
|
t += rb_enc_mbcput('r', t, enc);
|
2003-05-19 08:48:57 +04:00
|
|
|
continue;
|
2003-05-17 03:28:31 +04:00
|
|
|
case '\f':
|
2008-09-15 20:01:08 +04:00
|
|
|
t += rb_enc_mbcput('\\', t, enc);
|
|
|
|
t += rb_enc_mbcput('f', t, enc);
|
2003-05-19 08:48:57 +04:00
|
|
|
continue;
|
2007-11-04 18:03:31 +03:00
|
|
|
case '\v':
|
2008-09-15 20:01:08 +04:00
|
|
|
t += rb_enc_mbcput('\\', t, enc);
|
|
|
|
t += rb_enc_mbcput('v', t, enc);
|
2007-11-04 18:03:31 +03:00
|
|
|
continue;
|
1999-08-13 09:45:20 +04:00
|
|
|
}
|
2008-09-15 20:01:08 +04:00
|
|
|
t += rb_enc_mbcput(c, t, enc);
|
1999-08-13 09:45:20 +04:00
|
|
|
}
|
2006-08-31 14:47:44 +04:00
|
|
|
rb_str_resize(tmp, t - RSTRING_PTR(tmp));
|
2000-10-10 11:03:36 +04:00
|
|
|
return tmp;
|
1999-01-20 07:59:39 +03:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 18:45:29 +03:00
|
|
|
* Regexp.escape(string) -> new_string
|
|
|
|
*
|
|
|
|
* Returns a new string that escapes any characters
|
|
|
|
* that have special meaning in a regular expression:
|
|
|
|
*
|
|
|
|
* s = Regexp.escape('\*?{}.') # => "\\\\\\*\\?\\{\\}\\."
|
|
|
|
*
|
|
|
|
* For any string +s+, this call returns a MatchData object:
|
* compile.c, dir.c, eval.c, eval_jump.h, eval_method.h, numeric.c,
pack.c, parse.y, re.c, thread.c, vm.c, vm_dump.c, call_cfunc.ci,
thread_pthread.ci, thread_win32.ci: fixed indentation.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@12431 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2007-06-05 08:25:10 +04:00
|
|
|
*
|
2022-04-18 18:45:29 +03:00
|
|
|
* r = Regexp.new(Regexp.escape(s)) # => /\\\\\\\*\\\?\\\{\\\}\\\./
|
|
|
|
* r.match(s) # => #<MatchData "\\\\\\*\\?\\{\\}\\.">
|
2007-12-09 16:35:38 +03:00
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
2002-07-26 10:12:39 +04:00
|
|
|
static VALUE
|
2007-10-10 18:34:42 +04:00
|
|
|
rb_reg_s_quote(VALUE c, VALUE str)
|
2002-07-26 10:12:39 +04:00
|
|
|
{
|
2009-09-05 03:49:18 +04:00
|
|
|
return rb_reg_quote(reg_operand(str, TRUE));
|
2002-07-26 10:12:39 +04:00
|
|
|
}
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
int
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_options(VALUE re)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2003-07-25 08:38:42 +04:00
|
|
|
int options;
|
1998-01-16 15:19:22 +03:00
|
|
|
|
2000-05-22 11:09:55 +04:00
|
|
|
rb_reg_check(re);
|
2016-02-02 07:39:44 +03:00
|
|
|
options = RREGEXP_PTR(re)->options & ARG_REG_OPTION_MASK;
|
2007-10-19 11:41:03 +04:00
|
|
|
if (RBASIC(re)->flags & KCODE_FIXED) options |= ARG_ENCODING_FIXED;
|
2007-12-21 19:39:36 +03:00
|
|
|
if (RBASIC(re)->flags & REG_ENCODING_NONE) options |= ARG_ENCODING_NONE;
|
1999-01-20 07:59:39 +03:00
|
|
|
return options;
|
|
|
|
}
|
|
|
|
|
2021-06-01 10:59:33 +03:00
|
|
|
static VALUE
|
2007-08-02 18:42:59 +04:00
|
|
|
rb_check_regexp_type(VALUE re)
|
|
|
|
{
|
|
|
|
return rb_check_convert_type(re, T_REGEXP, "Regexp", "to_regexp");
|
|
|
|
}
|
|
|
|
|
2007-08-24 21:47:09 +04:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 18:45:29 +03:00
|
|
|
* Regexp.try_convert(object) -> regexp or nil
|
2007-08-24 21:47:09 +04:00
|
|
|
*
|
2022-04-18 18:45:29 +03:00
|
|
|
* Returns +object+ if it is a regexp:
|
2007-08-24 21:47:09 +04:00
|
|
|
*
|
2022-04-18 18:45:29 +03:00
|
|
|
* Regexp.try_convert(/re/) # => /re/
|
2007-12-09 16:35:38 +03:00
|
|
|
*
|
2022-04-18 18:45:29 +03:00
|
|
|
* Otherwise if +object+ responds to <tt>:to_regexp</tt>,
|
|
|
|
* calls <tt>object.to_regexp</tt> and returns the result.
|
|
|
|
*
|
|
|
|
* Returns +nil+ if +object+ does not respond to <tt>:to_regexp</tt>.
|
|
|
|
*
|
|
|
|
* Regexp.try_convert('re') # => nil
|
|
|
|
*
|
|
|
|
* Raises an exception unless <tt>object.to_regexp</tt> returns a regexp.
|
2007-12-09 16:35:38 +03:00
|
|
|
*
|
2007-08-24 21:47:09 +04:00
|
|
|
*/
|
|
|
|
static VALUE
|
|
|
|
rb_reg_s_try_convert(VALUE dummy, VALUE re)
|
|
|
|
{
|
|
|
|
return rb_check_regexp_type(re);
|
|
|
|
}
|
2003-12-26 18:58:28 +03:00
|
|
|
|
2003-10-29 11:31:43 +03:00
|
|
|
static VALUE
|
2007-10-05 16:26:35 +04:00
|
|
|
rb_reg_s_union(VALUE self, VALUE args0)
|
2003-10-29 11:31:43 +03:00
|
|
|
{
|
2007-10-05 16:26:35 +04:00
|
|
|
long argc = RARRAY_LEN(args0);
|
2007-10-16 09:06:30 +04:00
|
|
|
|
2003-10-29 11:31:43 +03:00
|
|
|
if (argc == 0) {
|
|
|
|
VALUE args[1];
|
|
|
|
args[0] = rb_str_new2("(?!)");
|
|
|
|
return rb_class_new_instance(1, args, rb_cRegexp);
|
|
|
|
}
|
|
|
|
else if (argc == 1) {
|
2007-11-25 16:25:34 +03:00
|
|
|
VALUE arg = rb_ary_entry(args0, 0);
|
|
|
|
VALUE re = rb_check_regexp_type(arg);
|
|
|
|
if (!NIL_P(re))
|
|
|
|
return re;
|
2003-10-29 11:31:43 +03:00
|
|
|
else {
|
2007-11-25 16:25:34 +03:00
|
|
|
VALUE quoted;
|
|
|
|
quoted = rb_reg_s_quote(Qnil, arg);
|
2008-01-04 19:30:33 +03:00
|
|
|
return rb_reg_new_str(quoted, 0);
|
2003-10-29 11:31:43 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
2007-10-16 09:48:40 +04:00
|
|
|
int i;
|
|
|
|
VALUE source = rb_str_buf_new(0);
|
2007-11-26 05:27:59 +03:00
|
|
|
rb_encoding *result_enc;
|
2007-11-25 16:25:34 +03:00
|
|
|
|
2007-11-26 05:27:59 +03:00
|
|
|
int has_asciionly = 0;
|
|
|
|
rb_encoding *has_ascii_compat_fixed = 0;
|
|
|
|
rb_encoding *has_ascii_incompat = 0;
|
2007-10-16 09:06:30 +04:00
|
|
|
|
2007-10-16 09:48:40 +04:00
|
|
|
for (i = 0; i < argc; i++) {
|
|
|
|
volatile VALUE v;
|
2007-10-16 14:48:02 +04:00
|
|
|
VALUE e = rb_ary_entry(args0, i);
|
2007-11-25 16:25:34 +03:00
|
|
|
|
2007-10-16 09:48:40 +04:00
|
|
|
if (0 < i)
|
2008-01-25 10:35:27 +03:00
|
|
|
rb_str_buf_cat_ascii(source, "|");
|
2007-11-25 16:25:34 +03:00
|
|
|
|
2007-10-16 14:48:02 +04:00
|
|
|
v = rb_check_regexp_type(e);
|
2007-10-16 09:48:40 +04:00
|
|
|
if (!NIL_P(v)) {
|
2007-11-26 05:27:59 +03:00
|
|
|
rb_encoding *enc = rb_enc_get(v);
|
|
|
|
if (!rb_enc_asciicompat(enc)) {
|
|
|
|
if (!has_ascii_incompat)
|
|
|
|
has_ascii_incompat = enc;
|
|
|
|
else if (has_ascii_incompat != enc)
|
2007-12-27 10:38:23 +03:00
|
|
|
rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
|
|
|
|
rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
|
2007-11-25 16:25:34 +03:00
|
|
|
}
|
2007-11-26 05:27:59 +03:00
|
|
|
else if (rb_reg_fixed_encoding_p(v)) {
|
|
|
|
if (!has_ascii_compat_fixed)
|
|
|
|
has_ascii_compat_fixed = enc;
|
|
|
|
else if (has_ascii_compat_fixed != enc)
|
2007-12-27 10:38:23 +03:00
|
|
|
rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
|
|
|
|
rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
|
2007-11-25 16:25:34 +03:00
|
|
|
}
|
|
|
|
else {
|
2007-11-26 05:27:59 +03:00
|
|
|
has_asciionly = 1;
|
2007-11-25 16:25:34 +03:00
|
|
|
}
|
2018-03-16 16:37:44 +03:00
|
|
|
v = rb_reg_str_with_term(v, -1);
|
2007-10-16 09:48:40 +04:00
|
|
|
}
|
|
|
|
else {
|
2008-07-20 23:44:05 +04:00
|
|
|
rb_encoding *enc;
|
2007-11-25 16:25:34 +03:00
|
|
|
StringValue(e);
|
2007-11-26 05:27:59 +03:00
|
|
|
enc = rb_enc_get(e);
|
2016-04-28 10:52:59 +03:00
|
|
|
if (!rb_enc_asciicompat(enc)) {
|
2007-11-26 05:27:59 +03:00
|
|
|
if (!has_ascii_incompat)
|
|
|
|
has_ascii_incompat = enc;
|
|
|
|
else if (has_ascii_incompat != enc)
|
2007-12-27 10:38:23 +03:00
|
|
|
rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
|
|
|
|
rb_enc_name(has_ascii_incompat), rb_enc_name(enc));
|
2007-11-25 16:25:34 +03:00
|
|
|
}
|
|
|
|
else if (rb_enc_str_asciionly_p(e)) {
|
2007-11-26 05:27:59 +03:00
|
|
|
has_asciionly = 1;
|
2007-11-25 16:25:34 +03:00
|
|
|
}
|
|
|
|
else {
|
2007-11-26 05:27:59 +03:00
|
|
|
if (!has_ascii_compat_fixed)
|
|
|
|
has_ascii_compat_fixed = enc;
|
|
|
|
else if (has_ascii_compat_fixed != enc)
|
2007-12-27 10:38:23 +03:00
|
|
|
rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
|
|
|
|
rb_enc_name(has_ascii_compat_fixed), rb_enc_name(enc));
|
2007-11-25 16:25:34 +03:00
|
|
|
}
|
2007-10-16 14:48:02 +04:00
|
|
|
v = rb_reg_s_quote(Qnil, e);
|
2007-10-17 09:21:10 +04:00
|
|
|
}
|
2007-12-27 10:38:23 +03:00
|
|
|
if (has_ascii_incompat) {
|
|
|
|
if (has_asciionly) {
|
|
|
|
rb_raise(rb_eArgError, "ASCII incompatible encoding: %s",
|
|
|
|
rb_enc_name(has_ascii_incompat));
|
|
|
|
}
|
|
|
|
if (has_ascii_compat_fixed) {
|
|
|
|
rb_raise(rb_eArgError, "incompatible encodings: %s and %s",
|
|
|
|
rb_enc_name(has_ascii_incompat), rb_enc_name(has_ascii_compat_fixed));
|
|
|
|
}
|
2007-11-26 05:27:59 +03:00
|
|
|
}
|
|
|
|
|
2008-01-25 10:35:27 +03:00
|
|
|
if (i == 0) {
|
|
|
|
rb_enc_copy(source, v);
|
|
|
|
}
|
2007-10-16 22:37:09 +04:00
|
|
|
rb_str_append(source, v);
|
2007-08-02 18:42:59 +04:00
|
|
|
}
|
2007-11-26 05:27:59 +03:00
|
|
|
|
|
|
|
if (has_ascii_incompat) {
|
|
|
|
result_enc = has_ascii_incompat;
|
2007-11-25 16:25:34 +03:00
|
|
|
}
|
2007-11-26 05:27:59 +03:00
|
|
|
else if (has_ascii_compat_fixed) {
|
|
|
|
result_enc = has_ascii_compat_fixed;
|
2007-11-25 16:25:34 +03:00
|
|
|
}
|
|
|
|
else {
|
2008-01-28 12:03:09 +03:00
|
|
|
result_enc = rb_ascii8bit_encoding();
|
2007-11-25 16:25:34 +03:00
|
|
|
}
|
|
|
|
|
2007-11-26 05:27:59 +03:00
|
|
|
rb_enc_associate(source, result_enc);
|
2007-10-16 09:48:40 +04:00
|
|
|
return rb_class_new_instance(1, &source, rb_cRegexp);
|
2003-10-29 11:31:43 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-10-08 20:03:53 +04:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 18:45:29 +03:00
|
|
|
* Regexp.union(*patterns) -> regexp
|
|
|
|
* Regexp.union(array_of_patterns) -> regexp
|
|
|
|
*
|
|
|
|
* Returns a new regexp that is the union of the given patterns:
|
|
|
|
*
|
|
|
|
* r = Regexp.union(%w[cat dog]) # => /cat|dog/
|
|
|
|
* r.match('cat') # => #<MatchData "cat">
|
|
|
|
* r.match('dog') # => #<MatchData "dog">
|
|
|
|
* r.match('cog') # => nil
|
|
|
|
*
|
|
|
|
* For each pattern that is a string, <tt>Regexp.new(pattern)</tt> is used:
|
|
|
|
*
|
|
|
|
* Regexp.union('penzance') # => /penzance/
|
|
|
|
* Regexp.union('a+b*c') # => /a\+b\*c/
|
|
|
|
* Regexp.union('skiing', 'sledding') # => /skiing|sledding/
|
|
|
|
* Regexp.union(['skiing', 'sledding']) # => /skiing|sledding/
|
|
|
|
*
|
|
|
|
* For each pattern that is a regexp, it is used as is,
|
|
|
|
* including its flags:
|
|
|
|
*
|
|
|
|
* Regexp.union(/foo/i, /bar/m, /baz/x)
|
|
|
|
* # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
|
|
|
|
* Regexp.union([/foo/i, /bar/m, /baz/x])
|
|
|
|
* # => /(?i-mx:foo)|(?m-ix:bar)|(?x-mi:baz)/
|
|
|
|
*
|
|
|
|
* With no arguments, returns <tt>/(?!)/</tt>:
|
|
|
|
*
|
|
|
|
* Regexp.union # => /(?!)/
|
|
|
|
*
|
|
|
|
* If any regexp pattern contains captures, the behavior is unspecified.
|
|
|
|
*
|
2007-10-08 20:03:53 +04:00
|
|
|
*/
|
2007-10-05 16:26:35 +04:00
|
|
|
static VALUE
|
|
|
|
rb_reg_s_union_m(VALUE self, VALUE args)
|
|
|
|
{
|
|
|
|
VALUE v;
|
|
|
|
if (RARRAY_LEN(args) == 1 &&
|
|
|
|
!NIL_P(v = rb_check_array_type(rb_ary_entry(args, 0)))) {
|
|
|
|
return rb_reg_s_union(self, v);
|
|
|
|
}
|
|
|
|
return rb_reg_s_union(self, args);
|
|
|
|
}
|
|
|
|
|
2022-12-14 06:57:14 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
|
|
|
* Regexp.linear_time?(re)
|
|
|
|
* Regexp.linear_time?(string, options = 0)
|
|
|
|
*
|
|
|
|
* Returns +true+ if matching against <tt>re</tt> can be
|
|
|
|
* done in linear time to the input string.
|
|
|
|
*
|
|
|
|
* Regexp.linear_time?(/re/) # => true
|
|
|
|
*
|
2022-12-19 04:28:51 +03:00
|
|
|
* Note that this is a property of the ruby interpreter, not of the argument
|
|
|
|
* regular expression. Identical regexp can or cannot run in linear time
|
|
|
|
* depending on your ruby binary. Neither forward nor backward compatibility
|
|
|
|
* is guaranteed about the return value of this method. Our current algorithm
|
|
|
|
* is (*1) but this is subject to change in the future. Alternative
|
|
|
|
* implementations can also behave differently. They might always return
|
2022-12-19 05:20:55 +03:00
|
|
|
* false for everything.
|
2022-12-19 04:28:51 +03:00
|
|
|
*
|
|
|
|
* (*1): https://doi.org/10.1109/SP40001.2021.00032
|
|
|
|
*
|
2022-12-14 06:57:14 +03:00
|
|
|
*/
|
|
|
|
static VALUE
|
|
|
|
rb_reg_s_linear_time_p(int argc, VALUE *argv, VALUE self)
|
|
|
|
{
|
2022-12-21 17:17:37 +03:00
|
|
|
struct reg_init_args args;
|
2022-12-22 09:59:31 +03:00
|
|
|
VALUE re = reg_extract_args(argc, argv, &args);
|
2022-12-14 06:57:14 +03:00
|
|
|
|
2022-12-22 09:59:31 +03:00
|
|
|
if (NIL_P(re)) {
|
2022-12-21 17:17:37 +03:00
|
|
|
re = reg_init_args(rb_reg_alloc(), args.str, args.enc, args.flags);
|
2022-12-14 06:57:14 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return RBOOL(onig_check_linear_time(RREGEXP_PTR(re)));
|
|
|
|
}
|
|
|
|
|
2004-01-18 17:16:47 +03:00
|
|
|
/* :nodoc: */
|
1999-01-20 07:59:39 +03:00
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
rb_reg_init_copy(VALUE copy, VALUE re)
|
2000-01-05 07:41:21 +03:00
|
|
|
{
|
2012-06-05 15:13:18 +04:00
|
|
|
if (!OBJ_INIT_COPY(copy, re)) return copy;
|
2000-05-22 11:09:55 +04:00
|
|
|
rb_reg_check(re);
|
2023-06-09 10:10:30 +03:00
|
|
|
return reg_copy(copy, re);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
VALUE
|
2006-03-22 18:03:40 +03:00
|
|
|
rb_reg_regsub(VALUE str, VALUE src, struct re_registers *regs, VALUE regexp)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
|
|
|
VALUE val = 0;
|
* ascii.c, euc_jp.c, oniggnu.h, oniguruma.h, regcomp.c, regenc.c, regenc.h, regerror.c, regexec.c, reggnu.c, regint.h, regparse.c, regparse.h, sjis.c, utf8.c:
imported Oni Guruma 3.4.0.
* parse.y, re.c: Now mbclen() takes unsigned char as its argument.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@7206 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2004-11-04 17:43:08 +03:00
|
|
|
char *p, *s, *e;
|
2007-12-11 06:08:50 +03:00
|
|
|
int no, clen;
|
2008-01-06 12:25:09 +03:00
|
|
|
rb_encoding *str_enc = rb_enc_get(str);
|
|
|
|
rb_encoding *src_enc = rb_enc_get(src);
|
2008-05-19 12:25:03 +04:00
|
|
|
int acompat = rb_enc_asciicompat(str_enc);
|
2020-08-13 14:56:23 +03:00
|
|
|
long n;
|
2010-12-27 12:27:43 +03:00
|
|
|
#define ASCGET(s,e,cl) (acompat ? (*(cl)=1,ISASCII((s)[0])?(s)[0]:-1) : rb_enc_ascget((s), (e), (cl), str_enc))
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2020-08-13 14:56:23 +03:00
|
|
|
RSTRING_GETMEM(str, s, n);
|
|
|
|
p = s;
|
|
|
|
e = s + n;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
while (s < e) {
|
2008-05-19 12:25:03 +04:00
|
|
|
int c = ASCGET(s, e, &clen);
|
2007-12-11 06:08:50 +03:00
|
|
|
char *ss;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2007-12-08 05:50:43 +03:00
|
|
|
if (c == -1) {
|
2008-01-06 12:25:09 +03:00
|
|
|
s += mbclen(s, e, str_enc);
|
1999-01-20 07:59:39 +03:00
|
|
|
continue;
|
|
|
|
}
|
2007-12-11 06:08:50 +03:00
|
|
|
ss = s;
|
|
|
|
s += clen;
|
|
|
|
|
2007-12-08 05:50:43 +03:00
|
|
|
if (c != '\\' || s == e) continue;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2001-05-30 13:12:34 +04:00
|
|
|
if (!val) {
|
|
|
|
val = rb_str_buf_new(ss-p);
|
|
|
|
}
|
2008-01-06 12:25:09 +03:00
|
|
|
rb_enc_str_buf_cat(val, p, ss-p, str_enc);
|
2007-12-11 06:08:50 +03:00
|
|
|
|
2008-05-19 12:25:03 +04:00
|
|
|
c = ASCGET(s, e, &clen);
|
2007-12-11 06:08:50 +03:00
|
|
|
if (c == -1) {
|
2008-01-06 12:25:09 +03:00
|
|
|
s += mbclen(s, e, str_enc);
|
|
|
|
rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
|
2008-02-17 18:35:09 +03:00
|
|
|
p = s;
|
2007-12-11 06:08:50 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
s += clen;
|
1998-01-16 15:13:05 +03:00
|
|
|
|
|
|
|
p = s;
|
2007-12-11 06:08:50 +03:00
|
|
|
switch (c) {
|
2006-03-23 15:43:51 +03:00
|
|
|
case '1': case '2': case '3': case '4':
|
1998-01-16 15:13:05 +03:00
|
|
|
case '5': case '6': case '7': case '8': case '9':
|
2016-02-02 07:39:44 +03:00
|
|
|
if (!NIL_P(regexp) && onig_noname_group_capture_is_active(RREGEXP_PTR(regexp))) {
|
2007-12-11 06:08:50 +03:00
|
|
|
no = c - '0';
|
2006-03-23 15:43:51 +03:00
|
|
|
}
|
|
|
|
else {
|
2007-12-11 06:08:50 +03:00
|
|
|
continue;
|
2006-03-23 15:43:51 +03:00
|
|
|
}
|
1998-01-16 15:13:05 +03:00
|
|
|
break;
|
2006-03-22 18:03:40 +03:00
|
|
|
|
|
|
|
case 'k':
|
2008-05-19 12:25:03 +04:00
|
|
|
if (s < e && ASCGET(s, e, &clen) == '<') {
|
2007-12-11 06:08:50 +03:00
|
|
|
char *name, *name_end;
|
2009-02-22 17:23:33 +03:00
|
|
|
|
2007-12-11 06:08:50 +03:00
|
|
|
name_end = name = s + clen;
|
|
|
|
while (name_end < e) {
|
2008-05-19 12:25:03 +04:00
|
|
|
c = ASCGET(name_end, e, &clen);
|
2007-12-11 06:08:50 +03:00
|
|
|
if (c == '>') break;
|
2008-01-06 12:25:09 +03:00
|
|
|
name_end += c == -1 ? mbclen(name_end, e, str_enc) : clen;
|
2007-12-11 06:08:50 +03:00
|
|
|
}
|
|
|
|
if (name_end < e) {
|
2014-06-04 17:09:57 +04:00
|
|
|
VALUE n = rb_str_subseq(str, (long)(name - RSTRING_PTR(str)),
|
|
|
|
(long)(name_end - name));
|
2016-12-19 06:11:57 +03:00
|
|
|
if ((no = NAME_TO_NUMBER(regs, regexp, n, name, name_end)) < 1) {
|
2014-06-04 16:33:18 +04:00
|
|
|
name_to_backref_error(n);
|
|
|
|
}
|
2007-12-11 06:08:50 +03:00
|
|
|
p = s = name_end + clen;
|
|
|
|
break;
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
rb_raise(rb_eRuntimeError, "invalid group name reference format");
|
|
|
|
}
|
2006-03-22 18:03:40 +03:00
|
|
|
}
|
|
|
|
|
2008-01-06 12:25:09 +03:00
|
|
|
rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
|
2006-03-22 18:03:40 +03:00
|
|
|
continue;
|
|
|
|
|
2006-03-23 15:43:51 +03:00
|
|
|
case '0':
|
1998-01-16 15:13:05 +03:00
|
|
|
case '&':
|
|
|
|
no = 0;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '`':
|
2008-01-06 12:25:09 +03:00
|
|
|
rb_enc_str_buf_cat(val, RSTRING_PTR(src), BEG(0), src_enc);
|
1998-01-16 15:13:05 +03:00
|
|
|
continue;
|
|
|
|
|
|
|
|
case '\'':
|
2008-01-06 12:25:09 +03:00
|
|
|
rb_enc_str_buf_cat(val, RSTRING_PTR(src)+END(0), RSTRING_LEN(src)-END(0), src_enc);
|
1998-01-16 15:13:05 +03:00
|
|
|
continue;
|
|
|
|
|
|
|
|
case '+':
|
|
|
|
no = regs->num_regs-1;
|
|
|
|
while (BEG(no) == -1 && no > 0) no--;
|
|
|
|
if (no == 0) continue;
|
|
|
|
break;
|
|
|
|
|
|
|
|
case '\\':
|
2008-01-06 12:25:09 +03:00
|
|
|
rb_enc_str_buf_cat(val, s-clen, clen, str_enc);
|
1998-01-16 15:13:05 +03:00
|
|
|
continue;
|
|
|
|
|
|
|
|
default:
|
2008-01-06 12:25:09 +03:00
|
|
|
rb_enc_str_buf_cat(val, ss, s-ss, str_enc);
|
1998-01-16 15:13:05 +03:00
|
|
|
continue;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (no >= 0) {
|
2000-02-08 11:54:01 +03:00
|
|
|
if (no >= regs->num_regs) continue;
|
1998-01-16 15:13:05 +03:00
|
|
|
if (BEG(no) == -1) continue;
|
2008-01-06 12:25:09 +03:00
|
|
|
rb_enc_str_buf_cat(val, RSTRING_PTR(src)+BEG(no), END(no)-BEG(no), src_enc);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
}
|
|
|
|
|
2007-12-29 16:44:32 +03:00
|
|
|
if (!val) return str;
|
1998-01-16 15:13:05 +03:00
|
|
|
if (p < e) {
|
2008-01-06 12:25:09 +03:00
|
|
|
rb_enc_str_buf_cat(val, p, e-p, str_enc);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
return val;
|
|
|
|
}
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
static VALUE
|
2019-08-27 05:16:52 +03:00
|
|
|
ignorecase_getter(ID _x, VALUE *_y)
|
1999-01-20 07:59:39 +03:00
|
|
|
{
|
2020-09-28 20:10:31 +03:00
|
|
|
rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "variable $= is no longer effective");
|
2007-10-04 11:31:50 +04:00
|
|
|
return Qfalse;
|
1999-01-20 07:59:39 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static void
|
2019-08-27 05:16:52 +03:00
|
|
|
ignorecase_setter(VALUE val, ID id, VALUE *_)
|
1999-01-20 07:59:39 +03:00
|
|
|
{
|
2020-09-28 20:10:31 +03:00
|
|
|
rb_category_warn(RB_WARN_CATEGORY_DEPRECATED, "variable $= is no longer effective; ignored");
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
static VALUE
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
match_getter(void)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
1999-08-13 09:45:20 +04:00
|
|
|
VALUE match = rb_backref_get();
|
|
|
|
|
|
|
|
if (NIL_P(match)) return Qnil;
|
2000-02-08 11:54:01 +03:00
|
|
|
rb_match_busy(match);
|
|
|
|
return match;
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2019-08-27 05:16:52 +03:00
|
|
|
static VALUE
|
2019-08-27 08:21:18 +03:00
|
|
|
get_LAST_MATCH_INFO(ID _x, VALUE *_y)
|
2019-08-27 05:16:52 +03:00
|
|
|
{
|
|
|
|
return match_getter();
|
|
|
|
}
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
static void
|
2019-08-27 05:16:52 +03:00
|
|
|
match_setter(VALUE val, ID _x, VALUE *_y)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2002-04-01 11:39:09 +04:00
|
|
|
if (!NIL_P(val)) {
|
|
|
|
Check_Type(val, T_MATCH);
|
|
|
|
}
|
1999-01-20 07:59:39 +03:00
|
|
|
rb_backref_set(val);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-04-18 18:45:29 +03:00
|
|
|
* Regexp.last_match -> matchdata or nil
|
|
|
|
* Regexp.last_match(n) -> string or nil
|
|
|
|
* Regexp.last_match(name) -> string or nil
|
|
|
|
*
|
|
|
|
* With no argument, returns the value of <tt>$!</tt>,
|
|
|
|
* which is the result of the most recent pattern match
|
2023-06-20 16:28:21 +03:00
|
|
|
* (see {Regexp global variables}[rdoc-ref:Regexp@Global+Variables]):
|
2022-04-18 18:45:29 +03:00
|
|
|
*
|
|
|
|
* /c(.)t/ =~ 'cat' # => 0
|
|
|
|
* Regexp.last_match # => #<MatchData "cat" 1:"a">
|
|
|
|
* /a/ =~ 'foo' # => nil
|
|
|
|
* Regexp.last_match # => nil
|
|
|
|
*
|
|
|
|
* With non-negative integer argument +n+, returns the _n_th field in the
|
|
|
|
* matchdata, if any, or nil if none:
|
|
|
|
*
|
|
|
|
* /c(.)t/ =~ 'cat' # => 0
|
|
|
|
* Regexp.last_match(0) # => "cat"
|
|
|
|
* Regexp.last_match(1) # => "a"
|
|
|
|
* Regexp.last_match(2) # => nil
|
|
|
|
*
|
|
|
|
* With negative integer argument +n+, counts backwards from the last field:
|
|
|
|
*
|
|
|
|
* Regexp.last_match(-1) # => "a"
|
|
|
|
*
|
|
|
|
* With string or symbol argument +name+,
|
|
|
|
* returns the string value for the named capture, if any:
|
|
|
|
*
|
|
|
|
* /(?<lhs>\w+)\s*=\s*(?<rhs>\w+)/ =~ 'var = val'
|
|
|
|
* Regexp.last_match # => #<MatchData "var = val" lhs:"var"rhs:"val">
|
|
|
|
* Regexp.last_match(:lhs) # => "var"
|
|
|
|
* Regexp.last_match('rhs') # => "val"
|
|
|
|
* Regexp.last_match('foo') # Raises IndexError.
|
|
|
|
*
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
2001-01-15 10:01:00 +03:00
|
|
|
static VALUE
|
2019-08-29 05:47:20 +03:00
|
|
|
rb_reg_s_last_match(int argc, VALUE *argv, VALUE _)
|
2001-01-15 10:01:00 +03:00
|
|
|
{
|
2018-12-04 05:24:15 +03:00
|
|
|
if (rb_check_arity(argc, 0, 1) == 1) {
|
2007-12-09 16:35:38 +03:00
|
|
|
VALUE match = rb_backref_get();
|
|
|
|
int n;
|
|
|
|
if (NIL_P(match)) return Qnil;
|
2018-12-04 05:24:15 +03:00
|
|
|
n = match_backref_number(match, argv[0]);
|
2007-12-09 16:35:38 +03:00
|
|
|
return rb_reg_nth_match(n, match);
|
2001-01-15 10:01:00 +03:00
|
|
|
}
|
|
|
|
return match_getter();
|
|
|
|
}
|
|
|
|
|
2008-02-18 05:52:10 +03:00
|
|
|
static void
|
2008-02-18 06:51:34 +03:00
|
|
|
re_warn(const char *s)
|
|
|
|
{
|
2008-02-18 06:43:11 +03:00
|
|
|
rb_warn("%s", s);
|
2008-02-18 05:52:10 +03:00
|
|
|
}
|
2003-12-26 18:58:28 +03:00
|
|
|
|
2022-03-24 10:59:11 +03:00
|
|
|
// This function is periodically called during regexp matching
|
2024-01-30 22:15:56 +03:00
|
|
|
bool
|
|
|
|
rb_reg_timeout_p(regex_t *reg, void *end_time_)
|
2022-03-24 10:59:11 +03:00
|
|
|
{
|
|
|
|
rb_hrtime_t *end_time = (rb_hrtime_t *)end_time_;
|
|
|
|
|
|
|
|
if (*end_time == 0) {
|
|
|
|
// This is the first time to check interrupts;
|
|
|
|
// just measure the current time and determine the end time
|
|
|
|
// if timeout is set.
|
|
|
|
rb_hrtime_t timelimit = reg->timelimit;
|
|
|
|
|
|
|
|
if (!timelimit) {
|
|
|
|
// no per-object timeout.
|
|
|
|
timelimit = rb_reg_match_time_limit;
|
|
|
|
}
|
|
|
|
|
|
|
|
if (timelimit) {
|
|
|
|
*end_time = rb_hrtime_add(timelimit, rb_hrtime_now());
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
// no timeout is set
|
|
|
|
*end_time = RB_HRTIME_MAX;
|
|
|
|
}
|
|
|
|
}
|
|
|
|
else {
|
|
|
|
if (*end_time < rb_hrtime_now()) {
|
2024-01-30 22:15:56 +03:00
|
|
|
// Timeout has exceeded
|
|
|
|
return true;
|
2022-03-24 10:59:11 +03:00
|
|
|
}
|
|
|
|
}
|
2024-01-30 22:15:56 +03:00
|
|
|
|
|
|
|
return false;
|
|
|
|
}
|
|
|
|
|
|
|
|
void
|
|
|
|
rb_reg_raise_timeout(void)
|
|
|
|
{
|
|
|
|
rb_raise(rb_eRegexpTimeoutError, "regexp match timeout");
|
2022-03-24 10:59:11 +03:00
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-06-20 11:47:44 +03:00
|
|
|
* Regexp.timeout -> float or nil
|
2022-03-24 10:59:11 +03:00
|
|
|
*
|
|
|
|
* It returns the current default timeout interval for Regexp matching in second.
|
|
|
|
* +nil+ means no default timeout configuration.
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
rb_reg_s_timeout_get(VALUE dummy)
|
|
|
|
{
|
|
|
|
double d = hrtime2double(rb_reg_match_time_limit);
|
|
|
|
if (d == 0.0) return Qnil;
|
|
|
|
return DBL2NUM(d);
|
|
|
|
}
|
|
|
|
|
|
|
|
/*
|
|
|
|
* call-seq:
|
2022-06-20 11:47:44 +03:00
|
|
|
* Regexp.timeout = float or nil
|
2022-03-24 10:59:11 +03:00
|
|
|
*
|
|
|
|
* It sets the default timeout interval for Regexp matching in second.
|
|
|
|
* +nil+ means no default timeout configuration.
|
|
|
|
* This configuration is process-global. If you want to set timeout for
|
|
|
|
* each Regexp, use +timeout+ keyword for <code>Regexp.new</code>.
|
|
|
|
*
|
|
|
|
* Regexp.timeout = 1
|
|
|
|
* /^a*b?a*$/ =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
2022-10-24 12:12:49 +03:00
|
|
|
rb_reg_s_timeout_set(VALUE dummy, VALUE timeout)
|
2022-03-24 10:59:11 +03:00
|
|
|
{
|
|
|
|
rb_ractor_ensure_main_ractor("can not access Regexp.timeout from non-main Ractors");
|
|
|
|
|
2022-10-24 12:21:30 +03:00
|
|
|
set_timeout(&rb_reg_match_time_limit, timeout);
|
2022-03-24 10:59:11 +03:00
|
|
|
|
2022-10-24 12:12:49 +03:00
|
|
|
return timeout;
|
2022-03-24 10:59:11 +03:00
|
|
|
}
|
|
|
|
|
2022-03-24 11:00:51 +03:00
|
|
|
/*
|
|
|
|
* call-seq:
|
|
|
|
* rxp.timeout -> float or nil
|
|
|
|
*
|
|
|
|
* It returns the timeout interval for Regexp matching in second.
|
|
|
|
* +nil+ means no default timeout configuration.
|
|
|
|
*
|
|
|
|
* This configuration is per-object. The global configuration set by
|
|
|
|
* Regexp.timeout= is ignored if per-object configuration is set.
|
|
|
|
*
|
|
|
|
* re = Regexp.new("^a*b?a*$", timeout: 1)
|
|
|
|
* re.timeout #=> 1.0
|
|
|
|
* re =~ "a" * 100000 + "x" #=> regexp match timeout (RuntimeError)
|
|
|
|
*/
|
|
|
|
|
|
|
|
static VALUE
|
|
|
|
rb_reg_timeout_get(VALUE re)
|
|
|
|
{
|
|
|
|
rb_reg_check(re);
|
|
|
|
double d = hrtime2double(RREGEXP_PTR(re)->timelimit);
|
|
|
|
if (d == 0.0) return Qnil;
|
|
|
|
return DBL2NUM(d);
|
|
|
|
}
|
|
|
|
|
2010-05-08 08:50:09 +04:00
|
|
|
/*
|
|
|
|
* Document-class: RegexpError
|
|
|
|
*
|
|
|
|
* Raised when given an invalid regexp expression.
|
|
|
|
*
|
|
|
|
* Regexp.new("?")
|
|
|
|
*
|
|
|
|
* <em>raises the exception:</em>
|
|
|
|
*
|
|
|
|
* RegexpError: target of repeat operator is not specified: /?/
|
|
|
|
*/
|
|
|
|
|
2003-12-26 18:58:28 +03:00
|
|
|
/*
|
|
|
|
* Document-class: Regexp
|
|
|
|
*
|
2023-11-14 07:53:59 +03:00
|
|
|
* :include: doc/_regexp.rdoc
|
2003-12-26 18:58:28 +03:00
|
|
|
*/
|
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
void
|
* array.c: moved to ANSI function style from K&R function style.
(used protoize on windows, so still K&R remains on #ifdef part of
other platforms. And `foo _((boo))' stuff is still there)
[ruby-dev:26975]
* bignum.c, class.c, compar.c, dir.c, dln.c, dmyext.c, enum.c,
enumerator.c, error.c, eval.c, file.c, gc.c, hash.c, inits.c,
io.c, main.c, marshal.c, math.c, numeric.c, object.c, pack.c,
prec.c, process.c, random.c, range.c, re.c, regcomp.c, regenc.c,
regerror.c, regexec.c, regparse.c, regparse.h, ruby.c, signal.c,
sprintf.c, st.c, string.c, struct.c, time.c, util.h, variable.c,
version.c: ditto.
git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@9126 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
2005-09-12 14:44:21 +04:00
|
|
|
Init_Regexp(void)
|
1998-01-16 15:13:05 +03:00
|
|
|
{
|
2000-08-28 13:53:42 +04:00
|
|
|
rb_eRegexpError = rb_define_class("RegexpError", rb_eStandardError);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2007-10-05 21:39:59 +04:00
|
|
|
onigenc_set_default_encoding(ONIG_ENCODING_ASCII);
|
2008-02-18 05:52:10 +03:00
|
|
|
onig_set_warn_func(re_warn);
|
|
|
|
onig_set_verb_warn_func(re_warn);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2019-08-27 08:21:18 +03:00
|
|
|
rb_define_virtual_variable("$~", get_LAST_MATCH_INFO, match_setter);
|
1998-01-16 15:13:05 +03:00
|
|
|
rb_define_virtual_variable("$&", last_match_getter, 0);
|
|
|
|
rb_define_virtual_variable("$`", prematch_getter, 0);
|
|
|
|
rb_define_virtual_variable("$'", postmatch_getter, 0);
|
|
|
|
rb_define_virtual_variable("$+", last_paren_match_getter, 0);
|
|
|
|
|
Some global variables can be accessed from ractors
Some global variables should be used from non-main Ractors.
[Bug #17268]
```ruby
# ractor-local (derived from created ractor): debug
'$DEBUG' => $DEBUG,
'$-d' => $-d,
# ractor-local (derived from created ractor): verbose
'$VERBOSE' => $VERBOSE,
'$-w' => $-w,
'$-W' => $-W,
'$-v' => $-v,
# process-local (readonly): other commandline parameters
'$-p' => $-p,
'$-l' => $-l,
'$-a' => $-a,
# process-local (readonly): getpid
'$$' => $$,
# thread local: process result
'$?' => $?,
# scope local: match
'$~' => $~.inspect,
'$&' => $&,
'$`' => $`,
'$\'' => $',
'$+' => $+,
'$1' => $1,
# scope local: last line
'$_' => $_,
# scope local: last backtrace
'$@' => $@,
'$!' => $!,
# ractor local: stdin, out, err
'$stdin' => $stdin.inspect,
'$stdout' => $stdout.inspect,
'$stderr' => $stderr.inspect,
```
2020-10-20 04:46:43 +03:00
|
|
|
rb_gvar_ractor_local("$~");
|
|
|
|
rb_gvar_ractor_local("$&");
|
|
|
|
rb_gvar_ractor_local("$`");
|
|
|
|
rb_gvar_ractor_local("$'");
|
|
|
|
rb_gvar_ractor_local("$+");
|
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
rb_define_virtual_variable("$=", ignorecase_getter, ignorecase_setter);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
1999-01-20 07:59:39 +03:00
|
|
|
rb_cRegexp = rb_define_class("Regexp", rb_cObject);
|
2002-12-20 11:33:17 +03:00
|
|
|
rb_define_alloc_func(rb_cRegexp, rb_reg_s_alloc);
|
2023-03-03 05:07:36 +03:00
|
|
|
rb_define_singleton_method(rb_cRegexp, "compile", rb_class_new_instance_pass_kw, -1);
|
2007-10-10 18:34:42 +04:00
|
|
|
rb_define_singleton_method(rb_cRegexp, "quote", rb_reg_s_quote, 1);
|
|
|
|
rb_define_singleton_method(rb_cRegexp, "escape", rb_reg_s_quote, 1);
|
2007-10-05 16:26:35 +04:00
|
|
|
rb_define_singleton_method(rb_cRegexp, "union", rb_reg_s_union_m, -2);
|
2001-01-15 10:01:00 +03:00
|
|
|
rb_define_singleton_method(rb_cRegexp, "last_match", rb_reg_s_last_match, -1);
|
2007-08-24 21:47:09 +04:00
|
|
|
rb_define_singleton_method(rb_cRegexp, "try_convert", rb_reg_s_try_convert, 1);
|
2022-12-14 06:57:14 +03:00
|
|
|
rb_define_singleton_method(rb_cRegexp, "linear_time?", rb_reg_s_linear_time_p, -1);
|
1999-01-20 07:59:39 +03:00
|
|
|
|
2000-05-22 11:09:55 +04:00
|
|
|
rb_define_method(rb_cRegexp, "initialize", rb_reg_initialize_m, -1);
|
2003-05-19 09:41:08 +04:00
|
|
|
rb_define_method(rb_cRegexp, "initialize_copy", rb_reg_init_copy, 1);
|
2002-12-12 12:17:32 +03:00
|
|
|
rb_define_method(rb_cRegexp, "hash", rb_reg_hash, 0);
|
|
|
|
rb_define_method(rb_cRegexp, "eql?", rb_reg_equal, 1);
|
1999-01-20 07:59:39 +03:00
|
|
|
rb_define_method(rb_cRegexp, "==", rb_reg_equal, 1);
|
|
|
|
rb_define_method(rb_cRegexp, "=~", rb_reg_match, 1);
|
2002-12-19 12:20:20 +03:00
|
|
|
rb_define_method(rb_cRegexp, "===", rb_reg_eqq, 1);
|
1999-01-20 07:59:39 +03:00
|
|
|
rb_define_method(rb_cRegexp, "~", rb_reg_match2, 0);
|
2004-07-17 10:28:10 +04:00
|
|
|
rb_define_method(rb_cRegexp, "match", rb_reg_match_m, -1);
|
2016-05-18 13:37:13 +03:00
|
|
|
rb_define_method(rb_cRegexp, "match?", rb_reg_match_m_p, -1);
|
2002-04-18 12:04:57 +04:00
|
|
|
rb_define_method(rb_cRegexp, "to_s", rb_reg_to_s, 0);
|
1999-01-20 07:59:39 +03:00
|
|
|
rb_define_method(rb_cRegexp, "inspect", rb_reg_inspect, 0);
|
|
|
|
rb_define_method(rb_cRegexp, "source", rb_reg_source, 0);
|
|
|
|
rb_define_method(rb_cRegexp, "casefold?", rb_reg_casefold_p, 0);
|
2001-02-08 12:19:27 +03:00
|
|
|
rb_define_method(rb_cRegexp, "options", rb_reg_options_m, 0);
|
2007-10-04 10:57:19 +04:00
|
|
|
rb_define_method(rb_cRegexp, "encoding", rb_obj_encoding, 0); /* in encoding.c */
|
2007-11-26 11:33:11 +03:00
|
|
|
rb_define_method(rb_cRegexp, "fixed_encoding?", rb_reg_fixed_encoding_p, 0);
|
2007-12-10 00:44:19 +03:00
|
|
|
rb_define_method(rb_cRegexp, "names", rb_reg_names, 0);
|
|
|
|
rb_define_method(rb_cRegexp, "named_captures", rb_reg_named_captures, 0);
|
2022-03-24 11:00:51 +03:00
|
|
|
rb_define_method(rb_cRegexp, "timeout", rb_reg_timeout_get, 0);
|
1998-01-16 15:13:05 +03:00
|
|
|
|
2022-03-28 09:03:17 +03:00
|
|
|
rb_eRegexpTimeoutError = rb_define_class_under(rb_cRegexp, "TimeoutError", rb_eRegexpError);
|
2022-03-24 10:59:11 +03:00
|
|
|
rb_define_singleton_method(rb_cRegexp, "timeout", rb_reg_s_timeout_get, 0);
|
|
|
|
rb_define_singleton_method(rb_cRegexp, "timeout=", rb_reg_s_timeout_set, 1);
|
|
|
|
|
2011-05-13 00:39:11 +04:00
|
|
|
/* see Regexp.options and Regexp.new */
|
2005-02-17 17:43:38 +03:00
|
|
|
rb_define_const(rb_cRegexp, "IGNORECASE", INT2FIX(ONIG_OPTION_IGNORECASE));
|
2011-05-13 00:39:11 +04:00
|
|
|
/* see Regexp.options and Regexp.new */
|
2005-02-17 17:43:38 +03:00
|
|
|
rb_define_const(rb_cRegexp, "EXTENDED", INT2FIX(ONIG_OPTION_EXTEND));
|
2011-05-13 00:39:11 +04:00
|
|
|
/* see Regexp.options and Regexp.new */
|
2005-02-17 17:43:38 +03:00
|
|
|
rb_define_const(rb_cRegexp, "MULTILINE", INT2FIX(ONIG_OPTION_MULTILINE));
|
2011-05-13 00:39:11 +04:00
|
|
|
/* see Regexp.options and Regexp.new */
|
2009-02-22 09:12:21 +03:00
|
|
|
rb_define_const(rb_cRegexp, "FIXEDENCODING", INT2FIX(ARG_ENCODING_FIXED));
|
2011-05-13 00:39:11 +04:00
|
|
|
/* see Regexp.options and Regexp.new */
|
2011-02-03 01:18:14 +03:00
|
|
|
rb_define_const(rb_cRegexp, "NOENCODING", INT2FIX(ARG_ENCODING_NONE));
|
1999-08-13 09:45:20 +04:00
|
|
|
|
1998-01-16 15:13:05 +03:00
|
|
|
rb_global_variable(®_cache);
|
|
|
|
|
2000-02-23 08:23:12 +03:00
|
|
|
rb_cMatch = rb_define_class("MatchData", rb_cObject);
|
2002-12-20 11:33:17 +03:00
|
|
|
rb_define_alloc_func(rb_cMatch, match_alloc);
|
1999-10-15 12:52:18 +04:00
|
|
|
rb_undef_method(CLASS_OF(rb_cMatch), "new");
|
2019-11-06 02:54:32 +03:00
|
|
|
rb_undef_method(CLASS_OF(rb_cMatch), "allocate");
|
1999-10-15 12:52:18 +04:00
|
|
|
|
2003-05-19 09:41:08 +04:00
|
|
|
rb_define_method(rb_cMatch, "initialize_copy", match_init_copy, 1);
|
2007-12-10 00:44:19 +03:00
|
|
|
rb_define_method(rb_cMatch, "regexp", match_regexp, 0);
|
|
|
|
rb_define_method(rb_cMatch, "names", match_names, 0);
|
1999-08-13 09:45:20 +04:00
|
|
|
rb_define_method(rb_cMatch, "size", match_size, 0);
|
|
|
|
rb_define_method(rb_cMatch, "length", match_size, 0);
|
|
|
|
rb_define_method(rb_cMatch, "offset", match_offset, 1);
|
2022-02-19 13:10:00 +03:00
|
|
|
rb_define_method(rb_cMatch, "byteoffset", match_byteoffset, 1);
|
1999-08-13 09:45:20 +04:00
|
|
|
rb_define_method(rb_cMatch, "begin", match_begin, 1);
|
|
|
|
rb_define_method(rb_cMatch, "end", match_end, 1);
|
2021-09-16 13:37:52 +03:00
|
|
|
rb_define_method(rb_cMatch, "match", match_nth, 1);
|
2021-09-16 13:50:29 +03:00
|
|
|
rb_define_method(rb_cMatch, "match_length", match_nth_length, 1);
|
1999-01-20 07:59:39 +03:00
|
|
|
rb_define_method(rb_cMatch, "to_a", match_to_a, 0);
|
|
|
|
rb_define_method(rb_cMatch, "[]", match_aref, -1);
|
2003-07-17 13:53:16 +04:00
|
|
|
rb_define_method(rb_cMatch, "captures", match_captures, 0);
|
2022-08-06 03:13:09 +03:00
|
|
|
rb_define_alias(rb_cMatch, "deconstruct", "captures");
|
2023-04-19 02:19:31 +03:00
|
|
|
rb_define_method(rb_cMatch, "named_captures", match_named_captures, -1);
|
2022-08-06 03:13:09 +03:00
|
|
|
rb_define_method(rb_cMatch, "deconstruct_keys", match_deconstruct_keys, 1);
|
2003-05-04 20:03:24 +04:00
|
|
|
rb_define_method(rb_cMatch, "values_at", match_values_at, -1);
|
1999-08-13 09:45:20 +04:00
|
|
|
rb_define_method(rb_cMatch, "pre_match", rb_reg_match_pre, 0);
|
|
|
|
rb_define_method(rb_cMatch, "post_match", rb_reg_match_post, 0);
|
1999-01-20 07:59:39 +03:00
|
|
|
rb_define_method(rb_cMatch, "to_s", match_to_s, 0);
|
2007-06-23 12:26:08 +04:00
|
|
|
rb_define_method(rb_cMatch, "inspect", match_inspect, 0);
|
2001-09-05 10:54:57 +04:00
|
|
|
rb_define_method(rb_cMatch, "string", match_string, 0);
|
2009-09-05 04:33:54 +04:00
|
|
|
rb_define_method(rb_cMatch, "hash", match_hash, 0);
|
|
|
|
rb_define_method(rb_cMatch, "eql?", match_equal, 1);
|
|
|
|
rb_define_method(rb_cMatch, "==", match_equal, 1);
|
1998-01-16 15:13:05 +03:00
|
|
|
}
|