* ext/strscan/strscan.c: merge documentation from 1.8 branch.

git-svn-id: svn+ssh://ci.ruby-lang.org/ruby/trunk@5774 b2dd03c8-39d4-4d8f-98ff-823fe69b080e
This commit is contained in:
aamine 2004-02-18 17:38:33 +00:00
Родитель fbac5a54a1
Коммит 6f77e4b4ba
2 изменённых файлов: 460 добавлений и 5 удалений

Просмотреть файл

@ -1,8 +1,15 @@
Thu Feb 19 02:37:28 2004 Minero Aoki <aamine@loveruby.net>
* ext/strscan/strscan.c: merge documentation from 1.8 branch. Thanks Gavin Sinclair.
Wed Feb 19 00:20:00 2004 Gavin Sinclair <gsinclair@soyabean.com.au>
* lib/cgi-lib.rb: deprecated after 1.8.1
* lib/getopts.rb: ditto
* lib/importenv.rb: ditto
* lib/parsearg.rb: ditto
Thu Feb 19 00:11:05 2004 Dave Thomas <dave@pragprog.com>
@ -123,7 +130,9 @@ Mon Feb 16 23:28:14 2004 NAKAMURA, Hiroshi <nakahiro@sarion.co.jp>
Mon Feb 16 22:22:00 2004 Gavin Sinclair <gsinclair@soyabean.com.au>
* lib/generator.rb: corrected doc format
* lib/rinda/rinda.rb: added documentation (from Hugh Sasse)
* lib/rinda/tuplespace.rb: ditto
[Note: rinda files actually committed Wed Feb 18 07:27:00 2004]

Просмотреть файл

@ -194,6 +194,12 @@ strscan_s_allocate(klass)
return Data_Wrap_Struct(klass, strscan_mark, strscan_free, p);
}
/*
* call-seq: StringScanner.new(string, dup = false)
*
* Creates a new StringScanner object to scan over the given +string+.
* +dup+ argument is obsolete and not used now.
*/
static VALUE
strscan_initialize(argc, argv, self)
int argc;
@ -216,6 +222,11 @@ strscan_initialize(argc, argv, self)
Instance Methods
======================================================================= */
/*
* call-seq: StringScanner.must_C_version
*
* This method is defined for backward compatibility.
*/
static VALUE
strscan_s_mustc(self)
VALUE self;
@ -223,6 +234,9 @@ strscan_s_mustc(self)
return self;
}
/*
* Reset the scan pointer (index 0) and clear matching data.
*/
static VALUE
strscan_reset(self)
VALUE self;
@ -235,6 +249,13 @@ strscan_reset(self)
return self;
}
/*
* call-seq:
* terminate
* clear
*
* Set the scan pointer to the end of the string and clear matching data.
*/
static VALUE
strscan_terminate(self)
VALUE self;
@ -247,6 +268,9 @@ strscan_terminate(self)
return self;
}
/*
* Returns the string being scanned.
*/
static VALUE
strscan_clear(self)
VALUE self;
@ -265,6 +289,12 @@ strscan_get_string(self)
return p->str;
}
/*
* call-seq: string=(str)
*
* Changes the string being scanned to +str+ and resets the scanner.
* Returns +str+.
*/
static VALUE
strscan_set_string(self, str)
VALUE self, str;
@ -280,6 +310,20 @@ strscan_set_string(self, str)
return str;
}
/*
* call-seq:
* concat(str)
* <<(str)
*
* Appends +str+ to the string being scanned.
* This method does not affect scan pointer.
*
* s = StringScanner.new("Fri Dec 12 1975 14:39")
* s.scan(/Fri /)
* s << " +1000 GMT"
* s.string # -> "Fri Dec 12 1975 14:39 +1000 GMT"
* s.scan(/Dec/) # -> "Dec"
*/
static VALUE
strscan_concat(self, str)
VALUE self, str;
@ -292,6 +336,20 @@ strscan_concat(self, str)
return self;
}
/*
* Returns the position of the scan pointer. In the 'reset' position, this
* value is zero. In the 'terminated' position (i.e. the string is exhausted),
* this value is the length of the string.
*
* In short, it's a 1-based index into the string.
*
* s = StringScanner.new('test string')
* s.pos # -> 0
* s.scan_until /str/ # -> "test str"
* s.pos # -> 8
* s.terminate # -> #<StringScanner fin>
* s.pos # -> 11
*/
static VALUE
strscan_get_pos(self)
VALUE self;
@ -302,6 +360,15 @@ strscan_get_pos(self)
return INT2FIX(p->curr);
}
/*
* call-seq: pos=(n)
*
* Modify the scan pointer.
*
* s = StringScanner.new('test string')
* s.pos = 7 # -> 7
* s.rest # -> "ring"
*/
static VALUE
strscan_set_pos(self, v)
VALUE self, v;
@ -371,6 +438,27 @@ strscan_do_scan(self, regex, succptr, getstr, headonly)
}
}
/*
* call-seq:
* scanner.scan(pattern) => String
*
*/
/*
* call-seq: scan(pattern)
*
* Tries to match with +pattern+ at the current position. If there's a match,
* the scanner advances the "scan pointer" and returns the matched string.
* Otherwise, the scanner returns +nil+.
*
* s = StringScanner.new('test string')
* p s.scan(/\w+/) # -> "test"
* p s.scan(/\w+/) # -> nil
* p s.scan(/\s+/) # -> " "
* p s.scan(/\w+/) # -> "string"
* p s.scan(/./) # -> nil
*
*/
static VALUE
strscan_scan(self, re)
VALUE self, re;
@ -378,6 +466,17 @@ strscan_scan(self, re)
return strscan_do_scan(self, re, 1, 1, 1);
}
/*
* call-seq: match?(pattern)
*
* Tests whether the given +pattern+ is matched from the current scan pointer.
* Returns the length of the match, or +nil+. The scan pointer is not advanced.
*
* s = StringScanner.new('test string')
* p s.match?(/\w+/) # -> 4
* p s.match?(/\w+/) # -> 4
* p s.match?(/\s+/) # -> nil
*/
static VALUE
strscan_match_p(self, re)
VALUE self, re;
@ -385,6 +484,23 @@ strscan_match_p(self, re)
return strscan_do_scan(self, re, 0, 0, 1);
}
/*
* call-seq: skip(pattern)
*
* Attempts to skip over the given +pattern+ beginning with the scan pointer.
* If it matches, the scan pointer is advanced to the end of the match, and the
* length of the match is returned. Otherwise, +nil+ is returned.
*
* It's similar to #scan, but without returning the matched string.
*
* s = StringScanner.new('test string')
* p s.skip(/\w+/) # -> 4
* p s.skip(/\w+/) # -> nil
* p s.skip(/\s+/) # -> 1
* p s.skip(/\w+/) # -> 6
* p s.skip(/./) # -> nil
*
*/
static VALUE
strscan_skip(self, re)
VALUE self, re;
@ -392,6 +508,21 @@ strscan_skip(self, re)
return strscan_do_scan(self, re, 1, 0, 1);
}
/*
* call-seq: check(pattern)
*
* This returns the value that #scan would return, without advancing the scan
* pointer. The match register is affected, though.
*
* s = StringScanner.new("Fri Dec 12 1975 14:39")
* s.check /Fri/ # -> "Fri"
* s.pos # -> 0
* s.matched # -> "Fri"
* s.check /12/ # -> nil
* s.matched # -> nil
*
* Mnemonic: it "checks" to see whether a #scan will return a value.
*/
static VALUE
strscan_check(self, re)
VALUE self, re;
@ -399,6 +530,16 @@ strscan_check(self, re)
return strscan_do_scan(self, re, 0, 1, 1);
}
/*
* call-seq: scan_full(pattern, return_string_p, advance_pointer_p)
*
* Tests whether the given +pattern+ is matched from the current scan pointer.
* Returns the matched string if +return_string_p+ is true.
* Advances the scan pointer if +advance_pointer_p+ is true.
* The match register is affected.
*
* "full" means "#scan with full parameters".
*/
static VALUE
strscan_scan_full(self, re, s, f)
VALUE self, re, s, f;
@ -407,6 +548,18 @@ strscan_scan_full(self, re, s, f)
}
/*
* call-seq: scan_until(pattern)
*
* Scans the string _until_ the +pattern+ is matched. Returns the substring up
* to and including the end of the match, advancing the scan pointer to that
* location. If there is no match, +nil+ is returned.
*
* s = StringScanner.new("Fri Dec 12 1975 14:39")
* s.scan_until(/1/) # -> "Fri Dec 1"
* s.pre_match # -> "Fri Dec "
* s.scan_until(/XYZ/) # -> nil
*/
static VALUE
strscan_scan_until(self, re)
VALUE self, re;
@ -414,6 +567,19 @@ strscan_scan_until(self, re)
return strscan_do_scan(self, re, 1, 1, 0);
}
/*
* call-seq: exist?(pattern)
*
* Looks _ahead_ to see if the +pattern+ exists _anywhere_ in the string,
* without advancing the scan pointer. This predicates whether a #scan_until
* will return a value.
*
* s = StringScanner.new('test string')
* s.exist? /s/ # -> 3
* s.scan /test/ # -> "test"
* s.exist? /s/ # -> 6
* s.exist? /e/ # -> nil
*/
static VALUE
strscan_exist_p(self, re)
VALUE self, re;
@ -421,6 +587,22 @@ strscan_exist_p(self, re)
return strscan_do_scan(self, re, 0, 0, 0);
}
/*
* call-seq: skip_until(pattern)
*
* Advances the scan pointer until +pattern+ is matched and consumed. Returns
* the number of bytes advanced, or +nil+ if no match was found.
*
* Look ahead to match +pattern+, and advance the scan pointer to the _end_
* of the match. Return the number of characters advanced, or +nil+ if the
* match was unsuccessful.
*
* It's similar to #scan_until, but without returning the intervening string.
*
* s = StringScanner.new("Fri Dec 12 1975 14:39")
* s.skip_until /12/ # -> 10
* s #
*/
static VALUE
strscan_skip_until(self, re)
VALUE self, re;
@ -428,6 +610,19 @@ strscan_skip_until(self, re)
return strscan_do_scan(self, re, 1, 0, 0);
}
/*
* call-seq: check_until(pattern)
*
* This returns the value that #scan_until would return, without advancing the
* scan pointer. The match register is affected, though.
*
* s = StringScanner.new("Fri Dec 12 1975 14:39")
* s.check_until /12/ # -> "Fri Dec 12"
* s.pos # -> 0
* s.matched # -> 12
*
* Mnemonic: it "checks" to see whether a #scan_until will return a value.
*/
static VALUE
strscan_check_until(self, re)
VALUE self, re;
@ -435,6 +630,15 @@ strscan_check_until(self, re)
return strscan_do_scan(self, re, 0, 1, 0);
}
/*
* call-seq: search_full(pattern, return_string_p, advance_pointer_p)
*
* Scans the string _until_ the +pattern+ is matched.
* Returns the matched string if +return_string_p+ is true, otherwise
* returns the number of bytes advanced.
* Advances the scan pointer if +advance_pointer_p+, otherwise not.
* This method does affect the match register.
*/
static VALUE
strscan_search_full(self, re, s, f)
VALUE self, re, s, f;
@ -457,6 +661,14 @@ adjust_registers_to_matched(p)
p->regs.end[0] = p->curr - p->prev;
}
/*
* Scans one character and returns it.
*
* s = StringScanner.new('ab')
* s.getch # => "a"
* s.getch # => "b"
* s.getch # => nil
*/
static VALUE
strscan_getch(self)
VALUE self;
@ -480,6 +692,14 @@ strscan_getch(self)
p->prev + p->regs.end[0]);
}
/*
* Scans one byte and returns it. Similar to, but not the same as, #getch.
*
* s = StringScanner.new('ab')
* s.get_byte # => "a"
* s.get_byte # => "b"
* s.get_byte # => nil
*/
static VALUE
strscan_get_byte(self)
VALUE self;
@ -507,7 +727,17 @@ strscan_getbyte(self)
return strscan_get_byte(self);
}
/*
* call-seq: peek(len)
*
* Extracts a string corresponding to <tt>string[pos,len]</tt>, without
* advancing the scan pointer.
*
* s = StringScanner.new('test string')
* s.peek(7) # => "test st"
* s.peek(7) # => "test st"
*
*/
static VALUE
strscan_peek(self, vlen)
VALUE self, vlen;
@ -526,6 +756,17 @@ strscan_peek(self, vlen)
return extract_beg_len(p, p->curr, len);
}
/*
* Set the scan pointer to the previous position. Only one previous position is
* remembered, and it changes with each scanning operation.
*
* s = StringScanner.new('test string')
* s.scan(/\w+/) # => "test"
* s.unscan
* s.scan(/../) # => "te"
* s.scan(/\d/) # => nil
* s.unscan # ScanError: cannot unscan: prev match had failed
*/
static VALUE
strscan_peep(self, vlen)
VALUE self, vlen;
@ -549,6 +790,18 @@ strscan_unscan(self)
return self;
}
/*
* Returns +true+ iff the scan pointer is at the beginning of the line.
*
* s = StringScanner.new("test\ntest\n")
* s.bol? # => true
* s.scan(/te/)
* s.bol? # => false
* s.scan(/st\n/)
* s.bol? # => true
* s.terminate
* s.bol? # => true
*/
static VALUE
strscan_bol_p(self)
VALUE self;
@ -561,6 +814,16 @@ strscan_bol_p(self)
return (*(CURPTR(p) - 1) == '\n') ? Qtrue : Qfalse;
}
/*
* Returns +true+ if the scan pointer is at the end of the string.
*
* s = StringScanner.new('test string')
* p s.eos? # => false
* s.scan(/test/)
* p s.eos? # => false
* s.terminate
* p s.eos? # => true
*/
static VALUE
strscan_eos_p(self)
VALUE self;
@ -574,14 +837,26 @@ strscan_eos_p(self)
return Qfalse;
}
/*
* Equivalent to #eos?.
* This method is obsolete, use #eos? instead.
*/
static VALUE
strscan_empty_p(self)
VALUE self;
{
rb_warning("StringScanner#empty_p is obsolete; use #eos? instead");
rb_warning("StringScanner#empty? is obsolete; use #eos? instead");
return strscan_eos_p(self);
}
/*
* Returns true iff there is more data in the string. See #eos?.
* This method is obsolete; use #eos? instead.
*
* s = StringScanner.new('test string')
* s.eos? # These two
* s.rest? # are opposites.
*/
static VALUE
strscan_rest_p(self)
VALUE self;
@ -595,7 +870,15 @@ strscan_rest_p(self)
return Qtrue;
}
/*
* Returns +true+ iff the last match was successful.
*
* s = StringScanner.new('test string')
* s.match?(/\w+/) # => 4
* s.matched? # => true
* s.match?(/\d+/) # => nil
* s.matched? # => false
*/
static VALUE
strscan_matched_p(self)
VALUE self;
@ -609,6 +892,13 @@ strscan_matched_p(self)
return Qfalse;
}
/*
* Returns the last matched string.
*
* s = StringScanner.new('test string')
* s.match?(/\w+/) # -> 4
* s.matched # -> "test"
*/
static VALUE
strscan_matched(self)
VALUE self;
@ -622,6 +912,16 @@ strscan_matched(self)
p->prev + p->regs.end[0]);
}
/*
* Returns the size of the most recent match (see #matched), or +nil+ if there
* was no recent match.
*
* s = StringScanner.new('test string')
* s.check /\w+/ # -> "test"
* s.matched_size # -> 4
* s.check /\d+/ # -> nil
* s.matched_size # -> nil
*/
static VALUE
strscan_matched_size(self)
VALUE self;
@ -634,6 +934,20 @@ strscan_matched_size(self)
return INT2NUM(p->regs.end[0] - p->regs.beg[0]);
}
/*
* call-seq: [](n)
*
* Return the n-th subgroup in the most recent match.
*
* s = StringScanner.new("Fri Dec 12 1975 14:39")
* s.scan(/(\w+) (\w+) (\d+) /) # -> "Fri Dec 12 "
* s[0] # -> "Fri Dec 12 "
* s[1] # -> "Fri"
* s[2] # -> "Dec"
* s[3] # -> "12"
* s.post_match # -> "1975 14:39"
* s.pre_match # -> ""
*/
static VALUE
strscan_aref(self, idx)
VALUE self, idx;
@ -655,6 +969,15 @@ strscan_aref(self, idx)
p->prev + p->regs.end[i]);
}
/*
* Return the <i><b>pre</b>-match</i> (in the regular expression sense) of the last scan.
*
* s = StringScanner.new('test string')
* s.scan(/\w+/) # -> "test"
* s.scan(/\s+/) # -> " "
* s.pre_match # -> "test"
* s.post_match # -> "string"
*/
static VALUE
strscan_pre_match(self)
VALUE self;
@ -667,6 +990,15 @@ strscan_pre_match(self)
return extract_range(p, 0, p->prev + p->regs.beg[0]);
}
/*
* Return the <i><b>post</b>-match</i> (in the regular expression sense) of the last scan.
*
* s = StringScanner.new('test string')
* s.scan(/\w+/) # -> "test"
* s.scan(/\s+/) # -> " "
* s.pre_match # -> "test"
* s.post_match # -> "string"
*/
static VALUE
strscan_post_match(self)
VALUE self;
@ -679,7 +1011,10 @@ strscan_post_match(self)
return extract_range(p, p->prev + p->regs.end[0], S_LEN(p));
}
/*
* Returns the "rest" of the string (i.e. everything after the scan pointer).
* If there is no more data, it returns <tt>""</tt>.
*/
static VALUE
strscan_rest(self)
VALUE self;
@ -693,6 +1028,9 @@ strscan_rest(self)
return extract_range(p, p->curr, S_LEN(p));
}
/*
* <tt>s.rest_size</tt> is equivalent to <tt>s.rest.size</tt>.
*/
static VALUE
strscan_rest_size(self)
VALUE self;
@ -709,10 +1047,20 @@ strscan_rest_size(self)
return INT2FIX(i);
}
#define INSPECT_LENGTH 5
#define BUFSIZE 256
/*
* Returns a string that represents the StringScanner object, showing:
* - the current position
* - the size of the string
* - the characters surrounding the scan pointer
*
* s = StringScanner.new("Fri Dec 12 1975 14:39")
* s.inspect # -> '#<StringScanner 0/21 @ "Fri D...">'
* s.scan_until /12/ # -> "Fri Dec 12"
* s.inspect # -> '#<StringScanner 10/21 "...ec 12" @ " 1975...">'
*/
static VALUE
strscan_inspect(self)
VALUE self;
@ -796,6 +1144,104 @@ inspect2(p)
Ruby Interface
======================================================================= */
/*
* StringScanner provides for lexical scanning operations on a String. Here is
* an example of its usage:
*
* s = StringScanner.new('This is an example string')
* s.eos? # -> false
*
* p s.scan(/\w+/) # -> "This"
* p s.scan(/\w+/) # -> nil
* p s.scan(/\s+/) # -> " "
* p s.scan(/\s+/) # -> nil
* p s.scan(/\w+/) # -> "is"
* s.eos? # -> false
*
* p s.scan(/\s+/) # -> " "
* p s.scan(/\w+/) # -> "an"
* p s.scan(/\s+/) # -> " "
* p s.scan(/\w+/) # -> "example"
* p s.scan(/\s+/) # -> " "
* p s.scan(/\w+/) # -> "string"
* s.eos? # -> true
*
* p s.scan(/\s+/) # -> nil
* p s.scan(/\w+/) # -> nil
*
* Scanning a string means remembering the position of a <i>scan pointer</i>,
* which is just an index. The scan pointer effectively points _between_
* characters. (XXX: get this right - is it between or not?)
*
* Given the string "test string", here are the pertinent scan pointer
* positions:
*
* t e s t s t r i n g
* 0 1 2 ... 1
* 0
*
* When you #scan for a pattern (a regular expression), the match must occur
* at the character after the scan pointer. If you use #scan_until, then the
* match can occur anywhere after the scan pointer. In both cases, the scan
* pointer moves <i>just beyond</i> the last character of the match, ready to
* scan again from the next character onwards. This is demonstrated by the
* example above.
*
* == Method Categories
*
* There are other methods besides the plain scanners. You can look ahead in
* the string without actually scanning. You can access the most recent match.
* You can modify the string being scanned, reset or terminate the scanner,
* find out or change the position of the scan pointer, skip ahead, and so on.
*
* === Advancing the Scan Pointer
*
* getch
* getbyte
* scan
* scan_until
* skip
* skip_until
*
* === Looking Ahead
*
* check
* check_until
* exist?
* match?
* peek
*
* === Finding Where we Are
*
* bol?
* eos?
* pos
*
* === Setting Where we Are
*
* reset
* terminate
* pos=
*
* === Match Data
*
* matched
* matched?
* matched_size
* []
* pre_match
* post_match
*
* === Miscellaneous
*
* <<
* concat
* string
* string=
* unscan
*
* There are aliases to several of the methods.
*/
void
Init_strscan()
{