From 8d302c914c15af4a29c8b8af801281fa117a7ad2 Mon Sep 17 00:00:00 2001 From: Yusuke Endoh Date: Mon, 12 Aug 2019 23:12:27 +0900 Subject: [PATCH] string.c (rb_str_sub, _gsub): improve the rdoc This change: * Added an explanation about back references except \n and \k (\` \& \' \+ \0) * Added an explanation about an escape (\\) * Added some rdoc references * Rephrased and clarified the reason why double escape is needed, added some examples, and moved the note to the last (because it is not specific to the method itself). --- string.c | 79 +++++++++++++++++++++++++++++++++++++++++--------------- 1 file changed, 58 insertions(+), 21 deletions(-) diff --git a/string.c b/string.c index 2890f46e24..77d67577a0 100644 --- a/string.c +++ b/string.c @@ -5129,27 +5129,31 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str) * Returns a copy of +str+ with the _first_ occurrence of +pattern+ * replaced by the second argument. The +pattern+ is typically a Regexp; if * given as a String, any regular expression metacharacters it contains will - * be interpreted literally, e.g. '\\\d' will match a backslash + * be interpreted literally, e.g. \d will match a backslash * followed by 'd', instead of a digit. * * If +replacement+ is a String it will be substituted for the matched text. * It may contain back-references to the pattern's capture groups of the form - * "\\d", where d is a group number, or - * "\\k", where n is a group name. If it is a - * double-quoted string, both back-references must be preceded by an - * additional backslash. However, within +replacement+ the special match - * variables, such as $&, will not refer to the current match. - * If +replacement+ is a String that looks like a pattern's capture group but - * is actually not a pattern capture group e.g. "\\'", then it - * will have to be preceded by two backslashes like so "\\\\'". + * \d, where d is a group number, or + * \k, where n is a group name. + * Similarly, \&, \', \`, and + * \+ are corresponded to special variables, $&, + * $', $`, and $+, respectively. + * (See rdoc-ref:regexp.rdoc in detail.) + * \0 is the same as \&. + * \\\\ is interpreted as an escape, i.e., a single backslash. + * Note that, within +replacement+ the special match variables, such as + * $&, will not refer to the current match. * * If the second argument is a Hash, and the matched text is one of its keys, * the corresponding value is the replacement string. * * In the block form, the current match string is passed in as a parameter, * and variables such as $1, $2, $`, - * $&, and $' will be set appropriately. The value - * returned by the block will be substituted for the match on each call. + * $&, and $' will be set appropriately. + * (See rdoc-ref:regexp.rdoc in detail.) + * The value returned by the block will be substituted for the match on each + * call. * * The result inherits any tainting in the original string or any supplied * replacement string. @@ -5160,6 +5164,19 @@ rb_str_sub_bang(int argc, VALUE *argv, VALUE str) * "hello".sub(/(?[aeiou])/, '*\k*') #=> "h*e*llo" * 'Is SHELL your preferred shell?'.sub(/[[:upper:]]{2,}/, ENV) * #=> "Is /bin/bash your preferred shell?" + * + * Note that a string literal consumes backslashes. + * (See rdoc-ref:syntax/literals.rdoc for the detail of string literals.) + * So, back-references are typically preceded by an additional backslash. + * For example, if you want to write a back-reference \& in + * +replacement+ with a double-quoted string literal, you need to write: + * "..\\\\&..". + * If you want to write a non-back-reference string \& in + * +replacement+, you need first to escape the backslash to prevent + * this method from interpreting it as a back-reference, and then you + * need to escape the backslashes again to prevent a string literal from + * consuming them: "..\\\\\\\\&..". + * You may want to use the block form to avoid a lot of backslashes. */ static VALUE @@ -5332,24 +5349,31 @@ rb_str_gsub_bang(int argc, VALUE *argv, VALUE str) * pattern substituted for the second argument. The pattern is * typically a Regexp; if given as a String, any * regular expression metacharacters it contains will be interpreted - * literally, e.g. '\\\d' will match a backslash followed by 'd', + * literally, e.g. \d will match a backslash followed by 'd', * instead of a digit. * - * If replacement is a String it will be substituted for - * the matched text. It may contain back-references to the pattern's capture - * groups of the form \\\d, where d is a group number, or - * \\\k, where n is a group name. If it is a - * double-quoted string, both back-references must be preceded by an - * additional backslash. However, within replacement the special match - * variables, such as $&, will not refer to the current match. + * If +replacement+ is a String it will be substituted for the matched text. + * It may contain back-references to the pattern's capture groups of the form + * \d, where d is a group number, or + * \k, where n is a group name. + * Similarly, \&, \', \`, and + * \+ are corresponded to special variables, $&, + * $', $`, and $+, respectively. + * (See rdoc-ref:regexp.rdoc in detail.) + * \0 is the same as \&. + * \\\\ is interpreted as an escape, i.e., a single backslash. + * Note that, within +replacement+ the special match variables, such as + * $&, will not refer to the current match. * * If the second argument is a Hash, and the matched text is one * of its keys, the corresponding value is the replacement string. * * In the block form, the current match string is passed in as a parameter, * and variables such as $1, $2, $`, - * $&, and $' will be set appropriately. The value - * returned by the block will be substituted for the match on each call. + * $&, and $' will be set appropriately. + * (See rdoc-ref:regexp.rdoc in detail.) + * The value returned by the block will be substituted for the match on each + * call. * * The result inherits any tainting in the original string or any supplied * replacement string. @@ -5362,6 +5386,19 @@ rb_str_gsub_bang(int argc, VALUE *argv, VALUE str) * "hello".gsub(/./) {|s| s.ord.to_s + ' '} #=> "104 101 108 108 111 " * "hello".gsub(/(?[aeiou])/, '{\k}') #=> "h{e}ll{o}" * 'hello'.gsub(/[eo]/, 'e' => 3, 'o' => '*') #=> "h3ll*" + * + * Note that a string literal consumes backslashes. + * (See rdoc-ref:syntax/literals.rdoc for the detail of string literals.) + * So, back-references are typically preceded by an additional backslash. + * For example, if you want to write a back-reference \& in + * +replacement+ with a double-quoted string literal, you need to write: + * "..\\\\&..". + * If you want to write a non-back-reference string \& in + * +replacement+, you need first to escape the backslash to prevent + * this method from interpreting it as a back-reference, and then you + * need to escape the backslashes again to prevent a string literal from + * consuming them: "..\\\\\\\\&..". + * You may want to use the block form to avoid a lot of backslashes. */ static VALUE