зеркало из https://github.com/github/ruby.git
[ruby/yarp] Encoding-dependent escapes
https://github.com/ruby/yarp/commit/36a5b801c4
This commit is contained in:
Родитель
9aba46c666
Коммит
432702a427
120
yarp/unescape.c
120
yarp/unescape.c
|
@ -14,6 +14,20 @@ yp_char_is_hexadecimal_digits(const char *c, size_t length) {
|
||||||
return true;
|
return true;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// We don't call the char_width function unless we have to because it's
|
||||||
|
// expensive to go through the indirection of the function pointer. Instead we
|
||||||
|
// provide a fast path that will check if we can just return 1.
|
||||||
|
static inline size_t
|
||||||
|
yp_char_width(yp_parser_t *parser, const char *start, const char *end) {
|
||||||
|
const unsigned char *uc = (const unsigned char *) start;
|
||||||
|
|
||||||
|
if (parser->encoding_changed || (*uc >= 0x80)) {
|
||||||
|
return parser->encoding.char_width(start, end - start);
|
||||||
|
} else {
|
||||||
|
return 1;
|
||||||
|
}
|
||||||
|
}
|
||||||
|
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
/* Lookup tables for characters */
|
/* Lookup tables for characters */
|
||||||
/******************************************************************************/
|
/******************************************************************************/
|
||||||
|
@ -178,7 +192,7 @@ unescape_char(const unsigned char value, const unsigned char flags) {
|
||||||
|
|
||||||
// Read a specific escape sequence into the given destination.
|
// Read a specific escape sequence into the given destination.
|
||||||
static const char *
|
static const char *
|
||||||
unescape(char *dest, size_t *dest_length, const char *backslash, const char *end, yp_list_t *error_list, const unsigned char flags, bool write_to_str) {
|
unescape(yp_parser_t *parser, char *dest, size_t *dest_length, const char *backslash, const char *end, const unsigned char flags, bool write_to_str) {
|
||||||
switch (backslash[1]) {
|
switch (backslash[1]) {
|
||||||
case 'a':
|
case 'a':
|
||||||
case 'b':
|
case 'b':
|
||||||
|
@ -218,7 +232,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
|
// \unnnn Unicode character, where nnnn is exactly 4 hexadecimal digits ([0-9a-fA-F])
|
||||||
case 'u': {
|
case 'u': {
|
||||||
if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
|
if ((flags & YP_UNESCAPE_FLAG_CONTROL) | (flags & YP_UNESCAPE_FLAG_META)) {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Unicode escape sequence cannot be used with control or meta flags.");
|
||||||
return backslash + 2;
|
return backslash + 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -235,11 +249,11 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
|
|
||||||
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
|
// \u{nnnn} character literal allows only 1-6 hexadecimal digits
|
||||||
if (hexadecimal_length > 6)
|
if (hexadecimal_length > 6)
|
||||||
yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
|
yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "invalid Unicode escape.");
|
||||||
|
|
||||||
// there are not hexadecimal characters
|
// there are not hexadecimal characters
|
||||||
if (hexadecimal_length == 0) {
|
if (hexadecimal_length == 0) {
|
||||||
yp_diagnostic_list_append(error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
|
yp_diagnostic_list_append(&parser->error_list, unicode_cursor, unicode_cursor + hexadecimal_length, "unterminated Unicode escape");
|
||||||
return unicode_cursor;
|
return unicode_cursor;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -252,7 +266,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
uint32_t value;
|
uint32_t value;
|
||||||
unescape_unicode(unicode_start, (size_t) (unicode_cursor - unicode_start), &value);
|
unescape_unicode(unicode_start, (size_t) (unicode_cursor - unicode_start), &value);
|
||||||
if (write_to_str) {
|
if (write_to_str) {
|
||||||
*dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, error_list);
|
*dest_length += unescape_unicode_write(dest + *dest_length, value, unicode_start, unicode_cursor, &parser->error_list);
|
||||||
}
|
}
|
||||||
|
|
||||||
unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
|
unicode_cursor += yp_strspn_whitespace(unicode_cursor, end - unicode_cursor);
|
||||||
|
@ -260,7 +274,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
|
|
||||||
// ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
|
// ?\u{nnnn} character literal should contain only one codepoint and cannot be like ?\u{nnnn mmmm}
|
||||||
if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1)
|
if (flags & YP_UNESCAPE_FLAG_EXPECT_SINGLE && codepoints_count > 1)
|
||||||
yp_diagnostic_list_append(error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
|
yp_diagnostic_list_append(&parser->error_list, extra_codepoints_start, unicode_cursor - 1, "Multiple codepoints at single character literal");
|
||||||
|
|
||||||
return unicode_cursor + 1;
|
return unicode_cursor + 1;
|
||||||
}
|
}
|
||||||
|
@ -270,12 +284,12 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
unescape_unicode(backslash + 2, 4, &value);
|
unescape_unicode(backslash + 2, 4, &value);
|
||||||
|
|
||||||
if (write_to_str) {
|
if (write_to_str) {
|
||||||
*dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, error_list);
|
*dest_length += unescape_unicode_write(dest + *dest_length, value, backslash + 2, backslash + 6, &parser->error_list);
|
||||||
}
|
}
|
||||||
return backslash + 6;
|
return backslash + 6;
|
||||||
}
|
}
|
||||||
|
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid Unicode escape sequence");
|
||||||
return backslash + 2;
|
return backslash + 2;
|
||||||
}
|
}
|
||||||
// \c\M-x meta control character, where x is an ASCII printable character
|
// \c\M-x meta control character, where x is an ASCII printable character
|
||||||
|
@ -283,18 +297,18 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
// \cx control character, where x is an ASCII printable character
|
// \cx control character, where x is an ASCII printable character
|
||||||
case 'c':
|
case 'c':
|
||||||
if (backslash + 2 >= end) {
|
if (backslash + 2 >= end) {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
||||||
return backslash + 2;
|
return backslash + 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (backslash[2]) {
|
switch (backslash[2]) {
|
||||||
case '\\':
|
case '\\':
|
||||||
return unescape(dest, dest_length, backslash + 2, end, error_list, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
|
return unescape(parser, dest, dest_length, backslash + 2, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
|
||||||
case '?':
|
case '?':
|
||||||
if (write_to_str) {
|
if (write_to_str) {
|
||||||
dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
|
dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
|
||||||
|
@ -302,7 +316,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
return backslash + 3;
|
return backslash + 3;
|
||||||
default: {
|
default: {
|
||||||
if (!char_is_ascii_printable(backslash[2])) {
|
if (!char_is_ascii_printable(backslash[2])) {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
||||||
return backslash + 2;
|
return backslash + 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -316,23 +330,23 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
// \C-? delete, ASCII 7Fh (DEL)
|
// \C-? delete, ASCII 7Fh (DEL)
|
||||||
case 'C':
|
case 'C':
|
||||||
if (backslash + 3 >= end) {
|
if (backslash + 3 >= end) {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
if (flags & YP_UNESCAPE_FLAG_CONTROL) {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Control escape sequence cannot be doubled.");
|
||||||
return backslash + 2;
|
return backslash + 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (backslash[2] != '-') {
|
if (backslash[2] != '-') {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
||||||
return backslash + 2;
|
return backslash + 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
switch (backslash[3]) {
|
switch (backslash[3]) {
|
||||||
case '\\':
|
case '\\':
|
||||||
return unescape(dest, dest_length, backslash + 3, end, error_list, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
|
return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_CONTROL, write_to_str);
|
||||||
case '?':
|
case '?':
|
||||||
if (write_to_str) {
|
if (write_to_str) {
|
||||||
dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
|
dest[(*dest_length)++] = (char) unescape_char(0x7f, flags);
|
||||||
|
@ -340,7 +354,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
return backslash + 4;
|
return backslash + 4;
|
||||||
default:
|
default:
|
||||||
if (!char_is_ascii_printable(backslash[3])) {
|
if (!char_is_ascii_printable(backslash[3])) {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid control escape sequence");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid control escape sequence");
|
||||||
return backslash + 2;
|
return backslash + 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -354,22 +368,22 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
// \M-x meta character, where x is an ASCII printable character
|
// \M-x meta character, where x is an ASCII printable character
|
||||||
case 'M': {
|
case 'M': {
|
||||||
if (backslash + 3 >= end) {
|
if (backslash + 3 >= end) {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 1, "Invalid control escape sequence");
|
||||||
return end;
|
return end;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (flags & YP_UNESCAPE_FLAG_META) {
|
if (flags & YP_UNESCAPE_FLAG_META) {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Meta escape sequence cannot be doubled.");
|
||||||
return backslash + 2;
|
return backslash + 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (backslash[2] != '-') {
|
if (backslash[2] != '-') {
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
||||||
return backslash + 2;
|
return backslash + 2;
|
||||||
}
|
}
|
||||||
|
|
||||||
if (backslash[3] == '\\') {
|
if (backslash[3] == '\\') {
|
||||||
return unescape(dest, dest_length, backslash + 3, end, error_list, flags | YP_UNESCAPE_FLAG_META, write_to_str);
|
return unescape(parser, dest, dest_length, backslash + 3, end, flags | YP_UNESCAPE_FLAG_META, write_to_str);
|
||||||
}
|
}
|
||||||
|
|
||||||
if (char_is_ascii_printable(backslash[3])) {
|
if (char_is_ascii_printable(backslash[3])) {
|
||||||
|
@ -379,7 +393,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
return backslash + 4;
|
return backslash + 4;
|
||||||
}
|
}
|
||||||
|
|
||||||
yp_diagnostic_list_append(error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
yp_diagnostic_list_append(&parser->error_list, backslash, backslash + 2, "Invalid meta escape sequence");
|
||||||
return backslash + 3;
|
return backslash + 3;
|
||||||
}
|
}
|
||||||
// \n
|
// \n
|
||||||
|
@ -390,14 +404,17 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
if (backslash + 2 < end && backslash[2] == '\n') {
|
if (backslash + 2 < end && backslash[2] == '\n') {
|
||||||
return backslash + 3;
|
return backslash + 3;
|
||||||
}
|
}
|
||||||
|
/* fallthrough */
|
||||||
/* fallthrough */
|
|
||||||
// In this case we're escaping something that doesn't need escaping.
|
// In this case we're escaping something that doesn't need escaping.
|
||||||
default: {
|
default: {
|
||||||
|
size_t width = yp_char_width(parser, backslash + 1, end);
|
||||||
|
|
||||||
if (write_to_str) {
|
if (write_to_str) {
|
||||||
dest[(*dest_length)++] = backslash[1];
|
memcpy(dest + *dest_length, backslash + 1, width);
|
||||||
|
*dest_length += width;
|
||||||
}
|
}
|
||||||
return backslash + 2;
|
|
||||||
|
return backslash + 1 + width;
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
@ -431,7 +448,7 @@ unescape(char *dest, size_t *dest_length, const char *backslash, const char *end
|
||||||
// \c? or \C-? delete, ASCII 7Fh (DEL)
|
// \c? or \C-? delete, ASCII 7Fh (DEL)
|
||||||
//
|
//
|
||||||
YP_EXPORTED_FUNCTION void
|
YP_EXPORTED_FUNCTION void
|
||||||
yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list) {
|
yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type) {
|
||||||
if (unescape_type == YP_UNESCAPE_NONE) {
|
if (unescape_type == YP_UNESCAPE_NONE) {
|
||||||
// If we're not unescaping then we can reference the source directly.
|
// If we're not unescaping then we can reference the source directly.
|
||||||
return;
|
return;
|
||||||
|
@ -448,7 +465,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
|
||||||
// within the string.
|
// within the string.
|
||||||
char *allocated = malloc(string->length);
|
char *allocated = malloc(string->length);
|
||||||
if (allocated == NULL) {
|
if (allocated == NULL) {
|
||||||
yp_diagnostic_list_append(error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
|
yp_diagnostic_list_append(&parser->error_list, string->source, string->source + string->length, "Failed to allocate memory for unescaping.");
|
||||||
return;
|
return;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -493,7 +510,7 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
|
||||||
// This is the only type of unescaping left. In this case we need to
|
// This is the only type of unescaping left. In this case we need to
|
||||||
// handle all of the different unescapes.
|
// handle all of the different unescapes.
|
||||||
assert(unescape_type == YP_UNESCAPE_ALL);
|
assert(unescape_type == YP_UNESCAPE_ALL);
|
||||||
cursor = unescape(dest, &dest_length, backslash, end, error_list, YP_UNESCAPE_FLAG_NONE, true);
|
cursor = unescape(parser, dest, &dest_length, backslash, end, YP_UNESCAPE_FLAG_NONE, true);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -521,29 +538,11 @@ yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unesc
|
||||||
yp_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
|
yp_string_owned_init(string, allocated, dest_length + ((size_t) (end - cursor)));
|
||||||
}
|
}
|
||||||
|
|
||||||
YP_EXPORTED_FUNCTION bool
|
|
||||||
yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
|
|
||||||
bool success;
|
|
||||||
|
|
||||||
yp_parser_t parser;
|
|
||||||
yp_parser_init(&parser, start, length, "");
|
|
||||||
|
|
||||||
yp_list_t error_list = YP_LIST_EMPTY;
|
|
||||||
yp_string_shared_init(result, start, start + length);
|
|
||||||
yp_unescape_manipulate_string(&parser, result, unescape_type, &error_list);
|
|
||||||
success = yp_list_empty_p(&error_list);
|
|
||||||
|
|
||||||
yp_list_free(&error_list);
|
|
||||||
yp_parser_free(&parser);
|
|
||||||
|
|
||||||
return success;
|
|
||||||
}
|
|
||||||
|
|
||||||
// This function is similar to yp_unescape_manipulate_string, except it doesn't
|
// This function is similar to yp_unescape_manipulate_string, except it doesn't
|
||||||
// actually perform any string manipulations. Instead, it calculates how long
|
// actually perform any string manipulations. Instead, it calculates how long
|
||||||
// the unescaped character is, and returns that value
|
// the unescaped character is, and returns that value
|
||||||
YP_EXPORTED_FUNCTION size_t
|
size_t
|
||||||
yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list) {
|
yp_unescape_calculate_difference(yp_parser_t *parser, const char *backslash, yp_unescape_type_t unescape_type, bool expect_single_codepoint) {
|
||||||
assert(unescape_type != YP_UNESCAPE_NONE);
|
assert(unescape_type != YP_UNESCAPE_NONE);
|
||||||
|
|
||||||
switch (backslash[1]) {
|
switch (backslash[1]) {
|
||||||
|
@ -551,7 +550,9 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
|
||||||
case '\'':
|
case '\'':
|
||||||
return 2;
|
return 2;
|
||||||
default: {
|
default: {
|
||||||
if (unescape_type == YP_UNESCAPE_MINIMAL) return 2;
|
if (unescape_type == YP_UNESCAPE_MINIMAL) {
|
||||||
|
return 1 + yp_char_width(parser, backslash + 1, parser->end);
|
||||||
|
}
|
||||||
|
|
||||||
// This is the only type of unescaping left. In this case we need to
|
// This is the only type of unescaping left. In this case we need to
|
||||||
// handle all of the different unescapes.
|
// handle all of the different unescapes.
|
||||||
|
@ -561,10 +562,27 @@ yp_unescape_calculate_difference(const char *backslash, const char *end, yp_unes
|
||||||
if (expect_single_codepoint)
|
if (expect_single_codepoint)
|
||||||
flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
|
flags |= YP_UNESCAPE_FLAG_EXPECT_SINGLE;
|
||||||
|
|
||||||
const char *cursor = unescape(NULL, 0, backslash, end, error_list, flags, false);
|
const char *cursor = unescape(parser, NULL, 0, backslash, parser->end, flags, false);
|
||||||
assert(cursor > backslash);
|
assert(cursor > backslash);
|
||||||
|
|
||||||
return (size_t) (cursor - backslash);
|
return (size_t) (cursor - backslash);
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
}
|
}
|
||||||
|
|
||||||
|
// This is one of the main entry points into the extension. It accepts a source
|
||||||
|
// string, a type of unescaping, and a pointer to a result string. It returns a
|
||||||
|
// boolean indicating whether or not the unescaping was successful.
|
||||||
|
YP_EXPORTED_FUNCTION bool
|
||||||
|
yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result) {
|
||||||
|
yp_parser_t parser;
|
||||||
|
yp_parser_init(&parser, start, length, NULL);
|
||||||
|
|
||||||
|
yp_string_shared_init(result, start, start + length);
|
||||||
|
yp_unescape_manipulate_string(&parser, result, unescape_type);
|
||||||
|
|
||||||
|
bool success = yp_list_empty_p(&parser.error_list);
|
||||||
|
yp_parser_free(&parser);
|
||||||
|
|
||||||
|
return success;
|
||||||
|
}
|
||||||
|
|
|
@ -31,12 +31,14 @@ typedef enum {
|
||||||
|
|
||||||
// Unescape the contents of the given token into the given string using the
|
// Unescape the contents of the given token into the given string using the
|
||||||
// given unescape mode.
|
// given unescape mode.
|
||||||
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type, yp_list_t *error_list);
|
YP_EXPORTED_FUNCTION void yp_unescape_manipulate_string(yp_parser_t *parser, yp_string_t *string, yp_unescape_type_t unescape_type);
|
||||||
|
|
||||||
// Accepts a source string and a type of unescaping and returns the unescaped version.
|
// Accepts a source string and a type of unescaping and returns the unescaped version.
|
||||||
// The caller must yp_string_free(result); after calling this function.
|
// The caller must yp_string_free(result); after calling this function.
|
||||||
YP_EXPORTED_FUNCTION bool yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result);
|
YP_EXPORTED_FUNCTION bool yp_unescape_string(const char *start, size_t length, yp_unescape_type_t unescape_type, yp_string_t *result);
|
||||||
|
|
||||||
YP_EXPORTED_FUNCTION size_t yp_unescape_calculate_difference(const char *value, const char *end, yp_unescape_type_t unescape_type, bool expect_single_codepoint, yp_list_t *error_list);
|
// Returns the number of bytes that encompass the first escape sequence in the
|
||||||
|
// given string.
|
||||||
|
size_t yp_unescape_calculate_difference(yp_parser_t *parser, const char *value, yp_unescape_type_t unescape_type, bool expect_single_codepoint);
|
||||||
|
|
||||||
#endif
|
#endif
|
||||||
|
|
22
yarp/yarp.c
22
yarp/yarp.c
|
@ -3600,7 +3600,7 @@ yp_symbol_node_label_create(yp_parser_t *parser, const yp_token_t *token) {
|
||||||
assert((label.end - label.start) >= 0);
|
assert((label.end - label.start) >= 0);
|
||||||
yp_string_shared_init(&node->unescaped, label.start, label.end);
|
yp_string_shared_init(&node->unescaped, label.start, label.end);
|
||||||
|
|
||||||
yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
|
yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL);
|
||||||
break;
|
break;
|
||||||
}
|
}
|
||||||
case YP_TOKEN_MISSING: {
|
case YP_TOKEN_MISSING: {
|
||||||
|
@ -5104,7 +5104,7 @@ lex_question_mark(yp_parser_t *parser) {
|
||||||
|
|
||||||
if (parser->current.start[1] == '\\') {
|
if (parser->current.start[1] == '\\') {
|
||||||
lex_state_set(parser, YP_LEX_STATE_END);
|
lex_state_set(parser, YP_LEX_STATE_END);
|
||||||
parser->current.end += yp_unescape_calculate_difference(parser->current.start + 1, parser->end, YP_UNESCAPE_ALL, true, &parser->error_list);
|
parser->current.end += yp_unescape_calculate_difference(parser, parser->current.start + 1, YP_UNESCAPE_ALL, true);
|
||||||
return YP_TOKEN_CHARACTER_LITERAL;
|
return YP_TOKEN_CHARACTER_LITERAL;
|
||||||
} else {
|
} else {
|
||||||
size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
|
size_t encoding_width = parser->encoding.char_width(parser->current.end, parser->end - parser->current.end);
|
||||||
|
@ -6493,7 +6493,7 @@ parser_lex(yp_parser_t *parser) {
|
||||||
// and find the next breakpoint.
|
// and find the next breakpoint.
|
||||||
if (*breakpoint == '\\') {
|
if (*breakpoint == '\\') {
|
||||||
yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
yp_unescape_type_t unescape_type = lex_mode->as.list.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
||||||
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
||||||
|
|
||||||
// If the result is an escaped newline, then we need to
|
// If the result is an escaped newline, then we need to
|
||||||
// track that newline.
|
// track that newline.
|
||||||
|
@ -6606,7 +6606,7 @@ parser_lex(yp_parser_t *parser) {
|
||||||
// literally. In this case we'll skip past the next character
|
// literally. In this case we'll skip past the next character
|
||||||
// and find the next breakpoint.
|
// and find the next breakpoint.
|
||||||
if (*breakpoint == '\\') {
|
if (*breakpoint == '\\') {
|
||||||
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, YP_UNESCAPE_ALL, false, &parser->error_list);
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, YP_UNESCAPE_ALL, false);
|
||||||
|
|
||||||
// If the result is an escaped newline, then we need to
|
// If the result is an escaped newline, then we need to
|
||||||
// track that newline.
|
// track that newline.
|
||||||
|
@ -6746,7 +6746,7 @@ parser_lex(yp_parser_t *parser) {
|
||||||
// literally. In this case we'll skip past the next character and
|
// literally. In this case we'll skip past the next character and
|
||||||
// find the next breakpoint.
|
// find the next breakpoint.
|
||||||
yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
yp_unescape_type_t unescape_type = parser->lex_modes.current->as.string.interpolation ? YP_UNESCAPE_ALL : YP_UNESCAPE_MINIMAL;
|
||||||
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
||||||
|
|
||||||
// If the result is an escaped newline, then we need to
|
// If the result is an escaped newline, then we need to
|
||||||
// track that newline.
|
// track that newline.
|
||||||
|
@ -6907,7 +6907,7 @@ parser_lex(yp_parser_t *parser) {
|
||||||
breakpoint += le_len;
|
breakpoint += le_len;
|
||||||
} else {
|
} else {
|
||||||
yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
|
yp_unescape_type_t unescape_type = (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL;
|
||||||
size_t difference = yp_unescape_calculate_difference(breakpoint, parser->end, unescape_type, false, &parser->error_list);
|
size_t difference = yp_unescape_calculate_difference(parser, breakpoint, unescape_type, false);
|
||||||
|
|
||||||
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
|
yp_newline_list_check_append(&parser->newline_list, breakpoint + difference - 1);
|
||||||
|
|
||||||
|
@ -6963,7 +6963,7 @@ yp_regular_expression_node_create_and_unescape(yp_parser_t *parser, const yp_tok
|
||||||
assert((content->end - content->start) >= 0);
|
assert((content->end - content->start) >= 0);
|
||||||
yp_string_shared_init(&node->unescaped, content->start, content->end);
|
yp_string_shared_init(&node->unescaped, content->start, content->end);
|
||||||
|
|
||||||
yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type, &parser->error_list);
|
yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6974,7 +6974,7 @@ yp_symbol_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
|
||||||
assert((content->end - content->start) >= 0);
|
assert((content->end - content->start) >= 0);
|
||||||
yp_string_shared_init(&node->unescaped, content->start, content->end);
|
yp_string_shared_init(&node->unescaped, content->start, content->end);
|
||||||
|
|
||||||
yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type, &parser->error_list);
|
yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6985,7 +6985,7 @@ yp_string_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openin
|
||||||
assert((content->end - content->start) >= 0);
|
assert((content->end - content->start) >= 0);
|
||||||
yp_string_shared_init(&node->unescaped, content->start, content->end);
|
yp_string_shared_init(&node->unescaped, content->start, content->end);
|
||||||
|
|
||||||
yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type, &parser->error_list);
|
yp_unescape_manipulate_string(parser, &node->unescaped, unescape_type);
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -6996,7 +6996,7 @@ yp_xstring_node_create_and_unescape(yp_parser_t *parser, const yp_token_t *openi
|
||||||
assert((content->end - content->start) >= 0);
|
assert((content->end - content->start) >= 0);
|
||||||
yp_string_shared_init(&node->unescaped, content->start, content->end);
|
yp_string_shared_init(&node->unescaped, content->start, content->end);
|
||||||
|
|
||||||
yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL, &parser->error_list);
|
yp_unescape_manipulate_string(parser, &node->unescaped, YP_UNESCAPE_ALL);
|
||||||
return node;
|
return node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
@ -9334,7 +9334,7 @@ parse_heredoc_dedent(yp_parser_t *parser, yp_node_t *node, yp_heredoc_quote_t qu
|
||||||
yp_node_destroy(parser, node);
|
yp_node_destroy(parser, node);
|
||||||
} else {
|
} else {
|
||||||
string->length = dest_length;
|
string->length = dest_length;
|
||||||
yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL, &parser->error_list);
|
yp_unescape_manipulate_string(parser, string, (quote == YP_HEREDOC_QUOTE_SINGLE) ? YP_UNESCAPE_MINIMAL : YP_UNESCAPE_ALL);
|
||||||
nodes->nodes[write_index++] = node;
|
nodes->nodes[write_index++] = node;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
|
Загрузка…
Ссылка в новой задаче