Heredocs are parsed line-by-line, so we need to keep track of the
temporary encoding of the string.  Previously, a heredoc would
only detect mixed encoding errors if they were on the same line,
this changes things so they will be caught on different lines.

Fixes [Bug #15839]
This commit is contained in:
Jeremy Evans 2019-05-14 22:04:46 -07:00
Родитель f91b1ab33d
Коммит c05eaa9325
2 изменённых файлов: 50 добавлений и 19 удалений

40
parse.y
Просмотреть файл

@ -5233,7 +5233,7 @@ none : /* none */
# define yylval (*p->lval)
static int regx_options(struct parser_params*);
static int tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**);
static int tokadd_string(struct parser_params*,int,int,int,long*,rb_encoding**,rb_encoding**);
static void tokaddmbc(struct parser_params *p, int c, rb_encoding *enc);
static enum yytokentype parse_string(struct parser_params*,rb_strterm_literal_t*);
static enum yytokentype here_document(struct parser_params*,rb_strterm_heredoc_t*);
@ -6510,10 +6510,9 @@ parser_mixed_escape(struct parser_params *p, const char *beg, rb_encoding *enc1,
static int
tokadd_string(struct parser_params *p,
int func, int term, int paren, long *nest,
rb_encoding **encp)
rb_encoding **encp, rb_encoding **enc)
{
int c;
rb_encoding *enc = 0;
bool erred = false;
#define mixed_error(enc1, enc2) \
@ -6569,7 +6568,7 @@ tokadd_string(struct parser_params *p,
tokadd(p, '\\');
break;
}
if (!parser_tokadd_utf8(p, &enc, term,
if (!parser_tokadd_utf8(p, enc, term,
func & STR_FUNC_SYMBOL,
func & STR_FUNC_REGEXP)) {
continue;
@ -6588,17 +6587,17 @@ tokadd_string(struct parser_params *p,
continue;
}
pushback(p, c);
if ((c = tokadd_escape(p, &enc)) < 0)
if ((c = tokadd_escape(p, enc)) < 0)
return -1;
if (enc && enc != *encp) {
mixed_escape(p->lex.ptok+2, enc, *encp);
if (*enc && *enc != *encp) {
mixed_escape(p->lex.ptok+2, *enc, *encp);
}
continue;
}
else if (func & STR_FUNC_EXPAND) {
pushback(p, c);
if (func & STR_FUNC_ESCAPE) tokadd(p, '\\');
c = read_escape(p, 0, &enc);
c = read_escape(p, 0, enc);
}
else if ((func & STR_FUNC_QWORDS) && ISSPACE(c)) {
/* ignore backslashed spaces in %w */
@ -6612,11 +6611,11 @@ tokadd_string(struct parser_params *p,
}
else if (!parser_isascii(p)) {
non_ascii:
if (!enc) {
enc = *encp;
if (!*enc) {
*enc = *encp;
}
else if (enc != *encp) {
mixed_error(enc, *encp);
else if (*enc != *encp) {
mixed_error(*enc, *encp);
continue;
}
if (tokadd_mbchar(p, c) == -1) return -1;
@ -6627,18 +6626,18 @@ tokadd_string(struct parser_params *p,
break;
}
if (c & 0x80) {
if (!enc) {
enc = *encp;
if (!*enc) {
*enc = *encp;
}
else if (enc != *encp) {
mixed_error(enc, *encp);
else if (*enc != *encp) {
mixed_error(*enc, *encp);
continue;
}
}
tokadd(p, c);
}
terminate:
if (enc) *encp = enc;
if (*enc) *encp = *enc;
return c;
}
@ -6774,6 +6773,7 @@ parse_string(struct parser_params *p, rb_strterm_literal_t *quote)
int paren = (int)quote->u2.paren;
int c, space = 0;
rb_encoding *enc = p->enc;
rb_encoding *base_enc = 0;
VALUE lit;
if (func & STR_FUNC_TERM) {
@ -6814,7 +6814,7 @@ parse_string(struct parser_params *p, rb_strterm_literal_t *quote)
}
pushback(p, c);
if (tokadd_string(p, func, term, paren, &quote->u0.nest,
&enc) == -1) {
&enc, &base_enc) == -1) {
if (p->eofp) {
#ifndef RIPPER
# define unterminated_literal(mesg) yyerror0(mesg)
@ -7171,6 +7171,7 @@ here_document(struct parser_params *p, rb_strterm_heredoc_t *here)
long len;
VALUE str = 0;
rb_encoding *enc = p->enc;
rb_encoding *base_enc = 0;
int bol;
eos = RSTRING_PTR(here->lastline) + here->offset;
@ -7282,7 +7283,8 @@ here_document(struct parser_params *p, rb_strterm_heredoc_t *here)
}
do {
pushback(p, c);
if ((c = tokadd_string(p, func, '\n', 0, NULL, &enc)) == -1) {
enc = p->enc;
if ((c = tokadd_string(p, func, '\n', 0, NULL, &enc, &base_enc)) == -1) {
if (p->eofp) goto error;
goto restore;
}

Просмотреть файл

@ -774,6 +774,35 @@ eom
assert_equal("\n0\n1", eval("<<~0 '1'\n \n0\#{}\n0"))
end
def test_heredoc_mixed_encoding
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932
<<-TEXT
\xe9\x9d\u1234
TEXT
HEREDOC
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932
<<-TEXT
\xe9\x9d
\u1234
TEXT
HEREDOC
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932
<<-TEXT
\u1234\xe9\x9d
TEXT
HEREDOC
assert_syntax_error(<<-'HEREDOC', 'UTF-8 mixed within Windows-31J source')
#encoding: cp932
<<-TEXT
\u1234
\xe9\x9d
TEXT
HEREDOC
end
def test_lineno_operation_brace_block
expected = __LINE__ + 1
actual = caller_lineno\