зеркало из https://github.com/microsoft/clang-1.git
PR13099: Teach -Wformat about raw string literals, UTF-8 strings and Unicode escape sequences.
git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@158390 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Родитель
3cc509b5ac
Коммит
df9ef1bc8c
|
@ -679,7 +679,8 @@ void StringLiteral::setString(ASTContext &C, StringRef Str,
|
|||
SourceLocation StringLiteral::
|
||||
getLocationOfByte(unsigned ByteNo, const SourceManager &SM,
|
||||
const LangOptions &Features, const TargetInfo &Target) const {
|
||||
assert(Kind == StringLiteral::Ascii && "This only works for ASCII strings");
|
||||
assert((Kind == StringLiteral::Ascii || Kind == StringLiteral::UTF8) &&
|
||||
"Only narrow string literals are currently supported");
|
||||
|
||||
// Loop over all of the tokens in this string until we find the one that
|
||||
// contains the byte we're looking for.
|
||||
|
|
|
@ -250,6 +250,39 @@ static bool ProcessUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
|
|||
return true;
|
||||
}
|
||||
|
||||
/// MeasureUCNEscape - Determine the number of bytes within the resulting string
|
||||
/// which this UCN will occupy.
|
||||
static int MeasureUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
|
||||
const char *ThisTokEnd, unsigned CharByteWidth,
|
||||
const LangOptions &Features, bool &HadError) {
|
||||
// UTF-32: 4 bytes per escape.
|
||||
if (CharByteWidth == 4)
|
||||
return 4;
|
||||
|
||||
uint32_t UcnVal = 0;
|
||||
unsigned short UcnLen = 0;
|
||||
FullSourceLoc Loc;
|
||||
|
||||
if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal,
|
||||
UcnLen, Loc, 0, Features, true)) {
|
||||
HadError = true;
|
||||
return 0;
|
||||
}
|
||||
|
||||
// UTF-16: 2 bytes for BMP, 4 bytes otherwise.
|
||||
if (CharByteWidth == 2)
|
||||
return UcnVal <= 0xFFFF ? 2 : 4;
|
||||
|
||||
// UTF-8.
|
||||
if (UcnVal < 0x80)
|
||||
return 1;
|
||||
if (UcnVal < 0x800)
|
||||
return 2;
|
||||
if (UcnVal < 0x10000)
|
||||
return 3;
|
||||
return 4;
|
||||
}
|
||||
|
||||
/// EncodeUCNEscape - Read the Universal Character Name, check constraints and
|
||||
/// convert the UTF32 to UTF8 or UTF16. This is a subroutine of
|
||||
/// StringLiteralParser. When we decide to implement UCN's for identifiers,
|
||||
|
@ -265,7 +298,7 @@ static void EncodeUCNEscape(const char *ThisTokBegin, const char *&ThisTokBuf,
|
|||
unsigned short UcnLen = 0;
|
||||
if (!ProcessUCNEscape(ThisTokBegin, ThisTokBuf, ThisTokEnd, UcnVal, UcnLen,
|
||||
Loc, Diags, Features, true)) {
|
||||
HadError = 1;
|
||||
HadError = true;
|
||||
return;
|
||||
}
|
||||
|
||||
|
@ -1369,14 +1402,31 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
|
|||
if (StringInvalid)
|
||||
return 0;
|
||||
|
||||
assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
|
||||
SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
|
||||
|
||||
|
||||
const char *SpellingStart = SpellingPtr;
|
||||
const char *SpellingEnd = SpellingPtr+TokLen;
|
||||
|
||||
// Skip over the leading quote.
|
||||
// Handle UTF-8 strings just like narrow strings.
|
||||
if (SpellingPtr[0] == 'u' && SpellingPtr[1] == '8')
|
||||
SpellingPtr += 2;
|
||||
|
||||
assert(SpellingPtr[0] != 'L' && SpellingPtr[0] != 'u' &&
|
||||
SpellingPtr[0] != 'U' && "Doesn't handle wide or utf strings yet");
|
||||
|
||||
// For raw string literals, this is easy.
|
||||
if (SpellingPtr[0] == 'R') {
|
||||
assert(SpellingPtr[1] == '"' && "Should be a raw string literal!");
|
||||
// Skip 'R"'.
|
||||
SpellingPtr += 2;
|
||||
while (*SpellingPtr != '(') {
|
||||
++SpellingPtr;
|
||||
assert(SpellingPtr < SpellingEnd && "Missing ( for raw string literal");
|
||||
}
|
||||
// Skip '('.
|
||||
++SpellingPtr;
|
||||
return SpellingPtr - SpellingStart + ByteNo;
|
||||
}
|
||||
|
||||
// Skip over the leading quote
|
||||
assert(SpellingPtr[0] == '"' && "Should be a string literal!");
|
||||
++SpellingPtr;
|
||||
|
||||
|
@ -1393,11 +1443,23 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,
|
|||
|
||||
// Otherwise, this is an escape character. Advance over it.
|
||||
bool HadError = false;
|
||||
ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
|
||||
FullSourceLoc(Tok.getLocation(), SM),
|
||||
CharByteWidth*8, Diags);
|
||||
if (SpellingPtr[1] == 'u' || SpellingPtr[1] == 'U') {
|
||||
const char *EscapePtr = SpellingPtr;
|
||||
unsigned Len = MeasureUCNEscape(SpellingStart, SpellingPtr, SpellingEnd,
|
||||
1, Features, HadError);
|
||||
if (Len > ByteNo) {
|
||||
// ByteNo is somewhere within the escape sequence.
|
||||
SpellingPtr = EscapePtr;
|
||||
break;
|
||||
}
|
||||
ByteNo -= Len;
|
||||
} else {
|
||||
ProcessCharEscape(SpellingPtr, SpellingEnd, HadError,
|
||||
FullSourceLoc(Tok.getLocation(), SM),
|
||||
CharByteWidth*8, Diags);
|
||||
--ByteNo;
|
||||
}
|
||||
assert(!HadError && "This method isn't valid on erroneous strings");
|
||||
--ByteNo;
|
||||
}
|
||||
|
||||
return SpellingPtr-SpellingStart;
|
||||
|
|
|
@ -2633,7 +2633,7 @@ void Sema::CheckFormatString(const StringLiteral *FExpr,
|
|||
bool inFunctionCall) {
|
||||
|
||||
// CHECK: is the format string a wide literal?
|
||||
if (!FExpr->isAscii()) {
|
||||
if (!FExpr->isAscii() && !FExpr->isUTF8()) {
|
||||
CheckFormatHandler::EmitFormatDiagnostic(
|
||||
*this, inFunctionCall, Args[format_idx],
|
||||
PDiag(diag::warn_format_string_is_wide_literal), FExpr->getLocStart(),
|
||||
|
|
|
@ -12,4 +12,16 @@ void f(char **sp, float *fp) {
|
|||
scanf("%afoobar", fp);
|
||||
printf(nullptr);
|
||||
printf(*sp); // expected-warning {{not a string literal}}
|
||||
|
||||
// PR13099
|
||||
printf(
|
||||
R"foobar(%)foobar"
|
||||
R"bazquux(d)bazquux" // expected-warning {{more '%' conversions than data arguments}}
|
||||
R"xyzzy()xyzzy");
|
||||
|
||||
printf(u8"this is %d test", 0); // ok
|
||||
printf(u8R"foo(
|
||||
\u1234\U0010fffe
|
||||
%d)foo" // expected-warning {{more '%' conversions than data arguments}}
|
||||
);
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче