Introduced raw_identifier token kind.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@122394 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Abramo Bagnara 2010-12-22 08:23:18 +00:00
Родитель eee1df1e7a
Коммит c4bf2b9afb
14 изменённых файлов: 115 добавлений и 89 удалений

Просмотреть файл

@ -103,6 +103,7 @@ TOK(comment) // Comment (only in -E -C[C] mode)
// C99 6.4.2: Identifiers.
TOK(identifier) // abcde123
TOK(raw_identifier) // Used only in raw lexing mode.
// C99 6.4.4.1: Integer Constants
// C99 6.4.4.2: Floating Constants

Просмотреть файл

@ -751,10 +751,10 @@ public:
// Preprocessor callback methods. These are invoked by a lexer as various
// directives and events are found.
/// LookUpIdentifierInfo - Given a tok::identifier token, look up the
/// identifier information for the token and install it into the token.
IdentifierInfo *LookUpIdentifierInfo(Token &Identifier,
const char *BufPtr = 0) const;
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
/// identifier information for the token and install it into the token,
/// updating the token kind accordingly.
IdentifierInfo *LookUpIdentifierInfo(Token &Identifier) const;
/// HandleIdentifier - This callback is invoked when the lexer reads an
/// identifier and has filled in the tokens IdentifierInfo member. This

Просмотреть файл

@ -88,6 +88,12 @@ public:
bool is(tok::TokenKind K) const { return Kind == (unsigned) K; }
bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; }
/// isAnyIdentifier - Return true if this is a raw identifier (when lexing
/// in raw mode) or a non-keyword identifier (when lexing in non-raw mode).
bool isAnyIdentifier() const {
return is(tok::identifier) || is(tok::raw_identifier);
}
/// isLiteral - Return true if this is a "literal", like a numeric
/// constant, string, etc.
bool isLiteral() const {
@ -154,7 +160,10 @@ public:
}
IdentifierInfo *getIdentifierInfo() const {
assert(!isAnnotation() && "Used IdentInfo on annotation token!");
assert(isNot(tok::raw_identifier) &&
"getIdentifierInfo() on a tok::raw_identifier token!");
assert(!isAnnotation() &&
"getIdentifierInfo() on an annotation token!");
if (isLiteral()) return 0;
return (IdentifierInfo*) PtrData;
}
@ -162,6 +171,18 @@ public:
PtrData = (void*) II;
}
/// getRawIdentifierData - For a raw identifier token (i.e., an identifier
/// lexed in raw mode), returns a pointer to the start of it in the text
/// buffer if known, null otherwise.
const char *getRawIdentifierData() const {
assert(is(tok::raw_identifier));
return reinterpret_cast<const char*>(PtrData);
}
void setRawIdentifierData(const char *Ptr) {
assert(is(tok::raw_identifier));
PtrData = const_cast<char*>(Ptr);
}
/// getLiteralData - For a literal token (numeric constant, string, etc), this
/// returns a pointer to the start of it in the text buffer if known, null
/// otherwise.

Просмотреть файл

@ -300,7 +300,7 @@ PTHEntry PTHWriter::LexTokens(Lexer& L) {
ParsingPreprocessorDirective = false;
}
if (Tok.is(tok::identifier)) {
if (Tok.is(tok::raw_identifier)) {
PP.LookUpIdentifierInfo(Tok);
EmitToken(Tok);
continue;
@ -320,13 +320,13 @@ PTHEntry PTHWriter::LexTokens(Lexer& L) {
// this case, discard both tokens.
if (NextTok.isAtStartOfLine())
goto NextToken;
// The token is the start of a directive. Emit it.
EmitToken(Tok);
Tok = NextTok;
// Did we see 'include'/'import'/'include_next'?
if (Tok.isNot(tok::identifier)) {
if (Tok.isNot(tok::raw_identifier)) {
EmitToken(Tok);
continue;
}
@ -353,7 +353,7 @@ PTHEntry PTHWriter::LexTokens(Lexer& L) {
L.LexIncludeFilename(Tok);
L.setParsingPreprocessorDirective(false);
assert(!Tok.isAtStartOfLine());
if (Tok.is(tok::identifier))
if (Tok.is(tok::raw_identifier))
PP.LookUpIdentifierInfo(Tok);
break;

Просмотреть файл

@ -266,21 +266,23 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
const SourceManager &SourceMgr,
const LangOptions &Features, bool *Invalid) {
assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
// If this token is an identifier, just return the string from the identifier
// table, which is very quick.
if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
const char *TokStart = 0;
// NOTE: this has to be checked *before* testing for an IdentifierInfo.
if (Tok.is(tok::raw_identifier))
TokStart = Tok.getRawIdentifierData();
else if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
// Just return the string from the identifier table, which is very quick.
Buffer = II->getNameStart();
return II->getLength();
}
// Otherwise, compute the start of the token in the input lexer buffer.
const char *TokStart = 0;
// NOTE: this can be checked even after testing for an IdentifierInfo.
if (Tok.isLiteral())
TokStart = Tok.getLiteralData();
if (TokStart == 0) {
// Compute the start of the token in the input lexer buffer.
bool CharDataInvalid = false;
TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
if (Invalid)
@ -290,13 +292,13 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
return 0;
}
}
// If this token contains nothing interesting, return it directly.
if (!Tok.needsCleaning()) {
Buffer = TokStart;
return Tok.getLength();
}
// Otherwise, hard case, relex the characters into the string.
char *OutBuf = const_cast<char*>(Buffer);
for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
@ -307,7 +309,7 @@ unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer,
}
assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
"NeedsCleaning flag set on something that didn't need cleaning!");
return OutBuf-Buffer;
}
@ -473,10 +475,9 @@ Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer, unsigned MaxLines) {
// we don't have an identifier table available. Instead, just look at
// the raw identifier to recognize and categorize preprocessor directives.
TheLexer.LexFromRawLexer(TheTok);
if (TheTok.getKind() == tok::identifier && !TheTok.needsCleaning()) {
const char *IdStart = Buffer->getBufferStart()
+ TheTok.getLocation().getRawEncoding() - 1;
llvm::StringRef Keyword(IdStart, TheTok.getLength());
if (TheTok.getKind() == tok::raw_identifier && !TheTok.needsCleaning()) {
llvm::StringRef Keyword(TheTok.getRawIdentifierData(),
TheTok.getLength());
PreambleDirectiveKind PDK
= llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
.Case("include", PDK_Skipped)
@ -1046,19 +1047,17 @@ void Lexer::LexIdentifier(Token &Result, const char *CurPtr) {
if (C != '\\' && C != '?' && (C != '$' || !Features.DollarIdents)) {
FinishIdentifier:
const char *IdStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, tok::identifier);
FormTokenWithChars(Result, CurPtr, tok::raw_identifier);
Result.setRawIdentifierData(IdStart);
// If we are in raw mode, return this identifier raw. There is no need to
// look up identifier information or attempt to macro expand it.
if (LexingRawMode) return;
if (LexingRawMode)
return;
// Fill in Result.IdentifierInfo, looking up the identifier in the
// identifier table.
IdentifierInfo *II = PP->LookUpIdentifierInfo(Result, IdStart);
// Change the kind of this identifier to the appropriate token kind, e.g.
// turning "for" into a keyword.
Result.setKind(II->getTokenID());
// Fill in Result.IdentifierInfo and update the token kind,
// looking up the identifier in the identifier table.
IdentifierInfo *II = PP->LookUpIdentifierInfo(Result);
// Finally, now that we know we have an identifier, pass this off to the
// preprocessor, which may macro expand it or something.

Просмотреть файл

@ -245,7 +245,7 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
// If this isn't an identifier directive (e.g. is "# 1\n" or "#\n", or
// something bogus), skip it.
if (Tok.isNot(tok::identifier)) {
if (Tok.isNot(tok::raw_identifier)) {
CurPPLexer->ParsingPreprocessorDirective = false;
// Restore comment saving mode.
if (CurLexer) CurLexer->SetCommentRetentionState(KeepComments);
@ -257,12 +257,8 @@ void Preprocessor::SkipExcludedConditionalBlock(SourceLocation IfTokenLoc,
// to spell an i/e in a strange way that is another letter. Skipping this
// allows us to avoid looking up the identifier info for #define/#undef and
// other common directives.
bool Invalid = false;
const char *RawCharData = SourceMgr.getCharacterData(Tok.getLocation(),
&Invalid);
if (Invalid)
return;
const char *RawCharData = Tok.getRawIdentifierData();
char FirstChar = RawCharData[0];
if (FirstChar >= 'a' && FirstChar <= 'z' &&
FirstChar != 'i' && FirstChar != 'e') {

Просмотреть файл

@ -292,7 +292,7 @@ void Preprocessor::HandlePragmaPoison(Token &PoisonTok) {
if (Tok.is(tok::eom)) return;
// Can only poison identifiers.
if (Tok.isNot(tok::identifier)) {
if (Tok.isNot(tok::raw_identifier)) {
Diag(Tok, diag::err_pp_invalid_poison);
return;
}
@ -599,7 +599,7 @@ IdentifierInfo *Preprocessor::ParsePragmaPushOrPopMacro(Token &Tok) {
// Create a Token from the string.
Token MacroTok;
MacroTok.startToken();
MacroTok.setKind(tok::identifier);
MacroTok.setKind(tok::raw_identifier);
CreateString(&StrVal[1], StrVal.size() - 2, MacroTok);
// Get the IdentifierInfo of MacroToPushTok.

Просмотреть файл

@ -285,9 +285,12 @@ void Preprocessor::CodeCompleteNaturalLanguage() {
llvm::StringRef Preprocessor::getSpelling(const Token &Tok,
llvm::SmallVectorImpl<char> &Buffer,
bool *Invalid) const {
// Try the fast path.
if (const IdentifierInfo *II = Tok.getIdentifierInfo())
return II->getName();
// NOTE: this has to be checked *before* testing for an IdentifierInfo.
if (Tok.isNot(tok::raw_identifier)) {
// Try the fast path.
if (const IdentifierInfo *II = Tok.getIdentifierInfo())
return II->getName();
}
// Resize the buffer if we need to copy into it.
if (Tok.needsCleaning())
@ -313,8 +316,10 @@ void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
InstantiationLoc, Len);
Tok.setLocation(Loc);
// If this is a literal token, set the pointer data.
if (Tok.isLiteral())
// If this is a raw identifier or a literal token, set the pointer data.
if (Tok.is(tok::raw_identifier))
Tok.setRawIdentifierData(DestPtr);
else if (Tok.isLiteral())
Tok.setLiteralData(DestPtr);
}
@ -369,25 +374,29 @@ void Preprocessor::EndSourceFile() {
// Lexer Event Handling.
//===----------------------------------------------------------------------===//
/// LookUpIdentifierInfo - Given a tok::identifier token, look up the
/// identifier information for the token and install it into the token.
IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier,
const char *BufPtr) const {
assert(Identifier.is(tok::identifier) && "Not an identifier!");
assert(Identifier.getIdentifierInfo() == 0 && "Identinfo already exists!");
/// LookUpIdentifierInfo - Given a tok::raw_identifier token, look up the
/// identifier information for the token and install it into the token,
/// updating the token kind accordingly.
IdentifierInfo *Preprocessor::LookUpIdentifierInfo(Token &Identifier) const {
assert(Identifier.getRawIdentifierData() != 0 && "No raw identifier data!");
// Look up this token, see if it is a macro, or if it is a language keyword.
IdentifierInfo *II;
if (BufPtr && !Identifier.needsCleaning()) {
if (!Identifier.needsCleaning()) {
// No cleaning needed, just use the characters from the lexed buffer.
II = getIdentifierInfo(llvm::StringRef(BufPtr, Identifier.getLength()));
II = getIdentifierInfo(llvm::StringRef(Identifier.getRawIdentifierData(),
Identifier.getLength()));
} else {
// Cleaning needed, alloca a buffer, clean into it, then use the buffer.
llvm::SmallString<64> IdentifierBuffer;
llvm::StringRef CleanedStr = getSpelling(Identifier, IdentifierBuffer);
II = getIdentifierInfo(CleanedStr);
}
// Update the token info (identifier info and appropriate token kind).
Identifier.setIdentifierInfo(II);
Identifier.setKind(II->getTokenID());
return II;
}

Просмотреть файл

@ -13,6 +13,7 @@
#include "clang/Lex/TokenConcatenation.h"
#include "clang/Lex/Preprocessor.h"
#include "llvm/Support/ErrorHandling.h"
using namespace clang;
@ -165,7 +166,14 @@ bool TokenConcatenation::AvoidConcat(const Token &PrevPrevTok,
}
switch (PrevKind) {
default: assert(0 && "InitAvoidConcatTokenInfo built wrong");
default:
llvm_unreachable("InitAvoidConcatTokenInfo built wrong");
return true;
case tok::raw_identifier:
llvm_unreachable("tok::raw_identifier in non-raw lexing mode!");
return true;
case tok::identifier: // id+id or id+number or id+L"foo".
// id+'.'... will not append.
if (Tok.is(tok::numeric_constant))

Просмотреть файл

@ -435,12 +435,13 @@ bool TokenLexer::PasteTokens(Token &Tok) {
// Lex the resultant pasted token into Result.
Token Result;
if (Tok.is(tok::identifier) && RHS.is(tok::identifier)) {
if (Tok.isAnyIdentifier() && RHS.isAnyIdentifier()) {
// Common paste case: identifier+identifier = identifier. Avoid creating
// a lexer and other overhead.
PP.IncrementPasteCounter(true);
Result.startToken();
Result.setKind(tok::identifier);
Result.setKind(tok::raw_identifier);
Result.setRawIdentifierData(ResultTokStrPtr);
Result.setLocation(ResultTokLoc);
Result.setLength(LHSLen+RHSLen);
} else {
@ -524,10 +525,10 @@ bool TokenLexer::PasteTokens(Token &Tok) {
// Now that we got the result token, it will be subject to expansion. Since
// token pasting re-lexes the result token in raw mode, identifier information
// isn't looked up. As such, if the result is an identifier, look up id info.
if (Tok.is(tok::identifier)) {
if (Tok.is(tok::raw_identifier)) {
// Look up the identifier info for the token. We disabled identifier lookup
// by saying we're skipping contents, so we need to do this manually.
PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr);
PP.LookUpIdentifierInfo(Tok);
}
return false;
}

Просмотреть файл

@ -20,6 +20,7 @@
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/SmallString.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/ErrorHandling.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
@ -378,14 +379,16 @@ void html::SyntaxHighlight(Rewriter &R, FileID FID, const Preprocessor &PP) {
unsigned TokLen = Tok.getLength();
switch (Tok.getKind()) {
default: break;
case tok::identifier: {
// Fill in Result.IdentifierInfo, looking up the identifier in the
// identifier table.
const IdentifierInfo *II =
PP.LookUpIdentifierInfo(Tok, BufferStart+TokOffs);
case tok::identifier:
llvm_unreachable("tok::identifier in raw lexing mode!");
break;
case tok::raw_identifier: {
// Fill in Result.IdentifierInfo and update the token kind,
// looking up the identifier in the identifier table.
PP.LookUpIdentifierInfo(Tok);
// If this is a pp-identifier, for a keyword, highlight it as such.
if (II->getTokenID() != tok::identifier)
if (Tok.isNot(tok::identifier))
HighlightRange(RB, TokOffs, TokOffs+TokLen, BufferStart,
"<span class='keyword'>", "</span>");
break;
@ -473,11 +476,8 @@ void html::HighlightMacros(Rewriter &R, FileID FID, const Preprocessor& PP) {
// If this raw token is an identifier, the raw lexer won't have looked up
// the corresponding identifier info for it. Do this now so that it will be
// macro expanded when we re-preprocess it.
if (Tok.is(tok::identifier)) {
// Change the kind of this identifier to the appropriate token kind, e.g.
// turning "for" into a keyword.
Tok.setKind(PP.LookUpIdentifierInfo(Tok)->getTokenID());
}
if (Tok.is(tok::raw_identifier))
PP.LookUpIdentifierInfo(Tok);
TokenStream.push_back(Tok);

Просмотреть файл

@ -78,7 +78,7 @@ static void LexRawTokensFromMainFile(Preprocessor &PP,
// If we have an identifier with no identifier info for our raw token, look
// up the indentifier info. This is important for equality comparison of
// identifier tokens.
if (RawTok.is(tok::identifier) && !RawTok.getIdentifierInfo())
if (RawTok.is(tok::raw_identifier))
PP.LookUpIdentifierInfo(RawTok);
RawTokens.push_back(RawTok);

Просмотреть файл

@ -34,10 +34,10 @@ TokenRewriter::TokenRewriter(FileID FID, SourceManager &SM,
RawLex.LexFromRawLexer(RawTok);
while (RawTok.isNot(tok::eof)) {
#if 0
if (Tok.is(tok::identifier)) {
if (Tok.is(tok::raw_identifier)) {
// Look up the identifier info for the token. This should use
// IdentifierTable directly instead of PP.
Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok));
PP.LookUpIdentifierInfo(Tok);
}
#endif

Просмотреть файл

@ -4010,27 +4010,18 @@ void clang_tokenize(CXTranslationUnit TU, CXSourceRange Range,
if (Tok.isLiteral()) {
CXTok.int_data[0] = CXToken_Literal;
CXTok.ptr_data = (void *)Tok.getLiteralData();
} else if (Tok.is(tok::identifier)) {
} else if (Tok.is(tok::raw_identifier)) {
// Lookup the identifier to determine whether we have a keyword.
std::pair<FileID, unsigned> LocInfo
= SourceMgr.getDecomposedLoc(Tok.getLocation());
bool Invalid = false;
llvm::StringRef Buf
= CXXUnit->getSourceManager().getBufferData(LocInfo.first, &Invalid);
if (Invalid)
return;
const char *StartPos = Buf.data() + LocInfo.second;
IdentifierInfo *II
= CXXUnit->getPreprocessor().LookUpIdentifierInfo(Tok, StartPos);
= CXXUnit->getPreprocessor().LookUpIdentifierInfo(Tok);
if ((II->getObjCKeywordID() != tok::objc_not_keyword) && previousWasAt) {
CXTok.int_data[0] = CXToken_Keyword;
}
else {
CXTok.int_data[0] = II->getTokenID() == tok::identifier?
CXToken_Identifier
: CXToken_Keyword;
CXTok.int_data[0] = Tok.is(tok::identifier)
? CXToken_Identifier
: CXToken_Keyword;
}
CXTok.ptr_data = II;
} else if (Tok.is(tok::comment)) {