This change refactors some of the low-level lexer interfaces a bit.

Token now has a class of kinds for "literals", which include 
numeric constants, strings, etc.  These tokens can optionally have
a pointer to the start of the token in the lexer buffer.  This 
makes it faster to get spelling and do other gymnastics, because we
don't have to go through source locations.

This change is performance neutral, but will make other changes
more feasible down the road.



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@63028 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Chris Lattner 2009-01-26 19:29:26 +00:00
Родитель 550faa3a6b
Коммит 47246be8ac
15 изменённых файлов: 153 добавлений и 112 удалений

Просмотреть файл

@ -140,32 +140,19 @@ void PTHWriter::EmitToken(const Token& T) {
Emit32(ResolveID(T.getIdentifierInfo())); Emit32(ResolveID(T.getIdentifierInfo()));
Emit32(fpos); Emit32(fpos);
// For specific tokens we cache their spelling. // Literals (strings, numbers, characters) get cached spellings.
if (T.getIdentifierInfo()) if (T.isLiteral()) {
return; // FIXME: This uses the slow getSpelling(). Perhaps we do better
// in the future? This only slows down PTH generation.
const std::string &spelling = PP.getSpelling(T);
const char* s = spelling.c_str();
// Get the string entry.
llvm::StringMapEntry<Offset> *E =
&CachedStrs.GetOrCreateValue(s, s+spelling.size());
switch (T.getKind()) { // Store the address of the string entry in our spelling map.
default: CurSpellMap->push_back(std::make_pair(fpos, E));
break;
case tok::string_literal:
case tok::wide_string_literal:
case tok::angle_string_literal:
case tok::numeric_constant:
case tok::char_constant: {
// FIXME: This uses the slow getSpelling(). Perhaps we do better
// in the future? This only slows down PTH generation.
const std::string& spelling = PP.getSpelling(T);
const char* s = spelling.c_str();
// Get the string entry.
llvm::StringMapEntry<Offset> *E =
&CachedStrs.GetOrCreateValue(s, s+spelling.size());
// Store the address of the string entry in our spelling map.
(*CurSpellMap).push_back(std::make_pair(fpos, E));
break;
}
} }
} }

Просмотреть файл

@ -430,6 +430,7 @@ bool PrintPPOutputPPCallbacks::AvoidConcat(const Token &PrevTok,
// Avoid spelling identifiers, the most common form of token. // Avoid spelling identifiers, the most common form of token.
FirstChar = II->getName()[0]; FirstChar = II->getName()[0];
} else if (!Tok.needsCleaning()) { } else if (!Tok.needsCleaning()) {
// FIXME: SPEED UP LITERALS!
SourceManager &SrcMgr = PP.getSourceManager(); SourceManager &SrcMgr = PP.getSourceManager();
FirstChar = FirstChar =
*SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation())); *SrcMgr.getCharacterData(SrcMgr.getSpellingLoc(Tok.getLocation()));
@ -556,6 +557,7 @@ void clang::DoPrintPreprocessedInput(Preprocessor &PP,
const char *Str = II->getName(); const char *Str = II->getName();
unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength(); unsigned Len = Tok.needsCleaning() ? strlen(Str) : Tok.getLength();
OS.write(Str, Len); OS.write(Str, Len);
// FIXME: ACCELERATE LITERALS
} else if (Tok.getLength() < 256) { } else if (Tok.getLength() < 256) {
const char *TokPtr = Buffer; const char *TokPtr = Buffer;
unsigned Len = PP.getSpelling(Tok, TokPtr); unsigned Len = PP.getSpelling(Tok, TokPtr);

Просмотреть файл

@ -415,7 +415,7 @@ public:
/// cached tokens doesn't get re-parsed and re-resolved after a backtrack is /// cached tokens doesn't get re-parsed and re-resolved after a backtrack is
/// invoked. /// invoked.
void AnnotateCachedTokens(const Token &Tok) { void AnnotateCachedTokens(const Token &Tok) {
assert(Tok.isAnnotationToken() && "Expected annotation token"); assert(Tok.isAnnotation() && "Expected annotation token");
if (CachedLexPos != 0 && isBacktrackEnabled()) if (CachedLexPos != 0 && isBacktrackEnabled())
AnnotatePreviousCachedTokens(Tok); AnnotatePreviousCachedTokens(Tok);
} }
@ -463,11 +463,11 @@ public:
return *SourceMgr.getCharacterData(SL); return *SourceMgr.getCharacterData(SL);
} }
/// CreateString - Plop the specified string into a scratch buffer and return /// CreateString - Plop the specified string into a scratch buffer and set the
/// a location for it. If specified, the source location provides a source /// specified token's location and length to it. If specified, the source
/// location for the token. /// location provides a location of the instantiation point of the token.
SourceLocation CreateString(const char *Buf, unsigned Len, void CreateString(const char *Buf, unsigned Len,
SourceLocation SourceLoc = SourceLocation()); Token &Tok, SourceLocation SourceLoc = SourceLocation());
/// DumpToken - Print the token to stderr, used for debugging. /// DumpToken - Print the token to stderr, used for debugging.
/// ///

Просмотреть файл

@ -30,17 +30,11 @@ class ScratchBuffer {
public: public:
ScratchBuffer(SourceManager &SM); ScratchBuffer(SourceManager &SM);
/// getToken - Splat the specified text into a temporary MemoryBuffer and
/// return a SourceLocation that refers to the token. The SourceLoc value
/// gives a virtual location that the token will appear to be from.
SourceLocation getToken(const char *Buf, unsigned Len,
SourceLocation SourceLoc);
/// getToken - Splat the specified text into a temporary MemoryBuffer and /// getToken - Splat the specified text into a temporary MemoryBuffer and
/// return a SourceLocation that refers to the token. This is just like the /// return a SourceLocation that refers to the token. This is just like the
/// previous method, but returns a location that indicates the physloc of the /// previous method, but returns a location that indicates the physloc of the
/// token. /// token.
SourceLocation getToken(const char *Buf, unsigned Len); SourceLocation getToken(const char *Buf, unsigned Len, const char *&DestPtr);
private: private:
void AllocScratchBuffer(unsigned RequestLen); void AllocScratchBuffer(unsigned RequestLen);

Просмотреть файл

@ -34,21 +34,28 @@ class Token {
/// The location of the token. /// The location of the token.
SourceLocation Loc; SourceLocation Loc;
// Conceptually these next two fields could be in a union with // Conceptually these next two fields could be in a union. However, this
// access depending on isAnnotationToken(). However, this causes gcc // causes gcc 4.2 to pessimize LexTokenInternal, a very performance critical
// 4.2 to pessimize LexTokenInternal, a very performance critical // routine. Keeping as separate members with casts until a more beautiful fix
// routine. Keeping as separate members with casts until a more // presents itself.
// beautiful fix presents itself.
/// UintData - This holds either the length of the token text, when /// UintData - This holds either the length of the token text, when
/// a normal token, or the end of the SourceRange when an annotation /// a normal token, or the end of the SourceRange when an annotation
/// token. /// token.
unsigned UintData; unsigned UintData;
/// PtrData - For normal tokens, this points to the uniqued /// PtrData - This is a union of four different pointer types, which depends
/// information for the identifier (if an identifier token) or /// on what type of token this is:
/// null. For annotation tokens, this points to information specific /// Identifiers, keywords, etc:
/// to the annotation token. /// This is an IdentifierInfo*, which contains the uniqued identifier
/// spelling.
/// Literals: isLiteral() returns true.
/// This is a pointer to the start of the token in a text buffer, which
/// may be dirty (have trigraphs / escaped newlines).
/// Annotations (resolved type names, C++ scopes, etc): isAnnotation().
/// This is a pointer to sema-specific data for the annotation token.
/// Other:
/// This is null.
void *PtrData; void *PtrData;
/// Kind - The actual flavor of token this is. /// Kind - The actual flavor of token this is.
@ -77,32 +84,40 @@ public:
bool is(tok::TokenKind K) const { return Kind == (unsigned) K; } bool is(tok::TokenKind K) const { return Kind == (unsigned) K; }
bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; } bool isNot(tok::TokenKind K) const { return Kind != (unsigned) K; }
bool isAnnotationToken() const { /// isLiteral - Return true if this is a "literal", like a numeric
/// constant, string, etc.
bool isLiteral() const {
return is(tok::numeric_constant) || is(tok::char_constant) ||
is(tok::string_literal) || is(tok::wide_string_literal) ||
is(tok::angle_string_literal);
}
bool isAnnotation() const {
return is(tok::annot_typename) || return is(tok::annot_typename) ||
is(tok::annot_cxxscope) || is(tok::annot_cxxscope) ||
is(tok::annot_template_id); is(tok::annot_template_id);
} }
/// getLocation - Return a source location identifier for the specified /// getLocation - Return a source location identifier for the specified
/// offset in the current file. /// offset in the current file.
SourceLocation getLocation() const { return Loc; } SourceLocation getLocation() const { return Loc; }
unsigned getLength() const { unsigned getLength() const {
assert(!isAnnotationToken() && "Annotation tokens have no length field"); assert(!isAnnotation() && "Annotation tokens have no length field");
return UintData; return UintData;
} }
void setLocation(SourceLocation L) { Loc = L; } void setLocation(SourceLocation L) { Loc = L; }
void setLength(unsigned Len) { void setLength(unsigned Len) {
assert(!isAnnotationToken() && "Annotation tokens have no length field"); assert(!isAnnotation() && "Annotation tokens have no length field");
UintData = Len; UintData = Len;
} }
SourceLocation getAnnotationEndLoc() const { SourceLocation getAnnotationEndLoc() const {
assert(isAnnotationToken() && "Used AnnotEndLocID on non-annotation token"); assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
return SourceLocation::getFromRawEncoding(UintData); return SourceLocation::getFromRawEncoding(UintData);
} }
void setAnnotationEndLoc(SourceLocation L) { void setAnnotationEndLoc(SourceLocation L) {
assert(isAnnotationToken() && "Used AnnotEndLocID on non-annotation token"); assert(isAnnotation() && "Used AnnotEndLocID on non-annotation token");
UintData = L.getRawEncoding(); UintData = L.getRawEncoding();
} }
@ -130,19 +145,32 @@ public:
} }
IdentifierInfo *getIdentifierInfo() const { IdentifierInfo *getIdentifierInfo() const {
assert(!isAnnotationToken() && "Used IdentInfo on annotation token"); assert(!isAnnotation() && "Used IdentInfo on annotation token!");
if (isLiteral()) return 0;
return (IdentifierInfo*) PtrData; return (IdentifierInfo*) PtrData;
} }
void setIdentifierInfo(IdentifierInfo *II) { void setIdentifierInfo(IdentifierInfo *II) {
PtrData = (void*) II; PtrData = (void*) II;
} }
/// getLiteralData - For a literal token (numeric constant, string, etc), this
/// returns a pointer to the start of it in the text buffer if known, null
/// otherwise.
const char *getLiteralData() const {
assert(isLiteral() && "Cannot get literal data of non-literal");
return reinterpret_cast<const char*>(PtrData);
}
void setLiteralData(const char *Ptr) {
assert(isLiteral() && "Cannot set literal data of non-literal");
PtrData = (void*)Ptr;
}
void *getAnnotationValue() const { void *getAnnotationValue() const {
assert(isAnnotationToken() && "Used AnnotVal on non-annotation token"); assert(isAnnotation() && "Used AnnotVal on non-annotation token");
return PtrData; return PtrData;
} }
void setAnnotationValue(void *val) { void setAnnotationValue(void *val) {
assert(isAnnotationToken() && "Used AnnotVal on non-annotation token"); assert(isAnnotation() && "Used AnnotVal on non-annotation token");
PtrData = val; PtrData = val;
} }

Просмотреть файл

@ -690,10 +690,11 @@ private:
if (!Tok.is(tok::identifier)) if (!Tok.is(tok::identifier))
return false; return false;
if (Actions.isTypeName(*Tok.getIdentifierInfo(), CurScope)) IdentifierInfo *II = Tok.getIdentifierInfo();
if (Actions.isTypeName(*II, CurScope))
return true; return true;
return Tok.getIdentifierInfo() == Ident_super; return II == Ident_super;
} }
OwningExprResult ParseObjCAtExpression(SourceLocation AtLocation); OwningExprResult ParseObjCAtExpression(SourceLocation AtLocation);

Просмотреть файл

@ -624,7 +624,9 @@ void Lexer::LexNumericConstant(Token &Result, const char *CurPtr) {
return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result)); return LexNumericConstant(Result, ConsumeChar(CurPtr, Size, Result));
// Update the location of token as well as BufferPtr. // Update the location of token as well as BufferPtr.
const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, tok::numeric_constant); FormTokenWithChars(Result, CurPtr, tok::numeric_constant);
Result.setLiteralData(TokStart);
} }
/// LexStringLiteral - Lex the remainder of a string literal, after having lexed /// LexStringLiteral - Lex the remainder of a string literal, after having lexed
@ -655,8 +657,10 @@ void Lexer::LexStringLiteral(Token &Result, const char *CurPtr, bool Wide) {
Diag(NulCharacter, diag::null_in_string); Diag(NulCharacter, diag::null_in_string);
// Update the location of the token as well as the BufferPtr instance var. // Update the location of the token as well as the BufferPtr instance var.
const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, FormTokenWithChars(Result, CurPtr,
Wide ? tok::wide_string_literal : tok::string_literal); Wide ? tok::wide_string_literal : tok::string_literal);
Result.setLiteralData(TokStart);
} }
/// LexAngledStringLiteral - Lex the remainder of an angled string literal, /// LexAngledStringLiteral - Lex the remainder of an angled string literal,
@ -687,7 +691,9 @@ void Lexer::LexAngledStringLiteral(Token &Result, const char *CurPtr) {
Diag(NulCharacter, diag::null_in_string); Diag(NulCharacter, diag::null_in_string);
// Update the location of token as well as BufferPtr. // Update the location of token as well as BufferPtr.
const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, tok::angle_string_literal); FormTokenWithChars(Result, CurPtr, tok::angle_string_literal);
Result.setLiteralData(TokStart);
} }
@ -735,7 +741,9 @@ void Lexer::LexCharConstant(Token &Result, const char *CurPtr) {
Diag(NulCharacter, diag::null_in_char); Diag(NulCharacter, diag::null_in_char);
// Update the location of token as well as BufferPtr. // Update the location of token as well as BufferPtr.
const char *TokStart = BufferPtr;
FormTokenWithChars(Result, CurPtr, tok::char_constant); FormTokenWithChars(Result, CurPtr, tok::char_constant);
Result.setLiteralData(TokStart);
} }
/// SkipWhitespace - Efficiently skip over a series of whitespace characters. /// SkipWhitespace - Efficiently skip over a series of whitespace characters.
@ -901,9 +909,8 @@ bool Lexer::SaveBCPLComment(Token &Result, const char *CurPtr) {
Spelling += "*/"; // add suffix. Spelling += "*/"; // add suffix.
Result.setKind(tok::comment); Result.setKind(tok::comment);
Result.setLocation(PP->CreateString(&Spelling[0], Spelling.size(), PP->CreateString(&Spelling[0], Spelling.size(), Result,
Result.getLocation())); Result.getLocation());
Result.setLength(Spelling.size());
return true; return true;
} }

Просмотреть файл

@ -225,8 +225,7 @@ Token MacroArgs::StringifyArgument(const Token *ArgToks,
} }
} }
Tok.setLength(Result.size()); PP.CreateString(&Result[0], Result.size(), Tok);
Tok.setLocation(PP.CreateString(&Result[0], Result.size()));
return Tok; return Tok;
} }

Просмотреть файл

@ -89,7 +89,7 @@ const Token &Preprocessor::PeekAhead(unsigned N) {
} }
void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) { void Preprocessor::AnnotatePreviousCachedTokens(const Token &Tok) {
assert(Tok.isAnnotationToken() && "Expected annotation token"); assert(Tok.isAnnotation() && "Expected annotation token");
assert(CachedLexPos != 0 && "Expected to have some cached tokens"); assert(CachedLexPos != 0 && "Expected to have some cached tokens");
assert(CachedTokens[CachedLexPos-1].getLocation() == Tok.getAnnotationEndLoc() assert(CachedTokens[CachedLexPos-1].getLocation() == Tok.getAnnotationEndLoc()
&& "The annotation should be until the most recent cached token"); && "The annotation should be until the most recent cached token");

Просмотреть файл

@ -429,10 +429,15 @@ static void ComputeDATE_TIME(SourceLocation &DATELoc, SourceLocation &TIMELoc,
char TmpBuffer[100]; char TmpBuffer[100];
sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday, sprintf(TmpBuffer, "\"%s %2d %4d\"", Months[TM->tm_mon], TM->tm_mday,
TM->tm_year+1900); TM->tm_year+1900);
DATELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer));
Token TmpTok;
TmpTok.startToken();
PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
DATELoc = TmpTok.getLocation();
sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec); sprintf(TmpBuffer, "\"%02d:%02d:%02d\"", TM->tm_hour, TM->tm_min, TM->tm_sec);
TIMELoc = PP.CreateString(TmpBuffer, strlen(TmpBuffer)); PP.CreateString(TmpBuffer, strlen(TmpBuffer), TmpTok);
TIMELoc = TmpTok.getLocation();
} }
/// ExpandBuiltinMacro - If an identifier token is read that is to be expanded /// ExpandBuiltinMacro - If an identifier token is read that is to be expanded
@ -463,8 +468,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
SourceMgr.getInstantiationLineNumber(Tok.getLocation())); SourceMgr.getInstantiationLineNumber(Tok.getLocation()));
unsigned Length = strlen(TmpBuffer)-1; unsigned Length = strlen(TmpBuffer)-1;
Tok.setKind(tok::numeric_constant); Tok.setKind(tok::numeric_constant);
Tok.setLength(Length); CreateString(TmpBuffer, Length+1, Tok, Tok.getLocation());
Tok.setLocation(CreateString(TmpBuffer, Length+1, Tok.getLocation())); Tok.setLength(Length); // Trim off space.
} else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) { } else if (II == Ident__FILE__ || II == Ident__BASE_FILE__) {
SourceLocation Loc = Tok.getLocation(); SourceLocation Loc = Tok.getLocation();
if (II == Ident__BASE_FILE__) { if (II == Ident__BASE_FILE__) {
@ -480,8 +485,7 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
std::string FN =SourceMgr.getSourceName(SourceMgr.getInstantiationLoc(Loc)); std::string FN =SourceMgr.getSourceName(SourceMgr.getInstantiationLoc(Loc));
FN = '"' + Lexer::Stringify(FN) + '"'; FN = '"' + Lexer::Stringify(FN) + '"';
Tok.setKind(tok::string_literal); Tok.setKind(tok::string_literal);
Tok.setLength(FN.size()); CreateString(&FN[0], FN.size(), Tok, Tok.getLocation());
Tok.setLocation(CreateString(&FN[0], FN.size(), Tok.getLocation()));
} else if (II == Ident__DATE__) { } else if (II == Ident__DATE__) {
if (!DATELoc.isValid()) if (!DATELoc.isValid())
ComputeDATE_TIME(DATELoc, TIMELoc, *this); ComputeDATE_TIME(DATELoc, TIMELoc, *this);
@ -511,8 +515,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
sprintf(TmpBuffer, "%u ", Depth); sprintf(TmpBuffer, "%u ", Depth);
unsigned Length = strlen(TmpBuffer)-1; unsigned Length = strlen(TmpBuffer)-1;
Tok.setKind(tok::numeric_constant); Tok.setKind(tok::numeric_constant);
Tok.setLength(Length); CreateString(TmpBuffer, Length, Tok, Tok.getLocation());
Tok.setLocation(CreateString(TmpBuffer, Length, Tok.getLocation())); Tok.setLength(Length); // Trim off space.
} else if (II == Ident__TIMESTAMP__) { } else if (II == Ident__TIMESTAMP__) {
// MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be // MSVC, ICC, GCC, VisualAge C++ extension. The generated string should be
// of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime. // of the form "Ddd Mmm dd hh::mm::ss yyyy", which is returned by asctime.
@ -540,8 +544,8 @@ void Preprocessor::ExpandBuiltinMacro(Token &Tok) {
unsigned Len = strlen(TmpBuffer); unsigned Len = strlen(TmpBuffer);
TmpBuffer[Len-1] = '"'; // Replace the newline with a quote. TmpBuffer[Len-1] = '"'; // Replace the newline with a quote.
Tok.setKind(tok::string_literal); Tok.setKind(tok::string_literal);
Tok.setLength(Len); CreateString(TmpBuffer, Len+1, Tok, Tok.getLocation());
Tok.setLocation(CreateString(TmpBuffer, Len+1, Tok.getLocation())); Tok.setLength(Len); // Trim off space.
} else { } else {
assert(0 && "Unknown identifier!"); assert(0 && "Unknown identifier!");
} }

Просмотреть файл

@ -156,7 +156,10 @@ void Preprocessor::Handle_Pragma(Token &Tok) {
// Plop the string (including the newline and trailing null) into a buffer // Plop the string (including the newline and trailing null) into a buffer
// where we can lex it. // where we can lex it.
SourceLocation TokLoc = CreateString(&StrVal[0], StrVal.size()); Token TmpTok;
TmpTok.startToken();
CreateString(&StrVal[0], StrVal.size(), TmpTok);
SourceLocation TokLoc = TmpTok.getLocation();
// Make and enter a lexer object so that we lex and expand the tokens just // Make and enter a lexer object so that we lex and expand the tokens just
// like any others. // like any others.

Просмотреть файл

@ -266,13 +266,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
} }
// Otherwise, compute the start of the token in the input lexer buffer. // Otherwise, compute the start of the token in the input lexer buffer.
const char *TokStart = SourceMgr.getCharacterData(Tok.getLocation()); const char *TokStart = 0;
if (Tok.isLiteral())
TokStart = Tok.getLiteralData();
if (TokStart == 0)
TokStart = SourceMgr.getCharacterData(Tok.getLocation());
// If this token contains nothing interesting, return it directly. // If this token contains nothing interesting, return it directly.
if (!Tok.needsCleaning()) { if (!Tok.needsCleaning()) {
Buffer = TokStart; Buffer = TokStart;
return Tok.getLength(); return Tok.getLength();
} }
// Otherwise, hard case, relex the characters into the string. // Otherwise, hard case, relex the characters into the string.
char *OutBuf = const_cast<char*>(Buffer); char *OutBuf = const_cast<char*>(Buffer);
for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength(); for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
@ -291,11 +298,20 @@ unsigned Preprocessor::getSpelling(const Token &Tok,
/// CreateString - Plop the specified string into a scratch buffer and return a /// CreateString - Plop the specified string into a scratch buffer and return a
/// location for it. If specified, the source location provides a source /// location for it. If specified, the source location provides a source
/// location for the token. /// location for the token.
SourceLocation Preprocessor:: void Preprocessor::CreateString(const char *Buf, unsigned Len, Token &Tok,
CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) { SourceLocation InstantiationLoc) {
if (SLoc.isValid()) Tok.setLength(Len);
return ScratchBuf->getToken(Buf, Len, SLoc);
return ScratchBuf->getToken(Buf, Len); const char *DestPtr;
SourceLocation Loc = ScratchBuf->getToken(Buf, Len, DestPtr);
if (InstantiationLoc.isValid())
Loc = SourceMgr.createInstantiationLoc(Loc, InstantiationLoc, Len);
Tok.setLocation(Loc);
// If this is a literal token, set the pointer data.
if (Tok.isLiteral())
Tok.setLiteralData(DestPtr);
} }

Просмотреть файл

@ -30,10 +30,14 @@ ScratchBuffer::ScratchBuffer(SourceManager &SM) : SourceMgr(SM), CurBuffer(0) {
/// return a SourceLocation that refers to the token. This is just like the /// return a SourceLocation that refers to the token. This is just like the
/// method below, but returns a location that indicates the physloc of the /// method below, but returns a location that indicates the physloc of the
/// token. /// token.
SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) { SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
const char *&DestPtr) {
if (BytesUsed+Len > ScratchBufSize) if (BytesUsed+Len > ScratchBufSize)
AllocScratchBuffer(Len); AllocScratchBuffer(Len);
// Return a pointer to the character data.
DestPtr = CurBuffer+BytesUsed;
// Copy the token data into the buffer. // Copy the token data into the buffer.
memcpy(CurBuffer+BytesUsed, Buf, Len); memcpy(CurBuffer+BytesUsed, Buf, Len);
@ -43,16 +47,6 @@ SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len) {
return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len); return BufferStartLoc.getFileLocWithOffset(BytesUsed-Len);
} }
/// getToken - Splat the specified text into a temporary MemoryBuffer and
/// return a SourceLocation that refers to the token. The SourceLoc value
/// gives a virtual location that the token will appear to be from.
SourceLocation ScratchBuffer::getToken(const char *Buf, unsigned Len,
SourceLocation SourceLoc) {
// Map the physloc to the specified sourceloc.
return SourceMgr.createInstantiationLoc(getToken(Buf, Len), SourceLoc, Len);
}
void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) { void ScratchBuffer::AllocScratchBuffer(unsigned RequestLen) {
// Only pay attention to the requested length if it is larger than our default // Only pay attention to the requested length if it is larger than our default
// page size. If it is, we allocate an entire chunk for it. This is to // page size. If it is, we allocate an entire chunk for it. This is to

Просмотреть файл

@ -352,6 +352,7 @@ void TokenLexer::Lex(Token &Tok) {
/// If this returns true, the caller should immediately return the token. /// If this returns true, the caller should immediately return the token.
bool TokenLexer::PasteTokens(Token &Tok) { bool TokenLexer::PasteTokens(Token &Tok) {
llvm::SmallVector<char, 128> Buffer; llvm::SmallVector<char, 128> Buffer;
const char *ResultTokStrPtr = 0;
do { do {
// Consume the ## operator. // Consume the ## operator.
SourceLocation PasteOpLoc = Tokens[CurToken].getLocation(); SourceLocation PasteOpLoc = Tokens[CurToken].getLocation();
@ -386,8 +387,16 @@ bool TokenLexer::PasteTokens(Token &Tok) {
// Plop the pasted result (including the trailing newline and null) into a // Plop the pasted result (including the trailing newline and null) into a
// scratch buffer where we can lex it. // scratch buffer where we can lex it.
SourceLocation ResultTokLoc = PP.CreateString(&Buffer[0], Buffer.size()); Token ResultTokTmp;
ResultTokTmp.startToken();
// Claim that the tmp token is a string_literal so that we can get the
// character pointer back from CreateString.
ResultTokTmp.setKind(tok::string_literal);
PP.CreateString(&Buffer[0], Buffer.size(), ResultTokTmp);
SourceLocation ResultTokLoc = ResultTokTmp.getLocation();
ResultTokStrPtr = ResultTokTmp.getLiteralData();
// Lex the resultant pasted token into Result. // Lex the resultant pasted token into Result.
Token Result; Token Result;
@ -405,20 +414,16 @@ bool TokenLexer::PasteTokens(Token &Tok) {
assert(ResultTokLoc.isFileID() && assert(ResultTokLoc.isFileID() &&
"Should be a raw location into scratch buffer"); "Should be a raw location into scratch buffer");
SourceManager &SourceMgr = PP.getSourceManager(); SourceManager &SourceMgr = PP.getSourceManager();
std::pair<FileID, unsigned> LocInfo = FileID LocFileID = SourceMgr.getFileID(ResultTokLoc);
SourceMgr.getDecomposedLoc(ResultTokLoc);
const char *ScratchBufStart =SourceMgr.getBufferData(LocInfo.first).first; const char *ScratchBufStart = SourceMgr.getBufferData(LocFileID).first;
// Make a lexer to lex this string from. Lex just this one token. // Make a lexer to lex this string from. Lex just this one token.
const char *ResultStrData = ScratchBufStart+LocInfo.second;
// Make a lexer object so that we lex and expand the paste result. // Make a lexer object so that we lex and expand the paste result.
Lexer TL(SourceMgr.getLocForStartOfFile(LocInfo.first), Lexer TL(SourceMgr.getLocForStartOfFile(LocFileID),
PP.getLangOptions(), PP.getLangOptions(), ScratchBufStart,
ScratchBufStart, ResultTokStrPtr,
ResultStrData, ResultTokStrPtr+LHSLen+RHSLen /*don't include null*/);
ResultStrData+LHSLen+RHSLen /*don't include null*/);
// Lex a token in raw mode. This way it won't look up identifiers // Lex a token in raw mode. This way it won't look up identifiers
// automatically, lexing off the end will return an eof token, and // automatically, lexing off the end will return an eof token, and
@ -442,12 +447,12 @@ bool TokenLexer::PasteTokens(Token &Tok) {
RHS.is(tok::slash)) { RHS.is(tok::slash)) {
HandleMicrosoftCommentPaste(Tok); HandleMicrosoftCommentPaste(Tok);
return true; return true;
} else {
// TODO: If not in assembler language mode.
PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
<< std::string(Buffer.begin(), Buffer.end()-1);
return false;
} }
// TODO: If not in assembler language mode.
PP.Diag(PasteOpLoc, diag::err_pp_bad_paste)
<< std::string(Buffer.begin(), Buffer.end()-1);
return false;
} }
// Turn ## into 'unknown' to avoid # ## # from looking like a paste // Turn ## into 'unknown' to avoid # ## # from looking like a paste
@ -471,7 +476,7 @@ bool TokenLexer::PasteTokens(Token &Tok) {
if (Tok.is(tok::identifier)) { if (Tok.is(tok::identifier)) {
// Look up the identifier info for the token. We disabled identifier lookup // Look up the identifier info for the token. We disabled identifier lookup
// by saying we're skipping contents, so we need to do this manually. // by saying we're skipping contents, so we need to do this manually.
Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok)); Tok.setIdentifierInfo(PP.LookUpIdentifierInfo(Tok, ResultTokStrPtr));
} }
return false; return false;
} }

Просмотреть файл

@ -78,14 +78,15 @@ TokenRewriter::AddToken(const Token &T, TokenRefTy Where) {
TokenRewriter::token_iterator TokenRewriter::token_iterator
TokenRewriter::AddTokenBefore(token_iterator I, const char *Val){ TokenRewriter::AddTokenBefore(token_iterator I, const char *Val) {
unsigned Len = strlen(Val); unsigned Len = strlen(Val);
// Plop the string into the scratch buffer, then create a token for this // Plop the string into the scratch buffer, then create a token for this
// string. // string.
Token Tok; Token Tok;
Tok.startToken(); Tok.startToken();
Tok.setLocation(ScratchBuf->getToken(Val, Len)); const char *Spelling;
Tok.setLocation(ScratchBuf->getToken(Val, Len, Spelling));
Tok.setLength(Len); Tok.setLength(Len);
// TODO: Form a whole lexer around this and relex the token! For now, just // TODO: Form a whole lexer around this and relex the token! For now, just