From 97ba77cf09bf7b83b679165ce67ad7d49ffd568c Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 16 Jul 2007 06:48:38 +0000 Subject: [PATCH] Add a new Preprocessor::AdvanceToTokenCharacter method which, given a sloc specifying the start of a token and a logical (phase 3) character number, returns a sloc representing the input character corresponding to it. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@39905 91177308-0d34-0410-b5e6-96231b3b80d8 --- Lex/Preprocessor.cpp | 37 ++++++++++++++++++++++++++++++++ include/clang/Lex/Lexer.h | 2 ++ include/clang/Lex/Preprocessor.h | 4 ++++ 3 files changed, 43 insertions(+) diff --git a/Lex/Preprocessor.cpp b/Lex/Preprocessor.cpp index 8376b9f8a3..93017937d5 100644 --- a/Lex/Preprocessor.cpp +++ b/Lex/Preprocessor.cpp @@ -36,6 +36,7 @@ #include "clang/Basic/SourceManager.h" #include "clang/Basic/TargetInfo.h" #include "llvm/ADT/SmallVector.h" +#include "llvm/Support/MemoryBuffer.h" #include using namespace clang; @@ -253,6 +254,42 @@ CreateString(const char *Buf, unsigned Len, SourceLocation SLoc) { } +/// AdvanceToTokenCharacter - Given a location that specifies the start of a +/// token, return a new location that specifies a character within the token. +SourceLocation Preprocessor::AdvanceToTokenCharacter(SourceLocation TokStart, + unsigned CharNo) { + // If they request the first char of the token, we're trivially done. + if (CharNo == 0) return TokStart; + + // Figure out how many physical characters away the specified logical + // character is. This needs to take into consideration newlines and + // trigraphs. + const char *TokStartPtr = SourceMgr.getCharacterData(TokStart); + const char *TokPtr = TokStartPtr; + + // The usual case is that tokens don't contain anything interesting. Skip + // over the uninteresting characters. If a token only consists of simple + // chars, this method is extremely fast. + while (CharNo && Lexer::isObviouslySimpleCharacter(*TokPtr)) + ++TokPtr, --CharNo; + + // If we have a character that may be a trigraph or escaped newline, create a + // lexer to parse it correctly. + unsigned FileID = TokStart.getFileID(); + const llvm::MemoryBuffer *SrcBuf = SourceMgr.getBuffer(FileID); + if (CharNo != 0) { + // Create a lexer starting at this token position. + Lexer TheLexer(SrcBuf, FileID, *this, TokPtr); + LexerToken Tok; + // Skip over characters the remaining characters. + for (; CharNo; --CharNo) + TheLexer.getAndAdvanceChar(TokPtr, Tok); + } + return SourceLocation(FileID, TokPtr-SrcBuf->getBufferStart()); +} + + + //===----------------------------------------------------------------------===// // Source File Location Methods. //===----------------------------------------------------------------------===// diff --git a/include/clang/Lex/Lexer.h b/include/clang/Lex/Lexer.h index 4a8965efa2..173e72f3f1 100644 --- a/include/clang/Lex/Lexer.h +++ b/include/clang/Lex/Lexer.h @@ -200,6 +200,7 @@ private: //===--------------------------------------------------------------------===// // Lexer character reading interfaces. +public: // This lexer is built on two interfaces for reading characters, both of which // automatically provide phase 1/2 translation. getAndAdvanceChar is used @@ -239,6 +240,7 @@ private: return C; } +private: /// ConsumeChar - When a character (identified by PeekCharAndSize) is consumed /// and added to a given token, check to see if there are diagnostics that /// need to be emitted or flags that need to be set on the token. If so, do diff --git a/include/clang/Lex/Preprocessor.h b/include/clang/Lex/Preprocessor.h index 4b6ea1f436..d1fa97872d 100644 --- a/include/clang/Lex/Preprocessor.h +++ b/include/clang/Lex/Preprocessor.h @@ -279,6 +279,10 @@ public: void DumpToken(const LexerToken &Tok, bool DumpFlags = false) const; void DumpMacro(const MacroInfo &MI) const; + /// AdvanceToTokenCharacter - Given a location that specifies the start of a + /// token, return a new location that specifies a character within the token. + SourceLocation AdvanceToTokenCharacter(SourceLocation TokStart,unsigned Char); + /// IncrementPasteCounter - Increment the counters for the number of token /// paste operations performed. If fast was specified, this is a 'fast paste' /// case we handled.