Move token length calculation out of the diagnostics machinery into

the lexer, where it can be shared. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@43090 91177308-0d34-0410-b5e6-96231b3b80d8
2007-10-17 21:18:47 +00:00 · 2007-10-17 21:18:47 +00:00 · 9a61194376
--- a/Driver/TextDiagnosticPrinter.cpp
+++ b/Driver/TextDiagnosticPrinter.cpp
@ -80,7 +80,7 @@ void TextDiagnosticPrinter::HighlightRange(const SourceRange &R,
      --EndColNo;  // Zero base the col #.
      
      // Add in the length of the token, so that we cover multi-char tokens.
-      EndColNo += GetTokenLength(R.getEnd());
+      EndColNo += Lexer::MeasureTokenLength(R.getEnd(), SourceMgr);
    } else {
      EndColNo = CaratLine.size();
    }
@ -97,31 +97,6 @@ void TextDiagnosticPrinter::HighlightRange(const SourceRange &R,
    CaratLine[i] = '~';
 }

-/// GetTokenLength - Given the source location of a token, determine its length.
-/// This is a fully general function that uses a lexer to relex the token.
-unsigned TextDiagnosticPrinter::GetTokenLength(SourceLocation Loc) {
-  // If this comes from a macro expansion, we really do want the macro name, not
-  // the token this macro expanded to.
-  Loc = SourceMgr.getLogicalLoc(Loc);
-  const char *StrData = SourceMgr.getCharacterData(Loc);
-  const char *BufEnd = SourceMgr.getBufferData(Loc.getFileID()).second;
-  
-  // TODO: this could be special cased for common tokens like identifiers, ')',
-  // etc to make this faster, if it mattered.  This could use 
-  // Lexer::isObviouslySimpleCharacter for example.
-  
-  // Create a langops struct and enable trigraphs.  This is sufficient for
-  // measuring tokens.
-  LangOptions LangOpts;
-  LangOpts.Trigraphs = true;
-  
-  // Create a lexer starting at the beginning of this token.
-  Lexer TheLexer(Loc, LangOpts, StrData, BufEnd);
-  Token TheTok;
-  TheLexer.LexRawToken(TheTok);
-  return TheTok.getLength();
-}
-
 void TextDiagnosticPrinter::HandleDiagnostic(Diagnostic::Level Level, 
                                             SourceLocation Pos,
                                             diag::kind ID,
--- a/Driver/TextDiagnosticPrinter.h
+++ b/Driver/TextDiagnosticPrinter.h
@ -31,7 +31,6 @@ public:
  void HighlightRange(const SourceRange &R, unsigned LineNo,
                      std::string &CaratLine,
                      const std::string &SourceLine);
-  unsigned GetTokenLength(SourceLocation Loc);

  virtual void HandleDiagnostic(Diagnostic::Level DiagLevel,
                                SourceLocation Pos,
--- a/Lex/Lexer.cpp
+++ b/Lex/Lexer.cpp
@ -163,6 +163,39 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
 }


+/// MeasureTokenLength - Relex the token at the specified location and return
+/// its length in bytes in the input file.  If the token needs cleaning (e.g.
+/// includes a trigraph or an escaped newline) then this count includes bytes
+/// that are part of that.
+unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
+                                   const SourceManager &SM) {
+  // If this comes from a macro expansion, we really do want the macro name, not
+  // the token this macro expanded to.
+  Loc = SM.getLogicalLoc(Loc);
+  
+  const char *StrData = SM.getCharacterData(Loc);
+  
+  // TODO: this could be special cased for common tokens like identifiers, ')',
+  // etc to make this faster, if it mattered.  Just look at StrData[0] to handle
+  // all obviously single-char tokens.  This could use 
+  // Lexer::isObviouslySimpleCharacter for example to handle identifiers or
+  // something.
+  
+  
+  const char *BufEnd = SM.getBufferData(Loc.getFileID()).second;
+  
+  // Create a langops struct and enable trigraphs.  This is sufficient for
+  // measuring tokens.
+  LangOptions LangOpts;
+  LangOpts.Trigraphs = true;
+  
+  // Create a lexer starting at the beginning of this token.
+  Lexer TheLexer(Loc, LangOpts, StrData, BufEnd);
+  Token TheTok;
+  TheLexer.LexRawToken(TheTok);
+  return TheTok.getLength();
+}
+
 //===----------------------------------------------------------------------===//
 // Character information.
 //===----------------------------------------------------------------------===//
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@ -24,6 +24,7 @@

 namespace clang {
 class Diagnostic;
+class SourceManager;
 class Preprocessor;

 /// Lexer - This provides a simple interface that turns a text buffer into a
@ -178,6 +179,13 @@ public:
  /// and " characters.  This does not add surrounding ""'s to the string.
  static void Stringify(llvm::SmallVectorImpl<char> &Str);
  
+  /// MeasureTokenLength - Relex the token at the specified location and return
+  /// its length in bytes in the input file.  If the token needs cleaning (e.g.
+  /// includes a trigraph or an escaped newline) then this count includes bytes
+  /// that are part of that.
+  static unsigned MeasureTokenLength(SourceLocation Loc,
+                                     const SourceManager &SM);
+  
  //===--------------------------------------------------------------------===//
  // Internal implementation interfaces.
 private: