move getSpelling from Preprocessor to Lexer, which it is more conceptually related to.

git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@119479 91177308-0d34-0410-b5e6-96231b3b80d8
2010-11-17 07:26:20 +00:00 · 2010-11-17 07:26:20 +00:00 · b0607279cb
--- a/include/clang/Lex/Lexer.h
+++ b/include/clang/Lex/Lexer.h
@ -211,6 +211,32 @@ public:
  /// and " characters.  This does not add surrounding ""'s to the string.
  static void Stringify(llvm::SmallVectorImpl<char> &Str);

+  
+  /// getSpelling - This method is used to get the spelling of a token into a
+  /// preallocated buffer, instead of as an std::string.  The caller is required
+  /// to allocate enough space for the token, which is guaranteed to be at least
+  /// Tok.getLength() bytes long.  The length of the actual result is returned.
+  ///
+  /// Note that this method may do two possible things: it may either fill in
+  /// the buffer specified with characters, or it may *change the input pointer*
+  /// to point to a constant buffer with the data already in it (avoiding a
+  /// copy).  The caller is not allowed to modify the returned buffer pointer
+  /// if an internal buffer is returned.
+  static unsigned getSpelling(const Token &Tok, const char *&Buffer, 
+                              const SourceManager &SourceMgr,
+                              const LangOptions &Features,
+                              bool *Invalid = 0);
+  
+  /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
+  /// token is the characters used to represent the token in the source file
+  /// after trigraph expansion and escaped-newline folding.  In particular, this
+  /// wants to get the true, uncanonicalized, spelling of things like digraphs
+  /// UCNs, etc.
+  static std::string getSpelling(const Token &Tok,
+                                 const SourceManager &SourceMgr,
+                                 const LangOptions &Features, 
+                                 bool *Invalid = 0);
+  
  /// MeasureTokenLength - Relex the token at the specified location and return
  /// its length in bytes in the input file.  If the token needs cleaning (e.g.
  /// includes a trigraph or an escaped newline) then this count includes bytes
--- a/include/clang/Lex/Preprocessor.h
+++ b/include/clang/Lex/Preprocessor.h
@ -641,17 +641,9 @@ public:
  /// UCNs, etc.
  ///
  /// \param Invalid If non-NULL, will be set \c true if an error occurs.
-  std::string getSpelling(const Token &Tok, bool *Invalid = 0) const;
-
-  /// getSpelling() - Return the 'spelling' of the Tok token.  The spelling of a
-  /// token is the characters used to represent the token in the source file
-  /// after trigraph expansion and escaped-newline folding.  In particular, this
-  /// wants to get the true, uncanonicalized, spelling of things like digraphs
-  /// UCNs, etc.
-  static std::string getSpelling(const Token &Tok,
-                                 const SourceManager &SourceMgr,
-                                 const LangOptions &Features, 
-                                 bool *Invalid = 0);
+  std::string getSpelling(const Token &Tok, bool *Invalid = 0) const {
+    return Lexer::getSpelling(Tok, SourceMgr, Features, Invalid);
+  }

  /// getSpelling - This method is used to get the spelling of a token into a
  /// preallocated buffer, instead of as an std::string.  The caller is required
@ -665,12 +657,8 @@ public:
  /// if an internal buffer is returned.
  unsigned getSpelling(const Token &Tok, const char *&Buffer, 
                       bool *Invalid = 0) const {
-    return getSpelling(Tok, Buffer, SourceMgr, Features, Invalid);
+    return Lexer::getSpelling(Tok, Buffer, SourceMgr, Features, Invalid);
  }
-  static unsigned getSpelling(const Token &Tok, const char *&Buffer, 
-                              const SourceManager &SourceMgr,
-                              const LangOptions &Features,
-                              bool *Invalid = 0);

  /// getSpelling - This method is used to get the spelling of a token into a
  /// SmallVector. Note that the returned StringRef may not point to the
--- a/lib/Lex/Lexer.cpp
+++ b/lib/Lex/Lexer.cpp
@ -212,6 +212,107 @@ void Lexer::Stringify(llvm::SmallVectorImpl<char> &Str) {
  }
 }

+//===----------------------------------------------------------------------===//
+// Token Spelling
+//===----------------------------------------------------------------------===//
+
+/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
+/// token are the characters used to represent the token in the source file
+/// after trigraph expansion and escaped-newline folding.  In particular, this
+/// wants to get the true, uncanonicalized, spelling of things like digraphs
+/// UCNs, etc.
+std::string Lexer::getSpelling(const Token &Tok, const SourceManager &SourceMgr,
+                               const LangOptions &Features, bool *Invalid) {
+  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+  
+  // If this token contains nothing interesting, return it directly.
+  bool CharDataInvalid = false;
+  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 
+                                                    &CharDataInvalid);
+  if (Invalid)
+    *Invalid = CharDataInvalid;
+  if (CharDataInvalid)
+    return std::string();
+  
+  if (!Tok.needsCleaning())
+    return std::string(TokStart, TokStart+Tok.getLength());
+  
+  std::string Result;
+  Result.reserve(Tok.getLength());
+  
+  // Otherwise, hard case, relex the characters into the string.
+  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+       Ptr != End; ) {
+    unsigned CharSize;
+    Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
+    Ptr += CharSize;
+  }
+  assert(Result.size() != unsigned(Tok.getLength()) &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+  return Result;
+}
+
+/// getSpelling - This method is used to get the spelling of a token into a
+/// preallocated buffer, instead of as an std::string.  The caller is required
+/// to allocate enough space for the token, which is guaranteed to be at least
+/// Tok.getLength() bytes long.  The actual length of the token is returned.
+///
+/// Note that this method may do two possible things: it may either fill in
+/// the buffer specified with characters, or it may *change the input pointer*
+/// to point to a constant buffer with the data already in it (avoiding a
+/// copy).  The caller is not allowed to modify the returned buffer pointer
+/// if an internal buffer is returned.
+unsigned Lexer::getSpelling(const Token &Tok, const char *&Buffer, 
+                            const SourceManager &SourceMgr,
+                            const LangOptions &Features, bool *Invalid) {
+  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
+  
+  // If this token is an identifier, just return the string from the identifier
+  // table, which is very quick.
+  if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
+    Buffer = II->getNameStart();
+    return II->getLength();
+  }
+  
+  // Otherwise, compute the start of the token in the input lexer buffer.
+  const char *TokStart = 0;
+  
+  if (Tok.isLiteral())
+    TokStart = Tok.getLiteralData();
+  
+  if (TokStart == 0) {
+    bool CharDataInvalid = false;
+    TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
+    if (Invalid)
+      *Invalid = CharDataInvalid;
+    if (CharDataInvalid) {
+      Buffer = "";
+      return 0;
+    }
+  }
+  
+  // If this token contains nothing interesting, return it directly.
+  if (!Tok.needsCleaning()) {
+    Buffer = TokStart;
+    return Tok.getLength();
+  }
+  
+  // Otherwise, hard case, relex the characters into the string.
+  char *OutBuf = const_cast<char*>(Buffer);
+  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
+       Ptr != End; ) {
+    unsigned CharSize;
+    *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
+    Ptr += CharSize;
+  }
+  assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
+         "NeedsCleaning flag set on something that didn't need cleaning!");
+  
+  return OutBuf-Buffer;
+}
+
+
+
 static bool isWhitespace(unsigned char c);

 /// MeasureTokenLength - Relex the token at the specified location and return
--- a/lib/Lex/LiteralSupport.cpp
+++ b/lib/Lex/LiteralSupport.cpp
@ -898,8 +898,8 @@ void StringLiteralParser::init(const Token *StringToks, unsigned NumStringToks){
    // and 'spelled' tokens can only shrink.
    bool StringInvalid = false;
    unsigned ThisTokLen = 
-      Preprocessor::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
-                                &StringInvalid);
+      Lexer::getSpelling(StringToks[i], ThisTokBuf, SM, Features,
+                         &StringInvalid);
    if (StringInvalid) {
      hadError = 1;
      continue;
@ -1019,8 +1019,8 @@ unsigned StringLiteralParser::getOffsetOfStringByte(const Token &Tok,

  bool StringInvalid = false;
  const char *SpellingPtr = &SpellingBuffer[0];
-  unsigned TokLen = Preprocessor::getSpelling(Tok, SpellingPtr, SM, Features,
-                                              &StringInvalid);
+  unsigned TokLen = Lexer::getSpelling(Tok, SpellingPtr, SM, Features,
+                                       &StringInvalid);
  if (StringInvalid)
    return 0;

--- a/lib/Lex/Preprocessor.cpp
+++ b/lib/Lex/Preprocessor.cpp
@ -278,116 +278,6 @@ void Preprocessor::CodeCompleteNaturalLanguage() {
    CodeComplete->CodeCompleteNaturalLanguage();
 }

-//===----------------------------------------------------------------------===//
-// Token Spelling
-//===----------------------------------------------------------------------===//
-
-/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
-/// token are the characters used to represent the token in the source file
-/// after trigraph expansion and escaped-newline folding.  In particular, this
-/// wants to get the true, uncanonicalized, spelling of things like digraphs
-/// UCNs, etc.
-std::string Preprocessor::getSpelling(const Token &Tok,
-                                      const SourceManager &SourceMgr,
-                                      const LangOptions &Features, 
-                                      bool *Invalid) {
-  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
-
-  // If this token contains nothing interesting, return it directly.
-  bool CharDataInvalid = false;
-  const char* TokStart = SourceMgr.getCharacterData(Tok.getLocation(), 
-                                                    &CharDataInvalid);
-  if (Invalid)
-    *Invalid = CharDataInvalid;
-  if (CharDataInvalid)
-    return std::string();
-
-  if (!Tok.needsCleaning())
-    return std::string(TokStart, TokStart+Tok.getLength());
-
-  std::string Result;
-  Result.reserve(Tok.getLength());
-
-  // Otherwise, hard case, relex the characters into the string.
-  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
-       Ptr != End; ) {
-    unsigned CharSize;
-    Result.push_back(Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features));
-    Ptr += CharSize;
-  }
-  assert(Result.size() != unsigned(Tok.getLength()) &&
-         "NeedsCleaning flag set on something that didn't need cleaning!");
-  return Result;
-}
-
-/// getSpelling() - Return the 'spelling' of this token.  The spelling of a
-/// token are the characters used to represent the token in the source file
-/// after trigraph expansion and escaped-newline folding.  In particular, this
-/// wants to get the true, uncanonicalized, spelling of things like digraphs
-/// UCNs, etc.
-std::string Preprocessor::getSpelling(const Token &Tok, bool *Invalid) const {
-  return getSpelling(Tok, SourceMgr, Features, Invalid);
-}
-
-/// getSpelling - This method is used to get the spelling of a token into a
-/// preallocated buffer, instead of as an std::string.  The caller is required
-/// to allocate enough space for the token, which is guaranteed to be at least
-/// Tok.getLength() bytes long.  The actual length of the token is returned.
-///
-/// Note that this method may do two possible things: it may either fill in
-/// the buffer specified with characters, or it may *change the input pointer*
-/// to point to a constant buffer with the data already in it (avoiding a
-/// copy).  The caller is not allowed to modify the returned buffer pointer
-/// if an internal buffer is returned.
-unsigned Preprocessor::getSpelling(const Token &Tok, const char *&Buffer, 
-                                   const SourceManager &SourceMgr,
-                                   const LangOptions &Features,
-                                   bool *Invalid) {
-  assert((int)Tok.getLength() >= 0 && "Token character range is bogus!");
-
-  // If this token is an identifier, just return the string from the identifier
-  // table, which is very quick.
-  if (const IdentifierInfo *II = Tok.getIdentifierInfo()) {
-    Buffer = II->getNameStart();
-    return II->getLength();
-  }
-
-  // Otherwise, compute the start of the token in the input lexer buffer.
-  const char *TokStart = 0;
-
-  if (Tok.isLiteral())
-    TokStart = Tok.getLiteralData();
-
-  if (TokStart == 0) {
-    bool CharDataInvalid = false;
-    TokStart = SourceMgr.getCharacterData(Tok.getLocation(), &CharDataInvalid);
-    if (Invalid)
-      *Invalid = CharDataInvalid;
-    if (CharDataInvalid) {
-      Buffer = "";
-      return 0;
-    }
-  }
-
-  // If this token contains nothing interesting, return it directly.
-  if (!Tok.needsCleaning()) {
-    Buffer = TokStart;
-    return Tok.getLength();
-  }
-
-  // Otherwise, hard case, relex the characters into the string.
-  char *OutBuf = const_cast<char*>(Buffer);
-  for (const char *Ptr = TokStart, *End = TokStart+Tok.getLength();
-       Ptr != End; ) {
-    unsigned CharSize;
-    *OutBuf++ = Lexer::getCharAndSizeNoWarn(Ptr, CharSize, Features);
-    Ptr += CharSize;
-  }
-  assert(unsigned(OutBuf-Buffer) != Tok.getLength() &&
-         "NeedsCleaning flag set on something that didn't need cleaning!");
-
-  return OutBuf-Buffer;
-}

 /// getSpelling - This method is used to get the spelling of a token into a
 /// SmallVector. Note that the returned StringRef may not point to the