Introduce a new lexer function to compute the "preamble" of a file,

which is the part of the file that contains all of the initial
comments, includes, and preprocessor directives that occur before any
of the actual code. Added a new -print-preamble cc1 action that is
only used for testing.


git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@108913 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Douglas Gregor 2010-07-20 20:18:03 +00:00
Родитель 4751a53c5e
Коммит f033f1da4a
10 изменённых файлов: 223 добавлений и 1 удалений

Просмотреть файл

@ -304,6 +304,9 @@ def fixit_EQ : Joined<"-fixit=">,
HelpText<"Apply fix-it advice creating a file with the given suffix">;
def parse_print_callbacks : Flag<"-parse-print-callbacks">,
HelpText<"Run parser and print each callback invoked">;
def print_preamble : Flag<"-print-preamble">,
HelpText<"Print the \"preamble\" of a file, which is a candidate for implicit"
" precompiled headers.">;
def emit_html : Flag<"-emit-html">,
HelpText<"Output input source as HTML">;
def ast_print : Flag<"-ast-print">,

Просмотреть файл

@ -134,6 +134,16 @@ public:
virtual bool hasCodeCompletionSupport() const;
};
class PrintPreambleAction : public FrontendAction {
protected:
void ExecuteAction();
virtual ASTConsumer *CreateASTConsumer(CompilerInstance &, llvm::StringRef) {
return 0;
}
virtual bool usesPreprocessorOnly() const { return true; }
};
//===----------------------------------------------------------------------===//
// Preprocessor Actions
//===----------------------------------------------------------------------===//
@ -174,7 +184,7 @@ protected:
virtual bool hasPCHSupport() const { return true; }
};
} // end namespace clang
#endif

Просмотреть файл

@ -44,6 +44,7 @@ namespace frontend {
ParseSyntaxOnly, ///< Parse and perform semantic analysis.
PluginAction, ///< Run a plugin action, \see ActionName.
PrintDeclContext, ///< Print DeclContext and their Decls.
PrintPreamble, ///< Print the "preamble" of the input file
PrintPreprocessedInput, ///< -E mode.
RewriteMacros, ///< Expand macros but not #includes.
RewriteObjC, ///< ObjC->C Rewriter.

Просмотреть файл

@ -219,6 +219,19 @@ public:
const SourceManager &SM,
const LangOptions &LangOpts);
/// \brief Compute the preamble of the given file.
///
/// The preamble of a file contains the initial comments, include directives,
/// and other preprocessor directives that occur before the code in this
/// particular file actually begins. The preamble of the main source file is
/// a potential prefix header.
///
/// \param Buffer The memory buffer containing the file's contents.
///
/// \returns The offset into the file where the preamble ends and the rest
/// of the file begins.
static unsigned ComputePreamble(const llvm::MemoryBuffer *Buffer);
//===--------------------------------------------------------------------===//
// Internal implementation interfaces.
private:

Просмотреть файл

@ -331,6 +331,7 @@ static const char *getActionName(frontend::ActionKind Kind) {
case frontend::ParsePrintCallbacks: return "-parse-print-callbacks";
case frontend::ParseSyntaxOnly: return "-fsyntax-only";
case frontend::PrintDeclContext: return "-print-decl-contexts";
case frontend::PrintPreamble: return "-print-preamble";
case frontend::PrintPreprocessedInput: return "-E";
case frontend::RewriteMacros: return "-rewrite-macros";
case frontend::RewriteObjC: return "-rewrite-objc";
@ -989,6 +990,8 @@ static InputKind ParseFrontendArgs(FrontendOptions &Opts, ArgList &Args,
Opts.ProgramAction = frontend::ParseSyntaxOnly; break;
case OPT_print_decl_contexts:
Opts.ProgramAction = frontend::PrintDeclContext; break;
case OPT_print_preamble:
Opts.ProgramAction = frontend::PrintPreamble; break;
case OPT_E:
Opts.ProgramAction = frontend::PrintPreprocessedInput; break;
case OPT_rewrite_macros:

Просмотреть файл

@ -19,6 +19,7 @@
#include "clang/Frontend/FrontendDiagnostic.h"
#include "clang/Frontend/Utils.h"
#include "llvm/ADT/OwningPtr.h"
#include "llvm/Support/MemoryBuffer.h"
#include "llvm/Support/raw_ostream.h"
using namespace clang;
@ -192,3 +193,32 @@ void PrintPreprocessedAction::ExecuteAction() {
DoPrintPreprocessedInput(CI.getPreprocessor(), OS,
CI.getPreprocessorOutputOpts());
}
void PrintPreambleAction::ExecuteAction() {
switch (getCurrentFileKind()) {
case IK_C:
case IK_CXX:
case IK_ObjC:
case IK_ObjCXX:
case IK_OpenCL:
break;
case IK_None:
case IK_Asm:
case IK_PreprocessedC:
case IK_PreprocessedCXX:
case IK_PreprocessedObjC:
case IK_PreprocessedObjCXX:
case IK_AST:
case IK_LLVM_IR:
// We can't do anything with these.
return;
}
llvm::MemoryBuffer *Buffer = llvm::MemoryBuffer::getFile(getCurrentFile());
if (Buffer) {
unsigned Preamble = Lexer::ComputePreamble(Buffer);
llvm::outs().write(Buffer->getBufferStart(), Preamble);
delete Buffer;
}
}

Просмотреть файл

@ -28,6 +28,7 @@
#include "clang/Lex/Preprocessor.h"
#include "clang/Lex/LexDiagnostic.h"
#include "clang/Basic/SourceManager.h"
#include "llvm/ADT/StringSwitch.h"
#include "llvm/Support/Compiler.h"
#include "llvm/Support/MemoryBuffer.h"
#include <cctype>
@ -247,6 +248,130 @@ unsigned Lexer::MeasureTokenLength(SourceLocation Loc,
return TheTok.getLength();
}
namespace {
enum PreambleDirectiveKind {
PDK_Skipped,
PDK_StartIf,
PDK_EndIf,
PDK_Unknown
};
}
unsigned Lexer::ComputePreamble(const llvm::MemoryBuffer *Buffer) {
// Create a lexer starting at the beginning of the file. Note that we use a
// "fake" file source location at offset 1 so that the lexer will track our
// position within the file.
const unsigned StartOffset = 1;
SourceLocation StartLoc = SourceLocation::getFromRawEncoding(StartOffset);
LangOptions LangOpts;
Lexer TheLexer(StartLoc, LangOpts, Buffer->getBufferStart(),
Buffer->getBufferStart(), Buffer->getBufferEnd());
bool InPreprocessorDirective = false;
Token TheTok;
Token IfStartTok;
unsigned IfCount = 0;
do {
TheLexer.LexFromRawLexer(TheTok);
if (InPreprocessorDirective) {
// If we've hit the end of the file, we're done.
if (TheTok.getKind() == tok::eof) {
InPreprocessorDirective = false;
break;
}
// If we haven't hit the end of the preprocessor directive, skip this
// token.
if (!TheTok.isAtStartOfLine())
continue;
// We've passed the end of the preprocessor directive, and will look
// at this token again below.
InPreprocessorDirective = false;
}
// Comments are okay; skip over them.
if (TheTok.getKind() == tok::comment)
continue;
if (TheTok.isAtStartOfLine() && TheTok.getKind() == tok::hash) {
// This is the start of a preprocessor directive.
Token HashTok = TheTok;
InPreprocessorDirective = true;
// Figure out which direective this is. Since we're lexing raw tokens,
// we don't have an identifier table available. Instead, just look at
// the raw identifier to recognize and categorize preprocessor directives.
TheLexer.LexFromRawLexer(TheTok);
if (TheTok.getKind() == tok::identifier && !TheTok.needsCleaning()) {
const char *IdStart = Buffer->getBufferStart()
+ TheTok.getLocation().getRawEncoding() - 1;
llvm::StringRef Keyword(IdStart, TheTok.getLength());
PreambleDirectiveKind PDK
= llvm::StringSwitch<PreambleDirectiveKind>(Keyword)
.Case("include", PDK_Skipped)
.Case("__include_macros", PDK_Skipped)
.Case("define", PDK_Skipped)
.Case("undef", PDK_Skipped)
.Case("line", PDK_Skipped)
.Case("error", PDK_Skipped)
.Case("pragma", PDK_Skipped)
.Case("import", PDK_Skipped)
.Case("include_next", PDK_Skipped)
.Case("warning", PDK_Skipped)
.Case("ident", PDK_Skipped)
.Case("sccs", PDK_Skipped)
.Case("assert", PDK_Skipped)
.Case("unassert", PDK_Skipped)
.Case("if", PDK_StartIf)
.Case("ifdef", PDK_StartIf)
.Case("ifndef", PDK_StartIf)
.Case("elif", PDK_Skipped)
.Case("else", PDK_Skipped)
.Case("endif", PDK_EndIf)
.Default(PDK_Unknown);
switch (PDK) {
case PDK_Skipped:
continue;
case PDK_StartIf:
if (IfCount == 0)
IfStartTok = HashTok;
++IfCount;
continue;
case PDK_EndIf:
// Mismatched #endif. The preamble ends here.
if (IfCount == 0)
break;
--IfCount;
continue;
case PDK_Unknown:
// We don't know what this directive is; stop at the '#'.
break;
}
}
// We only end up here if we didn't recognize the preprocessor
// directive or it was one that can't occur in the preamble at this
// point. Roll back the current token to the location of the '#'.
InPreprocessorDirective = false;
TheTok = HashTok;
}
// We hit a token
break;
} while (true);
SourceLocation End = IfCount? IfStartTok.getLocation() : TheTok.getLocation();
return End.getRawEncoding() - StartLoc.getRawEncoding();
}
//===----------------------------------------------------------------------===//
// Character information.
//===----------------------------------------------------------------------===//

Просмотреть файл

@ -0,0 +1,11 @@
// Preamble detection test: see below for comments and test commands.
#include <blah>
#ifndef FOO
#else
#ifdef BAR
#elif WIBBLE
#endif
#pragma unknown
#endif

25
test/Lexer/preamble.c Normal file
Просмотреть файл

@ -0,0 +1,25 @@
// Preamble detection test: see below for comments and test commands.
#include <blah>
#ifndef FOO
#else
#ifdef BAR
#elif WIBBLE
#endif
#pragma unknown
#endif
#ifdef WIBBLE
#include "honk"
#else
int foo();
#endif
// This test checks for detection of the preamble of a file, which
// includes all of the starting comments and #includes. Note that any
// changes to the preamble part of this file must be mirrored in
// Inputs/preamble.txt, since we diff against it.
// RUN: %clang_cc1 -print-preamble %s > %t
// RUN: diff %t %S/Inputs/preamble.txt

Просмотреть файл

@ -102,6 +102,7 @@ static FrontendAction *CreateFrontendBaseAction(CompilerInstance &CI) {
}
case PrintDeclContext: return new DeclContextPrintAction();
case PrintPreamble: return new PrintPreambleAction();
case PrintPreprocessedInput: return new PrintPreprocessedAction();
case RewriteMacros: return new RewriteMacrosAction();
case RewriteObjC: return new RewriteObjCAction();