From 668c1a4fdcc56bdd050256b1688e116fe84b72db Mon Sep 17 00:00:00 2001 From: Douglas Gregor Date: Tue, 21 Apr 2009 22:25:48 +0000 Subject: [PATCH] Lazy deserialization of the declaration chains associated with identifiers from a precompiled header. This patch changes the primary name lookup method for entities within a precompiled header. Previously, we would load all of the names of declarations at translation unit scope into a large DenseMap (inside the TranslationUnitDecl's DeclContext), and then perform a special "last resort" lookup into this DeclContext when we knew there was a PCH file (see Sema::LookupName). Now, when we see an identifier named for the first time, we load all of the declarations with that name that are visible from the translation unit into the IdentifierInfo's chain of declarations. Thus, the explicit "look into the translation unit's DeclContext" code is gone, and Sema effectively uses the same IdentifierInfo-based name lookup mechanism whether we are using a PCH file or not. This approach should help PCH scale with the size of the input program rather than the size of the PCH file. The "Hello, World!" application with Carbon.h as a PCH file now loads 20% of the identifiers in the PCH file rather than 85% of the identifiers. 90% of the 20% of identifiers loaded are actually loaded when we deserialize the preprocessor state. The next step is to make the preprocessor load macros lazily, which should drastically reduce the number of types, declarations, and identifiers loaded for "Hello, World". git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@69737 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/AST/ExternalASTSource.h | 12 +- include/clang/Basic/IdentifierTable.h | 38 +++- include/clang/Basic/OnDiskHashTable.h | 29 ++- include/clang/Frontend/PCHReader.h | 60 +++++- include/clang/Sema/ExternalSemaSource.h | 45 ++++ include/clang/Sema/SemaConsumer.h | 2 +- lib/Frontend/PCHReader.cpp | 263 ++++++++++++++++++++---- lib/Frontend/PCHWriter.cpp | 32 +-- lib/Sema/IdentifierResolver.cpp | 22 ++ lib/Sema/IdentifierResolver.h | 8 + lib/Sema/ParseAST.cpp | 10 +- lib/Sema/SemaLookup.cpp | 11 - test/PCH/builtins.c | 10 + test/PCH/builtins.h | 2 + 14 files changed, 455 insertions(+), 89 deletions(-) create mode 100644 include/clang/Sema/ExternalSemaSource.h create mode 100644 test/PCH/builtins.c create mode 100644 test/PCH/builtins.h diff --git a/include/clang/AST/ExternalASTSource.h b/include/clang/AST/ExternalASTSource.h index 267b4838a4..ef09770933 100644 --- a/include/clang/AST/ExternalASTSource.h +++ b/include/clang/AST/ExternalASTSource.h @@ -7,7 +7,8 @@ // //===----------------------------------------------------------------------===// // -// This file defines the ExternalASTSource interface, +// This file defines the ExternalASTSource interface, which enables +// construction of AST nodes from some external source.x // //===----------------------------------------------------------------------===// #ifndef LLVM_CLANG_AST_EXTERNAL_AST_SOURCE_H @@ -22,6 +23,7 @@ namespace clang { class ASTConsumer; class Decl; class DeclContext; +class ExternalSemaSource; // layering violation required for downcasting class Stmt; /// \brief The deserialized representation of a set of declarations @@ -44,7 +46,15 @@ struct VisibleDeclaration { /// actual type and declaration nodes, and read parts of declaration /// contexts. class ExternalASTSource { + /// \brief Whether this AST source also provides information for + /// semantic analysis. + bool SemaSource; + + friend class ExternalSemaSource; + public: + ExternalASTSource() : SemaSource(false) { } + virtual ~ExternalASTSource(); /// \brief Resolve a type ID into a type, potentially building a new diff --git a/include/clang/Basic/IdentifierTable.h b/include/clang/Basic/IdentifierTable.h index d4f680494e..3156bbc4e9 100644 --- a/include/clang/Basic/IdentifierTable.h +++ b/include/clang/Basic/IdentifierTable.h @@ -229,7 +229,7 @@ private: }; /// IdentifierInfoLookup - An abstract class used by IdentifierTable that -/// provides an interface for for performing lookups from strings +/// provides an interface for performing lookups from strings /// (const char *) to IdentiferInfo objects. class IdentifierInfoLookup { public: @@ -260,6 +260,11 @@ public: IdentifierTable(const LangOptions &LangOpts, IdentifierInfoLookup* externalLookup = 0); + /// \brief Set the external identifier lookup mechanism. + void setExternalIdentifierLookup(IdentifierInfoLookup *IILookup) { + ExternalLookup = IILookup; + } + llvm::BumpPtrAllocator& getAllocator() { return HashTable.getAllocator(); } @@ -295,6 +300,34 @@ public: return *II; } + /// \brief Creates a new IdentifierInfo from the given string. + /// + /// This is a lower-level version of get() that requires that this + /// identifier not be known previously and that does not consult an + /// external source for identifiers. In particular, external + /// identifier sources can use this routine to build IdentifierInfo + /// nodes and then introduce additional information about those + /// identifiers. + IdentifierInfo &CreateIdentifierInfo(const char *NameStart, + const char *NameEnd) { + llvm::StringMapEntry &Entry = + HashTable.GetOrCreateValue(NameStart, NameEnd); + + IdentifierInfo *II = Entry.getValue(); + assert(!II && "IdentifierInfo already exists"); + + // Lookups failed, make a new IdentifierInfo. + void *Mem = getAllocator().Allocate(); + II = new (Mem) IdentifierInfo(); + Entry.setValue(II); + + // Make sure getName() knows how to find the IdentifierInfo + // contents. + II->Entry = &Entry; + + return *II; + } + IdentifierInfo &get(const char *Name) { return get(Name, Name+strlen(Name)); } @@ -304,14 +337,11 @@ public: return get(NameBytes, NameBytes+Name.size()); } -private: typedef HashTableTy::const_iterator iterator; typedef HashTableTy::const_iterator const_iterator; iterator begin() const { return HashTable.begin(); } iterator end() const { return HashTable.end(); } -public: - unsigned size() const { return HashTable.size(); } /// PrintStats - Print some statistics to stderr that indicate how well the diff --git a/include/clang/Basic/OnDiskHashTable.h b/include/clang/Basic/OnDiskHashTable.h index 631d497f1a..3caeb9ffd8 100644 --- a/include/clang/Basic/OnDiskHashTable.h +++ b/include/clang/Basic/OnDiskHashTable.h @@ -242,6 +242,8 @@ class OnDiskChainedHashTable { const unsigned NumEntries; const unsigned char* const Buckets; const unsigned char* const Base; + Info InfoObj; + public: typedef typename Info::internal_key_type internal_key_type; typedef typename Info::external_key_type external_key_type; @@ -249,9 +251,10 @@ public: OnDiskChainedHashTable(unsigned numBuckets, unsigned numEntries, const unsigned char* buckets, - const unsigned char* base) + const unsigned char* base, + const Info &InfoObj = Info()) : NumBuckets(numBuckets), NumEntries(numEntries), - Buckets(buckets), Base(base) { + Buckets(buckets), Base(base), InfoObj(InfoObj) { assert((reinterpret_cast(buckets) & 0x3) == 0 && "'buckets' must have a 4-byte alignment"); } @@ -267,22 +270,27 @@ public: internal_key_type key; const unsigned char* const data; const unsigned len; + Info *InfoObj; public: iterator() : data(0), len(0) {} - iterator(const internal_key_type k, const unsigned char* d, unsigned l) - : key(k), data(d), len(l) {} + iterator(const internal_key_type k, const unsigned char* d, unsigned l, + Info *InfoObj) + : key(k), data(d), len(l), InfoObj(InfoObj) {} - data_type operator*() const { return Info::ReadData(key, data, len); } + data_type operator*() const { return InfoObj->ReadData(key, data, len); } bool operator==(const iterator& X) const { return X.data == data; } bool operator!=(const iterator& X) const { return X.data != data; } }; - iterator find(const external_key_type& eKey) { + iterator find(const external_key_type& eKey, Info *InfoPtr = 0) { + if (!InfoPtr) + InfoPtr = &InfoObj; + using namespace io; const internal_key_type& iKey = Info::GetInternalKey(eKey); unsigned key_hash = Info::ComputeHash(iKey); - // Each bucket is just a 32-bit offset into the PTH file. + // Each bucket is just a 32-bit offset into the hash table file. unsigned idx = key_hash & (NumBuckets - 1); const unsigned char* Bucket = Buckets + sizeof(uint32_t)*idx; @@ -319,7 +327,7 @@ public: } // The key matches! - return iterator(X, Items + L.first, L.second); + return iterator(X, Items + L.first, L.second, InfoPtr); } return iterator(); @@ -329,7 +337,8 @@ public: static OnDiskChainedHashTable* Create(const unsigned char* buckets, - const unsigned char* const base) { + const unsigned char* const base, + const Info &InfoObj = Info()) { using namespace io; assert(buckets > base); assert((reinterpret_cast(buckets) & 0x3) == 0 && @@ -338,7 +347,7 @@ public: unsigned numBuckets = ReadLE32(buckets); unsigned numEntries = ReadLE32(buckets); return new OnDiskChainedHashTable(numBuckets, numEntries, buckets, - base); + base, InfoObj); } }; diff --git a/include/clang/Frontend/PCHReader.h b/include/clang/Frontend/PCHReader.h index 6ee549abf5..3af147f7a9 100644 --- a/include/clang/Frontend/PCHReader.h +++ b/include/clang/Frontend/PCHReader.h @@ -15,9 +15,10 @@ #include "clang/Frontend/PCHBitCodes.h" #include "clang/AST/DeclarationName.h" -#include "clang/AST/ExternalASTSource.h" +#include "clang/Sema/ExternalSemaSource.h" #include "clang/AST/Type.h" #include "clang/Basic/Diagnostic.h" +#include "clang/Basic/IdentifierTable.h" #include "llvm/ADT/APFloat.h" #include "llvm/ADT/APInt.h" #include "llvm/ADT/APSInt.h" @@ -44,7 +45,9 @@ class Decl; class DeclContext; class GotoStmt; class LabelStmt; +class NamedDecl; class Preprocessor; +class Sema; class SwitchCase; /// \brief Reads a precompiled head containing the contents of a @@ -59,11 +62,15 @@ class SwitchCase; /// The PCH reader provides lazy de-serialization of declarations, as /// required when traversing the AST. Only those AST nodes that are /// actually required will be de-serialized. -class PCHReader : public ExternalASTSource { +class PCHReader : public ExternalSemaSource, public IdentifierInfoLookup { public: enum PCHReadResult { Success, Failure, IgnorePCH }; private: + /// \brief The semantic analysis object that will be processing the + /// PCH file and the translation unit that uses it. + Sema *SemaObj; + /// \brief The preprocessor that will be loading the source file. Preprocessor &PP; @@ -116,8 +123,14 @@ private: /// DeclContext. DeclContextOffsetsMap DeclContextOffsets; - /// \brief String data for the identifiers in the PCH file. - const char *IdentifierTable; + /// \brief Actual data for the on-disk hash table. + /// + /// FIXME: This will eventually go away. + const char *IdentifierTableData; + + /// \brief A pointer to an on-disk hash table of opaque type + /// IdentifierHashTable. + void *IdentifierLookupTable; /// \brief String data for identifiers, indexed by the identifier ID /// minus one. @@ -158,10 +171,10 @@ private: /// in the PCH file. unsigned TotalNumStatements; - /// \brief + /// \brief FIXME: document! llvm::SmallVector SpecialTypes; - PCHReadResult ReadPCHBlock(); + PCHReadResult ReadPCHBlock(uint64_t &PreprocessorBlockOffset); bool CheckPredefinesBuffer(const char *PCHPredef, unsigned PCHPredefLen, FileID PCHBufferID); @@ -179,8 +192,9 @@ private: public: typedef llvm::SmallVector RecordData; - PCHReader(Preprocessor &PP, ASTContext &Context) - : PP(PP), Context(Context), IdentifierTable(0), NumStatementsRead(0) { } + explicit PCHReader(Preprocessor &PP, ASTContext &Context) + : SemaObj(0), PP(PP), Context(Context), + IdentifierTableData(0), NumStatementsRead(0) { } ~PCHReader() {} @@ -246,6 +260,23 @@ public: /// \brief Print some statistics about PCH usage. virtual void PrintStats(); + /// \brief Initialize the semantic source with the Sema instance + /// being used to perform semantic analysis on the abstract syntax + /// tree. + virtual void InitializeSema(Sema &S); + + /// \brief Retrieve the IdentifierInfo for the named identifier. + /// + /// This routine builds a new IdentifierInfo for the given + /// identifier. If any declarations with this name are visible from + /// translation unit scope, their declarations will be deserialized + /// and introduced into the declaration chain of the + /// identifier. FIXME: if this identifier names a macro, deserialize + /// the macro. + virtual IdentifierInfo* get(const char *NameStart, const char *NameEnd); + + void SetIdentifierInfo(unsigned ID, const IdentifierInfo *II); + /// \brief Report a diagnostic. DiagnosticBuilder Diag(unsigned DiagID); @@ -284,9 +315,22 @@ public: /// supplements. ASTContext &getContext() { return Context; } + // FIXME: temporary hack to store declarations that we deserialized + // before we had access to the Sema object. + llvm::SmallVector TUDecls; + + /// \brief Retrieve the semantic analysis object used to analyze the + /// translation unit in which the precompiled header is being + /// imported. + Sema *getSema() { return SemaObj; } + /// \brief Retrieve the stream that this PCH reader is reading from. llvm::BitstreamReader &getStream() { return Stream; } + /// \brief Retrieve the identifier table associated with the + /// preprocessor. + IdentifierTable &getIdentifierTable(); + /// \brief Record that the given ID maps to the given switch-case /// statement. void RecordSwitchCaseID(SwitchCase *SC, unsigned ID); diff --git a/include/clang/Sema/ExternalSemaSource.h b/include/clang/Sema/ExternalSemaSource.h new file mode 100644 index 0000000000..1c216e4b86 --- /dev/null +++ b/include/clang/Sema/ExternalSemaSource.h @@ -0,0 +1,45 @@ +//===--- ExternalSemaSource.h - External Sema Interface ---------*- C++ -*-===// +// +// The LLVM Compiler Infrastructure +// +// This file is distributed under the University of Illinois Open Source +// License. See LICENSE.TXT for details. +// +//===----------------------------------------------------------------------===// +// +// This file defines the ExternalSemaSource interface. +// +//===----------------------------------------------------------------------===// +#ifndef LLVM_CLANG_SEMA_EXTERNAL_SEMA_SOURCE_H +#define LLVM_CLANG_SEMA_EXTERNAL_SEMA_SOURCE_H + +#include "clang/AST/ExternalASTSource.h" + +namespace clang { + +class Sema; + +/// \brief An abstract interface that should be implemented by +/// external AST sources that also provide information for semantic +/// analysis. +class ExternalSemaSource : public ExternalASTSource { +public: + ExternalSemaSource() { + ExternalASTSource::SemaSource = true; + } + + /// \brief Initialize the semantic source with the Sema instance + /// being used to perform semantic analysis on the abstract syntax + /// tree. + virtual void InitializeSema(Sema &S) {} + + // isa/cast/dyn_cast support + static bool classof(const ExternalASTSource *Source) { + return Source->SemaSource; + } + static bool classof(const ExternalSemaSource *) { return true; } +}; + +} // end namespace clang + +#endif diff --git a/include/clang/Sema/SemaConsumer.h b/include/clang/Sema/SemaConsumer.h index 25d4253390..e821947035 100644 --- a/include/clang/Sema/SemaConsumer.h +++ b/include/clang/Sema/SemaConsumer.h @@ -25,7 +25,7 @@ namespace clang { /// analysis of the entities in those ASTs. class SemaConsumer : public ASTConsumer { public: - explicit SemaConsumer() { + SemaConsumer() { ASTConsumer::SemaConsumer = true; } diff --git a/lib/Frontend/PCHReader.cpp b/lib/Frontend/PCHReader.cpp index abdb24f0ca..00075c88b6 100644 --- a/lib/Frontend/PCHReader.cpp +++ b/lib/Frontend/PCHReader.cpp @@ -12,6 +12,7 @@ //===----------------------------------------------------------------------===// #include "clang/Frontend/PCHReader.h" #include "clang/Frontend/FrontendDiagnostic.h" +#include "../Sema/Sema.h" // FIXME: move Sema headers elsewhere #include "clang/AST/ASTConsumer.h" #include "clang/AST/ASTContext.h" #include "clang/AST/Decl.h" @@ -22,6 +23,7 @@ #include "clang/AST/Type.h" #include "clang/Lex/MacroInfo.h" #include "clang/Lex/Preprocessor.h" +#include "clang/Basic/OnDiskHashTable.h" #include "clang/Basic/SourceManager.h" #include "clang/Basic/SourceManagerInternals.h" #include "clang/Basic/FileManager.h" @@ -994,6 +996,111 @@ unsigned PCHStmtReader::VisitBlockDeclRefExpr(BlockDeclRefExpr *E) { return 0; } +//===----------------------------------------------------------------------===// +// PCH reader implementation +//===----------------------------------------------------------------------===// + +namespace { +class VISIBILITY_HIDDEN PCHIdentifierLookupTrait { + PCHReader &Reader; + + // If we know the IdentifierInfo in advance, it is here and we will + // not build a new one. Used when deserializing information about an + // identifier that was constructed before the PCH file was read. + IdentifierInfo *KnownII; + +public: + typedef IdentifierInfo * data_type; + + typedef const std::pair external_key_type; + + typedef external_key_type internal_key_type; + + explicit PCHIdentifierLookupTrait(PCHReader &Reader, IdentifierInfo *II = 0) + : Reader(Reader), KnownII(II) { } + + static bool EqualKey(const internal_key_type& a, + const internal_key_type& b) { + return (a.second == b.second) ? memcmp(a.first, b.first, a.second) == 0 + : false; + } + + static unsigned ComputeHash(const internal_key_type& a) { + return BernsteinHash(a.first, a.second); + } + + // This hopefully will just get inlined and removed by the optimizer. + static const internal_key_type& + GetInternalKey(const external_key_type& x) { return x; } + + static std::pair + ReadKeyDataLength(const unsigned char*& d) { + using namespace clang::io; + unsigned KeyLen = ReadUnalignedLE16(d); + unsigned DataLen = ReadUnalignedLE16(d); + return std::make_pair(KeyLen, DataLen); + } + + static std::pair + ReadKey(const unsigned char* d, unsigned n) { + assert(n >= 2 && d[n-1] == '\0'); + return std::make_pair((const char*) d, n-1); + } + + IdentifierInfo *ReadData(const internal_key_type& k, + const unsigned char* d, + unsigned DataLen) { + using namespace clang::io; + uint32_t Bits = ReadUnalignedLE32(d); // FIXME: use these? + (void)Bits; + pch::IdentID ID = ReadUnalignedLE32(d); + DataLen -= 8; + + // Build the IdentifierInfo itself and link the identifier ID with + // the new IdentifierInfo. + IdentifierInfo *II = KnownII; + if (!II) + II = &Reader.getIdentifierTable().CreateIdentifierInfo( + k.first, k.first + k.second); + Reader.SetIdentifierInfo(ID, II); + + // FIXME: If this identifier is a macro, deserialize the macro + // definition now. + + // Read all of the declarations visible at global scope with this + // name. + Sema *SemaObj = Reader.getSema(); + while (DataLen > 0) { + NamedDecl *D = cast(Reader.GetDecl(ReadUnalignedLE32(d))); + + if (SemaObj) { + // Introduce this declaration into the translation-unit scope + // and add it to the declaration chain for this identifier, so + // that (unqualified) name lookup will find it. + SemaObj->TUScope->AddDecl(Action::DeclPtrTy::make(D)); + SemaObj->IdResolver.AddDeclToIdentifierChain(II, D); + } else { + // Queue this declaration so that it will be added to the + // translation unit scope and identifier's declaration chain + // once a Sema object is known. + // FIXME: This is a temporary hack. It will go away once we have + // lazy deserialization of macros. + Reader.TUDecls.push_back(D); + } + + DataLen -= 4; + } + return II; + } +}; + +} // end anonymous namespace + +/// \brief The on-disk hash table used to contain information about +/// all of the identifiers in the program. +typedef OnDiskChainedHashTable + PCHIdentifierLookupTable; + // FIXME: use the diagnostics machinery static bool Error(const char *Str) { std::fprintf(stderr, "%s\n", Str); @@ -1314,30 +1421,18 @@ bool PCHReader::ReadPreprocessorBlock() { } } -PCHReader::PCHReadResult PCHReader::ReadPCHBlock() { +PCHReader::PCHReadResult +PCHReader::ReadPCHBlock(uint64_t &PreprocessorBlockOffset) { if (Stream.EnterSubBlock(pch::PCH_BLOCK_ID)) { Error("Malformed block record"); return Failure; } - uint64_t PreprocessorBlockBit = 0; - // Read all of the records and blocks for the PCH file. RecordData Record; while (!Stream.AtEndOfStream()) { unsigned Code = Stream.ReadCode(); if (Code == llvm::bitc::END_BLOCK) { - // If we saw the preprocessor block, read it now. - if (PreprocessorBlockBit) { - uint64_t SavedPos = Stream.GetCurrentBitNo(); - Stream.JumpToBit(PreprocessorBlockBit); - if (ReadPreprocessorBlock()) { - Error("Malformed preprocessor block"); - return Failure; - } - Stream.JumpToBit(SavedPos); - } - if (Stream.ReadBlockEnd()) { Error("Error at end of module block"); return Failure; @@ -1360,11 +1455,11 @@ PCHReader::PCHReadResult PCHReader::ReadPCHBlock() { case pch::PREPROCESSOR_BLOCK_ID: // Skip the preprocessor block for now, but remember where it is. We // want to read it in after the identifier table. - if (PreprocessorBlockBit) { + if (PreprocessorBlockOffset) { Error("Multiple preprocessor blocks found."); return Failure; } - PreprocessorBlockBit = Stream.GetCurrentBitNo(); + PreprocessorBlockOffset = Stream.GetCurrentBitNo(); if (Stream.SkipBlock()) { Error("Malformed block record"); return Failure; @@ -1437,7 +1532,15 @@ PCHReader::PCHReadResult PCHReader::ReadPCHBlock() { } case pch::IDENTIFIER_TABLE: - IdentifierTable = BlobStart; + IdentifierTableData = BlobStart; + IdentifierLookupTable + = PCHIdentifierLookupTable::Create( + (const unsigned char *)IdentifierTableData + Record[0], + (const unsigned char *)IdentifierTableData, + PCHIdentifierLookupTrait(*this)); + // FIXME: What about any identifiers already placed into the + // identifier table? Should we load decls with those names now? + PP.getIdentifierTable().setExternalIdentifierLookup(this); break; case pch::IDENTIFIER_OFFSET: @@ -1479,6 +1582,23 @@ PCHReader::PCHReadResult PCHReader::ReadPCHBlock() { return Failure; } +namespace { + /// \brief Helper class that saves the current stream position and + /// then restores it when destroyed. + struct VISIBILITY_HIDDEN SavedStreamPosition { + explicit SavedStreamPosition(llvm::BitstreamReader &Stream) + : Stream(Stream), Offset(Stream.GetCurrentBitNo()) { } + + ~SavedStreamPosition() { + Stream.JumpToBit(Offset); + } + + private: + llvm::BitstreamReader &Stream; + uint64_t Offset; + }; +} + PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) { // Set the PCH file name. this->FileName = FileName; @@ -1506,6 +1626,7 @@ PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) { // We expect a number of well-defined blocks, though we don't necessarily // need to understand them all. + uint64_t PreprocessorBlockOffset = 0; while (!Stream.AtEndOfStream()) { unsigned Code = Stream.ReadCode(); @@ -1515,7 +1636,7 @@ PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) { } unsigned BlockID = Stream.ReadSubBlockID(); - + // We only know the PCH subblock ID. switch (BlockID) { case llvm::bitc::BLOCKINFO_BLOCK_ID: @@ -1525,7 +1646,7 @@ PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) { } break; case pch::PCH_BLOCK_ID: - switch (ReadPCHBlock()) { + switch (ReadPCHBlock(PreprocessorBlockOffset)) { case Success: break; @@ -1551,28 +1672,54 @@ PCHReader::PCHReadResult PCHReader::ReadPCH(const std::string &FileName) { // Load the translation unit declaration ReadDeclRecord(DeclOffsets[0], 0); + // Initialization of builtins and library builtins occurs before the + // PCH file is read, so there may be some identifiers that were + // loaded into the IdentifierTable before we intercepted the + // creation of identifiers. Iterate through the list of known + // identifiers and determine whether we have to establish + // preprocessor definitions or top-level identifier declaration + // chains for those identifiers. + // + // We copy the IdentifierInfo pointers to a small vector first, + // since de-serializing declarations or macro definitions can add + // new entries into the identifier table, invalidating the + // iterators. + llvm::SmallVector Identifiers; + for (IdentifierTable::iterator Id = PP.getIdentifierTable().begin(), + IdEnd = PP.getIdentifierTable().end(); + Id != IdEnd; ++Id) + Identifiers.push_back(Id->second); + PCHIdentifierLookupTable *IdTable + = (PCHIdentifierLookupTable *)IdentifierLookupTable; + for (unsigned I = 0, N = Identifiers.size(); I != N; ++I) { + IdentifierInfo *II = Identifiers[I]; + // Look in the on-disk hash table for an entry for + PCHIdentifierLookupTrait Info(*this, II); + std::pair Key(II->getName(), II->getLength()); + PCHIdentifierLookupTable::iterator Pos = IdTable->find(Key, &Info); + if (Pos == IdTable->end()) + continue; + + // Dereferencing the iterator has the effect of populating the + // IdentifierInfo node with the various declarations it needs. + (void)*Pos; + } + // Load the special types. Context.setBuiltinVaListType( GetType(SpecialTypes[pch::SPECIAL_TYPE_BUILTIN_VA_LIST])); - return Success; -} - -namespace { - /// \brief Helper class that saves the current stream position and - /// then restores it when destroyed. - struct VISIBILITY_HIDDEN SavedStreamPosition { - explicit SavedStreamPosition(llvm::BitstreamReader &Stream) - : Stream(Stream), Offset(Stream.GetCurrentBitNo()) { } - - ~SavedStreamPosition() { - Stream.JumpToBit(Offset); + // If we saw the preprocessor block, read it now. + if (PreprocessorBlockOffset) { + SavedStreamPosition SavedPos(Stream); + Stream.JumpToBit(PreprocessorBlockOffset); + if (ReadPreprocessorBlock()) { + Error("Malformed preprocessor block"); + return Failure; } + } - private: - llvm::BitstreamReader &Stream; - uint64_t Offset; - }; + return Success; } /// \brief Parse the record that corresponds to a LangOptions data @@ -2021,7 +2168,7 @@ Decl *PCHReader::ReadDeclRecord(uint64_t Offset, unsigned Index) { } } - assert(D && "Unknown declaration creating PCH file"); + assert(D && "Unknown declaration reading PCH file"); if (D) { LoadedDecl(Index, D); Reader.Visit(D); @@ -2220,11 +2367,44 @@ void PCHReader::PrintStats() { std::fprintf(stderr, "\n"); } +void PCHReader::InitializeSema(Sema &S) { + SemaObj = &S; + + // FIXME: this makes sure any declarations that were deserialized + // "too early" still get added to the identifier's declaration + // chains. + for (unsigned I = 0, N = TUDecls.size(); I != N; ++I) { + SemaObj->TUScope->AddDecl(Action::DeclPtrTy::make(TUDecls[I])); + SemaObj->IdResolver.AddDecl(TUDecls[I]); + } + TUDecls.clear(); +} + +IdentifierInfo* PCHReader::get(const char *NameStart, const char *NameEnd) { + // Try to find this name within our on-disk hash table + PCHIdentifierLookupTable *IdTable + = (PCHIdentifierLookupTable *)IdentifierLookupTable; + std::pair Key(NameStart, NameEnd - NameStart); + PCHIdentifierLookupTable::iterator Pos = IdTable->find(Key); + if (Pos == IdTable->end()) + return 0; + + // Dereferencing the iterator has the effect of building the + // IdentifierInfo node and populating it with the various + // declarations it needs. + return *Pos; +} + +void PCHReader::SetIdentifierInfo(unsigned ID, const IdentifierInfo *II) { + assert(ID && "Non-zero identifier ID required"); + IdentifierData[ID - 1] = reinterpret_cast(II); +} + IdentifierInfo *PCHReader::DecodeIdentifierInfo(unsigned ID) { if (ID == 0) return 0; - if (!IdentifierTable || IdentifierData.empty()) { + if (!IdentifierTableData || IdentifierData.empty()) { Error("No identifier table in PCH file"); return 0; } @@ -2232,8 +2412,7 @@ IdentifierInfo *PCHReader::DecodeIdentifierInfo(unsigned ID) { if (IdentifierData[ID - 1] & 0x01) { uint64_t Offset = IdentifierData[ID - 1] >> 1; IdentifierData[ID - 1] = reinterpret_cast( - &Context.Idents.get(IdentifierTable + Offset)); - // FIXME: also read the contents of the IdentifierInfo. + &Context.Idents.get(IdentifierTableData + Offset)); } return reinterpret_cast(IdentifierData[ID - 1]); @@ -2724,6 +2903,12 @@ DiagnosticBuilder PCHReader::Diag(SourceLocation Loc, unsigned DiagID) { DiagID); } +/// \brief Retrieve the identifier table associated with the +/// preprocessor. +IdentifierTable &PCHReader::getIdentifierTable() { + return PP.getIdentifierTable(); +} + /// \brief Record that the given ID maps to the given switch-case /// statement. void PCHReader::RecordSwitchCaseID(SwitchCase *SC, unsigned ID) { diff --git a/lib/Frontend/PCHWriter.cpp b/lib/Frontend/PCHWriter.cpp index 16eaf982fe..993ba9266f 100644 --- a/lib/Frontend/PCHWriter.cpp +++ b/lib/Frontend/PCHWriter.cpp @@ -1421,7 +1421,7 @@ void PCHWriter::WriteSourceManagerBlock(SourceManager &SourceMgr) { /// void PCHWriter::WritePreprocessor(const Preprocessor &PP) { // Enter the preprocessor block. - Stream.EnterSubblock(pch::PREPROCESSOR_BLOCK_ID, 3); + Stream.EnterSubblock(pch::PREPROCESSOR_BLOCK_ID, 2); // If the PCH file contains __DATE__ or __TIME__ emit a warning about this. // FIXME: use diagnostics subsystem for localization etc. @@ -1732,13 +1732,13 @@ public: pch::IdentID ID) { unsigned KeyLen = strlen(II->getName()) + 1; clang::io::Emit16(Out, KeyLen); - unsigned DataLen = 4 + 4 + 2; // 4 bytes for token ID, builtin, flags - // 4 bytes for the persistent ID - // 2 bytes for the length of the decl chain + unsigned DataLen = 4 + 4; // 4 bytes for token ID, builtin, flags + // 4 bytes for the persistent ID for (IdentifierResolver::iterator D = IdentifierResolver::begin(II), DEnd = IdentifierResolver::end(); D != DEnd; ++D) DataLen += sizeof(pch::DeclID); + clang::io::Emit16(Out, DataLen); return std::make_pair(KeyLen, DataLen); } @@ -1762,15 +1762,18 @@ public: clang::io::Emit32(Out, Bits); clang::io::Emit32(Out, ID); - llvm::SmallVector Decls; - for (IdentifierResolver::iterator D = IdentifierResolver::begin(II), - DEnd = IdentifierResolver::end(); + // Emit the declaration IDs in reverse order, because the + // IdentifierResolver provides the declarations as they would be + // visible (e.g., the function "stat" would come before the struct + // "stat"), but IdentifierResolver::AddDeclToIdentifierChain() + // adds declarations to the end of the list (so we need to see the + // struct "status" before the function "status"). + llvm::SmallVector Decls(IdentifierResolver::begin(II), + IdentifierResolver::end()); + for (llvm::SmallVector::reverse_iterator D = Decls.rbegin(), + DEnd = Decls.rend(); D != DEnd; ++D) - Decls.push_back(Writer.getDeclID(*D)); - - clang::io::Emit16(Out, Decls.size()); - for (unsigned I = 0; I < Decls.size(); ++I) - clang::io::Emit32(Out, Decls[I]); + clang::io::Emit32(Out, Writer.getDeclID(*D)); } }; } // end anonymous namespace @@ -1799,21 +1802,24 @@ void PCHWriter::WriteIdentifierTable() { // Create the on-disk hash table in a buffer. llvm::SmallVector IdentifierTable; + uint32_t BucketOffset; { PCHIdentifierTableTrait Trait(*this); llvm::raw_svector_ostream Out(IdentifierTable); - Generator.Emit(Out, Trait); + BucketOffset = Generator.Emit(Out, Trait); } // Create a blob abbreviation BitCodeAbbrev *Abbrev = new BitCodeAbbrev(); Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_TABLE)); + Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Fixed, 32)); Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev); // Write the identifier table RecordData Record; Record.push_back(pch::IDENTIFIER_TABLE); + Record.push_back(BucketOffset); Stream.EmitRecordWithBlob(IDTableAbbrev, Record, &IdentifierTable.front(), IdentifierTable.size()); diff --git a/lib/Sema/IdentifierResolver.cpp b/lib/Sema/IdentifierResolver.cpp index c31435b677..ceab859c90 100644 --- a/lib/Sema/IdentifierResolver.cpp +++ b/lib/Sema/IdentifierResolver.cpp @@ -243,6 +243,28 @@ IdentifierResolver::begin(DeclarationName Name) { return end(); } +void IdentifierResolver::AddDeclToIdentifierChain(IdentifierInfo *II, + NamedDecl *D) { + void *Ptr = II->getFETokenInfo(); + + if (!Ptr) { + II->setFETokenInfo(D); + return; + } + + IdDeclInfo *IDI; + + if (isDeclPtr(Ptr)) { + II->setFETokenInfo(NULL); + IDI = &(*IdDeclInfos)[II]; + NamedDecl *PrevD = static_cast(Ptr); + IDI->AddDecl(PrevD); + } else + IDI = toIdDeclInfo(Ptr); + + IDI->AddDecl(D); +} + //===----------------------------------------------------------------------===// // IdDeclInfoMap Implementation //===----------------------------------------------------------------------===// diff --git a/lib/Sema/IdentifierResolver.h b/lib/Sema/IdentifierResolver.h index 1843f4ebca..0b0e6b388d 100644 --- a/lib/Sema/IdentifierResolver.h +++ b/lib/Sema/IdentifierResolver.h @@ -177,6 +177,14 @@ public: /// (and, therefore, replaced). bool ReplaceDecl(NamedDecl *Old, NamedDecl *New); + /// \brief Link the declaration into the chain of declarations for + /// the given identifier. + /// + /// This is a lower-level routine used by the PCH reader to link a + /// declaration into a specific IdentifierInfo before the + /// declaration actually has a name. + void AddDeclToIdentifierChain(IdentifierInfo *II, NamedDecl *D); + explicit IdentifierResolver(const LangOptions &LangOpt); ~IdentifierResolver(); diff --git a/lib/Sema/ParseAST.cpp b/lib/Sema/ParseAST.cpp index 448556092f..e2ee88ac86 100644 --- a/lib/Sema/ParseAST.cpp +++ b/lib/Sema/ParseAST.cpp @@ -14,6 +14,7 @@ #include "clang/Sema/ParseAST.h" #include "Sema.h" #include "clang/Sema/SemaConsumer.h" +#include "clang/Sema/ExternalSemaSource.h" #include "clang/AST/ASTConsumer.h" #include "clang/AST/ExternalASTSource.h" #include "clang/AST/Stmt.h" @@ -50,8 +51,13 @@ void clang::ParseAST(Preprocessor &PP, ASTConsumer *Consumer, if (SemaConsumer *SC = dyn_cast(Consumer)) SC->InitializeSema(S); - if (Ctx.getExternalSource()) - Ctx.getExternalSource()->StartTranslationUnit(Consumer); + if (ExternalASTSource *External = Ctx.getExternalSource()) { + if (ExternalSemaSource *ExternalSema = + dyn_cast(External)) + ExternalSema->InitializeSema(S); + + External->StartTranslationUnit(Consumer); + } Parser::DeclGroupPtrTy ADecl; diff --git a/lib/Sema/SemaLookup.cpp b/lib/Sema/SemaLookup.cpp index cd82507005..0b11d9cf68 100644 --- a/lib/Sema/SemaLookup.cpp +++ b/lib/Sema/SemaLookup.cpp @@ -878,17 +878,6 @@ Sema::LookupName(Scope *S, DeclarationName Name, LookupNameKind NameKind, // We have a single lookup result. return LookupResult::CreateLookupResult(Context, *I); } - - /// If the context has an external AST source attached, look at - /// translation unit scope. - if (Context.getExternalSource()) { - DeclContext::lookup_iterator I, E; - for (llvm::tie(I, E) - = Context.getTranslationUnitDecl()->lookup(Context, Name); - I != E; ++I) - if (isAcceptableLookupResult(*I, NameKind, IDNS)) - return LookupResult::CreateLookupResult(Context, I, E); - } } else { // Perform C++ unqualified name lookup. std::pair MaybeResult = diff --git a/test/PCH/builtins.c b/test/PCH/builtins.c new file mode 100644 index 0000000000..3d1786ba77 --- /dev/null +++ b/test/PCH/builtins.c @@ -0,0 +1,10 @@ +// Test this without pch. +// RUN: clang-cc -include %S/builtins.h -fsyntax-only -verify %s && + +// Test with pch. +// RUN: clang-cc -emit-pch -o %t %S/builtins.h && +// RUN: clang-cc -include-pch %t -fsyntax-only -verify %s + +void hello() { + printf("Hello, World!"); +} diff --git a/test/PCH/builtins.h b/test/PCH/builtins.h new file mode 100644 index 0000000000..56e4a53814 --- /dev/null +++ b/test/PCH/builtins.h @@ -0,0 +1,2 @@ +// Header for PCH test builtins.c +int printf(char const *, ...);