Write the identifier table into the PCH file as an on-disk hash table

that also includes the contents of the IdentifierInfo itself (the
various fields and flags, along with the chain of identifiers visible
at the top level that have that name).

We don't make any use of the hash table yet, except that our
identifier ID -> string mapping points into the hash table now.



git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@69625 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Douglas Gregor 2009-04-20 20:36:09 +00:00
Родитель 2e828f06e5
Коммит 3251ceb90b
5 изменённых файлов: 122 добавлений и 25 удалений

Просмотреть файл

@ -141,7 +141,7 @@ public:
return tok::objc_not_keyword;
}
void setObjCKeywordID(tok::ObjCKeywordKind ID) { ObjCOrBuiltinID = ID; }
/// getBuiltinID - Return a value indicating whether this is a builtin
/// function. 0 is not-built-in. 1 is builtin-for-some-nonprimary-target.
/// 2+ are specific builtin functions.
@ -156,7 +156,10 @@ public:
assert(ObjCOrBuiltinID - unsigned(tok::NUM_OBJC_KEYWORDS) == ID
&& "ID too large for field!");
}
unsigned getObjCOrBuiltinID() const { return ObjCOrBuiltinID; }
void setObjCOrBuiltinID(unsigned ID) { ObjCOrBuiltinID = ID; }
/// get/setExtension - Initialize information about whether or not this
/// language token is an extension. This controls extension warnings, and is
/// only valid if a custom token ID is set.

Просмотреть файл

@ -124,7 +124,8 @@ private:
///
/// Each element in this array is either an offset into
/// IdentifierTable that contains the string data (if the lowest bit
/// is set) or is an IdentifierInfo* that has already been resolved.
/// is set, in which case the offset is shifted left by one) or is
/// an IdentifierInfo* that has already been resolved.
llvm::SmallVector<uint64_t, 16> IdentifierData;
/// \brief The set of external definitions stored in the the PCH

Просмотреть файл

@ -94,6 +94,10 @@ private:
/// IdentifierInfo.
llvm::DenseMap<const IdentifierInfo *, pch::IdentID> IdentifierIDs;
/// \brief Offsets of each of the identifier IDs into the identifier
/// table, shifted left by one bit with the low bit set.
llvm::SmallVector<uint64_t, 16> IdentifierOffsets;
/// \brief Declarations encountered that might be external
/// definitions.
///
@ -162,14 +166,22 @@ public:
/// \brief Emit a reference to a declaration.
void AddDeclRef(const Decl *D, RecordData &Record);
/// \brief Determine the declaration ID of an already-emitted
/// declaration.
pch::DeclID getDeclID(const Decl *D);
/// \brief Emit a declaration name.
void AddDeclarationName(DeclarationName Name, RecordData &Record);
/// \brief Add a string to the given record.
void AddString(const std::string &Str, RecordData &Record);
/// \brief Add the given statement or expression to the queue of statements to
/// emit.
/// \brief Note that the identifier II occurs at the given offset
/// within the identifier table.
void SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset);
/// \brief Add the given statement or expression to the queue of
/// statements to emit.
///
/// This routine should be used when emitting types and declarations
/// that have expressions as part of their formulation. Once the

Просмотреть файл

@ -2089,9 +2089,10 @@ IdentifierInfo *PCHReader::DecodeIdentifierInfo(unsigned ID) {
}
if (IdentifierData[ID - 1] & 0x01) {
uint64_t Offset = IdentifierData[ID - 1];
uint64_t Offset = IdentifierData[ID - 1] >> 1;
IdentifierData[ID - 1] = reinterpret_cast<uint64_t>(
&Context.Idents.get(IdentifierTable + Offset));
&Context.Idents.get(IdentifierTable + Offset));
// FIXME: also read the contents of the IdentifierInfo.
}
return reinterpret_cast<IdentifierInfo *>(IdentifierData[ID - 1]);

Просмотреть файл

@ -13,6 +13,7 @@
#include "clang/Frontend/PCHWriter.h"
#include "../Sema/Sema.h" // FIXME: move header into include/clang/Sema
#include "../Sema/IdentifierResolver.h" // FIXME: move header
#include "clang/AST/ASTContext.h"
#include "clang/AST/Decl.h"
#include "clang/AST/DeclContextInternals.h"
@ -23,6 +24,7 @@
#include "clang/Lex/MacroInfo.h"
#include "clang/Lex/Preprocessor.h"
#include "clang/Basic/FileManager.h"
#include "clang/Basic/OnDiskHashTable.h"
#include "clang/Basic/SourceManager.h"
#include "clang/Basic/SourceManagerInternals.h"
#include "clang/Basic/TargetInfo.h"
@ -1616,6 +1618,71 @@ void PCHWriter::WriteDeclsBlock(ASTContext &Context) {
Stream.ExitBlock();
}
namespace {
class VISIBILITY_HIDDEN PCHIdentifierTableTrait {
PCHWriter &Writer;
public:
typedef const IdentifierInfo* key_type;
typedef key_type key_type_ref;
typedef pch::IdentID data_type;
typedef data_type data_type_ref;
PCHIdentifierTableTrait(PCHWriter &Writer) : Writer(Writer) { }
static unsigned ComputeHash(const IdentifierInfo* II) {
return clang::BernsteinHash(II->getName());
}
static std::pair<unsigned,unsigned>
EmitKeyDataLength(llvm::raw_ostream& Out, const IdentifierInfo* II,
pch::IdentID ID) {
unsigned KeyLen = strlen(II->getName()) + 1;
clang::io::Emit16(Out, KeyLen);
unsigned DataLen = 4 + 4 + 2; // 4 bytes for token ID, builtin, flags
// 4 bytes for the persistent ID
// 2 bytes for the length of the decl chain
for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
DEnd = IdentifierResolver::end();
D != DEnd; ++D)
DataLen += sizeof(pch::DeclID);
return std::make_pair(KeyLen, DataLen);
}
void EmitKey(llvm::raw_ostream& Out, const IdentifierInfo* II,
unsigned KeyLen) {
// Record the location of the key data. This is used when generating
// the mapping from persistent IDs to strings.
Writer.SetIdentifierOffset(II, Out.tell());
Out.write(II->getName(), KeyLen);
}
void EmitData(llvm::raw_ostream& Out, const IdentifierInfo* II,
pch::IdentID ID, unsigned) {
uint32_t Bits = 0;
Bits = Bits | (uint32_t)II->getTokenID();
Bits = (Bits << 8) | (uint32_t)II->getObjCOrBuiltinID();
Bits = (Bits << 10) | II->hasMacroDefinition();
Bits = (Bits << 1) | II->isExtensionToken();
Bits = (Bits << 1) | II->isPoisoned();
Bits = (Bits << 1) | II->isCPlusPlusOperatorKeyword();
clang::io::Emit32(Out, Bits);
clang::io::Emit32(Out, ID);
llvm::SmallVector<pch::DeclID, 8> Decls;
for (IdentifierResolver::iterator D = IdentifierResolver::begin(II),
DEnd = IdentifierResolver::end();
D != DEnd; ++D)
Decls.push_back(Writer.getDeclID(*D));
clang::io::Emit16(Out, Decls.size());
for (unsigned I = 0; I < Decls.size(); ++I)
clang::io::Emit32(Out, Decls[I]);
}
};
} // end anonymous namespace
/// \brief Write the identifier table into the PCH file.
///
/// The identifier table consists of a blob containing string data
@ -1626,43 +1693,42 @@ void PCHWriter::WriteIdentifierTable() {
// Create and write out the blob that contains the identifier
// strings.
RecordData IdentOffsets;
IdentOffsets.resize(IdentifierIDs.size());
IdentifierOffsets.resize(IdentifierIDs.size());
{
// Create the identifier string data.
std::vector<char> Data;
Data.push_back(0); // Data must not be empty.
OnDiskChainedHashTableGenerator<PCHIdentifierTableTrait> Generator;
// Create the on-disk hash table representation.
for (llvm::DenseMap<const IdentifierInfo *, pch::IdentID>::iterator
ID = IdentifierIDs.begin(), IDEnd = IdentifierIDs.end();
ID != IDEnd; ++ID) {
assert(ID->first && "NULL identifier in identifier table");
Generator.insert(ID->first, ID->second);
}
// Make sure we're starting on an odd byte. The PCH reader
// expects the low bit to be set on all of the offsets.
if ((Data.size() & 0x01) == 0)
Data.push_back((char)0);
IdentOffsets[ID->second - 1] = Data.size();
Data.insert(Data.end(),
ID->first->getName(),
ID->first->getName() + ID->first->getLength());
Data.push_back((char)0);
// Create the on-disk hash table in a buffer.
llvm::SmallVector<char, 4096> IdentifierTable;
{
PCHIdentifierTableTrait Trait(*this);
llvm::raw_svector_ostream Out(IdentifierTable);
Generator.Emit(Out, Trait);
}
// Create a blob abbreviation
BitCodeAbbrev *Abbrev = new BitCodeAbbrev();
Abbrev->Add(BitCodeAbbrevOp(pch::IDENTIFIER_TABLE));
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob)); // Triple name
Abbrev->Add(BitCodeAbbrevOp(BitCodeAbbrevOp::Blob));
unsigned IDTableAbbrev = Stream.EmitAbbrev(Abbrev);
// Write the identifier table
RecordData Record;
Record.push_back(pch::IDENTIFIER_TABLE);
Stream.EmitRecordWithBlob(IDTableAbbrev, Record, &Data.front(), Data.size());
Stream.EmitRecordWithBlob(IDTableAbbrev, Record,
&IdentifierTable.front(),
IdentifierTable.size());
}
// Write the offsets table for identifier IDs.
Stream.EmitRecord(pch::IDENTIFIER_OFFSET, IdentOffsets);
Stream.EmitRecord(pch::IDENTIFIER_OFFSET, IdentifierOffsets);
}
/// \brief Write a record containing the given attributes.
@ -1791,6 +1857,12 @@ void PCHWriter::AddString(const std::string &Str, RecordData &Record) {
Record.insert(Record.end(), Str.begin(), Str.end());
}
/// \brief Note that the identifier II occurs at the given offset
/// within the identifier table.
void PCHWriter::SetIdentifierOffset(const IdentifierInfo *II, uint32_t Offset) {
IdentifierOffsets[IdentifierIDs[II] - 1] = (Offset << 1) | 0x01;
}
PCHWriter::PCHWriter(llvm::BitstreamWriter &Stream)
: Stream(Stream), NextTypeID(pch::NUM_PREDEF_TYPE_IDS), NumStatements(0) { }
@ -1930,6 +2002,14 @@ void PCHWriter::AddDeclRef(const Decl *D, RecordData &Record) {
Record.push_back(ID);
}
pch::DeclID PCHWriter::getDeclID(const Decl *D) {
if (D == 0)
return 0;
assert(DeclIDs.find(D) != DeclIDs.end() && "Declaration not emitted!");
return DeclIDs[D];
}
void PCHWriter::AddDeclarationName(DeclarationName Name, RecordData &Record) {
Record.push_back(Name.getNameKind());
switch (Name.getNameKind()) {