зеркало из https://github.com/microsoft/clang-1.git
Add a new RewriteRope data structure which is a smarter way to represent the text
backing a rewrite buffer than using an std::vector<char>. This class was hacked together very quickly and needs to be cleaned up, but it seems to work. It speeds up rewriting a a 7M file from 6.43s to 0.24s on my machine. The impl could also be made to be a lot more algorithmically sound. This produces identical output to using vector on this testcase, if it causes a problems or bugs are encountered, it can be disabled by changing the RewriteBuffer::Buffer typedef back. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@43884 91177308-0d34-0410-b5e6-96231b3b80d8
This commit is contained in:
Родитель
cdd808e2af
Коммит
8b0c2f659d
|
@ -120,6 +120,11 @@ void RewriteBuffer::ReplaceText(unsigned OrigOffset, unsigned OrigLength,
|
|||
unsigned RealOffset = getMappedOffset(OrigOffset, true);
|
||||
assert(RealOffset+OrigLength <= Buffer.size() && "Invalid location");
|
||||
|
||||
Buffer.erase(Buffer.begin()+RealOffset, Buffer.begin()+RealOffset+OrigLength);
|
||||
Buffer.insert(Buffer.begin()+RealOffset, NewStr, NewStr+NewLength);
|
||||
AddDelta(OrigOffset, NewLength-OrigLength);
|
||||
return;
|
||||
|
||||
// Overwrite the common piece.
|
||||
unsigned CommonLength = std::min(OrigLength, NewLength);
|
||||
std::copy(NewStr, NewStr+CommonLength, Buffer.begin()+RealOffset);
|
||||
|
|
|
@ -291,6 +291,7 @@
|
|||
DE46BF270AE0A82D00CC047C /* TargetInfo.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; path = TargetInfo.h; sourceTree = "<group>"; };
|
||||
DE4772F90C10EAE5002239E8 /* CGStmt.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CGStmt.cpp; path = CodeGen/CGStmt.cpp; sourceTree = "<group>"; };
|
||||
DE4772FB0C10EAEC002239E8 /* CGExpr.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = CGExpr.cpp; path = CodeGen/CGExpr.cpp; sourceTree = "<group>"; };
|
||||
DE53370B0CE2D96F00D9A028 /* RewriteRope.h */ = {isa = PBXFileReference; fileEncoding = 4; lastKnownFileType = sourcecode.c.h; name = RewriteRope.h; path = clang/Rewrite/RewriteRope.h; sourceTree = "<group>"; };
|
||||
DE5932CD0AD60FF400BC794C /* clang.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = clang.cpp; path = Driver/clang.cpp; sourceTree = "<group>"; };
|
||||
DE5932CE0AD60FF400BC794C /* clang.h */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.c.h; name = clang.h; path = Driver/clang.h; sourceTree = "<group>"; };
|
||||
DE5932CF0AD60FF400BC794C /* PrintParserCallbacks.cpp */ = {isa = PBXFileReference; fileEncoding = 30; lastKnownFileType = sourcecode.cpp.cpp; name = PrintParserCallbacks.cpp; path = Driver/PrintParserCallbacks.cpp; sourceTree = "<group>"; };
|
||||
|
@ -723,6 +724,7 @@
|
|||
isa = PBXGroup;
|
||||
children = (
|
||||
DEF7D9F60C9C8B1A0001F598 /* Rewriter.h */,
|
||||
DE53370B0CE2D96F00D9A028 /* RewriteRope.h */,
|
||||
);
|
||||
name = Rewrite;
|
||||
sourceTree = "<group>";
|
||||
|
|
|
@ -0,0 +1,302 @@
|
|||
//===--- RewriteRope.h - Rope specialized for rewriter ----------*- C++ -*-===//
|
||||
//
|
||||
// The LLVM Compiler Infrastructure
|
||||
//
|
||||
// This file was developed by Chris Lattner and is distributed under
|
||||
// the University of Illinois Open Source License. See LICENSE.TXT for details.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
//
|
||||
// This file defines the RewriteRope class, which is a powerful string class.
|
||||
//
|
||||
//===----------------------------------------------------------------------===//
|
||||
|
||||
#ifndef LLVM_CLANG_REWRITEROPE_H
|
||||
#define LLVM_CLANG_REWRITEROPE_H
|
||||
|
||||
#include "llvm/ADT/iterator"
|
||||
#include <vector>
|
||||
|
||||
|
||||
namespace clang {
|
||||
|
||||
struct RopeRefCountString {
|
||||
unsigned RefCount;
|
||||
char Data[1]; // Variable sized.
|
||||
};
|
||||
|
||||
struct RopePiece {
|
||||
RopeRefCountString *StrData;
|
||||
unsigned StartOffs;
|
||||
unsigned EndOffs;
|
||||
|
||||
RopePiece(RopeRefCountString *Str, unsigned Start, unsigned End)
|
||||
: StrData(Str), StartOffs(Start), EndOffs(End) {
|
||||
StrData->RefCount++;
|
||||
}
|
||||
|
||||
~RopePiece() {
|
||||
if (--StrData->RefCount == 0)
|
||||
delete [] (char*)StrData;
|
||||
}
|
||||
|
||||
const char &operator[](unsigned Offset) const {
|
||||
return StrData->Data[Offset+StartOffs];
|
||||
}
|
||||
char &operator[](unsigned Offset) {
|
||||
return StrData->Data[Offset+StartOffs];
|
||||
}
|
||||
|
||||
unsigned size() const { return EndOffs-StartOffs; }
|
||||
};
|
||||
|
||||
class RewriteRope;
|
||||
|
||||
template <typename CharType, typename PieceType>
|
||||
class RewriteRopeIterator :
|
||||
public std::iterator<std::random_access_iterator_tag, CharType, ptrdiff_t> {
|
||||
PieceType *CurPiece;
|
||||
unsigned CurChar;
|
||||
friend class RewriteRope;
|
||||
public:
|
||||
RewriteRopeIterator(PieceType *curPiece, unsigned curChar)
|
||||
: CurPiece(curPiece), CurChar(curChar) {}
|
||||
|
||||
CharType &operator*() const {
|
||||
return (**CurPiece)[CurChar];
|
||||
}
|
||||
|
||||
bool operator==(const RewriteRopeIterator &RHS) const {
|
||||
return CurPiece == RHS.CurPiece && CurChar == RHS.CurChar;
|
||||
}
|
||||
bool operator!=(const RewriteRopeIterator &RHS) const {
|
||||
return !operator==(RHS);
|
||||
}
|
||||
|
||||
inline RewriteRopeIterator& operator++() { // Preincrement
|
||||
if (CurChar+1 < (*CurPiece)->size())
|
||||
++CurChar;
|
||||
else {
|
||||
CurChar = 0;
|
||||
++CurPiece;
|
||||
}
|
||||
return *this;
|
||||
}
|
||||
|
||||
inline RewriteRopeIterator operator++(int) { // Postincrement
|
||||
RewriteRopeIterator tmp = *this; ++*this; return tmp;
|
||||
}
|
||||
|
||||
RewriteRopeIterator operator+(int Offset) const {
|
||||
assert(Offset >= 0 && "FIXME: Only handle forward case so far!");
|
||||
|
||||
PieceType *Piece = CurPiece;
|
||||
unsigned Char = CurChar;
|
||||
while (Char+Offset >= (*Piece)->size()) {
|
||||
Offset -= (*Piece)->size()-Char;
|
||||
++Piece;
|
||||
Char = 0;
|
||||
}
|
||||
Char += Offset;
|
||||
return RewriteRopeIterator(Piece, Char);
|
||||
}
|
||||
|
||||
ptrdiff_t operator-(const RewriteRopeIterator &RHS) const {
|
||||
if (CurPiece < RHS.CurPiece ||
|
||||
(CurPiece == RHS.CurPiece && CurChar < RHS.CurChar))
|
||||
return -RHS.operator-(*this);
|
||||
|
||||
PieceType *Piece = RHS.CurPiece;
|
||||
unsigned Char = RHS.CurChar;
|
||||
|
||||
unsigned Offset = 0;
|
||||
while (Piece != CurPiece) {
|
||||
Offset += (*Piece)->size()-Char;
|
||||
Char = 0;
|
||||
++Piece;
|
||||
}
|
||||
|
||||
return Offset + CurChar-Char;
|
||||
}
|
||||
};
|
||||
|
||||
|
||||
|
||||
/// RewriteRope - A powerful string class, todo generalize this.
|
||||
class RewriteRope {
|
||||
std::vector<RopePiece*> Chunks;
|
||||
unsigned CurSize;
|
||||
public:
|
||||
RewriteRope() : CurSize(0) {}
|
||||
~RewriteRope() { clear(); }
|
||||
|
||||
typedef RewriteRopeIterator<char, RopePiece*> iterator;
|
||||
typedef RewriteRopeIterator<const char, RopePiece* const> const_iterator;
|
||||
iterator begin() {
|
||||
if (Chunks.empty()) return iterator(0,0);
|
||||
return iterator(&Chunks[0], 0);
|
||||
}
|
||||
iterator end() {
|
||||
if (Chunks.empty()) return iterator(0,0);
|
||||
return iterator(&Chunks[0]+Chunks.size(), 0);
|
||||
}
|
||||
|
||||
const_iterator begin() const {
|
||||
if (Chunks.empty()) return const_iterator(0,0);
|
||||
return const_iterator(&Chunks[0], 0);
|
||||
}
|
||||
const_iterator end() const {
|
||||
if (Chunks.empty()) return const_iterator(0,0);
|
||||
return const_iterator(&Chunks[0]+Chunks.size(), 0);
|
||||
}
|
||||
|
||||
|
||||
unsigned size() const { return CurSize; }
|
||||
|
||||
void clear() {
|
||||
for (unsigned i = 0, e = Chunks.size(); i != e; ++i)
|
||||
delete Chunks[i];
|
||||
Chunks.clear();
|
||||
CurSize = 0;
|
||||
}
|
||||
|
||||
void assign(const char *Start, const char *End) {
|
||||
clear();
|
||||
Chunks.push_back(new RopePiece(MakeRopeString(Start, End), 0,
|
||||
End-Start));
|
||||
CurSize = End-Start;
|
||||
}
|
||||
|
||||
void insert(iterator Loc, const char *Start, const char *End) {
|
||||
if (Start == End) return;
|
||||
|
||||
unsigned ChunkNo = SplitAt(Loc);
|
||||
|
||||
RopeRefCountString *Str = MakeRopeString(Start, End);
|
||||
Chunks.insert(Chunks.begin()+ChunkNo, new RopePiece(Str, 0, End-Start));
|
||||
CurSize += End-Start;
|
||||
}
|
||||
|
||||
void erase(iterator Start, iterator End) {
|
||||
if (Start == End) return;
|
||||
|
||||
unsigned StartChunkIdx = getChunkIdx(Start);
|
||||
unsigned EndChunkIdx = getChunkIdx(End);
|
||||
|
||||
// If erase is localized within the same chunk, this is a degenerate case.
|
||||
if (StartChunkIdx == EndChunkIdx) {
|
||||
RopePiece *Chunk = Chunks[StartChunkIdx];
|
||||
unsigned NumDel = End.CurChar-Start.CurChar;
|
||||
CurSize -= NumDel;
|
||||
|
||||
// If deleting from start of chunk, just adjust range.
|
||||
if (Start.CurChar == 0) {
|
||||
if (Chunk->EndOffs != End.CurChar) {
|
||||
Chunk->StartOffs += NumDel;
|
||||
} else {
|
||||
// Deleting entire chunk, remove it.
|
||||
delete Chunk;
|
||||
Chunks.erase(Chunks.begin()+StartChunkIdx);
|
||||
}
|
||||
return;
|
||||
}
|
||||
|
||||
// If deleting to the end of chunk, just adjust range.
|
||||
if (End.CurChar == Chunk->size()) {
|
||||
Chunk->EndOffs -= NumDel;
|
||||
return;
|
||||
}
|
||||
|
||||
// If deleting the middle of a chunk, split this chunk and adjust the end
|
||||
// piece.
|
||||
unsigned NewIdx = SplitAt(Start);
|
||||
Chunk = Chunks[NewIdx];
|
||||
Chunk->StartOffs += End.CurChar-Start.CurChar;
|
||||
|
||||
return;
|
||||
}
|
||||
|
||||
|
||||
// Otherwise, the start chunk and the end chunk are different.
|
||||
|
||||
// Delete the end of the start chunk. If it is the whole thing, remove it.
|
||||
{
|
||||
RopePiece *StartChunk = Chunks[StartChunkIdx];
|
||||
unsigned NumDel = StartChunk->size()-Start.CurChar;
|
||||
CurSize -= NumDel;
|
||||
if (Start.CurChar == 0) {
|
||||
// Delete the whole chunk.
|
||||
delete StartChunk;
|
||||
Chunks.erase(Chunks.begin()+StartChunkIdx);
|
||||
--EndChunkIdx;
|
||||
} else {
|
||||
// Otherwise, just move the end of chunk marker up.
|
||||
StartChunk->EndOffs -= NumDel;
|
||||
++StartChunkIdx;
|
||||
}
|
||||
}
|
||||
|
||||
// If deleting a span of chunks, nuke them all now.
|
||||
while (StartChunkIdx != EndChunkIdx) {
|
||||
CurSize -= Chunks[StartChunkIdx]->size();
|
||||
delete Chunks[StartChunkIdx];
|
||||
Chunks.erase(Chunks.begin()+StartChunkIdx);
|
||||
--EndChunkIdx;
|
||||
}
|
||||
|
||||
// Finally, erase the start of the end chunk if appropriate.
|
||||
if (End.CurChar != 0) {
|
||||
RopePiece *EndChunk = Chunks[EndChunkIdx];
|
||||
EndChunk->StartOffs += End.CurChar;
|
||||
CurSize -= End.CurChar;
|
||||
}
|
||||
}
|
||||
|
||||
private:
|
||||
RopeRefCountString *MakeRopeString(const char *Start, const char *End) {
|
||||
unsigned Size = End-Start+sizeof(RopeRefCountString)-1;
|
||||
RopeRefCountString *Res =
|
||||
reinterpret_cast<RopeRefCountString *>(new char[Size]);
|
||||
Res->RefCount = 0;
|
||||
memcpy(Res->Data, Start, End-Start);
|
||||
return Res;
|
||||
}
|
||||
|
||||
unsigned getChunkIdx(iterator Loc) const {
|
||||
// Return the loc idx of the specified chunk, handling empty ropes.
|
||||
return Loc.CurPiece == 0 ? 0 : Loc.CurPiece - &Chunks[0];
|
||||
}
|
||||
|
||||
/// SplitAt - If the specified iterator position has a non-zero character
|
||||
/// number, split the specified buffer up. This guarantees that the specified
|
||||
/// iterator is at the start of a chunk. Return the chunk it is at the start
|
||||
/// of.
|
||||
unsigned SplitAt(iterator Loc) {
|
||||
unsigned ChunkIdx = getChunkIdx(Loc);
|
||||
|
||||
// If the specified position is at the start of a piece, return it.
|
||||
if (Loc.CurChar == 0)
|
||||
return ChunkIdx;
|
||||
|
||||
// Otherwise, we have to split the specified piece in half, inserting the
|
||||
// new piece into the vector of pieces.
|
||||
RopePiece *CurPiece = *Loc.CurPiece;
|
||||
|
||||
// Make a new piece for the prefix part.
|
||||
RopePiece *NewPiece = new RopePiece(CurPiece->StrData, CurPiece->StartOffs,
|
||||
CurPiece->StartOffs+Loc.CurChar);
|
||||
|
||||
// Make the current piece refer the suffix part.
|
||||
CurPiece->StartOffs += Loc.CurChar;
|
||||
|
||||
// Insert the new piece.
|
||||
Chunks.insert(Chunks.begin()+ChunkIdx, NewPiece);
|
||||
|
||||
// Return the old chunk, which is the suffix.
|
||||
return ChunkIdx+1;
|
||||
}
|
||||
};
|
||||
|
||||
} // end namespace clang
|
||||
|
||||
#endif
|
|
@ -16,6 +16,7 @@
|
|||
#define LLVM_CLANG_REWRITER_H
|
||||
|
||||
#include "clang/Basic/SourceLocation.h"
|
||||
#include "clang/Rewrite/RewriteRope.h"
|
||||
#include <map>
|
||||
#include <vector>
|
||||
|
||||
|
@ -55,16 +56,14 @@ class RewriteBuffer {
|
|||
/// Buffer - This is the actual buffer itself. Note that using a vector or
|
||||
/// string is a horribly inefficient way to do this, we should use a rope
|
||||
/// instead.
|
||||
typedef std::vector<char> BufferTy;
|
||||
typedef RewriteRope BufferTy;
|
||||
//typedef std::vector<char> BufferTy;
|
||||
BufferTy Buffer;
|
||||
public:
|
||||
|
||||
|
||||
typedef BufferTy::const_iterator iterator;
|
||||
iterator begin() const { return Buffer.begin(); }
|
||||
iterator end() const { return Buffer.end(); }
|
||||
|
||||
|
||||
private: // Methods only usable by Rewriter.
|
||||
|
||||
/// Initialize - Start this rewrite buffer out with a copy of the unmodified
|
||||
|
|
Загрузка…
Ссылка в новой задаче