From febe719596ee68605944da5f2e03258e18e6df8c Mon Sep 17 00:00:00 2001 From: Chris Lattner Date: Mon, 14 Apr 2008 07:17:29 +0000 Subject: [PATCH] Change the RewriteRope::Chunks data structure from an std::list into a nice shiny B+ Tree variant. This fixes the last of the known algorithmic issues with the rewriter, allowing a significant speedup. For example, -emit-html on Ted's 500K .i file speeds up from 26.8s -> 0.64s in a debug build (41x!) and 5.475s -> 0.132s (41x!) in an optimized build. This code is functional but needs to be cleaned up, ifdefs removed, better commented, and moved to a .cpp file. I plan to do this tomorrow. git-svn-id: https://llvm.org/svn/llvm-project/cfe/trunk@49635 91177308-0d34-0410-b5e6-96231b3b80d8 --- include/clang/Rewrite/RewriteRope.h | 763 +++++++++++++++++++++++++++- lib/Rewrite/DeltaTree.cpp | 14 +- lib/Rewrite/Rewriter.cpp | 21 +- 3 files changed, 779 insertions(+), 19 deletions(-) diff --git a/include/clang/Rewrite/RewriteRope.h b/include/clang/Rewrite/RewriteRope.h index c10b13242c..5cc5b57b90 100644 --- a/include/clang/Rewrite/RewriteRope.h +++ b/include/clang/Rewrite/RewriteRope.h @@ -18,11 +18,24 @@ #include #include +#include "llvm/Support/Casting.h" + +//#define USE_ROPE_VECTOR + namespace clang { struct RopeRefCountString { unsigned RefCount; char Data[1]; // Variable sized. + + void addRef() { + if (this) ++RefCount; + } + + void dropRef() { + if (this && --RefCount == 0) + delete [] (char*)this; + } }; struct RopePiece { @@ -30,18 +43,29 @@ struct RopePiece { unsigned StartOffs; unsigned EndOffs; + RopePiece() : StrData(0), StartOffs(0), EndOffs(0) {} + RopePiece(RopeRefCountString *Str, unsigned Start, unsigned End) : StrData(Str), StartOffs(Start), EndOffs(End) { - ++StrData->RefCount; + StrData->addRef(); } RopePiece(const RopePiece &RP) : StrData(RP.StrData), StartOffs(RP.StartOffs), EndOffs(RP.EndOffs) { - ++StrData->RefCount; + StrData->addRef(); } ~RopePiece() { - if (--StrData->RefCount == 0) - delete [] (char*)StrData; + StrData->dropRef(); + } + + void operator=(const RopePiece &RHS) { + if (StrData != RHS.StrData) { + StrData->dropRef(); + StrData = RHS.StrData; + StrData->addRef(); + } + StartOffs = RHS.StartOffs; + EndOffs = RHS.EndOffs; } const char &operator[](unsigned Offset) const { @@ -53,8 +77,668 @@ struct RopePiece { unsigned size() const { return EndOffs-StartOffs; } }; + + + +#ifndef USE_ROPE_VECTOR + using llvm::dyn_cast; + using llvm::cast; -class RewriteRope; +/// This is an adapted B+ Tree, ... erases don't keep the tree balanced. + +class RopePieceBTreeNode; +struct InsertResult { + RopePieceBTreeNode *LHS, *RHS; +}; + +class RopePieceBTreeNode { +protected: + /// WidthFactor - This controls the number of K/V slots held in the BTree: + /// how wide it is. Each level of the BTree is guaranteed to have at least + /// 'WidthFactor' elements in it (either ropepieces or children), (except the + /// root, which may have less) and may have at most 2*WidthFactor elements. + enum { WidthFactor = 8 }; + + /// Size - This is the number of bytes of file this node (including any + /// potential children) covers. + unsigned Size; + + /// IsLeaf - True if this is an instance of RopePieceBTreeLeaf, false if it is + /// an instance of RopePieceBTreeInterior. + bool IsLeaf; + + RopePieceBTreeNode(bool isLeaf) : IsLeaf(isLeaf) {} + ~RopePieceBTreeNode() {} +public: + + bool isLeaf() const { return IsLeaf; } + unsigned size() const { return Size; } + + void Destroy(); + + /// split - Split the range containing the specified offset so that we are + /// guaranteed that there is a place to do an insertion at the specified + /// offset. The offset is relative, so "0" is the start of the node. This + /// returns true if the insertion could not be done in place, and returns + /// information in 'Res' about the piece that is percolated up. + bool split(unsigned Offset, InsertResult *Res); + + /// insert - Insert the specified ropepiece into this tree node at the + /// specified offset. The offset is relative, so "0" is the start of the + /// node. This returns true if the insertion could not be done in place, and + /// returns information in 'Res' about the piece that is percolated up. + bool insert(unsigned Offset, const RopePiece &R, InsertResult *Res); + + /// erase - Remove NumBytes from this node at the specified offset. We are + /// guaranteed that there is a split at Offset. + void erase(unsigned Offset, unsigned NumBytes); + + static inline bool classof(const RopePieceBTreeNode *) { return true; } + +}; + + + + +class RopePieceBTreeLeaf : public RopePieceBTreeNode { + /// NumPieces - This holds the number of rope pieces currently active in the + /// Pieces array. + unsigned char NumPieces; + + /// Pieces - This tracks the file chunks currently in this leaf. + /// + RopePiece Pieces[2*WidthFactor]; + + /// NextLeaf - This is a pointer to the next leaf in the tree, allowing + /// efficient in-order forward iteration of the tree without traversal. + const RopePieceBTreeLeaf *NextLeaf; +public: + RopePieceBTreeLeaf() : RopePieceBTreeNode(true), NextLeaf(0) {} + + bool isFull() const { return NumPieces == 2*WidthFactor; } + + /// clear - Remove all rope pieces from this leaf. + void clear() { + while (NumPieces) + Pieces[--NumPieces] = RopePiece(); + Size = 0; + } + + unsigned getNumPieces() const { return NumPieces; } + + const RopePiece &getPiece(unsigned i) const { + assert(i < getNumPieces() && "Invalid piece ID"); + return Pieces[i]; + } + + const RopePieceBTreeLeaf *getNextLeafInOrder() const { return NextLeaf; } + void setNextLeafInOrder(const RopePieceBTreeLeaf *NL) { NextLeaf = NL; } + + void FullRecomputeSizeLocally() { + Size = 0; + for (unsigned i = 0, e = getNumPieces(); i != e; ++i) + Size += getPiece(i).size(); + } + + /// split - Split the range containing the specified offset so that we are + /// guaranteed that there is a place to do an insertion at the specified + /// offset. The offset is relative, so "0" is the start of the node. This + /// returns true if the insertion could not be done in place, and returns + /// information in 'Res' about the piece that is percolated up. + bool split(unsigned Offset, InsertResult *Res); + + /// insert - Insert the specified ropepiece into this tree node at the + /// specified offset. The offset is relative, so "0" is the start of the + /// node. This returns true if the insertion could not be done in place, and + /// returns information in 'Res' about the piece that is percolated up. + bool insert(unsigned Offset, const RopePiece &R, InsertResult *Res); + + + /// erase - Remove NumBytes from this node at the specified offset. We are + /// guaranteed that there is a split at Offset. + void erase(unsigned Offset, unsigned NumBytes); + + static inline bool classof(const RopePieceBTreeLeaf *) { return true; } + static inline bool classof(const RopePieceBTreeNode *N) { + return N->isLeaf(); + } +}; + +/// split - Split the range containing the specified offset so that we are +/// guaranteed that there is a place to do an insertion at the specified +/// offset. The offset is relative, so "0" is the start of the node. This +/// returns true if the insertion could not be done in place, and returns +/// information in 'Res' about the piece that is percolated up. +inline bool RopePieceBTreeLeaf::split(unsigned Offset, InsertResult *Res) { + // Find the insertion point. We are guaranteed that there is a split at the + // specified offset so find it. + if (Offset == 0 || Offset == size()) { + // Fastpath for a common case. There is already a splitpoint at the end. + return false; + } + + // Find the piece that this offset lands in. + unsigned PieceOffs = 0; + unsigned i = 0; + while (Offset >= PieceOffs+Pieces[i].size()) { + PieceOffs += Pieces[i].size(); + ++i; + } + + // If there is already a split point at the specified offset, just return + // success. + if (PieceOffs == Offset) + return false; + + // Otherwise, we need to split piece 'i' at Offset-PieceOffs. Convert Offset + // to being Piece relative. + unsigned IntraPieceOffset = Offset-PieceOffs; + + // We do this by shrinking the RopePiece and then doing an insert of the tail. + RopePiece Tail(Pieces[i].StrData, Pieces[i].StartOffs+IntraPieceOffset, + Pieces[i].EndOffs); + Size -= Pieces[i].size(); + Pieces[i].EndOffs = Pieces[i].StartOffs+IntraPieceOffset; + Size += Pieces[i].size(); + + return insert(Offset, Tail, Res); +} + + +/// insert - Insert the specified RopePiece into this tree node at the +/// specified offset. The offset is relative, so "0" is the start of the +/// node. This returns true if the insertion could not be done in place, and +/// returns information in 'Res' about the piece that is percolated up. +inline bool RopePieceBTreeLeaf::insert(unsigned Offset, const RopePiece &R, + InsertResult *Res) { + // If this node is not full, insert the piece. + if (!isFull()) { + // Find the insertion point. We are guaranteed that there is a split at the + // specified offset so find it. + unsigned i = 0, e = getNumPieces(); + if (Offset == size()) { + // Fastpath for a common case. + i = e; + } else { + unsigned SlotOffs = 0; + for (; Offset > SlotOffs; ++i) + SlotOffs += getPiece(i).size(); + assert(SlotOffs == Offset && "Split didn't occur before insertion!"); + } + + // For an insertion into a non-full leaf node, just insert the value in + // its sorted position. This requires moving later values over. + for (; i != e; --e) + Pieces[e] = Pieces[e-1]; + Pieces[i] = R; + ++NumPieces; + Size += R.size(); + return false; + } + + // Otherwise, if this is leaf is full, split it in two halves. Since this + // node is full, it contains 2*WidthFactor values. We move the first + // 'WidthFactor' values to the LHS child (which we leave in this node) and + // move the last 'WidthFactor' values into the RHS child. + + // Create the new node. + RopePieceBTreeLeaf *NewNode = new RopePieceBTreeLeaf(); + + // Move over the last 'WidthFactor' values from here to NewNode. + std::copy(&Pieces[WidthFactor], &Pieces[2*WidthFactor], + &NewNode->Pieces[0]); + // Replace old pieces with null RopePieces to drop refcounts. + std::fill(&Pieces[WidthFactor], &Pieces[2*WidthFactor], RopePiece()); + + // Decrease the number of values in the two nodes. + NewNode->NumPieces = NumPieces = WidthFactor; + + // Recompute the two nodes' size. + NewNode->FullRecomputeSizeLocally(); + FullRecomputeSizeLocally(); + + // Update the list of leaves. + NewNode->setNextLeafInOrder(this->getNextLeafInOrder()); + this->setNextLeafInOrder(NewNode); + + assert(Res && "No result location specified"); + Res->LHS = this; + Res->RHS = NewNode; + + if (this->size() >= Offset) + this->insert(Offset, R, 0 /*can't fail*/); + else + NewNode->insert(Offset - this->size(), R, 0 /*can't fail*/); + return true; +} + +/// erase - Remove NumBytes from this node at the specified offset. We are +/// guaranteed that there is a split at Offset. +inline void RopePieceBTreeLeaf::erase(unsigned Offset, unsigned NumBytes) { + // Since we are guaranteed that there is a split at Offset, we start by + // finding the Piece that starts there. + unsigned PieceOffs = 0; + unsigned i = 0; + for (; Offset > PieceOffs; ++i) + PieceOffs += getPiece(i).size(); + assert(PieceOffs == Offset && "Split didn't occur before erase!"); + + unsigned StartPiece = i; + + // Figure out how many pieces completely cover 'NumBytes'. We want to remove + // all of them. + for (; Offset+NumBytes > PieceOffs+getPiece(i).size(); ++i) + PieceOffs += getPiece(i).size(); + + // If we exactly include the last one, include it in the region to delete. + if (Offset+NumBytes == PieceOffs+getPiece(i).size()) + PieceOffs += getPiece(i).size(), ++i; + + // If we completely cover some RopePieces, erase them now. + if (i != StartPiece) { + unsigned NumDeleted = i-StartPiece; + for (; i != getNumPieces(); ++i) + Pieces[i-NumDeleted] = Pieces[i]; + + // Drop references to dead rope pieces. + std::fill(&Pieces[getNumPieces()-NumDeleted], &Pieces[getNumPieces()], + RopePiece()); + NumPieces -= NumDeleted; + + unsigned CoverBytes = PieceOffs-Offset; + NumBytes -= CoverBytes; + Size -= CoverBytes; + } + + // If we completely removed some stuff, we could be done. + if (NumBytes == 0) return; + + // Okay, now might be erasing part of some Piece. If this is the case, then + // move the start point of the piece. + assert(getPiece(StartPiece).size() > NumBytes); + Pieces[StartPiece].StartOffs += NumBytes; + + // The size of this node just shrunk by NumBytes. + Size -= NumBytes; +} + +// Holds up to 2*WidthFactor children. +class RopePieceBTreeInterior : public RopePieceBTreeNode { + /// NumChildren - This holds the number of children currently active in the + /// Children array. + unsigned char NumChildren; + RopePieceBTreeNode *Children[2*WidthFactor]; +public: + RopePieceBTreeInterior() : RopePieceBTreeNode(false) {} + + RopePieceBTreeInterior(RopePieceBTreeNode *LHS, RopePieceBTreeNode *RHS) + : RopePieceBTreeNode(false) { + Children[0] = LHS; + Children[1] = RHS; + NumChildren = 2; + Size = LHS->size() + RHS->size(); + } + + bool isFull() const { return NumChildren == 2*WidthFactor; } + + unsigned getNumChildren() const { return NumChildren; } + const RopePieceBTreeNode *getChild(unsigned i) const { + assert(i < NumChildren && "invalid child #"); + return Children[i]; + } + RopePieceBTreeNode *getChild(unsigned i) { + assert(i < NumChildren && "invalid child #"); + return Children[i]; + } + + void FullRecomputeSizeLocally() { + Size = 0; + for (unsigned i = 0, e = getNumChildren(); i != e; ++i) + Size += getChild(i)->size(); + } + + + /// split - Split the range containing the specified offset so that we are + /// guaranteed that there is a place to do an insertion at the specified + /// offset. The offset is relative, so "0" is the start of the node. This + /// returns true if the insertion could not be done in place, and returns + /// information in 'Res' about the piece that is percolated up. + bool split(unsigned Offset, InsertResult *Res); + + + /// insert - Insert the specified ropepiece into this tree node at the + /// specified offset. The offset is relative, so "0" is the start of the + /// node. This returns true if the insertion could not be done in place, and + /// returns information in 'Res' about the piece that is percolated up. + bool insert(unsigned Offset, const RopePiece &R, InsertResult *Res); + + /// HandleChildPiece - A child propagated an insertion result up to us. + /// Insert the new child, and/or propagate the result further up the tree. + bool HandleChildPiece(unsigned i, InsertResult &Res); + + /// erase - Remove NumBytes from this node at the specified offset. We are + /// guaranteed that there is a split at Offset. + void erase(unsigned Offset, unsigned NumBytes); + + static inline bool classof(const RopePieceBTreeInterior *) { return true; } + static inline bool classof(const RopePieceBTreeNode *N) { + return !N->isLeaf(); + } +}; + +/// split - Split the range containing the specified offset so that we are +/// guaranteed that there is a place to do an insertion at the specified +/// offset. The offset is relative, so "0" is the start of the node. This +/// returns true if the insertion could not be done in place, and returns +/// information in 'Res' about the piece that is percolated up. +inline bool RopePieceBTreeInterior::split(unsigned Offset, InsertResult *Res) { + // Figure out which child to split. + if (Offset == 0 || Offset == size()) + return false; // If we have an exact offset, we're already split. + + unsigned ChildOffset = 0; + unsigned i = 0; + for (; Offset >= ChildOffset+getChild(i)->size(); ++i) + ChildOffset += getChild(i)->size(); + + // If already split there, we're done. + if (ChildOffset == Offset) + return false; + + // Otherwise, recursively split the child. + if (getChild(i)->split(Offset-ChildOffset, Res)) + return HandleChildPiece(i, *Res); + return false; // Done! +} + +/// insert - Insert the specified ropepiece into this tree node at the +/// specified offset. The offset is relative, so "0" is the start of the +/// node. This returns true if the insertion could not be done in place, and +/// returns information in 'Res' about the piece that is percolated up. +inline bool RopePieceBTreeInterior::insert(unsigned Offset, const RopePiece &R, + InsertResult *Res) { + // Find the insertion point. We are guaranteed that there is a split at the + // specified offset so find it. + unsigned i = 0, e = getNumChildren(); + + unsigned ChildOffs = 0; + if (Offset == size()) { + // Fastpath for a common case. Insert at end of last child. + i = e-1; + ChildOffs = size()-getChild(i)->size(); + } else { + for (; Offset > ChildOffs+getChild(i)->size(); ++i) + ChildOffs += getChild(i)->size(); + } + + Size += R.size(); + + // Insert at the end of this child. + if (getChild(i)->insert(Offset-ChildOffs, R, Res)) + return HandleChildPiece(i, *Res); + + return false; +} + +/// HandleChildPiece - A child propagated an insertion result up to us. +/// Insert the new child, and/or propagate the result further up the tree. +inline bool RopePieceBTreeInterior::HandleChildPiece(unsigned i, + InsertResult &Res) { + // Otherwise the child propagated a subtree up to us as a new child. See if + // we have space for it here. + if (!isFull()) { + // Replace child 'i' with the two children specified in Res. + if (i + 1 != getNumChildren()) + memmove(&Children[i+2], &Children[i+1], + (getNumChildren()-i-1)*sizeof(Children[0])); + Children[i] = Res.LHS; + Children[i+1] = Res.RHS; + ++NumChildren; + return false; + } + + // Okay, this node is full. Split it in half, moving WidthFactor children to + // a newly allocated interior node. + + // Create the new node. + RopePieceBTreeInterior *NewNode = new RopePieceBTreeInterior(); + + // Move over the last 'WidthFactor' values from here to NewNode. + memcpy(&NewNode->Children[0], &Children[WidthFactor], + WidthFactor*sizeof(Children[0])); + + // Decrease the number of values in the two nodes. + NewNode->NumChildren = NumChildren = WidthFactor; + + // Finally, insert the two new children in the side the can (now) hold them. + if (i < WidthFactor) + this->HandleChildPiece(i, Res); + else + NewNode->HandleChildPiece(i-WidthFactor, Res); + + // Recompute the two nodes' size. + NewNode->FullRecomputeSizeLocally(); + FullRecomputeSizeLocally(); + + Res.LHS = this; + Res.RHS = NewNode; + return true; +} + +/// erase - Remove NumBytes from this node at the specified offset. We are +/// guaranteed that there is a split at Offset. +inline void RopePieceBTreeInterior::erase(unsigned Offset, unsigned NumBytes) { + // This will shrink this node by NumBytes. + Size -= NumBytes; + + // Find the first child that overlaps with Offset. + unsigned i = 0; + for (; Offset >= getChild(i)->size(); ++i) + Offset -= getChild(i)->size(); + + // Propagate the delete request into overlapping children, or completely + // delete the children as appropriate. + while (NumBytes) { + RopePieceBTreeNode *CurChild = getChild(i); + + // If we are deleting something contained entirely in the child, pass on the + // request. + if (Offset+NumBytes < CurChild->size()) { + CurChild->erase(Offset, NumBytes); + return; + } + + // If this deletion request starts somewhere in the middle of the child, it + // must be deleting to the end of the child. + if (Offset) { + unsigned BytesFromChild = CurChild->size()-Offset; + CurChild->erase(Offset, BytesFromChild); + NumBytes -= BytesFromChild; + ++i; + continue; + } + + // If the deletion request completely covers the child, delete it and move + // the rest down. + NumBytes -= CurChild->size(); + CurChild->Destroy(); + --NumChildren; + if (i+1 != getNumChildren()) + memmove(&Children[i], &Children[i+1], + (getNumChildren()-i)*sizeof(Children[0])); + } +} + +inline void RopePieceBTreeNode::Destroy() { + if (RopePieceBTreeLeaf *Leaf = dyn_cast(this)) + delete Leaf; + else + delete cast(this); +} + +/// split - Split the range containing the specified offset so that we are +/// guaranteed that there is a place to do an insertion at the specified +/// offset. The offset is relative, so "0" is the start of the node. This +/// returns true if the insertion could not be done in place, and returns +/// information in 'Res' about the piece that is percolated up. +inline bool RopePieceBTreeNode::split(unsigned Offset, InsertResult *Res) { + assert(Offset <= size() && "Invalid offset to split!"); + if (RopePieceBTreeLeaf *Leaf = dyn_cast(this)) + return Leaf->split(Offset, Res); + return cast(this)->split(Offset, Res); +} + +/// insert - Insert the specified ropepiece into this tree node at the +/// specified offset. The offset is relative, so "0" is the start of the +/// node. +inline bool RopePieceBTreeNode::insert(unsigned Offset, const RopePiece &R, + InsertResult *Res) { + assert(Offset <= size() && "Invalid offset to insert!"); + if (RopePieceBTreeLeaf *Leaf = dyn_cast(this)) + return Leaf->insert(Offset, R, Res); + return cast(this)->insert(Offset, R, Res); +} + +/// erase - Remove NumBytes from this node at the specified offset. We are +/// guaranteed that there is a split at Offset. +inline void RopePieceBTreeNode::erase(unsigned Offset, unsigned NumBytes) { + assert(Offset+NumBytes <= size() && "Invalid offset to erase!"); + if (RopePieceBTreeLeaf *Leaf = dyn_cast(this)) + return Leaf->erase(Offset, NumBytes); + return cast(this)->erase(Offset, NumBytes); +} + + + +/// RewritePieceBTreeIterator - Provide read-only forward iteration. +class RewritePieceBTreeIterator : + public forward_iterator { + /// CurNode - The current B+Tree node that we are inspecting. + const RopePieceBTreeLeaf *CurNode; + /// CurPiece - The current RopePiece in the B+Tree node that we're inspecting. + const RopePiece *CurPiece; + /// CurChar - The current byte in the RopePiece we are pointing to. + unsigned CurChar; + friend class RewriteRope; +public: + RewritePieceBTreeIterator(const RopePieceBTreeNode *N) { // begin iterator. + // Walk down the left side of the tree until we get to a leaf. + while (const RopePieceBTreeInterior *IN = + dyn_cast(N)) + N = IN->getChild(0); + + // We must have at least one leaf. + CurNode = cast(N); + + // If we found a leaf that happens to be empty, skip over it until we get to + // something full. + while (CurNode && CurNode->getNumPieces() == 0) + CurNode = CurNode->getNextLeafInOrder(); + + if (CurNode != 0) + CurPiece = &CurNode->getPiece(0); + else // Empty tree, this is an end() iterator. + CurPiece = 0; + CurChar = 0; + } + // end iterator + RewritePieceBTreeIterator() : CurNode(0), CurPiece(0), CurChar(0) {} + + const char operator*() const { + return (*CurPiece)[CurChar]; + } + + bool operator==(const RewritePieceBTreeIterator &RHS) const { + return CurPiece == RHS.CurPiece && CurChar == RHS.CurChar; + } + bool operator!=(const RewritePieceBTreeIterator &RHS) const { + return !operator==(RHS); + } + + inline RewritePieceBTreeIterator& operator++() { // Preincrement + if (CurChar+1 < CurPiece->size()) + ++CurChar; + else if (CurPiece != &CurNode->getPiece(CurNode->getNumPieces()-1)) { + CurChar = 0; + ++CurPiece; + } else { + // Find the next non-empty leaf node. + do + CurNode = CurNode->getNextLeafInOrder(); + while (CurNode && CurNode->getNumPieces() == 0); + + if (CurNode != 0) + CurPiece = &CurNode->getPiece(0); + else // Hit end(). + CurPiece = 0; + CurChar = 0; + } + return *this; + } + + inline RewritePieceBTreeIterator operator++(int) { // Postincrement + RewritePieceBTreeIterator tmp = *this; ++*this; return tmp; + } +}; + + +class RopePieceBTree { + RopePieceBTreeNode *Root; + void operator=(const RopePieceBTree &); // DO NOT IMPLEMENT +public: + RopePieceBTree() { + Root = new RopePieceBTreeLeaf(); + } + RopePieceBTree(const RopePieceBTree &RHS) { + assert(RHS.empty() && "Can't copy non-empty tree yet"); + Root = new RopePieceBTreeLeaf(); + } + ~RopePieceBTree() { + Root->Destroy(); + } + + typedef RewritePieceBTreeIterator iterator; + iterator begin() const { return iterator(Root); } + iterator end() const { return iterator(); } + unsigned size() const { return Root->size(); } + unsigned empty() const { return size() == 0; } + + void clear() { + if (RopePieceBTreeLeaf *Leaf = dyn_cast(Root)) + Leaf->clear(); + else { + Root->Destroy(); + Root = new RopePieceBTreeLeaf(); + } + } + + void insert(unsigned Offset, const RopePiece &R) { + InsertResult Result; + // #1. Split at Offset. + if (Root->split(Offset, &Result)) + Root = new RopePieceBTreeInterior(Result.LHS, Result.RHS); + + // #2. Do the insertion. + if (Root->insert(Offset, R, &Result)) + Root = new RopePieceBTreeInterior(Result.LHS, Result.RHS); + } + + void erase(unsigned Offset, unsigned NumBytes) { + InsertResult Result; + // #1. Split at Offset. + if (Root->split(Offset, &Result)) + Root = new RopePieceBTreeInterior(Result.LHS, Result.RHS); + + // #2. Do the erasing. + Root->erase(Offset, NumBytes); + } +}; + + +#endif // ifndef USE_ROPE_VECTOR + +#ifdef USE_ROPE_VECTOR + class RewriteRope; template class RewriteRopeIterator : @@ -105,15 +789,19 @@ public: RewriteRopeIterator tmp = *this; ++*this; return tmp; } }; - +#endif /// RewriteRope - A powerful string class, todo generalize this. class RewriteRope { +#ifdef USE_ROPE_VECTOR // FIXME: This could be significantly faster by using a balanced binary tree // instead of a list. std::list Chunks; unsigned CurSize; +#else + RopePieceBTree Chunks; +#endif /// We allocate space for string data out of a buffer of size AllocChunkSize. /// This keeps track of how much space is left. @@ -122,9 +810,24 @@ class RewriteRope { enum { AllocChunkSize = 4080 }; public: - RewriteRope() : CurSize(0), AllocBuffer(0), AllocOffs(AllocChunkSize) {} - ~RewriteRope() { clear(); } + RewriteRope() : +#ifdef USE_ROPE_VECTOR + CurSize(0), +#endif + AllocBuffer(0), AllocOffs(AllocChunkSize) {} + RewriteRope(const RewriteRope &RHS) : Chunks(RHS.Chunks), +#ifdef USE_ROPE_VECTOR + CurSize(RHS.CurSize), +#endif + AllocBuffer(0), AllocOffs(AllocChunkSize) { + } + + ~RewriteRope() { + // If we had an allocation buffer, drop our reference to it. + AllocBuffer->dropRef(); + } +#ifdef USE_ROPE_VECTOR typedef RewriteRopeIterator::iterator> iterator; typedef RewriteRopeIterator::const_iterator> const_iterator; @@ -135,18 +838,45 @@ public: const_iterator end() const { return const_iterator(Chunks.end(), 0); } unsigned size() const { return CurSize; } +#else + typedef RopePieceBTree::iterator iterator; + typedef RopePieceBTree::iterator const_iterator; + iterator begin() const { return Chunks.begin(); } + iterator end() const { return Chunks.end(); } + unsigned size() const { return Chunks.size(); } +#endif void clear() { Chunks.clear(); - CurSize = 0; +#ifdef USE_ROPE_VECTOR + CurSize = 0; +#endif } +#ifndef USE_ROPE_VECTOR + void assign(const char *Start, const char *End) { + clear(); + Chunks.insert(0, MakeRopeString(Start, End)); + } + + void insert(unsigned Offset, const char *Start, const char *End) { + if (Start == End) return; + Chunks.insert(Offset, MakeRopeString(Start, End)); + } + + void erase(unsigned Offset, unsigned NumBytes) { + if (NumBytes == 0) return; + Chunks.erase(Offset, NumBytes); + } +#endif + +#ifdef USE_ROPE_VECTOR void assign(const char *Start, const char *End) { clear(); Chunks.push_back(MakeRopeString(Start, End)); CurSize = End-Start; } - + iterator getAtOffset(unsigned Offset) { assert(Offset <= CurSize && "Offset out of range!"); if (Offset == CurSize) return iterator(Chunks.end(), 0); @@ -236,6 +966,7 @@ public: CurSize -= End.CurChar; } } +#endif private: RopePiece MakeRopeString(const char *Start, const char *End) { @@ -261,14 +992,25 @@ private: // Otherwise, this was a small request but we just don't have space for it // Make a new chunk and share it with later allocations. + + // If we had an old allocation, drop our reference to it. + if (AllocBuffer && --AllocBuffer->RefCount == 0) + delete [] (char*)AllocBuffer; + unsigned AllocSize = sizeof(RopeRefCountString)-1+AllocChunkSize; AllocBuffer = reinterpret_cast(new char[AllocSize]); AllocBuffer->RefCount = 0; memcpy(AllocBuffer->Data, Start, Len); AllocOffs = Len; + + // Start out the new allocation with a refcount of 1, since we have an + // internal reference to it. + AllocBuffer->addRef(); return RopePiece(AllocBuffer, 0, Len); } +#ifdef USE_ROPE_VECTOR + /// SplitAt - If the specified iterator position has a non-zero character /// number, split the specified buffer up. This guarantees that the specified /// iterator is at the start of a chunk. Return the chunk it is at the start @@ -293,6 +1035,7 @@ private: // Return the old chunk, which is the suffix. return Chunk; } +#endif }; } // end namespace clang diff --git a/lib/Rewrite/DeltaTree.cpp b/lib/Rewrite/DeltaTree.cpp index dd096c2613..f7715312eb 100644 --- a/lib/Rewrite/DeltaTree.cpp +++ b/lib/Rewrite/DeltaTree.cpp @@ -58,10 +58,12 @@ namespace { } // end anonymous namespace -struct InsertResult { - DeltaTreeNode *LHS, *RHS; - SourceDelta Split; -}; +namespace { + struct InsertResult { + DeltaTreeNode *LHS, *RHS; + SourceDelta Split; + }; +} // end anonymous namespace namespace { @@ -72,8 +74,8 @@ namespace { /// WidthFactor - This controls the number of K/V slots held in the BTree: /// how wide it is. Each level of the BTree is guaranteed to have at least - /// WidthFactor-1 K/V pairs (unless the whole tree is less full than that) - /// and may have at most 2*WidthFactor-1 K/V pairs. + /// WidthFactor-1 K/V pairs (except the root) and may have at most + /// 2*WidthFactor-1 K/V pairs. enum { WidthFactor = 8 }; /// Values - This tracks the SourceDelta's currently in this node. diff --git a/lib/Rewrite/Rewriter.cpp b/lib/Rewrite/Rewriter.cpp index 1c1903cc40..f7dcdd23fc 100644 --- a/lib/Rewrite/Rewriter.cpp +++ b/lib/Rewrite/Rewriter.cpp @@ -27,8 +27,12 @@ void RewriteBuffer::RemoveText(unsigned OrigOffset, unsigned Size) { assert(RealOffset+Size < Buffer.size() && "Invalid location"); // Remove the dead characters. +#ifdef USE_ROPE_VECTOR RewriteRope::iterator I = Buffer.getAtOffset(RealOffset); Buffer.erase(I, I+Size); +#else + Buffer.erase(RealOffset, Size); +#endif // Add a delta so that future changes are offset correctly. AddDelta(OrigOffset, -Size); @@ -40,23 +44,29 @@ void RewriteBuffer::InsertText(unsigned OrigOffset, // Nothing to insert, exit early. if (StrLen == 0) return; - + unsigned RealOffset = getMappedOffset(OrigOffset, InsertAfter); + +#ifdef USE_ROPE_VECTOR assert(RealOffset <= Buffer.size() && "Invalid location"); // Insert the new characters. Buffer.insert(Buffer.getAtOffset(RealOffset), StrData, StrData+StrLen); +#else + Buffer.insert(RealOffset, StrData, StrData+StrLen); +#endif // Add a delta so that future changes are offset correctly. AddDelta(OrigOffset, StrLen); } /// ReplaceText - This method replaces a range of characters in the input -/// buffer with a new string. This is effectively a combined "remove/insert" +/// buffer with a new string. This is effectively a combined "remove+insert" /// operation. void RewriteBuffer::ReplaceText(unsigned OrigOffset, unsigned OrigLength, const char *NewStr, unsigned NewLength) { unsigned RealOffset = getMappedOffset(OrigOffset, true); +#ifdef USE_ROPE_VECTOR assert(RealOffset+OrigLength <= Buffer.size() && "Invalid location"); // Overwrite the common piece. @@ -76,7 +86,12 @@ void RewriteBuffer::ReplaceText(unsigned OrigOffset, unsigned OrigLength, RewriteRope::iterator I = Buffer.getAtOffset(RealOffset+NewLength); Buffer.erase(I, I+(OrigLength-NewLength)); } - AddDelta(OrigOffset, NewLength-OrigLength); +#else + Buffer.erase(RealOffset, OrigLength); + Buffer.insert(RealOffset, NewStr, NewStr+NewLength); +#endif + if (OrigLength != NewLength) + AddDelta(OrigOffset, NewLength-OrigLength); }