From beb1af2f34b5c538fc08d849b132355160b4c93f Mon Sep 17 00:00:00 2001 From: "robertphillips@google.com" Date: Wed, 7 May 2014 21:31:09 +0000 Subject: [PATCH] First pass at pre-rendering saveLayers for GPU https://codereview.chromium.org/261663003/ git-svn-id: http://skia.googlecode.com/svn/trunk@14632 2bbb7eff-a529-9590-31e7-b0007b416f81 --- include/core/SkPicture.h | 1 + src/core/SkPicturePlayback.cpp | 139 +++++++++++++++++++++++++--- src/core/SkPicturePlayback.h | 66 ++++++++++++++ src/core/SkPictureStateTree.cpp | 61 ++++++++++--- src/core/SkPictureStateTree.h | 25 +++++- src/gpu/GrPictureUtils.cpp | 9 ++ src/gpu/GrPictureUtils.h | 3 +- src/gpu/SkGpuDevice.cpp | 155 ++++++++++++++++++++++++++++---- 8 files changed, 413 insertions(+), 46 deletions(-) diff --git a/include/core/SkPicture.h b/include/core/SkPicture.h index bd723e89a..c0b776ce5 100644 --- a/include/core/SkPicture.h +++ b/include/core/SkPicture.h @@ -477,6 +477,7 @@ private: friend class SkPicturePlayback; friend class SkPictureRecorder; friend class SkGpuDevice; + friend class GrGatherCanvas; friend class GrGatherDevice; friend class SkDebugCanvas; diff --git a/src/core/SkPicturePlayback.cpp b/src/core/SkPicturePlayback.cpp index 291774b88..94fe42163 100644 --- a/src/core/SkPicturePlayback.cpp +++ b/src/core/SkPicturePlayback.cpp @@ -23,6 +23,33 @@ template int SafeCount(const T* obj) { */ #define SPEW_CLIP_SKIPPINGx +SkPicturePlayback::PlaybackReplacements::ReplacementInfo* +SkPicturePlayback::PlaybackReplacements::push() { + SkDEBUGCODE(this->validate()); + return fReplacements.push(); +} + +void SkPicturePlayback::PlaybackReplacements::freeAll() { + for (int i = 0; i < fReplacements.count(); ++i) { + SkDELETE(fReplacements[i].fBM); + } + fReplacements.reset(); +} + +#ifdef SK_DEBUG +void SkPicturePlayback::PlaybackReplacements::validate() const { + // Check that the ranges are monotonically increasing and non-overlapping + if (fReplacements.count() > 0) { + SkASSERT(fReplacements[0].fStart < fReplacements[0].fStop); + + for (int i = 1; i < fReplacements.count(); ++i) { + SkASSERT(fReplacements[i].fStart < fReplacements[i].fStop); + SkASSERT(fReplacements[i-1].fStop < fReplacements[i].fStart); + } + } +} +#endif + SkPicturePlayback::SkPicturePlayback(const SkPicture* picture, const SkPictInfo& info) : fPicture(picture) , fInfo(info) { @@ -205,6 +232,10 @@ void SkPicturePlayback::init() { fStateTree = NULL; fCachedActiveOps = NULL; fCurOffset = 0; + fUseBBH = true; + fStart = 0; + fStop = 0; + fReplacements = NULL; } SkPicturePlayback::~SkPicturePlayback() { @@ -744,6 +775,21 @@ private: SkPicturePlayback* fPlayback; }; +// TODO: Replace with hash or pass in "lastLookedUp" hint +SkPicturePlayback::PlaybackReplacements::ReplacementInfo* +SkPicturePlayback::PlaybackReplacements::lookupByStart(size_t start) { + SkDEBUGCODE(this->validate()); + for (int i = 0; i < fReplacements.count(); ++i) { + if (start == fReplacements[i].fStart) { + return &fReplacements[i]; + } else if (start < fReplacements[i].fStart) { + return NULL; // the ranges are monotonically increasing and non-overlapping + } + } + + return NULL; +} + void SkPicturePlayback::draw(SkCanvas& canvas, SkDrawPictureCallback* callback) { SkAutoResetOpID aroi(this); SkASSERT(0 == fCurOffset); @@ -769,20 +815,24 @@ void SkPicturePlayback::draw(SkCanvas& canvas, SkDrawPictureCallback* callback) TextContainer text; const SkTDArray* activeOps = NULL; - if (NULL != fStateTree && NULL != fBoundingHierarchy) { - SkRect clipBounds; - if (canvas.getClipBounds(&clipBounds)) { - SkIRect query; - clipBounds.roundOut(&query); + // When draw limits are enabled (i.e., 0 != fStart || 0 != fStop) the state + // tree isn't used to pick and choose the draw operations + if (0 == fStart && 0 == fStop) { + if (fUseBBH && NULL != fStateTree && NULL != fBoundingHierarchy) { + SkRect clipBounds; + if (canvas.getClipBounds(&clipBounds)) { + SkIRect query; + clipBounds.roundOut(&query); - const SkPicture::OperationList& activeOpsList = this->getActiveOps(query); - if (activeOpsList.valid()) { - if (0 == activeOpsList.numOps()) { - return; // nothing to draw + const SkPicture::OperationList& activeOpsList = this->getActiveOps(query); + if (activeOpsList.valid()) { + if (0 == activeOpsList.numOps()) { + return; // nothing to draw + } + + // Since the opList is valid we know it is our derived class + activeOps = &((const CachedOperationList&)activeOpsList).fOps; } - - // Since the opList is valid we know it is our derived class - activeOps = &((const CachedOperationList&)activeOpsList).fOps; } } } @@ -791,6 +841,14 @@ void SkPicturePlayback::draw(SkCanvas& canvas, SkDrawPictureCallback* callback) SkPictureStateTree::Iterator() : fStateTree->getIterator(*activeOps, &canvas); + if (0 != fStart || 0 != fStop) { + reader.setOffset(fStart); + uint32_t size; + SkDEBUGCODE(DrawType op =) read_op_and_size(&reader, &size); + SkASSERT(SAVE_LAYER == op); + reader.setOffset(fStart+size); + } + if (it.isValid()) { uint32_t skipTo = it.nextDraw(); if (kDrawComplete == skipTo) { @@ -821,6 +879,60 @@ void SkPicturePlayback::draw(SkCanvas& canvas, SkDrawPictureCallback* callback) return; } #endif + if (0 != fStart || 0 != fStop) { + size_t offset = reader.offset() ; + if (offset >= fStop) { + uint32_t size; + SkDEBUGCODE(DrawType op =) read_op_and_size(&reader, &size); + SkASSERT(RESTORE == op); + return; + } + } + + if (NULL != fReplacements) { + // Potentially replace a block of operations with a single drawBitmap call + SkPicturePlayback::PlaybackReplacements::ReplacementInfo* temp = + fReplacements->lookupByStart(reader.offset()); + if (NULL != temp) { + SkASSERT(NULL != temp->fBM); + SkASSERT(NULL != temp->fPaint); + canvas.drawBitmap(*temp->fBM, temp->fPos.fX, temp->fPos.fY, temp->fPaint); + + if (it.isValid()) { + // This save is needed since the BBH will automatically issue + // a restore to balanced the saveLayer we're skipping + canvas.save(); + // Note: This skipping only works if the client only issues + // well behaved saveLayer calls (i.e., doesn't use + // kMatrix_SaveFlag or kClip_SaveFlag in isolation) + + // At this point we know that the PictureStateTree was aiming + // for some draw op within temp's saveLayer (although potentially + // in a separate saveLayer nested inside it). + // We need to skip all the operations inside temp's range + // along with all the associated state changes but update + // the state tree to the first operation outside temp's range. + SkASSERT(it.peekDraw() >= temp->fStart && it.peekDraw() <= temp->fStop); + + while (kDrawComplete != it.peekDraw() && it.peekDraw() <= temp->fStop) { + it.skipDraw(); + } + + if (kDrawComplete == it.peekDraw()) { + break; + } + + uint32_t skipTo = it.nextDraw(); + reader.setOffset(skipTo); + } else { + reader.setOffset(temp->fStop); + uint32_t size; + SkDEBUGCODE(DrawType op =) read_op_and_size(&reader, &size); + SkASSERT(RESTORE == op); + } + continue; + } + } #ifdef SPEW_CLIP_SKIPPING opCount++; @@ -915,8 +1027,7 @@ void SkPicturePlayback::draw(SkCanvas& canvas, SkDrawPictureCallback* callback) SkRegion::Op regionOp = ClipParams_unpackRegionOp(packed); bool doAA = ClipParams_unpackDoAA(packed); size_t offsetToRestore = reader.readInt(); - SkASSERT(!offsetToRestore || \ - offsetToRestore >= reader.offset()); + SkASSERT(!offsetToRestore || offsetToRestore >= reader.offset()); canvas.clipRRect(rrect, regionOp, doAA); if (canvas.isClipEmpty() && offsetToRestore) { #ifdef SPEW_CLIP_SKIPPING diff --git a/src/core/SkPicturePlayback.h b/src/core/SkPicturePlayback.h index 28fdd6327..7ac8dd8d6 100644 --- a/src/core/SkPicturePlayback.h +++ b/src/core/SkPicturePlayback.h @@ -5,6 +5,7 @@ * Use of this source code is governed by a BSD-style license that can be * found in the LICENSE file. */ + #ifndef SkPicturePlayback_DEFINED #define SkPicturePlayback_DEFINED @@ -91,6 +92,8 @@ public: const SkPicture::OperationList& getActiveOps(const SkIRect& queryRect); + void setUseBBH(bool useBBH) { fUseBBH = useBBH; } + void draw(SkCanvas& canvas, SkDrawPictureCallback*); void serialize(SkWStream*, SkPicture::EncodeBitmap) const; @@ -227,6 +230,7 @@ private: // these help us with reading/writing private: friend class SkPicture; + friend class SkGpuDevice; // for access to setDrawLimits & setReplacements // The picture that owns this SkPicturePlayback object const SkPicture* fPicture; @@ -248,6 +252,68 @@ private: SkBBoxHierarchy* fBoundingHierarchy; SkPictureStateTree* fStateTree; + // Limit the opcode playback to be between the offsets 'start' and 'stop'. + // The opcode at 'start' should be a saveLayer while the opcode at + // 'stop' should be a restore. Neither of those commands will be issued. + // Set both start & stop to 0 to disable draw limiting + // Draw limiting cannot be enabled at the same time as draw replacing + void setDrawLimits(size_t start, size_t stop) { + SkASSERT(NULL == fReplacements); + fStart = start; + fStop = stop; + } + + // PlaybackReplacements collects op ranges that can be replaced with + // a single drawBitmap call (using a precomputed bitmap). + class PlaybackReplacements { + public: + // All the operations between fStart and fStop (inclusive) will be replaced with + // a single drawBitmap call using fPos, fBM and fPaint. + // fPaint will be NULL if the picture's paint wasn't copyable + struct ReplacementInfo { + size_t fStart; + size_t fStop; + SkPoint fPos; + SkBitmap* fBM; + const SkPaint* fPaint; // Note: this object doesn't own the paint + }; + + ~PlaybackReplacements() { this->freeAll(); } + + // Add a new replacement range. The replacement ranges should be + // sorted in increasing order and non-overlapping (esp. no nested + // saveLayers). + ReplacementInfo* push(); + + private: + friend class SkPicturePlayback; // for access to lookupByStart + + // look up a replacement range by its start offset + ReplacementInfo* lookupByStart(size_t start); + + void freeAll(); + + #ifdef SK_DEBUG + void validate() const; + #endif + + SkTDArray fReplacements; + }; + + + // Replace all the draw ops in the replacement ranges in 'replacements' with + // the associated drawBitmap call + // Draw replacing cannot be enabled at the same time as draw limiting + void setReplacements(PlaybackReplacements* replacements) { + SkASSERT(fStart == 0 && fStop == 0); + fReplacements = replacements; + } + + bool fUseBBH; + size_t fStart; + size_t fStop; + PlaybackReplacements* fReplacements; + class CachedOperationList : public SkPicture::OperationList { public: CachedOperationList() { diff --git a/src/core/SkPictureStateTree.cpp b/src/core/SkPictureStateTree.cpp index f6d54ffed..39a1a47e1 100644 --- a/src/core/SkPictureStateTree.cpp +++ b/src/core/SkPictureStateTree.cpp @@ -114,27 +114,60 @@ void SkPictureStateTree::Iterator::setCurrentMatrix(const SkMatrix* matrix) { fCurrentMatrix = matrix; } -uint32_t SkPictureStateTree::Iterator::nextDraw() { +uint32_t SkPictureStateTree::Iterator::peekDraw() { SkASSERT(this->isValid()); if (fPlaybackIndex >= fDraws->count()) { + return kDrawComplete; + } + + Draw* draw = static_cast((*fDraws)[fPlaybackIndex]); + return draw->fOffset; +} + +uint32_t SkPictureStateTree::Iterator::skipDraw() { + SkASSERT(this->isValid()); + if (fPlaybackIndex >= fDraws->count()) { + return this->finish(); + } + + Draw* draw = static_cast((*fDraws)[fPlaybackIndex]); + + if (fSave) { + fCanvas->save(); + fSave = false; + } + + fNodes.rewind(); + + ++fPlaybackIndex; + return draw->fOffset; +} + +uint32_t SkPictureStateTree::Iterator::finish() { + if (fCurrentNode->fFlags & Node::kSaveLayer_Flag) { + fCanvas->restore(); + } + + for (fCurrentNode = fCurrentNode->fParent; fCurrentNode; + fCurrentNode = fCurrentNode->fParent) { + // Note: we call restore() twice when both flags are set. + if (fCurrentNode->fFlags & Node::kSave_Flag) { + fCanvas->restore(); + } if (fCurrentNode->fFlags & Node::kSaveLayer_Flag) { fCanvas->restore(); } + } - for (fCurrentNode = fCurrentNode->fParent; fCurrentNode; - fCurrentNode = fCurrentNode->fParent) { - // Note: we call restore() twice when both flags are set. - if (fCurrentNode->fFlags & Node::kSave_Flag) { - fCanvas->restore(); - } - if (fCurrentNode->fFlags & Node::kSaveLayer_Flag) { - fCanvas->restore(); - } - } + fCanvas->setMatrix(fPlaybackMatrix); + fCurrentMatrix = NULL; + return kDrawComplete; +} - fCanvas->setMatrix(fPlaybackMatrix); - fCurrentMatrix = NULL; - return kDrawComplete; +uint32_t SkPictureStateTree::Iterator::nextDraw() { + SkASSERT(this->isValid()); + if (fPlaybackIndex >= fDraws->count()) { + return this->finish(); } Draw* draw = static_cast((*fDraws)[fPlaybackIndex]); diff --git a/src/core/SkPictureStateTree.h b/src/core/SkPictureStateTree.h index d61bf032c..a77e09448 100644 --- a/src/core/SkPictureStateTree.h +++ b/src/core/SkPictureStateTree.h @@ -74,8 +74,29 @@ public: */ class Iterator { public: - /** Returns the next offset into the picture stream, or kDrawComplete if complete. */ + /** Returns the next op offset needed to create the drawing state + required by the queued up draw operation or the offset of the queued + up draw operation itself. In the latter case, the next draw operation + will move into the queued up slot. + It retuns kDrawComplete when done. + TODO: this might be better named nextOp + */ uint32_t nextDraw(); + /** Peek at the currently queued up draw op's offset. Note that this can + be different then what 'nextDraw' would return b.c. it is + the offset of the next _draw_ op while 'nextDraw' can return + the offsets to saveLayer and clip ops while it is creating the proper + drawing context for the queued up draw op. + */ + uint32_t peekDraw(); + /** Stop trying to create the drawing context for the currently queued + up _draw_ operation and queue up the next one. This call returns + the offset of the skipped _draw_ operation. Obviously (since the + correct drawing context has not been established), the skipped + _draw_ operation should not be issued. Returns kDrawComplete if + the end of the draw operations is reached. + */ + uint32_t skipDraw(); static const uint32_t kDrawComplete = SK_MaxU32; Iterator() : fPlaybackMatrix(), fValid(false) { } bool isValid() const { return fValid; } @@ -111,6 +132,8 @@ public: // Whether or not this is a valid iterator (the default public constructor sets this false) bool fValid; + uint32_t finish(); + friend class SkPictureStateTree; }; diff --git a/src/gpu/GrPictureUtils.cpp b/src/gpu/GrPictureUtils.cpp index 996a32d9a..a66f34c0a 100644 --- a/src/gpu/GrPictureUtils.cpp +++ b/src/gpu/GrPictureUtils.cpp @@ -9,6 +9,7 @@ #include "SkDevice.h" #include "SkDraw.h" #include "SkPaintPriv.h" +#include "SkPicturePlayback.h" SkPicture::AccelData::Key GPUAccelData::ComputeAccelDataKey() { static const SkPicture::AccelData::Key gGPUID = SkPicture::AccelData::GenerateDomain(); @@ -249,7 +250,15 @@ public: } virtual void drawPicture(SkPicture& picture) SK_OVERRIDE { + // BBH-based rendering doesn't re-issue many of the operations the gather + // process cares about (e.g., saves and restores) so it must be disabled. + if (NULL != picture.fPlayback) { + picture.fPlayback->setUseBBH(false); + } picture.draw(this); + if (NULL != picture.fPlayback) { + picture.fPlayback->setUseBBH(true); + } } protected: // disable aa for speed diff --git a/src/gpu/GrPictureUtils.h b/src/gpu/GrPictureUtils.h index c62529862..b8a7814cc 100644 --- a/src/gpu/GrPictureUtils.h +++ b/src/gpu/GrPictureUtils.h @@ -68,10 +68,9 @@ public: // incorporate the clip and matrix state into the key static SkPicture::AccelData::Key ComputeAccelDataKey(); -protected: +private: SkTDArray fSaveLayerInfo; -private: typedef SkPicture::AccelData INHERITED; }; diff --git a/src/gpu/SkGpuDevice.cpp b/src/gpu/SkGpuDevice.cpp index 517f082a4..1a6a3e4c4 100644 --- a/src/gpu/SkGpuDevice.cpp +++ b/src/gpu/SkGpuDevice.cpp @@ -28,6 +28,7 @@ #include "SkMaskFilter.h" #include "SkPathEffect.h" #include "SkPicture.h" +#include "SkPicturePlayback.h" #include "SkRRect.h" #include "SkStroke.h" #include "SkSurface.h" @@ -1920,6 +1921,12 @@ void SkGpuDevice::EXPERIMENTAL_optimize(SkPicture* picture) { GatherGPUInfo(picture, data); } +static void wrap_texture(GrTexture* texture, int width, int height, SkBitmap* result) { + SkImageInfo info = SkImageInfo::MakeN32Premul(width, height); + result->setConfig(info); + result->setPixelRef(SkNEW_ARGS(SkGrPixelRef, (info, texture)))->unref(); +} + void SkGpuDevice::EXPERIMENTAL_purge(SkPicture* picture) { } @@ -1940,30 +1947,148 @@ bool SkGpuDevice::EXPERIMENTAL_drawPicture(SkCanvas* canvas, SkPicture* picture) pullForward[i] = false; } - SkIRect clip; - - fClipData.getConservativeBounds(this->width(), this->height(), &clip, NULL); - - SkMatrix inv; - if (!fContext->getMatrix().invert(&inv)) { - return false; + SkRect clipBounds; + if (!canvas->getClipBounds(&clipBounds)) { + return true; } + SkIRect query; + clipBounds.roundOut(&query); - SkRect r = SkRect::Make(clip); - inv.mapRect(&r); - r.roundOut(&clip); + const SkPicture::OperationList& ops = picture->EXPERIMENTAL_getActiveOps(query); - const SkPicture::OperationList& ops = picture->EXPERIMENTAL_getActiveOps(clip); + // This code pre-renders the entire layer since it will be cached and potentially + // reused with different clips (e.g., in different tiles). Because of this the + // clip will not be limiting the size of the pre-rendered layer. kSaveLayerMaxSize + // is used to limit which clips are pre-rendered. + static const int kSaveLayerMaxSize = 256; - for (int i = 0; i < ops.numOps(); ++i) { + if (ops.valid()) { + // In this case the picture has been generated with a BBH so we use + // the BBH to limit the pre-rendering to just the layers needed to cover + // the region being drawn + for (int i = 0; i < ops.numOps(); ++i) { + uint32_t offset = ops.offset(i); + + // For now we're saving all the layers in the GPUAccelData so they + // can be nested. Additionally, the nested layers appear before + // their parent in the list. + for (int j = 0 ; j < gpuData->numSaveLayers(); ++j) { + const GPUAccelData::SaveLayerInfo& info = gpuData->saveLayerInfo(j); + + if (pullForward[j]) { + continue; // already pulling forward + } + + if (offset < info.fSaveLayerOpID || offset > info.fRestoreOpID) { + continue; // the op isn't in this range + } + + // TODO: once this code is more stable unsuitable layers can + // just be omitted during the optimization stage + if (!info.fValid || + kSaveLayerMaxSize < info.fSize.fWidth || + kSaveLayerMaxSize < info.fSize.fHeight || + info.fIsNested) { + continue; // this layer is unsuitable + } + + pullForward[j] = true; + } + } + } else { + // In this case there is no BBH associated with the picture. Pre-render + // all the layers + // TODO: intersect the bounds of each layer with the clip region to + // reduce the number of pre-rendered layers for (int j = 0; j < gpuData->numSaveLayers(); ++j) { const GPUAccelData::SaveLayerInfo& info = gpuData->saveLayerInfo(j); - if (ops.offset(i) > info.fSaveLayerOpID && ops.offset(i) < info.fRestoreOpID) { - pullForward[j] = true; + // TODO: once this code is more stable unsuitable layers can + // just be omitted during the optimization stage + if (!info.fValid || + kSaveLayerMaxSize < info.fSize.fWidth || + kSaveLayerMaxSize < info.fSize.fHeight || + info.fIsNested) { + continue; + } + + pullForward[j] = true; + } + } + + SkPicturePlayback::PlaybackReplacements replacements; + + for (int i = 0; i < gpuData->numSaveLayers(); ++i) { + if (pullForward[i]) { + GrCachedLayer* layer = fContext->getLayerCache()->findLayerOrCreate(picture, i); + + const GPUAccelData::SaveLayerInfo& info = gpuData->saveLayerInfo(i); + + if (NULL != picture->fPlayback) { + SkPicturePlayback::PlaybackReplacements::ReplacementInfo* layerInfo = + replacements.push(); + layerInfo->fStart = info.fSaveLayerOpID; + layerInfo->fStop = info.fRestoreOpID; + layerInfo->fPos = info.fOffset; + + GrTextureDesc desc; + desc.fFlags = kRenderTarget_GrTextureFlagBit; + desc.fWidth = info.fSize.fWidth; + desc.fHeight = info.fSize.fHeight; + desc.fConfig = kSkia8888_GrPixelConfig; + // TODO: need to deal with sample count + + bool bNeedsRendering = true; + + // This just uses scratch textures and doesn't cache the texture. + // This can yield a lot of re-rendering + if (NULL == layer->getTexture()) { + layer->setTexture(fContext->lockAndRefScratchTexture(desc, + GrContext::kApprox_ScratchTexMatch)); + if (NULL == layer->getTexture()) { + continue; + } + } else { + bNeedsRendering = false; + } + + layerInfo->fBM = SkNEW(SkBitmap); + wrap_texture(layer->getTexture(), desc.fWidth, desc.fHeight, layerInfo->fBM); + + SkASSERT(info.fPaint); + layerInfo->fPaint = info.fPaint; + + if (bNeedsRendering) { + SkAutoTUnref surface(SkSurface::NewRenderTargetDirect( + layer->getTexture()->asRenderTarget())); + + SkCanvas* canvas = surface->getCanvas(); + + canvas->setMatrix(info.fCTM); + canvas->clear(SK_ColorTRANSPARENT); + + picture->fPlayback->setDrawLimits(info.fSaveLayerOpID, info.fRestoreOpID); + picture->fPlayback->draw(*canvas, NULL); + picture->fPlayback->setDrawLimits(0, 0); + canvas->flush(); + } } } } - return false; + // Playback using new layers + picture->fPlayback->setReplacements(&replacements); + picture->fPlayback->draw(*canvas, NULL); + picture->fPlayback->setReplacements(NULL); + + for (int i = 0; i < gpuData->numSaveLayers(); ++i) { + GrCachedLayer* layer = fContext->getLayerCache()->findLayerOrCreate(picture, i); + + if (NULL != layer->getTexture()) { + fContext->unlockScratchTexture(layer->getTexture()); + layer->setTexture(NULL); + } + } + + return true; }