diff --git a/src/features.xml b/src/features.xml index 3f1c7bee5d..adb444b65f 100644 --- a/src/features.xml +++ b/src/features.xml @@ -86,6 +86,16 @@ + + Feature_AtlasEnginePresentFallback + We don't feel super confident in our usage of the Present1 API, so this settings adds a fallback to Present on error + AlwaysDisabled + + Release + WindowsInbox + + + Feature_NearbyFontLoading Controls whether fonts in the same directory as the binary are used during rendering. Disabled for conhost so that it doesn't iterate the entire system32 directory. diff --git a/src/renderer/atlas/AtlasEngine.cpp b/src/renderer/atlas/AtlasEngine.cpp index fc6fdfbccf..e571fac97a 100644 --- a/src/renderer/atlas/AtlasEngine.cpp +++ b/src/renderer/atlas/AtlasEngine.cpp @@ -87,89 +87,25 @@ try _api.invalidatedRows.start = std::min(_api.invalidatedRows.start, _p.s->cellCount.y); _api.invalidatedRows.end = clamp(_api.invalidatedRows.end, _api.invalidatedRows.start, _p.s->cellCount.y); } - - const auto allInvalid = _api.invalidatedRows == range{ 0, _p.s->cellCount.y }; - - // Avoid scrolling if everything's invalid anyways. This isn't here for performance or correctness - // (the code also works without this), but rather because it helps me reason about the way this works. - // For instance it ensures we don't pass a scroll rect to Present1() when effectively nothing is scrolling. - if (allInvalid) - { - _api.scrollOffset = 0; - } - else { const auto limit = gsl::narrow_cast(_p.s->cellCount.y & 0x7fff); const auto offset = gsl::narrow_cast(clamp(_api.scrollOffset, -limit, limit)); + const auto nothingInvalid = _api.invalidatedRows.start == _api.invalidatedRows.end; _api.scrollOffset = offset; - // Scroll the buffer by the given offset and mark the newly uncovered rows as "invalid". - if (offset) + // Mark the newly scrolled in rows as invalidated + if (offset < 0) { - const auto nothingInvalid = _api.invalidatedRows.start == _api.invalidatedRows.end; - - if (offset < 0) - { - // scrollOffset/offset = -1 - // +----------+ +----------+ - // | | | xxxxxxxxx| - // | xxxxxxxxx| -> |xxxxxxx | - // |xxxxxxx | | | - // +----------+ +----------+ - const u16 begRow = _p.s->cellCount.y + offset; - _api.invalidatedRows.start = nothingInvalid ? begRow : std::min(_api.invalidatedRows.start, begRow); - _api.invalidatedRows.end = _p.s->cellCount.y; - - const auto dst = std::copy_n(_p.rows.begin() - offset, _p.rows.size() + offset, _p.rowsScratch.begin()); - std::copy_n(_p.rows.begin(), -offset, dst); - } - else - { - // scrollOffset/offset = 1 - // +----------+ +----------+ - // | xxxxxxxxx| | | - // |xxxxxxx | -> | xxxxxxxxx| - // | | |xxxxxxx | - // +----------+ +----------+ - const u16 endRow = offset; - _api.invalidatedRows.start = 0; - _api.invalidatedRows.end = nothingInvalid ? endRow : std::max(_api.invalidatedRows.end, endRow); - - const auto dst = std::copy_n(_p.rows.end() - offset, offset, _p.rowsScratch.begin()); - std::copy_n(_p.rows.begin(), _p.rows.size() - offset, dst); - } - - std::swap(_p.rows, _p.rowsScratch); - - // Scrolling the background bitmap is a lot easier because we can rely on memmove which works - // with both forwards and backwards copying. It's a mystery why the STL doesn't have this. - { - const auto srcOffset = std::max(0, -offset) * gsl::narrow_cast(_p.colorBitmapRowStride); - const auto dstOffset = std::max(0, offset) * gsl::narrow_cast(_p.colorBitmapRowStride); - const auto count = _p.colorBitmapDepthStride - std::max(srcOffset, dstOffset); - assert(dstOffset >= 0 && dstOffset + count <= _p.colorBitmapDepthStride); - assert(srcOffset >= 0 && srcOffset + count <= _p.colorBitmapDepthStride); - - auto src = _p.colorBitmap.data() + srcOffset; - auto dst = _p.colorBitmap.data() + dstOffset; - const auto bytes = count * sizeof(u32); - - for (size_t i = 0; i < 2; ++i) - { - // Avoid bumping the colorBitmapGeneration unless necessary. This approx. further halves - // the (already small) GPU load. This could easily be replaced with some custom SIMD - // to avoid going over the memory twice, but... that's a story for another day. - if (memcmp(dst, src, bytes) != 0) - { - memmove(dst, src, bytes); - _p.colorBitmapGenerations[i].bump(); - } - - src += _p.colorBitmapDepthStride; - dst += _p.colorBitmapDepthStride; - } - } + const u16 begRow = _p.s->cellCount.y + offset; + _api.invalidatedRows.start = nothingInvalid ? begRow : std::min(_api.invalidatedRows.start, begRow); + _api.invalidatedRows.end = _p.s->cellCount.y; + } + else + { + const u16 endRow = offset; + _api.invalidatedRows.start = 0; + _api.invalidatedRows.end = nothingInvalid ? endRow : std::max(_api.invalidatedRows.end, endRow); } } @@ -190,46 +126,115 @@ try _p.cursorRect = {}; _p.scrollOffset = _api.scrollOffset; - if (_api.invalidatedRows.non_empty()) + // This if condition serves 2 purposes: + // * By setting top/bottom to the full height we ensure that we call Present() without + // any dirty rects and not Present1() on the first frame after the settings change. + // * If the scrollOffset is so large that it scrolls the entire viewport, invalidatedRows will span + // the entire viewport as well. We need to set scrollOffset to 0 then, not just because scrolling + // the contents of the entire swap chain is redundant, but more importantly because the scroll rect + // is the subset of the contents that are being scrolled into. If you scroll the entire viewport + // then the scroll rect is empty, which Present1() will loudly complain about. + if (_api.invalidatedRows == range{ 0, _p.s->cellCount.y }) { - const auto deltaPx = _api.scrollOffset * _p.s->font->cellSize.y; - const til::CoordType targetSizeX = _p.s->targetSize.x; - const til::CoordType targetSizeY = _p.s->targetSize.y; - u16 y = 0; + _p.MarkAllAsDirty(); + } - _p.dirtyRectInPx.left = 0; - _p.dirtyRectInPx.top = _api.invalidatedRows.start * _p.s->font->cellSize.y; - _p.dirtyRectInPx.right = targetSizeX; - _p.dirtyRectInPx.bottom = _api.invalidatedRows.end * _p.s->font->cellSize.y; - - for (const auto r : _p.rows) + if (const auto offset = _api.scrollOffset) + { + if (offset < 0) { - r->dirtyTop += deltaPx; - r->dirtyBottom += deltaPx; - - if (_api.invalidatedRows.contains(y)) - { - const auto clampedTop = clamp(r->dirtyTop, 0, targetSizeY); - const auto clampedBottom = clamp(r->dirtyBottom, 0, targetSizeY); - if (clampedTop != clampedBottom) - { - _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, clampedTop); - _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.bottom, clampedBottom); - } - - r->clear(y, _p.s->font->cellSize.y); - } - - ++y; + // scrollOffset/offset = -1 + // +----------+ +----------+ + // | | | xxxxxxxxx| + // | xxxxxxxxx| -> |xxxxxxx | + // |xxxxxxx | | | + // +----------+ +----------+ + const auto dst = std::copy_n(_p.rows.begin() - offset, _p.rows.size() + offset, _p.rowsScratch.begin()); + std::copy_n(_p.rows.begin(), -offset, dst); + } + else + { + // scrollOffset/offset = 1 + // +----------+ +----------+ + // | xxxxxxxxx| | | + // |xxxxxxx | -> | xxxxxxxxx| + // | | |xxxxxxx | + // +----------+ +----------+ + const auto dst = std::copy_n(_p.rows.end() - offset, offset, _p.rowsScratch.begin()); + std::copy_n(_p.rows.begin(), _p.rows.size() - offset, dst); } - // I feel a little bit like this is a hack, but I'm not sure how to better express this. - // This ensures that we end up calling Present1() without dirty rects if the swap chain is - // recreated/resized, because DXGI requires you to then call Present1() without dirty rects. - if (allInvalid) + std::swap(_p.rows, _p.rowsScratch); + + // Now that the rows have scrolled, their cached dirty rects, naturally also need to do the same. + // It doesn't really matter that some of these will end up being out of bounds, + // because we'll call ShapedRow::Clear() later on which resets them. { - _p.dirtyRectInPx.top = 0; - _p.dirtyRectInPx.bottom = targetSizeY; + const auto deltaPx = offset * _p.s->font->cellSize.y; + for (const auto r : _p.rows) + { + r->dirtyTop += deltaPx; + r->dirtyBottom += deltaPx; + } + } + + // Scrolling the background bitmap is a lot easier because we can rely on memmove which works + // with both forwards and backwards copying. It's a mystery why the STL doesn't have this. + { + const auto srcOffset = std::max(0, -offset) * gsl::narrow_cast(_p.colorBitmapRowStride); + const auto dstOffset = std::max(0, offset) * gsl::narrow_cast(_p.colorBitmapRowStride); + const auto count = _p.colorBitmapDepthStride - std::max(srcOffset, dstOffset); + assert(dstOffset >= 0 && dstOffset + count <= _p.colorBitmapDepthStride); + assert(srcOffset >= 0 && srcOffset + count <= _p.colorBitmapDepthStride); + + auto src = _p.colorBitmap.data() + srcOffset; + auto dst = _p.colorBitmap.data() + dstOffset; + const auto bytes = count * sizeof(u32); + + for (size_t i = 0; i < 2; ++i) + { + // Avoid bumping the colorBitmapGeneration unless necessary. This approx. further halves + // the (already small) GPU load. This could easily be replaced with some custom SIMD + // to avoid going over the memory twice, but... that's a story for another day. + if (memcmp(dst, src, bytes) != 0) + { + memmove(dst, src, bytes); + _p.colorBitmapGenerations[i].bump(); + } + + src += _p.colorBitmapDepthStride; + dst += _p.colorBitmapDepthStride; + } + } + } + + // This serves two purposes. For each invalidated row, this will: + // * Get the old dirty rect and mark that region as needing invalidation during the upcoming Present1(), + // because it'll now be replaced with something else (for instance nothing/whitespace). + // * Clear() them to prepare them for the new incoming content from the TextBuffer. + if (_api.invalidatedRows.non_empty()) + { + const til::CoordType targetSizeX = _p.s->targetSize.x; + const til::CoordType targetSizeY = _p.s->targetSize.y; + + _p.dirtyRectInPx.left = 0; + _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, _api.invalidatedRows.start * _p.s->font->cellSize.y); + _p.dirtyRectInPx.right = targetSizeX; + _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.bottom, _api.invalidatedRows.end * _p.s->font->cellSize.y); + + for (auto y = _api.invalidatedRows.start; y < _api.invalidatedRows.end; ++y) + { + const auto r = _p.rows[y]; + const auto clampedTop = clamp(r->dirtyTop, 0, targetSizeY); + const auto clampedBottom = clamp(r->dirtyBottom, 0, targetSizeY); + + if (clampedTop != clampedBottom) + { + _p.dirtyRectInPx.top = std::min(_p.dirtyRectInPx.top, clampedTop); + _p.dirtyRectInPx.bottom = std::max(_p.dirtyRectInPx.bottom, clampedBottom); + } + + r->Clear(y, _p.s->font->cellSize.y); } } @@ -515,17 +520,7 @@ void AtlasEngine::_handleSettingsUpdate() _recreateCellCountDependentResources(); } - // !!! NOTE !!! - // This will indirectly mark the entire viewport as dirty, but AtlasEngine::_recreateBackend() - // may do the same, so make sure that that function stays in sync with what this code does. _api.invalidatedRows = invalidatedRowsAll; - - u16 y = 0; - for (const auto r : _p.rows) - { - r->clear(y, _p.s->font->cellSize.y); - ++y; - } } void AtlasEngine::_recreateFontDependentResources() diff --git a/src/renderer/atlas/AtlasEngine.r.cpp b/src/renderer/atlas/AtlasEngine.r.cpp index 717516c9cf..543a103f7d 100644 --- a/src/renderer/atlas/AtlasEngine.r.cpp +++ b/src/renderer/atlas/AtlasEngine.r.cpp @@ -457,6 +457,17 @@ void AtlasEngine::_present() } } - THROW_IF_FAILED(_p.swapChain.swapChain->Present1(1, 0, ¶ms)); + if constexpr (Feature_AtlasEnginePresentFallback::IsEnabled()) + { + if (FAILED_LOG(_p.swapChain.swapChain->Present1(1, 0, ¶ms))) + { + THROW_IF_FAILED(_p.swapChain.swapChain->Present(1, 0)); + } + } + else + { + THROW_IF_FAILED(_p.swapChain.swapChain->Present1(1, 0, ¶ms)); + } + _p.swapChain.waitForPresentation = true; } diff --git a/src/renderer/atlas/common.h b/src/renderer/atlas/common.h index 42a02894fa..d038365e6d 100644 --- a/src/renderer/atlas/common.h +++ b/src/renderer/atlas/common.h @@ -429,7 +429,7 @@ namespace Microsoft::Console::Render::Atlas struct ShapedRow { - void clear(u16 y, u16 cellHeight) noexcept + void Clear(u16 y, u16 cellHeight) noexcept { mappings.clear(); glyphIndices.clear();