From b5e41f1e7efaaa558b4df831a350f4a6a04eb757 Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Mon, 19 Oct 2020 20:07:54 +0000 Subject: [PATCH] Bug 1671289 - Improve WebRender's integrated profiler. r=gw In a (large-ish) nutshell: - Consolidate all counters under a single type. - Counters are all arranged in an array and referred to via index. - All counters can be displayed as average+max (float/int), graph, and change indicator. - Specify what to show and in what form via a pref. - All counters and visualizations support not having values every frame. - GPU time queries visualization is easier to read relative to the frame budget: - If the maximum value is under 16ms, the right side of the graph is fixed at 16ms. - If the maximum value is above 16ms, draw a vertical bar at 16ms. - Added a few new profile counters: - Total frame CPU time (from API send to the end of GPU command submission). - Visibility, Prepare, Batching and Glyph resolve times. The main change is how profile counters are represented. Instead of having different types for different visualizations, every counter is represented the same way, tracking average/max values over half a ms and optionally recording a graph over a number of frames. Counters are stored in a vector and referred to via index (See constants at the top of profiler.rs). The main motivation for this storage is to facilitate adding counters without having to think too much about where to store them and how to pass them to the renderer. The profiler's UI is defined by a string with with a single syntax: - Comma separated list of tokens (leading and trailing spaces ignored), which can be: - A counter name: - If prefixed with a '#' character, the counter is shown as a graph. - If prefixed with a '*' character, the counter is shown as a change indicator - By default (counter name without prefix), the counter is shown as average and max over half a second. - A preset name: - A preset is a builtin UI string in the same syntax that can be nested in the main UI string. - Presets are defined towards the top of profiler.rs and can also refer to other presets. - An empty token adds a bit of vertical space. - A '|' token begins a new column. - A '_' token begins a new row. Differential Revision: https://phabricator.services.mozilla.com/D93603 --- gfx/config/gfxVars.h | 1 + gfx/layers/ipc/CompositorBridgeParent.cpp | 12 + gfx/layers/ipc/CompositorBridgeParent.h | 5 + gfx/layers/wr/WebRenderBridgeParent.cpp | 7 +- gfx/layers/wr/WebRenderBridgeParent.h | 1 + gfx/thebes/gfxPlatform.cpp | 20 +- gfx/webrender_bindings/RenderThread.cpp | 18 + gfx/webrender_bindings/RenderThread.h | 3 + gfx/webrender_bindings/RendererOGL.cpp | 5 + gfx/webrender_bindings/RendererOGL.h | 2 + gfx/webrender_bindings/WebRenderAPI.cpp | 4 + gfx/webrender_bindings/WebRenderAPI.h | 1 + gfx/webrender_bindings/src/bindings.rs | 8 + gfx/wr/examples/common/boilerplate.rs | 4 - gfx/wr/examples/yuv.rs | 1 - gfx/wr/webrender/src/api_resources.rs | 3 + gfx/wr/webrender/src/clip.rs | 1 + gfx/wr/webrender/src/filterdata.rs | 1 + gfx/wr/webrender/src/frame_builder.rs | 47 +- gfx/wr/webrender/src/glyph_rasterizer/mod.rs | 23 +- gfx/wr/webrender/src/gpu_cache.rs | 16 +- gfx/wr/webrender/src/intern.rs | 12 +- gfx/wr/webrender/src/internal_types.rs | 4 +- gfx/wr/webrender/src/prepare.rs | 2 +- gfx/wr/webrender/src/prim_store/backdrop.rs | 1 + gfx/wr/webrender/src/prim_store/borders.rs | 2 + gfx/wr/webrender/src/prim_store/gradient.rs | 3 + gfx/wr/webrender/src/prim_store/image.rs | 2 + gfx/wr/webrender/src/prim_store/line_dec.rs | 1 + gfx/wr/webrender/src/prim_store/mod.rs | 1 + gfx/wr/webrender/src/prim_store/picture.rs | 1 + gfx/wr/webrender/src/prim_store/text_run.rs | 1 + gfx/wr/webrender/src/profiler.rs | 3041 ++++++++---------- gfx/wr/webrender/src/render_api.rs | 14 + gfx/wr/webrender/src/render_backend.rs | 93 +- gfx/wr/webrender/src/renderer.rs | 803 +++-- gfx/wr/webrender/src/resource_cache.rs | 41 +- gfx/wr/webrender/src/scene_builder_thread.rs | 69 +- gfx/wr/webrender/src/texture_cache.rs | 50 +- gfx/wr/webrender_api/src/lib.rs | 47 +- gfx/wr/wrench/src/args.yaml | 4 + gfx/wr/wrench/src/main.rs | 12 +- modules/libpref/init/all.js | 5 +- 43 files changed, 2052 insertions(+), 2340 deletions(-) diff --git a/gfx/config/gfxVars.h b/gfx/config/gfxVars.h index 22c659f5fbd6..ed93eeb2fa84 100644 --- a/gfx/config/gfxVars.h +++ b/gfx/config/gfxVars.h @@ -53,6 +53,7 @@ class gfxVarReceiver; _(UseWebRenderOptimizedShaders, bool, false) \ _(UseWebRenderMultithreading, bool, false) \ _(UseWebRenderScissoredCacheClears, bool, true) \ + _(WebRenderProfilerUI, nsCString, nsCString()) \ _(WebglAllowCoreProfile, bool, true) \ _(WebglAllowWindowsNativeGl, bool, false) \ _(WebRenderMaxPartialPresentRects, int32_t, 0) \ diff --git a/gfx/layers/ipc/CompositorBridgeParent.cpp b/gfx/layers/ipc/CompositorBridgeParent.cpp index 9fc96d0335fe..536c4ea515b8 100644 --- a/gfx/layers/ipc/CompositorBridgeParent.cpp +++ b/gfx/layers/ipc/CompositorBridgeParent.cpp @@ -2044,6 +2044,7 @@ void CompositorBridgeParent::InitializeStatics() { &UpdateWebRenderMultithreading); gfxVars::SetWebRenderBatchingLookbackListener( &UpdateWebRenderBatchingParameters); + gfxVars::SetWebRenderProfilerUIListener(&UpdateWebRenderProfilerUI); } /*static*/ @@ -2122,6 +2123,17 @@ void CompositorBridgeParent::UpdateWebRenderBatchingParameters() { }); } +/*static*/ +void CompositorBridgeParent::UpdateWebRenderProfilerUI() { + if (!sIndirectLayerTreesLock) { + return; + } + MonitorAutoLock lock(*sIndirectLayerTreesLock); + ForEachWebRenderBridgeParent([&](WebRenderBridgeParent* wrBridge) -> void { + wrBridge->UpdateProfilerUI(); + }); +} + RefPtr CompositorBridgeParent::GetWebRenderBridgeParent() const { return mWrBridge; diff --git a/gfx/layers/ipc/CompositorBridgeParent.h b/gfx/layers/ipc/CompositorBridgeParent.h index 392392c28e35..e35c532545ba 100644 --- a/gfx/layers/ipc/CompositorBridgeParent.h +++ b/gfx/layers/ipc/CompositorBridgeParent.h @@ -735,6 +735,11 @@ class CompositorBridgeParent final : public CompositorBridgeParentBase, */ static void UpdateWebRenderBatchingParameters(); + /** + * Notify the compositor webrender profiler UI string has been updated. + */ + static void UpdateWebRenderProfilerUI(); + /** * Wrap the data structure to be sent over IPC. */ diff --git a/gfx/layers/wr/WebRenderBridgeParent.cpp b/gfx/layers/wr/WebRenderBridgeParent.cpp index eed2b7a88f95..ec15d49b9526 100644 --- a/gfx/layers/wr/WebRenderBridgeParent.cpp +++ b/gfx/layers/wr/WebRenderBridgeParent.cpp @@ -362,9 +362,9 @@ WebRenderBridgeParent::WebRenderBridgeParent( MOZ_ASSERT(!mCompositorScheduler); mCompositorScheduler = new CompositorVsyncScheduler(this, mWidget); } - UpdateDebugFlags(); UpdateQualitySettings(); + UpdateProfilerUI(); } WebRenderBridgeParent::WebRenderBridgeParent(const wr::PipelineId& aPipelineId, @@ -1500,6 +1500,11 @@ void WebRenderBridgeParent::UpdateDebugFlags() { mApi->UpdateDebugFlags(gfxVars::WebRenderDebugFlags()); } +void WebRenderBridgeParent::UpdateProfilerUI() { + nsCString uiString = gfxVars::GetWebRenderProfilerUIOrDefault(); + mApi->SetProfilerUI(uiString); +} + void WebRenderBridgeParent::UpdateMultithreading() { mApi->EnableMultithreading(gfxVars::UseWebRenderMultithreading()); } diff --git a/gfx/layers/wr/WebRenderBridgeParent.h b/gfx/layers/wr/WebRenderBridgeParent.h index bac646e15beb..bdc3562a9ad3 100644 --- a/gfx/layers/wr/WebRenderBridgeParent.h +++ b/gfx/layers/wr/WebRenderBridgeParent.h @@ -130,6 +130,7 @@ class WebRenderBridgeParent final : public PWebRenderBridgeParent, void UpdateDebugFlags(); void UpdateMultithreading(); void UpdateBatchingParameters(); + void UpdateProfilerUI(); mozilla::ipc::IPCResult RecvEnsureConnected( TextureFactoryIdentifier* aTextureFactoryIdentifier, diff --git a/gfx/thebes/gfxPlatform.cpp b/gfx/thebes/gfxPlatform.cpp index cab3568c2691..39945bc7962d 100644 --- a/gfx/thebes/gfxPlatform.cpp +++ b/gfx/thebes/gfxPlatform.cpp @@ -570,6 +570,14 @@ void RecordingPrefChanged(const char* aPrefName, void* aClosure) { #define WR_DEBUG_PREF "gfx.webrender.debug" +static void WebRendeProfilerUIPrefChangeCallback(const char* aPrefName, void*) { + nsCString uiString; + if (NS_SUCCEEDED(Preferences::GetCString("gfx.webrender.debug.profiler-ui", + uiString))) { + gfxVars::SetWebRenderProfilerUI(uiString); + } +} + static void WebRenderDebugPrefChangeCallback(const char* aPrefName, void*) { wr::DebugFlags flags{0}; #define GFX_WEBRENDER_DEBUG(suffix, bit) \ @@ -584,18 +592,11 @@ static void WebRenderDebugPrefChangeCallback(const char* aPrefName, void*) { GFX_WEBRENDER_DEBUG(".gpu-sample-queries", wr::DebugFlags::GPU_SAMPLE_QUERIES) GFX_WEBRENDER_DEBUG(".disable-batching", wr::DebugFlags::DISABLE_BATCHING) GFX_WEBRENDER_DEBUG(".epochs", wr::DebugFlags::EPOCHS) - GFX_WEBRENDER_DEBUG(".compact-profiler", wr::DebugFlags::COMPACT_PROFILER) GFX_WEBRENDER_DEBUG(".smart-profiler", wr::DebugFlags::SMART_PROFILER) GFX_WEBRENDER_DEBUG(".echo-driver-messages", wr::DebugFlags::ECHO_DRIVER_MESSAGES) - GFX_WEBRENDER_DEBUG(".new-frame-indicator", - wr::DebugFlags::NEW_FRAME_INDICATOR) - GFX_WEBRENDER_DEBUG(".new-scene-indicator", - wr::DebugFlags::NEW_SCENE_INDICATOR) GFX_WEBRENDER_DEBUG(".show-overdraw", wr::DebugFlags::SHOW_OVERDRAW) GFX_WEBRENDER_DEBUG(".gpu-cache", wr::DebugFlags::GPU_CACHE_DBG) - GFX_WEBRENDER_DEBUG(".slow-frame-indicator", - wr::DebugFlags::SLOW_FRAME_INDICATOR) GFX_WEBRENDER_DEBUG(".texture-cache.clear-evicted", wr::DebugFlags::TEXTURE_CACHE_DBG_CLEAR_EVICTED) GFX_WEBRENDER_DEBUG(".picture-caching", wr::DebugFlags::PICTURE_CACHING_DBG) @@ -1385,6 +1386,8 @@ void gfxPlatform::ShutdownLayersIPC() { Preferences::UnregisterCallback(WebRenderDebugPrefChangeCallback, WR_DEBUG_PREF); + Preferences::UnregisterCallback(WebRendeProfilerUIPrefChangeCallback, + "gfx.webrender.debug.profiler-ui"); } } else { @@ -2796,6 +2799,9 @@ void gfxPlatform::InitWebRenderConfig() { Preferences::RegisterPrefixCallbackAndCall(WebRenderDebugPrefChangeCallback, WR_DEBUG_PREF); + Preferences::RegisterPrefixCallbackAndCall( + WebRendeProfilerUIPrefChangeCallback, + "gfx.webrender.debug.profiler-ui"); Preferences::RegisterCallback( WebRenderQualityPrefChangeCallback, nsDependentCString( diff --git a/gfx/webrender_bindings/RenderThread.cpp b/gfx/webrender_bindings/RenderThread.cpp index 113e29cbde4c..4aedf8fb608f 100644 --- a/gfx/webrender_bindings/RenderThread.cpp +++ b/gfx/webrender_bindings/RenderThread.cpp @@ -394,6 +394,24 @@ void RenderThread::SetClearColor(wr::WindowId aWindowId, wr::ColorF aColor) { } } +void RenderThread::SetProfilerUI(wr::WindowId aWindowId, nsCString aUI) { + if (mHasShutdown) { + return; + } + + if (!IsInRenderThread()) { + Loop()->PostTask(NewRunnableMethod( + "wr::RenderThread::SetProfilerUI", this, &RenderThread::SetProfilerUI, + aWindowId, aUI)); + return; + } + + auto it = mRenderers.find(aWindowId); + if (it != mRenderers.end()) { + it->second->SetProfilerUI(aUI); + } +} + void RenderThread::RunEvent(wr::WindowId aWindowId, UniquePtr aEvent) { if (!IsInRenderThread()) { diff --git a/gfx/webrender_bindings/RenderThread.h b/gfx/webrender_bindings/RenderThread.h index 9eeb4fb50fa2..a04c5d1fa2ac 100644 --- a/gfx/webrender_bindings/RenderThread.h +++ b/gfx/webrender_bindings/RenderThread.h @@ -179,6 +179,9 @@ class RenderThread final { /// Automatically forwarded to the render thread. void SetClearColor(wr::WindowId aWindowId, wr::ColorF aColor); + /// Automatically forwarded to the render thread. + void SetProfilerUI(wr::WindowId aWindowId, nsCString aUI); + /// Automatically forwarded to the render thread. void PipelineSizeChanged(wr::WindowId aWindowId, uint64_t aPipelineId, float aWidth, float aHeight); diff --git a/gfx/webrender_bindings/RendererOGL.cpp b/gfx/webrender_bindings/RendererOGL.cpp index 11f14f2554bd..e3cde8067294 100644 --- a/gfx/webrender_bindings/RendererOGL.cpp +++ b/gfx/webrender_bindings/RendererOGL.cpp @@ -436,5 +436,10 @@ void RendererOGL::AccumulateMemoryReport(MemoryReport* aReport) { aReport->swap_chain += swapChainSize; } +void RendererOGL::SetProfilerUI(const nsCString& aUI) { + wr_renderer_set_profiler_ui(GetRenderer(), (const uint8_t*)aUI.get(), + aUI.Length()); +} + } // namespace wr } // namespace mozilla diff --git a/gfx/webrender_bindings/RendererOGL.h b/gfx/webrender_bindings/RendererOGL.h index 66a9ab629298..48ce04846924 100644 --- a/gfx/webrender_bindings/RendererOGL.h +++ b/gfx/webrender_bindings/RendererOGL.h @@ -115,6 +115,8 @@ class RendererOGL { void AccumulateMemoryReport(MemoryReport* aReport); + void SetProfilerUI(const nsCString& aUI); + wr::Renderer* GetRenderer() { return mRenderer; } gl::GLContext* gl() const; diff --git a/gfx/webrender_bindings/WebRenderAPI.cpp b/gfx/webrender_bindings/WebRenderAPI.cpp index 7e3d1a300377..a40ef38ae4f1 100644 --- a/gfx/webrender_bindings/WebRenderAPI.cpp +++ b/gfx/webrender_bindings/WebRenderAPI.cpp @@ -542,6 +542,10 @@ void WebRenderAPI::SetClearColor(const gfx::DeviceColor& aColor) { RenderThread::Get()->SetClearColor(mId, ToColorF(aColor)); } +void WebRenderAPI::SetProfilerUI(const nsCString& aUIString) { + RenderThread::Get()->SetProfilerUI(mId, aUIString); +} + void WebRenderAPI::Pause() { class PauseEvent : public RendererEvent { public: diff --git a/gfx/webrender_bindings/WebRenderAPI.h b/gfx/webrender_bindings/WebRenderAPI.h index 7315c535a840..182e51e05f62 100644 --- a/gfx/webrender_bindings/WebRenderAPI.h +++ b/gfx/webrender_bindings/WebRenderAPI.h @@ -258,6 +258,7 @@ class WebRenderAPI final { void SetBatchingLookback(uint32_t aCount); void SetClearColor(const gfx::DeviceColor& aColor); + void SetProfilerUI(const nsCString& aUIString); void Pause(); bool Resume(); diff --git a/gfx/webrender_bindings/src/bindings.rs b/gfx/webrender_bindings/src/bindings.rs index 780df9ee50bc..c6334643285b 100644 --- a/gfx/webrender_bindings/src/bindings.rs +++ b/gfx/webrender_bindings/src/bindings.rs @@ -743,6 +743,14 @@ pub unsafe extern "C" fn wr_renderer_readback( renderer.read_pixels_into(FramebufferIntSize::new(width, height).into(), format, &mut slice); } +#[no_mangle] +pub unsafe extern "C" fn wr_renderer_set_profiler_ui(renderer: &mut Renderer, ui_str: *const u8, ui_str_len: usize) { + let slice = std::slice::from_raw_parts(ui_str, ui_str_len); + if let Ok(ui_str) = std::str::from_utf8(slice) { + renderer.set_profiler_ui(ui_str); + } +} + #[no_mangle] pub unsafe extern "C" fn wr_renderer_delete(renderer: *mut Renderer) { let renderer = Box::from_raw(renderer); diff --git a/gfx/wr/examples/common/boilerplate.rs b/gfx/wr/examples/common/boilerplate.rs index d5208ad9cda7..1693cceb7760 100644 --- a/gfx/wr/examples/common/boilerplate.rs +++ b/gfx/wr/examples/common/boilerplate.rs @@ -254,14 +254,10 @@ pub fn main_wrapper( winit::VirtualKeyCode::P => debug_flags.toggle(DebugFlags::PROFILER_DBG), winit::VirtualKeyCode::O => debug_flags.toggle(DebugFlags::RENDER_TARGET_DBG), winit::VirtualKeyCode::I => debug_flags.toggle(DebugFlags::TEXTURE_CACHE_DBG), - winit::VirtualKeyCode::S => debug_flags.toggle(DebugFlags::COMPACT_PROFILER), winit::VirtualKeyCode::T => debug_flags.toggle(DebugFlags::PICTURE_CACHING_DBG), winit::VirtualKeyCode::Q => debug_flags.toggle( DebugFlags::GPU_TIME_QUERIES | DebugFlags::GPU_SAMPLE_QUERIES ), - winit::VirtualKeyCode::F => debug_flags.toggle( - DebugFlags::NEW_FRAME_INDICATOR | DebugFlags::NEW_SCENE_INDICATOR - ), winit::VirtualKeyCode::G => debug_flags.toggle(DebugFlags::GPU_CACHE_DBG), winit::VirtualKeyCode::Key1 => txn.set_document_view( device_size.into(), diff --git a/gfx/wr/examples/yuv.rs b/gfx/wr/examples/yuv.rs index dabc1de467cb..e2b8d1efd61a 100644 --- a/gfx/wr/examples/yuv.rs +++ b/gfx/wr/examples/yuv.rs @@ -218,7 +218,6 @@ fn main() { }; let opts = webrender::RendererOptions { - debug_flags: webrender::DebugFlags::NEW_FRAME_INDICATOR | webrender::DebugFlags::NEW_SCENE_INDICATOR, ..Default::default() }; diff --git a/gfx/wr/webrender/src/api_resources.rs b/gfx/wr/webrender/src/api_resources.rs index 3c61207aa927..0a48858fc422 100644 --- a/gfx/wr/webrender/src/api_resources.rs +++ b/gfx/wr/webrender/src/api_resources.rs @@ -10,6 +10,7 @@ use crate::api::SharedFontInstanceMap; use crate::api::units::*; use crate::render_api::{ResourceUpdate, TransactionMsg, AddFont}; use crate::image_tiling::*; +use crate::profiler; use std::collections::HashMap; use std::sync::Arc; @@ -164,6 +165,8 @@ impl ApiResources { } let (rasterizer, requests) = self.create_blob_scene_builder_requests(&blobs_to_rasterize); + transaction.profile.set(profiler::RASTERIZED_BLOBS, blobs_to_rasterize.len()); + transaction.profile.set(profiler::RASTERIZED_BLOB_TILES, requests.len()); transaction.use_scene_builder_thread |= !requests.is_empty(); transaction.use_scene_builder_thread |= !transaction.scene_ops.is_empty(); transaction.blob_rasterizer = rasterizer; diff --git a/gfx/wr/webrender/src/clip.rs b/gfx/wr/webrender/src/clip.rs index d71992e1efb7..c10cd969a478 100644 --- a/gfx/wr/webrender/src/clip.rs +++ b/gfx/wr/webrender/src/clip.rs @@ -1400,6 +1400,7 @@ impl intern::Internable for ClipIntern { type Key = ClipItemKey; type StoreData = ClipNode; type InternData = ClipInternData; + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_CLIPS; } #[derive(Debug, MallocSizeOf)] diff --git a/gfx/wr/webrender/src/filterdata.rs b/gfx/wr/webrender/src/filterdata.rs index 9907d3d73475..d399b2252e22 100644 --- a/gfx/wr/webrender/src/filterdata.rs +++ b/gfx/wr/webrender/src/filterdata.rs @@ -162,6 +162,7 @@ impl intern::Internable for FilterDataIntern { type Key = SFilterDataKey; type StoreData = SFilterDataTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_FILTER_DATA; } fn push_component_transfer_data( diff --git a/gfx/wr/webrender/src/frame_builder.rs b/gfx/wr/webrender/src/frame_builder.rs index 2d26a29d4f21..fb99f864bca5 100644 --- a/gfx/wr/webrender/src/frame_builder.rs +++ b/gfx/wr/webrender/src/frame_builder.rs @@ -19,7 +19,7 @@ use crate::picture::{BackdropKind, SubpixelMode, TileCacheLogger, RasterConfig, use crate::prepare::prepare_primitives; use crate::prim_store::{PictureIndex, PrimitiveDebugId}; use crate::prim_store::{DeferredResolve}; -use crate::profiler::{FrameProfileCounters, TextureCacheProfileCounters, ResourceProfileCounters}; +use crate::profiler::{self, TransactionProfile}; use crate::render_backend::{DataStores, FrameStamp, FrameId, ScratchBuffer}; use crate::render_target::{RenderTarget, PictureCacheTarget, TextureCacheRenderTarget}; use crate::render_target::{RenderTargetContext, RenderTargetKind}; @@ -176,7 +176,6 @@ pub struct FrameBuildingContext<'a> { pub struct FrameBuildingState<'a> { pub render_tasks: &'a mut RenderTaskGraph, - pub profile_counters: &'a mut FrameProfileCounters, pub clip_store: &'a mut ClipStore, pub resource_cache: &'a mut ResourceCache, pub gpu_cache: &'a mut GpuCache, @@ -185,6 +184,7 @@ pub struct FrameBuildingState<'a> { pub surfaces: &'a mut Vec, pub dirty_region_stack: Vec, pub composite_state: &'a mut CompositeState, + pub num_visible_primitives: u32, } impl<'a> FrameBuildingState<'a> { @@ -248,17 +248,16 @@ impl FrameBuilder { resource_cache: &mut ResourceCache, gpu_cache: &mut GpuCache, render_tasks: &mut RenderTaskGraph, - profile_counters: &mut FrameProfileCounters, global_device_pixel_scale: DevicePixelScale, scene_properties: &SceneProperties, transform_palette: &mut TransformPalette, data_stores: &mut DataStores, scratch: &mut ScratchBuffer, debug_flags: DebugFlags, - texture_cache_profile: &mut TextureCacheProfileCounters, composite_state: &mut CompositeState, tile_cache_logger: &mut TileCacheLogger, tile_caches: &mut FastHashMap>, + profile: &mut TransactionProfile, ) -> Option { profile_scope!("build_layer_screen_rects_and_cull_layers"); @@ -335,6 +334,7 @@ impl FrameBuilder { { profile_scope!("UpdateVisibility"); profile_marker!("UpdateVisibility"); + profile.start_time(profiler::FRAME_VISIBILITY_TIME); let visibility_context = FrameVisibilityContext { global_device_pixel_scale, @@ -371,11 +371,14 @@ impl FrameBuilder { visibility_state.scratch.frame.clip_chain_stack = visibility_state.clip_chain_stack.take(); visibility_state.scratch.frame.surface_stack = visibility_state.surface_stack.take(); + + profile.end_time(profiler::FRAME_VISIBILITY_TIME); } + profile.start_time(profiler::FRAME_PREPARE_TIME); + let mut frame_state = FrameBuildingState { render_tasks, - profile_counters, clip_store: &mut scene.clip_store, resource_cache, gpu_cache, @@ -384,6 +387,7 @@ impl FrameBuilder { surfaces: &mut surfaces, dirty_region_stack: scratch.frame.dirty_region_stack.take(), composite_state, + num_visible_primitives: 0, }; frame_state @@ -453,6 +457,8 @@ impl FrameBuilder { ); frame_state.pop_dirty_region(); + profile.end_time(profiler::FRAME_PREPARE_TIME); + profile.set(profiler::VISIBLE_PRIMITIVES, frame_state.num_visible_primitives); scratch.frame.dirty_region_stack = frame_state.dirty_region_stack.take(); scratch.frame.surfaces = surfaces.take(); @@ -460,9 +466,11 @@ impl FrameBuilder { { profile_marker!("BlockOnResources"); - resource_cache.block_until_all_resources_added(gpu_cache, - render_tasks, - texture_cache_profile); + resource_cache.block_until_all_resources_added( + gpu_cache, + render_tasks, + profile, + ); } Some(root_render_task_id) @@ -478,7 +486,6 @@ impl FrameBuilder { layer: DocumentLayer, device_origin: DeviceIntPoint, pan: WorldPoint, - resource_profile: &mut ResourceProfileCounters, scene_properties: &SceneProperties, data_stores: &mut DataStores, scratch: &mut ScratchBuffer, @@ -487,15 +494,13 @@ impl FrameBuilder { tile_cache_logger: &mut TileCacheLogger, tile_caches: &mut FastHashMap>, dirty_rects_are_valid: bool, + profile: &mut TransactionProfile, ) -> Frame { profile_scope!("build"); profile_marker!("BuildFrame"); - let mut profile_counters = FrameProfileCounters::new(); - profile_counters - .total_primitives - .set(scene.prim_store.prim_count()); - resource_profile.picture_cache_slices.set(scene.tile_cache_config.picture_cache_slice_count); + profile.set(profiler::PRIMITIVES, scene.prim_store.prim_count()); + profile.set(profiler::PICTURE_CACHE_SLICES, scene.tile_cache_config.picture_cache_slice_count); resource_cache.begin_frame(stamp); gpu_cache.begin_frame(stamp); @@ -532,19 +537,20 @@ impl FrameBuilder { resource_cache, gpu_cache, &mut render_tasks, - &mut profile_counters, global_device_pixel_scale, scene_properties, &mut transform_palette, data_stores, scratch, debug_flags, - &mut resource_profile.texture_cache, &mut composite_state, tile_cache_logger, tile_caches, + profile, ); + profile.start_time(profiler::FRAME_BATCHING_TIME); + let mut passes; let mut deferred_resolves = vec![]; let mut has_texture_cache_tasks = false; @@ -611,11 +617,13 @@ impl FrameBuilder { } } - let gpu_cache_frame_id = gpu_cache.end_frame(&mut resource_profile.gpu_cache).frame_id(); + profile.end_time(profiler::FRAME_BATCHING_TIME); + + let gpu_cache_frame_id = gpu_cache.end_frame(profile).frame_id(); render_tasks.write_task_data(); *render_task_counters = render_tasks.counters(); - resource_cache.end_frame(&mut resource_profile.texture_cache); + resource_cache.end_frame(profile); self.prim_headers_prealloc.record_vec(&mut prim_headers.headers_int); self.composite_state_prealloc.record(&composite_state); @@ -627,7 +635,6 @@ impl FrameBuilder { scene.output_rect.size, ), layer, - profile_counters, passes, transform_palette: transform_palette.finish(), render_tasks, @@ -988,8 +995,6 @@ pub struct Frame { pub device_rect: DeviceIntRect, pub layer: DocumentLayer, pub passes: Vec, - #[cfg_attr(any(feature = "capture", feature = "replay"), serde(default = "FrameProfileCounters::new", skip))] - pub profile_counters: FrameProfileCounters, pub transform_palette: Vec, pub render_tasks: RenderTaskGraph, diff --git a/gfx/wr/webrender/src/glyph_rasterizer/mod.rs b/gfx/wr/webrender/src/glyph_rasterizer/mod.rs index 2eacd8657fe7..a9c6981c85d8 100644 --- a/gfx/wr/webrender/src/glyph_rasterizer/mod.rs +++ b/gfx/wr/webrender/src/glyph_rasterizer/mod.rs @@ -19,7 +19,7 @@ use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction}; use crate::gpu_cache::GpuCache; use crate::render_task_graph::RenderTaskGraph; use crate::render_task_cache::RenderTaskCache; -use crate::profiler::TextureCacheProfileCounters; +use crate::profiler::{self, TransactionProfile}; use malloc_size_of::{MallocSizeOf, MallocSizeOfOps}; use rayon::ThreadPool; use rayon::prelude::*; @@ -100,6 +100,7 @@ impl GlyphRasterizer { // Increment the total number of glyphs that are pending. This is used to determine // later whether to use worker threads for the remaining glyphs during resolve time. self.pending_glyph_count += 1; + self.glyph_request_count += 1; // Find a batch container for the font instance for this glyph. Use get_mut to avoid // cloning the font instance, since this is the common path. @@ -219,8 +220,10 @@ impl GlyphRasterizer { gpu_cache: &mut GpuCache, _: &mut RenderTaskCache, _: &mut RenderTaskGraph, - _: &mut TextureCacheProfileCounters, + profile: &mut TransactionProfile, ) { + profile.start_time(profiler::GLYPH_RESOLVE_TIME); + // Work around the borrow checker, since we call flush_glyph_requests below let mut pending_glyph_requests = mem::replace( &mut self.pending_glyph_requests, @@ -241,6 +244,11 @@ impl GlyphRasterizer { debug_assert_eq!(self.pending_glyph_count, 0); debug_assert!(self.pending_glyph_requests.is_empty()); + if self.glyph_request_count > 0 { + profile.set(profiler::RASTERIZED_GLYPHS, self.glyph_request_count); + self.glyph_request_count = 0; + } + profile_scope!("resolve_glyphs"); // Pull rasterized glyphs from the queue and update the caches. while self.pending_glyph_jobs > 0 { @@ -307,6 +315,8 @@ impl GlyphRasterizer { // Now that we are done with the critical path (rendering the glyphs), // we can schedule removing the fonts if needed. self.remove_dead_fonts(); + + profile.end_time(profiler::GLYPH_RESOLVE_TIME); } } @@ -936,6 +946,9 @@ pub struct GlyphRasterizer { /// The current number of glyph request jobs that have been kicked to worker threads. pending_glyph_jobs: usize, + /// The number of glyphs requested this frame. + glyph_request_count: usize, + /// A map of current glyph request batches. pending_glyph_requests: FastHashMap>, @@ -980,6 +993,7 @@ impl GlyphRasterizer { font_contexts: Arc::new(font_context), pending_glyph_jobs: 0, pending_glyph_count: 0, + glyph_request_count: 0, glyph_rx, glyph_tx, workers, @@ -1060,6 +1074,7 @@ impl GlyphRasterizer { //TODO: any signals need to be sent to the workers? self.pending_glyph_jobs = 0; self.pending_glyph_count = 0; + self.glyph_request_count = 0; self.fonts_to_remove.clear(); self.font_instances_to_remove.clear(); } @@ -1122,7 +1137,7 @@ mod test_glyph_rasterizer { use crate::gpu_cache::GpuCache; use crate::render_task_cache::RenderTaskCache; use crate::render_task_graph::{RenderTaskGraph, RenderTaskGraphCounters}; - use crate::profiler::TextureCacheProfileCounters; + use crate::profiler::TransactionProfile; use api::{FontKey, FontInstanceKey, FontSize, FontTemplate, FontRenderMode, IdNamespace, ColorU}; use api::units::DevicePoint; @@ -1193,7 +1208,7 @@ mod test_glyph_rasterizer { &mut gpu_cache, &mut render_task_cache, &mut render_task_tree, - &mut TextureCacheProfileCounters::new(), + &mut TransactionProfile::new(), ); } diff --git a/gfx/wr/webrender/src/gpu_cache.rs b/gfx/wr/webrender/src/gpu_cache.rs index bac61d976c72..da67f9df64c9 100644 --- a/gfx/wr/webrender/src/gpu_cache.rs +++ b/gfx/wr/webrender/src/gpu_cache.rs @@ -30,7 +30,7 @@ use api::IdNamespace; use api::units::*; use euclid::{HomogeneousVector, Rect}; use crate::internal_types::{FastHashMap, FastHashSet}; -use crate::profiler::GpuCacheProfileCounters; +use crate::profiler::{self, TransactionProfile}; use crate::render_backend::{FrameStamp, FrameId}; use crate::prim_store::VECS_PER_SEGMENT; use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH; @@ -865,18 +865,12 @@ impl GpuCache { /// device specific cache texture. pub fn end_frame( &mut self, - profile_counters: &mut GpuCacheProfileCounters, + profile: &mut TransactionProfile, ) -> FrameStamp { profile_scope!("end_frame"); - profile_counters - .allocated_rows - .set(self.texture.rows.len()); - profile_counters - .allocated_blocks - .set(self.texture.allocated_block_count); - profile_counters - .saved_blocks - .set(self.saved_block_count); + profile.set(profiler::GPU_CACHE_ROWS_TOTAL, self.texture.rows.len()); + profile.set(profiler::GPU_CACHE_BLOCKS_TOTAL, self.texture.allocated_block_count); + profile.set(profiler::GPU_CACHE_BLOCKS_SAVED, self.saved_block_count); let reached_threshold = self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) && diff --git a/gfx/wr/webrender/src/intern.rs b/gfx/wr/webrender/src/intern.rs index 7633233ac496..5dd9c90600b5 100644 --- a/gfx/wr/webrender/src/intern.rs +++ b/gfx/wr/webrender/src/intern.rs @@ -35,12 +35,12 @@ use crate::internal_types::FastHashMap; use malloc_size_of::MallocSizeOf; -use crate::profiler::ResourceProfileCounter; use std::fmt::Debug; use std::hash::Hash; use std::marker::PhantomData; -use std::{mem, ops, u64}; +use std::{ops, u64}; use crate::util::VecHelper; +use crate::profiler::TransactionProfile; #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] @@ -166,7 +166,7 @@ impl DataStore { pub fn apply_updates( &mut self, update_list: UpdateList, - profile_counter: &mut ResourceProfileCounter, + profile: &mut TransactionProfile, ) { for insertion in update_list.insertions { self.items @@ -178,8 +178,7 @@ impl DataStore { self.items[removal.index] = None; } - let per_item_size = mem::size_of::() + mem::size_of::(); - profile_counter.set(self.items.len(), per_item_size * self.items.len()); + profile.set(I::PROFILE_COUNTER, self.items.len()); } } @@ -460,4 +459,7 @@ pub trait Internable: MallocSizeOf { type Key: Eq + Hash + Clone + Debug + MallocSizeOf + InternDebug + InternSerialize + for<'a> InternDeserialize<'a>; type StoreData: From + MallocSizeOf + InternSerialize + for<'a> InternDeserialize<'a>; type InternData: MallocSizeOf + InternSerialize + for<'a> InternDeserialize<'a>; + + // Profile counter indices, see the list in profiler.rs + const PROFILE_COUNTER: usize; } diff --git a/gfx/wr/webrender/src/internal_types.rs b/gfx/wr/webrender/src/internal_types.rs index 2bd8cccee4e5..e9da910f9d88 100644 --- a/gfx/wr/webrender/src/internal_types.rs +++ b/gfx/wr/webrender/src/internal_types.rs @@ -12,9 +12,9 @@ use crate::device::TextureFilter; use crate::renderer::PipelineInfo; use crate::gpu_cache::GpuCacheUpdateList; use crate::frame_builder::Frame; +use crate::profiler::TransactionProfile; use fxhash::FxHasher; use plane_split::BspSplitter; -use crate::profiler::BackendProfileCounters; use smallvec::SmallVec; use std::{usize, i32}; use std::collections::{HashMap, HashSet}; @@ -539,6 +539,7 @@ impl ResourceUpdateList { pub struct RenderedDocument { pub frame: Frame, pub is_new_scene: bool, + pub profile: TransactionProfile, } pub enum DebugOutput { @@ -565,7 +566,6 @@ pub enum ResultMsg { DocumentId, RenderedDocument, ResourceUpdateList, - BackendProfileCounters, ), AppendNotificationRequests(Vec), ForceRedraw, diff --git a/gfx/wr/webrender/src/prepare.rs b/gfx/wr/webrender/src/prepare.rs index 0a83dab9de83..ab6c4e151418 100644 --- a/gfx/wr/webrender/src/prepare.rs +++ b/gfx/wr/webrender/src/prepare.rs @@ -128,7 +128,7 @@ pub fn prepare_primitives( tile_cache_log, tile_caches, ) { - frame_state.profile_counters.visible_primitives.inc(); + frame_state.num_visible_primitives += 1; } else { prim_instance.clear_visibility(); } diff --git a/gfx/wr/webrender/src/prim_store/backdrop.rs b/gfx/wr/webrender/src/prim_store/backdrop.rs index ea033574fb21..c45bf78eef44 100644 --- a/gfx/wr/webrender/src/prim_store/backdrop.rs +++ b/gfx/wr/webrender/src/prim_store/backdrop.rs @@ -74,6 +74,7 @@ impl Internable for Backdrop { type Key = BackdropKey; type StoreData = BackdropTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_BACKDROPS; } impl InternablePrimitive for Backdrop { diff --git a/gfx/wr/webrender/src/prim_store/borders.rs b/gfx/wr/webrender/src/prim_store/borders.rs index 084350c3357e..ad7bb6a239c6 100644 --- a/gfx/wr/webrender/src/prim_store/borders.rs +++ b/gfx/wr/webrender/src/prim_store/borders.rs @@ -145,6 +145,7 @@ impl intern::Internable for NormalBorderPrim { type Key = NormalBorderKey; type StoreData = NormalBorderTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_NORMAL_BORDERS; } impl InternablePrimitive for NormalBorderPrim { @@ -318,6 +319,7 @@ impl intern::Internable for ImageBorder { type Key = ImageBorderKey; type StoreData = ImageBorderTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_IMAGE_BORDERS; } impl InternablePrimitive for ImageBorder { diff --git a/gfx/wr/webrender/src/prim_store/gradient.rs b/gfx/wr/webrender/src/prim_store/gradient.rs index b7901d48b82f..da623cf21288 100644 --- a/gfx/wr/webrender/src/prim_store/gradient.rs +++ b/gfx/wr/webrender/src/prim_store/gradient.rs @@ -321,6 +321,7 @@ impl Internable for LinearGradient { type Key = LinearGradientKey; type StoreData = LinearGradientTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_LINEAR_GRADIENTS; } impl InternablePrimitive for LinearGradient { @@ -550,6 +551,7 @@ impl Internable for RadialGradient { type Key = RadialGradientKey; type StoreData = RadialGradientTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_RADIAL_GRADIENTS; } impl InternablePrimitive for RadialGradient { @@ -769,6 +771,7 @@ impl Internable for ConicGradient { type Key = ConicGradientKey; type StoreData = ConicGradientTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_CONIC_GRADIENTS; } impl InternablePrimitive for ConicGradient { diff --git a/gfx/wr/webrender/src/prim_store/image.rs b/gfx/wr/webrender/src/prim_store/image.rs index de7b2220e444..ce3db2e7a577 100644 --- a/gfx/wr/webrender/src/prim_store/image.rs +++ b/gfx/wr/webrender/src/prim_store/image.rs @@ -288,6 +288,7 @@ impl Internable for Image { type Key = ImageKey; type StoreData = ImageTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_IMAGES; } impl InternablePrimitive for Image { @@ -460,6 +461,7 @@ impl Internable for YuvImage { type Key = YuvImageKey; type StoreData = YuvImageTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_YUV_IMAGES; } impl InternablePrimitive for YuvImage { diff --git a/gfx/wr/webrender/src/prim_store/line_dec.rs b/gfx/wr/webrender/src/prim_store/line_dec.rs index 517fb2200f26..1889da1eee24 100644 --- a/gfx/wr/webrender/src/prim_store/line_dec.rs +++ b/gfx/wr/webrender/src/prim_store/line_dec.rs @@ -126,6 +126,7 @@ impl intern::Internable for LineDecoration { type Key = LineDecorationKey; type StoreData = LineDecorationTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_LINE_DECORATIONS; } impl InternablePrimitive for LineDecoration { diff --git a/gfx/wr/webrender/src/prim_store/mod.rs b/gfx/wr/webrender/src/prim_store/mod.rs index 2a9f3da17699..d6f9472c3a2e 100644 --- a/gfx/wr/webrender/src/prim_store/mod.rs +++ b/gfx/wr/webrender/src/prim_store/mod.rs @@ -577,6 +577,7 @@ impl intern::Internable for PrimitiveKeyKind { type Key = PrimitiveKey; type StoreData = PrimitiveTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_PRIMITIVES; } impl InternablePrimitive for PrimitiveKeyKind { diff --git a/gfx/wr/webrender/src/prim_store/picture.rs b/gfx/wr/webrender/src/prim_store/picture.rs index 2cc77fe05b9e..d0815cdac877 100644 --- a/gfx/wr/webrender/src/prim_store/picture.rs +++ b/gfx/wr/webrender/src/prim_store/picture.rs @@ -277,6 +277,7 @@ impl Internable for Picture { type Key = PictureKey; type StoreData = PictureTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_PICTURES; } impl InternablePrimitive for Picture { diff --git a/gfx/wr/webrender/src/prim_store/text_run.rs b/gfx/wr/webrender/src/prim_store/text_run.rs index 1360b42e2af5..a20cd1d27671 100644 --- a/gfx/wr/webrender/src/prim_store/text_run.rs +++ b/gfx/wr/webrender/src/prim_store/text_run.rs @@ -151,6 +151,7 @@ impl intern::Internable for TextRun { type Key = TextRunKey; type StoreData = TextRunTemplate; type InternData = (); + const PROFILE_COUNTER: usize = crate::profiler::INTERNED_TEXT_RUNS; } impl InternablePrimitive for TextRun { diff --git a/gfx/wr/webrender/src/profiler.rs b/gfx/wr/webrender/src/profiler.rs index e39947ceba33..e54e7153bc4f 100644 --- a/gfx/wr/webrender/src/profiler.rs +++ b/gfx/wr/webrender/src/profiler.rs @@ -2,74 +2,1058 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ +//! # Overlay profiler +//! +//! ## Profiler UI string syntax +//! +//! Comma-separated list of of tokens with trailing and leading spaces trimmed. +//! Each tokens can be: +//! - A counter name with an optional prefix. The name corresponds to the displayed name (see the +//! counters vector below. +//! - By default (no prefix) the counter is shown as average + max over half a second. +//! - With a '#' prefix the counter is shown as a graph. +//! - With a '*' prefix the counter is shown as a change indicator. +//! - Some special counters such as GPU time queries have specific visualizations ignoring prefixes. +//! - A preset name to append the preset to the UI (see PROFILER_PRESETS). +//! - An empty token to insert a bit of vertical space. +//! - A '|' token to start a new column. +//! - A '_' token to start a new row. + use api::{ColorF, ColorU}; use crate::debug_render::DebugRenderer; -use crate::device::query::{GpuSampler, GpuTimer}; +use crate::device::query::GpuTimer; use euclid::{Point2D, Rect, Size2D, vec2, default}; use crate::internal_types::FastHashMap; use crate::renderer::{MAX_VERTEX_TEXTURE_WIDTH, wr_has_been_initialized}; +use api::units::DeviceIntSize; use std::collections::vec_deque::VecDeque; -use std::{f32, mem}; +use std::fmt::{Write, Debug}; +use std::f32; use std::ffi::CStr; use std::ops::Range; use std::time::Duration; use time::precise_time_ns; -pub mod expected { - use std::ops::Range; - pub const AVG_BACKEND_CPU_TIME: Range = 0.0..3.0; - pub const MAX_BACKEND_CPU_TIME: Range = 0.0..6.0; - pub const AVG_RENDERER_CPU_TIME: Range = 0.0..5.0; - pub const MAX_RENDERER_CPU_TIME: Range = 0.0..10.0; - pub const AVG_IPC_TIME: Range = 0.0..2.0; - pub const MAX_IPC_TIME: Range = 0.0..4.0; - pub const AVG_GPU_TIME: Range = 0.0..8.0; - pub const MAX_GPU_TIME: Range = 0.0..15.0; - pub const DRAW_CALLS: Range = 1..100; - pub const VERTICES: Range = 10..25_000; - pub const TOTAL_PRIMITIVES: Range = 1..5000; - pub const VISIBLE_PRIMITIVES: Range = 1..5000; - pub const USED_TARGETS: Range = 1..4; - pub const COLOR_PASSES: Range = 1..4; - pub const ALPHA_PASSES: Range = 0..3; - pub const RENDERED_PICTURE_CACHE_TILES: Range = 0..5; - pub const TOTAL_PICTURE_CACHE_TILES: Range = 0..15; - pub const CREATED_TARGETS: Range = 0..3; - pub const CHANGED_TARGETS: Range = 0..3; - pub const TEXTURE_DATA_UPLOADED: Range = 0..10; - pub const GPU_CACHE_ROWS_TOTAL: Range = 1..50; - pub const GPU_CACHE_ROWS_UPDATED: Range = 0..25; - pub const GPU_CACHE_BLOCKS_TOTAL: Range = 1..65_000; - pub const GPU_CACHE_BLOCKS_UPDATED: Range = 0..1000; - pub const GPU_CACHE_BLOCKS_SAVED: Range = 0..50_000; - pub const DISPLAY_LIST_BUILD_TIME: Range = 0.0..3.0; - pub const MAX_SCENE_BUILD_TIME: Range = 0.0..3.0; - pub const DISPLAY_LIST_SEND_TIME: Range = 0.0..1.0; - pub const DISPLAY_LIST_TOTAL_TIME: Range = 0.0..4.0; - pub const NUM_FONT_TEMPLATES: Range = 0..50; - pub const FONT_TEMPLATES_MB: Range = 0.0..40.0; - pub const NUM_IMAGE_TEMPLATES: Range = 0..20; - pub const IMAGE_TEMPLATES_MB: Range = 0.0..10.0; - pub const DISPLAY_LIST_MB: Range = 0.0..0.2; - pub const NUM_RASTERIZED_BLOBS: Range = 0..25; // in tiles - pub const RASTERIZED_BLOBS_MB: Range = 0.0..4.0; +macro_rules! set_text { + ($dst:expr, $($arg:tt)*) => { + $dst.clear(); + write!($dst, $($arg)*).unwrap(); + }; } const GRAPH_WIDTH: f32 = 1024.0; const GRAPH_HEIGHT: f32 = 320.0; const GRAPH_PADDING: f32 = 8.0; const GRAPH_FRAME_HEIGHT: f32 = 16.0; -const PROFILE_PADDING: f32 = 8.0; +const PROFILE_SPACING: f32 = 15.0; +const PROFILE_PADDING: f32 = 10.0; +const BACKGROUND_COLOR: ColorU = ColorU { r: 20, g: 20, b: 20, a: 220 }; -const ONE_SECOND_NS: u64 = 1000000000; -const AVERAGE_OVER_NS: u64 = ONE_SECOND_NS / 2; +const ONE_SECOND_NS: u64 = 1_000_000_000; -#[derive(Copy, Clone, Debug, PartialEq)] -pub enum ProfileStyle { - Full, - Compact, - Smart, - NoDraw, +const fn float(name: &'static str, unit: &'static str, index: usize, expected: Expected) -> CounterDescriptor { + CounterDescriptor { name, unit, show_as: ShowAs::Float, index, expected } +} + +const fn int(name: &'static str, unit: &'static str, index: usize, expected: Expected) -> CounterDescriptor { + CounterDescriptor { name, unit, show_as: ShowAs::Int, index, expected: expected.into_float() } +} + +// Not in the list below: +// - "GPU time queries" shows the details of the GPU time queries if selected as a graph. +// - "GPU cache bars" shows some info about the GPU cache. +pub static PORFILE_COUNTERS: &'static[CounterDescriptor] = &[ + float("Frame building", "ms", FRAME_BUILDING_TIME, expected(0.0..6.0).avg(0.0..3.0)), + + float("Visibility", "ms", FRAME_VISIBILITY_TIME, expected(0.0..3.0).avg(0.0..2.0)), + float("Prepare", "ms", FRAME_PREPARE_TIME, expected(0.0..3.0).avg(0.0..2.0)), + float("Batching", "ms", FRAME_BATCHING_TIME, expected(0.0..3.0).avg(0.0..2.0)), + + float("Renderer", "ms", RENDERER_TIME, expected(0.0..8.0).avg(0.0..5.0)), + float("Frame CPU total", "ms", TOTAL_FRAME_CPU_TIME, expected(0.0..15.0).avg(0.0..6.0)), + float("GPU", "ms", GPU_TIME, expected(0.0..15.0).avg(0.0..8.0)), + + float("Content send", "ms", CONTENT_SEND_TIME, expected(0.0..1.0).avg(0.0..1.0)), + float("API send", "ms", API_SEND_TIME, expected(0.0..1.0).avg(0.0..0.4)), + float("DisplayList", "ms", DISPLAY_LIST_BUILD_TIME, expected(0.0..5.0).avg(0.0..3.0)), + float("DisplayList mem", "MB", DISPLAY_LIST_MEM, expected(0.0..20.0)), + float("Scene building", "ms", SCENE_BUILD_TIME, expected(0.0..4.0).avg(0.0..3.0)), + + int("Rasterized blobs", "", RASTERIZED_BLOBS, expected(0..15)), + int("Rasterized blob tiles", "", RASTERIZED_BLOB_TILES, expected(0..15)), + int("Rasterized blob pixels", "px", RASTERIZED_BLOBS_PX, expected(0..300_000)), + float("Blob rasterization", "ms", BLOB_RASTERIZATION_TIME, expected(0.0..8.0)), + + int("Rasterized glyphs", "", RASTERIZED_GLYPHS, expected(0..15)), + float("Glyph resolve", "ms", GLYPH_RESOLVE_TIME, expected(0.0..4.0)), + + int("Draw calls", "", DRAW_CALLS, expected(1..120).avg(1..90)), + int("Vertices", "", VERTICES, expected(10..5000)), + int("Primitives", "", PRIMITIVES, expected(10..5000)), + int("Visible primitives", "", VISIBLE_PRIMITIVES, expected(1..5000)), + + int("Used targets", "", USED_TARGETS, expected(1..4)), + int("Created targets", "", CREATED_TARGETS, expected(0..3)), + int("Picture cache slices", "", PICTURE_CACHE_SLICES, expected(0..5)), + + int("Color passes", "", COLOR_PASSES, expected(1..4)), + int("Alpha passes", "", ALPHA_PASSES, expected(0..3)), + int("Picture tiles", "", PICTURE_TILES, expected(0..15)), + float("Picture tiles mem", "MB", PICTURE_TILES_MEM, expected(0.0..150.0)), + int("Rendered picture tiles", "", RENDERED_PICTURE_TILES, expected(0..5)), + int("Texture uploads", "", TEXTURE_UPLOADS, expected(0..10)), + float("Texture uploads mem", "MB", TEXTURE_UPLOADS_MEM, expected(0.0..10.0)), + + int("Font templates", "", FONT_TEMPLATES, expected(0..40)), + float("Font templates mem", "MB", FONT_TEMPLATES_MEM, expected(0.0..20.0)), + int("Image templates", "", IMAGE_TEMPLATES, expected(0..100)), + float("Image templates mem", "MB", IMAGE_TEMPLATES_MEM, expected(0.0..50.0)), + + int("GPU cache rows total", "", GPU_CACHE_ROWS_TOTAL, expected(1..50)), + int("GPU cache rows updated", "", GPU_CACHE_ROWS_UPDATED, expected(0..25)), + int("GPU blocks total", "", GPU_CACHE_BLOCKS_TOTAL, expected(1..65_000)), + int("GPU blocks updated", "", GPU_CACHE_BLOCKS_UPDATED, expected(0..1000)), + int("GPU blocks saved", "", GPU_CACHE_BLOCKS_SAVED, expected(0..50_000)), + + int("Texture cache A8 regions", "", TEXTURE_CACHE_A8_REGIONS, expected(0..100)), + float("Texture cache A8 mem", "MB", TEXTURE_CACHE_A8_MEM, expected(0.0..100.0)), + int("Texture cache A16 regions", "", TEXTURE_CACHE_A16_REGIONS, expected(0..100)), + float("Texture cache A16 mem", "MB", TEXTURE_CACHE_A16_MEM, expected(0.0..100.0)), + int("Texture cache RGBA8 linear regions", "", TEXTURE_CACHE_RGBA8_LINEAR_REGIONS, expected(0..100)), + float("Texture cache RGBA8 linear mem", "MB", TEXTURE_CACHE_RGBA8_LINEAR_MEM, expected(0.0..100.0)), + int("Texture cache RGBA8 nearest regions", "", TEXTURE_CACHE_RGBA8_NEAREST_REGIONS, expected(0..100)), + float("Texture cache RGBA8 nearest mem", "MB", TEXTURE_CACHE_RGBA8_NEAREST_MEM, expected(0.0..100.0)), + float("Texture cache shared mem", "", TEXTURE_CACHE_SHARED_MEM, expected(0.0..100.0)), + float("Texture cache standalone mem", "MB", TEXTURE_CACHE_STANDALONE_MEM, expected(0.0..100.0)), + + + float("Slow frame", "", SLOW_FRAME, expected(0.0..0.0)), + float("Slow transaction", "", SLOW_TXN, expected(0.0..0.0)), + + float("GPU cache upload", "ms", GPU_CACHE_UPLOAD_TIME, expected(0.0..2.0)), + float("Texture cache upload", "ms", TEXTURE_CACHE_UPLOAD_TIME, expected(0.0..3.0)), + + float("Frame", "ms", FRAME_TIME, Expected::none()), + + float("Alpha targets samplers", "%", ALPHA_TARGETS_SAMPLERS, Expected::none()), + float("Transparent pass samplers", "%", TRANSPARENT_PASS_SAMPLERS, Expected::none()), + float("Opaque pass samplers", "%", OPAQUE_PASS_SAMPLERS, Expected::none()), + float("Total samplers", "%", TOTAL_SAMPLERS, Expected::none()), + + int("Interned primitives", "", INTERNED_PRIMITIVES, Expected::none()), + int("Interned clips", "", INTERNED_CLIPS, Expected::none()), + int("Interned text runs", "", INTERNED_TEXT_RUNS, Expected::none()), + int("Interned normal borders", "", INTERNED_NORMAL_BORDERS, Expected::none()), + int("Interned image borders", "", INTERNED_IMAGE_BORDERS, Expected::none()), + int("Interned images", "", INTERNED_IMAGES, Expected::none()), + int("Interned YUV images", "", INTERNED_YUV_IMAGES, Expected::none()), + int("Interned line decorations", "", INTERNED_LINE_DECORATIONS, Expected::none()), + int("Interned linear gradients", "", INTERNED_LINEAR_GRADIENTS, Expected::none()), + int("Interned radial gradients", "", INTERNED_RADIAL_GRADIENTS, Expected::none()), + int("Interned conic gradients", "", INTERNED_CONIC_GRADIENTS, Expected::none()), + int("Interned pictures", "", INTERNED_PICTURES, Expected::none()), + int("Interned filter data", "", INTERNED_FILTER_DATA, Expected::none()), + int("Interned backdrops", "", INTERNED_BACKDROPS, Expected::none()), +]; + +/// Profiler UI string presets. Defined in the profiler UI string syntax, can contain other presets. +static PROFILER_PRESETS: &'static[(&'static str, &'static str)] = &[ + (&"Transaction times", &"DisplayList,Scene building,Content send,API send"), + (&"Frame times", &"Frame CPU total,Frame building,Visibility,Prepare,Batching,Glyph resolve,Renderer,GPU"), + (&"Frame stats", &"Primitives,Visible primitives,Draw calls,Vertices,Color passes,Alpha passes,Rendered picture tiles,Rasterized glyphs"), + (&"Time graphs", &"#DisplayList,#Scene building,#Blob rasterization, ,#Frame CPU total,#Frame building,#Renderer,#Texture cache upload, ,#GPU"), + (&"Memory", &"Image templates,Image templates mem,Font templates,Font templates mem,DisplayList mem,Picture tiles mem"), + (&"GPU samplers", &"Alpha targets samplers,Transparent pass samplers,Opaque pass samplers,Total samplers"), + (&"Interners", "Interned primitives,Interned clips,Interned pictures,Interned text runs,Interned normal borders,Interned image borders,Interned images,Interned YUV images,Interned line decorations,Interned linear gradients,Interned radial gradients,Interned conic gradients,Interned filter data,Interned backdrops"), + (&"Slow indicators", &"*Slow transaction,*Slow frame"), + (&"Compact", &"FPS, ,Frame times, ,Frame stats"), + (&"Default", &"FPS,|,Slow indicators,_,Time graphs,|,Frame times, ,Transaction times, ,Frame stats, ,Memory, ,Interners,_,GPU time queries"), +]; + +fn find_preset(name: &str) -> Option<&'static str> { + for preset in PROFILER_PRESETS { + if preset.0 == name { + return Some(preset.1); + } + } + + None +} + +// The indices here must match the PROFILE_COUNTERS array (checked at runtime). +pub const FRAME_BUILDING_TIME: usize = 0; +pub const FRAME_VISIBILITY_TIME: usize = 1; +pub const FRAME_PREPARE_TIME: usize = 2; +pub const FRAME_BATCHING_TIME: usize = 3; + +pub const RENDERER_TIME: usize = 4; +pub const TOTAL_FRAME_CPU_TIME: usize = 5; +pub const GPU_TIME: usize = 6; + +pub const CONTENT_SEND_TIME: usize = 7; +pub const API_SEND_TIME: usize = 8; + +pub const DISPLAY_LIST_BUILD_TIME: usize = 9; +pub const DISPLAY_LIST_MEM: usize = 10; + +pub const SCENE_BUILD_TIME: usize = 11; + +pub const RASTERIZED_BLOBS: usize = 12; +pub const RASTERIZED_BLOB_TILES: usize = 13; +pub const RASTERIZED_BLOBS_PX: usize = 14; +pub const BLOB_RASTERIZATION_TIME: usize = 15; + +pub const RASTERIZED_GLYPHS: usize = 16; +pub const GLYPH_RESOLVE_TIME: usize = 17; + +pub const DRAW_CALLS: usize = 18; +pub const VERTICES: usize = 19; +pub const PRIMITIVES: usize = 20; +pub const VISIBLE_PRIMITIVES: usize = 21; + +pub const USED_TARGETS: usize = 22; +pub const CREATED_TARGETS: usize = 23; +pub const PICTURE_CACHE_SLICES: usize = 24; + +pub const COLOR_PASSES: usize = 25; +pub const ALPHA_PASSES: usize = 26; +pub const PICTURE_TILES: usize = 27; +pub const PICTURE_TILES_MEM: usize = 28; +pub const RENDERED_PICTURE_TILES: usize = 29; +pub const TEXTURE_UPLOADS: usize = 30; +pub const TEXTURE_UPLOADS_MEM: usize = 31; + +pub const FONT_TEMPLATES: usize = 32; +pub const FONT_TEMPLATES_MEM: usize = 33; +pub const IMAGE_TEMPLATES: usize = 34; +pub const IMAGE_TEMPLATES_MEM: usize = 35; + +pub const GPU_CACHE_ROWS_TOTAL: usize = 36; +pub const GPU_CACHE_ROWS_UPDATED: usize = 37; +pub const GPU_CACHE_BLOCKS_TOTAL: usize = 38; +pub const GPU_CACHE_BLOCKS_UPDATED: usize = 39; +pub const GPU_CACHE_BLOCKS_SAVED: usize = 40; + +pub const TEXTURE_CACHE_A8_REGIONS: usize = 41; +pub const TEXTURE_CACHE_A8_MEM: usize = 42; +pub const TEXTURE_CACHE_A16_REGIONS: usize = 43; +pub const TEXTURE_CACHE_A16_MEM: usize = 44; +pub const TEXTURE_CACHE_RGBA8_LINEAR_REGIONS: usize = 45; +pub const TEXTURE_CACHE_RGBA8_LINEAR_MEM: usize = 46; +pub const TEXTURE_CACHE_RGBA8_NEAREST_REGIONS: usize = 47; +pub const TEXTURE_CACHE_RGBA8_NEAREST_MEM: usize = 48; +pub const TEXTURE_CACHE_SHARED_MEM: usize = 49; +pub const TEXTURE_CACHE_STANDALONE_MEM: usize = 50; + +pub const SLOW_FRAME: usize = 51; +pub const SLOW_TXN: usize = 52; + +pub const GPU_CACHE_UPLOAD_TIME: usize = 53; +pub const TEXTURE_CACHE_UPLOAD_TIME: usize = 54; + +pub const FRAME_TIME: usize = 55; + +pub const ALPHA_TARGETS_SAMPLERS: usize = 56; +pub const TRANSPARENT_PASS_SAMPLERS: usize = 57; +pub const OPAQUE_PASS_SAMPLERS: usize = 58; +pub const TOTAL_SAMPLERS: usize = 59; + +pub const INTERNED_PRIMITIVES: usize = 60; +pub const INTERNED_CLIPS: usize = 61; +pub const INTERNED_TEXT_RUNS: usize = 62; +pub const INTERNED_NORMAL_BORDERS: usize = 63; +pub const INTERNED_IMAGE_BORDERS: usize = 64; +pub const INTERNED_IMAGES: usize = 65; +pub const INTERNED_YUV_IMAGES: usize = 66; +pub const INTERNED_LINE_DECORATIONS: usize = 67; +pub const INTERNED_LINEAR_GRADIENTS: usize = 68; +pub const INTERNED_RADIAL_GRADIENTS: usize = 69; +pub const INTERNED_CONIC_GRADIENTS: usize = 70; +pub const INTERNED_PICTURES: usize = 71; +pub const INTERNED_FILTER_DATA: usize = 72; +pub const INTERNED_BACKDROPS: usize = 73; + +pub const NUM_PROFILER_EVENTS: usize = 74; + +pub struct Profiler { + counters: Vec, + gpu_frames: GpuFrameCollection, + + start: u64, + avg_over_period: u64, + num_graph_samples: usize, + + ui: Vec, +} + +impl Profiler { + pub fn new() -> Self { + + let mut counters = Vec::with_capacity(PORFILE_COUNTERS.len()); + + for (idx, descriptor) in PORFILE_COUNTERS.iter().enumerate() { + debug_assert_eq!(descriptor.index, idx); + counters.push(Counter::new(descriptor)); + } + + Profiler { + gpu_frames: GpuFrameCollection::new(), + + counters, + start: precise_time_ns(), + avg_over_period: ONE_SECOND_NS / 2, + + num_graph_samples: 500, // Would it be useful to control this via a pref? + ui: Vec::new(), + } + } + + /// Sum a few counters and if the total amount is larger than a threshold, update + /// a specific counter. + /// + /// This is useful to monitor slow frame and slow transactions. + fn update_slow_event(&mut self, dst_counter: usize, counters: &[usize], threshold: f64) { + let mut total = 0.0; + for &counter in counters { + if self.counters[counter].value.is_finite() { + total += self.counters[counter].value; + } + } + + if total > threshold { + self.counters[dst_counter].set(total); + } + } + + // Call at the end of every frame, after setting the counter values and before drawing the counters. + pub fn update(&mut self) { + let now = precise_time_ns(); + let update_avg = (now - self.start) > self.avg_over_period; + if update_avg { + self.start = now; + } + + self.update_slow_event( + SLOW_FRAME, + &[TOTAL_FRAME_CPU_TIME], + 15.0, + ); + self.update_slow_event( + SLOW_TXN, + &[DISPLAY_LIST_BUILD_TIME, CONTENT_SEND_TIME, SCENE_BUILD_TIME], + 80.0 + ); + + for counter in &mut self.counters { + counter.update(update_avg); + } + } + + pub fn set_gpu_time_queries(&mut self, gpu_queries: Vec) { + let mut gpu_time_ns = 0; + for sample in &gpu_queries { + gpu_time_ns += sample.time_ns; + } + + self.gpu_frames.push(gpu_time_ns, gpu_queries); + + self.counters[GPU_TIME].set_f64(ns_to_ms(gpu_time_ns)); + } + + // Find the index of a counter by its name. + pub fn index_of(&self, name: &str) -> Option { + self.counters.iter().position(|counter| counter.name == name) + } + + // Define the profiler UI, see comment about the syntax at the top of this file. + pub fn set_ui(&mut self, names: &str) { + let mut selection = Vec::new(); + + self.append_to_ui(&mut selection, names); + + if selection == self.ui { + return; + } + + for counter in &mut self.counters { + counter.disable_graph(); + } + + for item in &selection { + if let Item::Graph(idx) = item { + self.counters[*idx].enable_graph(self.num_graph_samples); + } + } + + self.ui = selection; + } + + fn append_to_ui(&mut self, selection: &mut Vec, names: &str) { + // Group successive counters together. + fn flush_counters(counters: &mut Vec, selection: &mut Vec) { + if !counters.is_empty() { + selection.push(Item::Counters(std::mem::take(counters))) + } + } + + let mut counters = Vec::new(); + + for name in names.split(",") { + let name = name.trim(); + let is_graph = name.starts_with("#"); + let is_indicator = name.starts_with("*"); + let name = if is_graph || is_indicator { + &name[1..] + } else { + name + }; + // See comment about the ui string syntax at the top of this file. + match name { + "" => { + flush_counters(&mut counters, selection); + selection.push(Item::Space); + } + "|" => { + flush_counters(&mut counters, selection); + selection.push(Item::Column); + } + "_" => { + flush_counters(&mut counters, selection); + selection.push(Item::Row); + } + "FPS" => { + flush_counters(&mut counters, selection); + selection.push(Item::Fps); + } + "GPU time queries" => { + flush_counters(&mut counters, selection); + selection.push(Item::GpuTimeQueries); + } + "GPU cache bars" => { + flush_counters(&mut counters, selection); + selection.push(Item::GpuCacheBars); + } + _ => { + if let Some(idx) = self.index_of(name) { + if is_graph { + flush_counters(&mut counters, selection); + selection.push(Item::Graph(idx)); + } else if is_indicator { + flush_counters(&mut counters, selection); + selection.push(Item::ChangeIndicator(idx)); + } else { + counters.push(idx); + } + } else if let Some(preset_str) = find_preset(name) { + flush_counters(&mut counters, selection); + self.append_to_ui(selection, preset_str); + } else { + selection.push(Item::Text(format!("Unknonw counter: {}", name))); + } + } + } + } + + flush_counters(&mut counters, selection); + } + + pub fn set_counters(&mut self, counters: &mut TransactionProfile) { + for (id, evt) in counters.events.iter_mut().enumerate() { + if let Event::Value(val) = *evt { + self.counters[id].set(val); + } + *evt = Event::None; + } + } + + pub fn get(&self, id: usize) -> Option { + self.counters[id].get() + } + + fn draw_counters( + counters: &[Counter], + selected: &[usize], + mut x: f32, mut y: f32, + text_buffer: &mut String, + debug_renderer: &mut DebugRenderer, + ) -> default::Rect { + let line_height = debug_renderer.line_height(); + + x += PROFILE_PADDING; + y += PROFILE_PADDING; + let origin = default::Point2D::new(x, y); + y += line_height * 0.5; + + let mut total_rect = Rect::zero(); + + let mut color_index = 0; + let colors = [ + // Regular values, + ColorU::new(255, 255, 255, 255), + ColorU::new(255, 255, 0, 255), + // Unexpected values, + ColorU::new(255, 80, 0, 255), + ColorU::new(255, 0, 0, 255), + ]; + + for idx in selected { + // If The index is invalid, add some vertical space. + let counter = &counters[*idx]; + + let rect = debug_renderer.add_text( + x, y, + counter.name, + colors[color_index], + None, + ); + color_index = (color_index + 1) % 2; + + total_rect = total_rect.union(&rect); + y += line_height; + } + + color_index = 0; + x = total_rect.max_x() + 60.0; + y = origin.y + line_height * 0.5; + + for idx in selected { + let counter = &counters[*idx]; + let expected_offset = if counter.has_unexpected_avg_max() { 2 } else { 0 }; + + counter.write_value(text_buffer); + + let rect = debug_renderer.add_text( + x, + y, + &text_buffer, + colors[color_index + expected_offset], + None, + ); + color_index = (color_index + 1) % 2; + + total_rect = total_rect.union(&rect); + y += line_height; + } + + total_rect = total_rect + .union(&Rect { origin, size: Size2D::new(1.0, 1.0) }) + .inflate(PROFILE_PADDING, PROFILE_PADDING); + + debug_renderer.add_quad( + total_rect.min_x(), + total_rect.min_y(), + total_rect.max_x(), + total_rect.max_y(), + BACKGROUND_COLOR, + BACKGROUND_COLOR, + ); + + total_rect + } + + fn draw_graph( + counter: &Counter, + x: f32, + y: f32, + text_buffer: &mut String, + debug_renderer: &mut DebugRenderer, + ) -> default::Rect { + let graph = counter.graph.as_ref().unwrap(); + + let max_samples = graph.values.capacity() as f32; + + let size = Size2D::new(max_samples, 100.0); + let line_height = debug_renderer.line_height(); + let graph_rect = Rect::new(Point2D::new(x + PROFILE_PADDING, y + PROFILE_PADDING), size); + let mut rect = graph_rect.inflate(PROFILE_PADDING, PROFILE_PADDING); + + let stats = graph.stats(); + + let text_color = ColorU::new(255, 255, 0, 255); + let text_origin = rect.origin + vec2(rect.size.width, 25.0); + set_text!(text_buffer, "{} ({})", counter.name, counter.unit); + debug_renderer.add_text( + text_origin.x, + text_origin.y, + if counter.unit == "" { counter.name } else { text_buffer }, + ColorU::new(0, 255, 0, 255), + None, + ); + + set_text!(text_buffer, "Samples: {}", stats.samples); + + debug_renderer.add_text( + text_origin.x, + text_origin.y + line_height, + text_buffer, + text_color, + None, + ); + + if stats.samples > 0 { + set_text!(text_buffer, "Min: {:.2} {}", stats.min, counter.unit); + debug_renderer.add_text( + text_origin.x, + text_origin.y + line_height * 2.0, + text_buffer, + text_color, + None, + ); + + set_text!(text_buffer, "Avg: {:.2} {}", stats.avg, counter.unit); + debug_renderer.add_text( + text_origin.x, + text_origin.y + line_height * 3.0, + text_buffer, + text_color, + None, + ); + + set_text!(text_buffer, "Max: {:.2} {}", stats.max, counter.unit); + debug_renderer.add_text( + text_origin.x, + text_origin.y + line_height * 4.0, + text_buffer, + text_color, + None, + ); + } + + rect.size.width += 200.0; + debug_renderer.add_quad( + rect.min_x(), + rect.min_y(), + rect.max_x(), + rect.max_y(), + BACKGROUND_COLOR, + BACKGROUND_COLOR, + ); + + let bx1 = graph_rect.max_x(); + let by1 = graph_rect.max_y(); + + let w = graph_rect.size.width / max_samples; + let h = graph_rect.size.height; + + let color_t0 = ColorU::new(0, 255, 0, 255); + let color_b0 = ColorU::new(0, 180, 0, 255); + + let color_t2 = ColorU::new(255, 0, 0, 255); + let color_b2 = ColorU::new(180, 0, 0, 255); + + for (index, sample) in graph.values.iter().enumerate() { + if !sample.is_finite() { + // NAN means no sample this frame. + continue; + } + let sample = *sample as f32; + let x1 = bx1 - index as f32 * w; + let x0 = x1 - w; + + let y0 = by1 - (sample / stats.max as f32) as f32 * h; + let y1 = by1; + + let (color_top, color_bottom) = if counter.is_unexpected_value(sample as f64) { + (color_t2, color_b2) + } else { + (color_t0, color_b0) + }; + + debug_renderer.add_quad(x0, y0, x1, y1, color_top, color_bottom); + } + + rect + } + + + fn draw_change_indicator( + counter: &Counter, + x: f32, y: f32, + debug_renderer: &mut DebugRenderer + ) -> default::Rect { + let height = 10.0; + let width = 20.0; + + // Draw the indicator red instead of blue if is is not within expected ranges. + let color = if counter.has_unexpected_value() || counter.has_unexpected_avg_max() { + ColorU::new(255, 20, 20, 255) + } else { + ColorU::new(0, 100, 250, 255) + }; + + let tx = counter.change_indicator as f32 * width; + debug_renderer.add_quad( + x, + y, + x + 15.0 * width, + y + height, + ColorU::new(0, 0, 0, 150), + ColorU::new(0, 0, 0, 150), + ); + + debug_renderer.add_quad( + x + tx, + y, + x + tx + width, + y + height, + color, + ColorU::new(25, 25, 25, 255), + ); + + Rect { + origin: Point2D::new(x, y), + size: Size2D::new(15.0 * width + 20.0, height), + } + } + + fn draw_bar( + label: &str, + label_color: ColorU, + counters: &[(ColorU, usize)], + x: f32, y: f32, + debug_renderer: &mut DebugRenderer, + ) -> default::Rect { + let x = x + 8.0; + let y = y + 24.0; + let text_rect = debug_renderer.add_text( + x, y, + label, + label_color, + None, + ); + + let x_base = text_rect.max_x() + 10.0; + let width = 300.0; + let total_value = counters.last().unwrap().1; + let scale = width / total_value as f32; + let mut x_current = x_base; + + for &(color, counter) in counters { + let x_stop = x_base + counter as f32 * scale; + debug_renderer.add_quad( + x_current, + text_rect.origin.y, + x_stop, + text_rect.max_y(), + color, + color, + ); + x_current = x_stop; + + } + + let mut total_rect = text_rect; + total_rect.size.width += width + 10.0; + + total_rect + } + + fn draw_gpu_cache_bars(&self, x: f32, mut y: f32, text_buffer: &mut String, debug_renderer: &mut DebugRenderer) -> default::Rect { + let color_updated = ColorU::new(0xFF, 0, 0, 0xFF); + let color_free = ColorU::new(0, 0, 0xFF, 0xFF); + let color_saved = ColorU::new(0, 0xFF, 0, 0xFF); + + let updated_blocks = self.get(GPU_CACHE_BLOCKS_UPDATED).unwrap_or(0.0) as usize; + let saved_blocks = self.get(GPU_CACHE_BLOCKS_SAVED).unwrap_or(0.0) as usize; + let allocated_blocks = self.get(GPU_CACHE_BLOCKS_TOTAL).unwrap_or(0.0) as usize; + let allocated_rows = self.get(GPU_CACHE_ROWS_TOTAL).unwrap_or(0.0) as usize; + let updated_rows = self.get(GPU_CACHE_ROWS_UPDATED).unwrap_or(0.0) as usize; + let requested_blocks = updated_blocks + saved_blocks; + let total_blocks = allocated_rows * MAX_VERTEX_TEXTURE_WIDTH; + + set_text!(text_buffer, "GPU cache rows ({}):", allocated_rows); + + let rect0 = Profiler::draw_bar( + text_buffer, + ColorU::new(0xFF, 0xFF, 0xFF, 0xFF), + &[ + (color_updated, updated_rows), + (color_free, allocated_rows), + ], + x, y, + debug_renderer, + ); + + y = rect0.max_y(); + + let rect1 = Profiler::draw_bar( + "GPU cache blocks", + ColorU::new(0xFF, 0xFF, 0, 0xFF), + &[ + (color_updated, updated_blocks), + (color_saved, requested_blocks), + (color_free, allocated_blocks), + (ColorU::new(0, 0, 0, 0xFF), total_blocks), + ], + x, y, + debug_renderer, + ); + + let total_rect = rect0.union(&rect1).inflate(10.0, 10.0); + debug_renderer.add_quad( + total_rect.origin.x, + total_rect.origin.y, + total_rect.origin.x + total_rect.size.width, + total_rect.origin.y + total_rect.size.height, + ColorF::new(0.1, 0.1, 0.1, 0.8).into(), + ColorF::new(0.2, 0.2, 0.2, 0.8).into(), + ); + + total_rect + } + + fn draw_gpu_time_queries( + time_queries: &GpuFrameCollection, + x: f32, y: f32, + debug_renderer: &mut DebugRenderer, + ) -> default::Rect { + let mut has_data = false; + for frame in &time_queries.frames { + if !frame.samples.is_empty() { + has_data = true; + break; + } + } + + if !has_data { + return Rect::zero(); + } + + let graph_rect = Rect::new( + Point2D::new(x + GRAPH_PADDING, y + GRAPH_PADDING), + Size2D::new(GRAPH_WIDTH, GRAPH_HEIGHT), + ); + let bounding_rect = graph_rect.inflate(GRAPH_PADDING, GRAPH_PADDING); + + debug_renderer.add_quad( + bounding_rect.origin.x, + bounding_rect.origin.y, + bounding_rect.origin.x + bounding_rect.size.width, + bounding_rect.origin.y + bounding_rect.size.height, + BACKGROUND_COLOR, + BACKGROUND_COLOR, + ); + + let w = graph_rect.size.width; + let mut y0 = graph_rect.origin.y; + + let mut max_time = time_queries.frames + .iter() + .max_by_key(|f| f.total_time) + .unwrap() + .total_time as f32; + + // If the max time is lower than 16ms, fix the scale + // at 16ms so that the graph is easier to interpret. + let baseline_ns = 16_000_000.0; // 16ms + max_time = max_time.max(baseline_ns); + + let mut tags_present = FastHashMap::default(); + + for frame in &time_queries.frames { + let y1 = y0 + GRAPH_FRAME_HEIGHT; + + let mut current_ns = 0; + for sample in &frame.samples { + let x0 = graph_rect.origin.x + w * current_ns as f32 / max_time; + current_ns += sample.time_ns; + let x1 = graph_rect.origin.x + w * current_ns as f32 / max_time; + let mut bottom_color = sample.tag.color; + bottom_color.a *= 0.5; + + debug_renderer.add_quad( + x0, + y0, + x1, + y1, + sample.tag.color.into(), + bottom_color.into(), + ); + + tags_present.insert(sample.tag.label, sample.tag.color); + } + + y0 = y1; + } + + // If the max time is higher than 16ms, show a vertical line at the + // 16ms mark. + if max_time > baseline_ns { + let x = graph_rect.origin.x + w * baseline_ns as f32 / max_time; + let height = time_queries.frames.len() as f32 * GRAPH_FRAME_HEIGHT; + + debug_renderer.add_quad( + x, + graph_rect.origin.y, + x + 4.0, + graph_rect.origin.y + height, + ColorU::new(120, 00, 00, 150), + ColorU::new(120, 00, 00, 100), + ); + } + + + // Add a legend to see which color correspond to what primitive. + const LEGEND_SIZE: f32 = 20.0; + const PADDED_LEGEND_SIZE: f32 = 25.0; + if !tags_present.is_empty() { + debug_renderer.add_quad( + bounding_rect.max_x() + GRAPH_PADDING, + bounding_rect.origin.y, + bounding_rect.max_x() + GRAPH_PADDING + 200.0, + bounding_rect.origin.y + tags_present.len() as f32 * PADDED_LEGEND_SIZE + GRAPH_PADDING, + BACKGROUND_COLOR, + BACKGROUND_COLOR, + ); + } + + for (i, (label, &color)) in tags_present.iter().enumerate() { + let x0 = bounding_rect.origin.x + bounding_rect.size.width + GRAPH_PADDING * 2.0; + let y0 = bounding_rect.origin.y + GRAPH_PADDING + i as f32 * PADDED_LEGEND_SIZE; + + debug_renderer.add_quad( + x0, y0, x0 + LEGEND_SIZE, y0 + LEGEND_SIZE, + color.into(), + color.into(), + ); + + debug_renderer.add_text( + x0 + PADDED_LEGEND_SIZE, + y0 + LEGEND_SIZE * 0.75, + label, + ColorU::new(255, 255, 0, 255), + None, + ); + } + + bounding_rect + } + + pub fn draw_profile( + &mut self, + _frame_index: u64, + debug_renderer: &mut DebugRenderer, + device_size: DeviceIntSize, + ) { + let x_start = 20.0; + let mut y_start = 150.0; + let default_column_width = 400.0; + + // set_text!(..) into this string instead of using format!(..) to avoid + // unnecessary allocations. + let mut text_buffer = String::with_capacity(32); + + let mut column_width = default_column_width; + let mut max_y = y_start; + + let mut x = x_start; + let mut y = y_start; + + for elt in &self.ui { + let rect = match elt { + Item::Counters(indices) => { + Profiler::draw_counters(&self.counters, &indices, x, y, &mut text_buffer, debug_renderer) + } + Item::Graph(idx) => { + Profiler::draw_graph(&self.counters[*idx], x, y, &mut text_buffer, debug_renderer) + } + Item::ChangeIndicator(idx) => { + Profiler::draw_change_indicator(&self.counters[*idx], x, y, debug_renderer) + } + Item::GpuTimeQueries => { + Profiler::draw_gpu_time_queries(&self.gpu_frames, x, y, debug_renderer) + } + Item::GpuCacheBars => { + self.draw_gpu_cache_bars(x, y, &mut text_buffer, debug_renderer) + } + Item::Text(text) => { + let p = 10.0; + let mut rect = debug_renderer.add_text( + x + p, + y + p, + &text, + ColorU::new(255, 255, 255, 255), + None, + ); + rect = rect.inflate(p, p); + + debug_renderer.add_quad( + rect.origin.x, + rect.origin.y, + rect.max_x(), + rect.max_y(), + BACKGROUND_COLOR, + BACKGROUND_COLOR, + ); + + rect + } + Item::Fps => { + set_text!(&mut text_buffer, "{:.2} fps", 1000.0 / self.counters[FRAME_TIME].max); + let mut rect = debug_renderer.add_text( + x + PROFILE_PADDING, + y + PROFILE_PADDING + 5.0, + &text_buffer, + ColorU::new(255, 255, 255, 255), + None, + ); + rect = rect.inflate(PROFILE_PADDING, PROFILE_PADDING); + + debug_renderer.add_quad( + rect.min_x(), + rect.min_y(), + rect.max_x(), + rect.max_y(), + BACKGROUND_COLOR, + BACKGROUND_COLOR, + ); + + rect + } + Item::Space => { + Rect { origin: Point2D::new(x, y), size: Size2D::new(0.0, PROFILE_SPACING) } + } + Item::Column => { + max_y = max_y.max(y); + x += column_width + PROFILE_SPACING; + y = y_start; + column_width = default_column_width; + + continue; + } + Item::Row => { + max_y = max_y.max(y); + y_start = max_y + PROFILE_SPACING; + y = y_start; + x = x_start; + column_width = default_column_width; + + continue; + } + }; + + column_width = column_width.max(rect.size.width); + y = rect.max_y(); + + if y > device_size.height as f32 - 100.0 { + max_y = max_y.max(y); + x += column_width + PROFILE_SPACING; + y = y_start; + column_width = default_column_width; + } + } + } + + #[cfg(feature = "capture")] + pub fn dump_stats(&self, sink: &mut dyn std::io::Write) -> std::io::Result<()> { + for counter in &self.counters { + if counter.value.is_finite() { + writeln!(sink, "{} {:?}{}", counter.name, counter.value, counter.unit)?; + } + } + + Ok(()) + } } /// Defines the interface for hooking up an external profiler to WR. @@ -181,939 +1165,398 @@ pub struct GpuProfileTag { pub color: ColorF, } -trait ProfileCounter { - fn description(&self) -> &'static str; - fn value(&self) -> String; - fn is_expected(&self) -> bool; +/// Ranges of expected value for a profile counter. +#[derive(Clone, Debug)] +pub struct Expected { + pub range: Option>, + pub avg: Option>, } -#[derive(Clone)] -pub struct IntProfileCounter { - description: &'static str, - value: usize, - expect: Option>, -} - -impl IntProfileCounter { - fn new(description: &'static str, expect: Option>) -> Self { - IntProfileCounter { - description, - value: 0, - expect, +impl Expected { + const fn none() -> Self { + Expected { + range: None, + avg: None, } } +} - #[inline(always)] - pub fn inc(&mut self) { - self.value += 1; - } - - pub fn set(&mut self, value: usize) { - self.value = value; +const fn expected(range: Range) -> Expected { + Expected { + range: Some(range), + avg: None, } } -impl ProfileCounter for IntProfileCounter { - fn description(&self) -> &'static str { - self.description - } - - fn value(&self) -> String { - format!("{}", self.value) - } - - fn is_expected(&self) -> bool { - self.expect.as_ref().map(|range| range.contains(&(self.value as u64))).unwrap_or(true) +impl Expected { + const fn avg(mut self, avg: Range) -> Self { + self.avg = Some(avg); + self } } -/// A profile counter recording average and maximum integer values over time slices -/// of half a second. -#[derive(Clone)] -pub struct AverageIntProfileCounter { - description: &'static str, - /// Start of the current time slice. - start_ns: u64, - /// Sum of the values recorded during the current time slice. - sum: u64, +impl Expected { + const fn avg(mut self, avg: Range) -> Self { + self.avg = Some(avg); + self + } + + const fn into_float(self) -> Expected { + Expected { + range: match self.range { + Some(r) => Some(r.start as f64 .. r.end as f64), + None => None, + }, + avg: match self.avg { + Some(r) => Some(r.start as f64 .. r.end as f64), + None => None, + }, + } + } +} + +pub struct CounterDescriptor { + pub name: &'static str, + pub unit: &'static str, + pub index: usize, + pub show_as: ShowAs, + pub expected: Expected, +} + +#[derive(Debug)] +pub struct Counter { + pub name: &'static str, + pub unit: &'static str, + pub show_as: ShowAs, + pub expected: Expected, + + /// + value: f64, /// Number of samples in the current time slice. num_samples: u64, + /// Sum of the values recorded during the current time slice. + sum: f64, /// The max value in in-progress time slice. - next_max: u64, + next_max: f64, /// The max value of the previous time slice (displayed). - max: u64, - /// The average value of the previous time slice (displayed). - avg: u64, - /// Intermediate accumulator for `add` and `inc`. - accum: u64, - /// Expected average range of values, if any. - expect_avg: Option>, - /// Expected maximum range of values, if any. - expect_max: Option>, + max: f64, + /// The average value of the previous time slice (displayed). + avg: f64, + /// Incremented when the counter changes. + change_indicator: u8, + + /// Only used to check that the constants match the real index. + index: usize, + + graph: Option, } -impl AverageIntProfileCounter { - pub fn new( - description: &'static str, - expect_avg: Option>, - expect_max: Option>, - ) -> Self { - AverageIntProfileCounter { - description, - start_ns: precise_time_ns(), - sum: 0, +impl Counter { + pub fn new(descriptor: &CounterDescriptor) -> Self { + Counter { + name: descriptor.name, + unit: descriptor.unit, + show_as: descriptor.show_as, + expected: descriptor.expected.clone(), + index: descriptor.index, + value: std::f64::NAN, num_samples: 0, - next_max: 0, - max: 0, - avg: 0, - accum: 0, - expect_avg, - expect_max, + sum: 0.0, + next_max: 0.0, + max: 0.0, + avg: 0.0, + change_indicator: 0, + graph: None, } } - - pub fn reset(&mut self) { - if self.accum > 0 { - self.set_u64(self.accum); - self.accum = 0; - } + pub fn set_f64(&mut self, val: f64) { + self.value = val; } - pub fn set(&mut self, val: usize) { - self.set_u64(val as u64); + pub fn set(&mut self, val: T) where T: Into { + self.set_f64(val.into()); } - pub fn set_u64(&mut self, val: u64) { - let now = precise_time_ns(); - if (now - self.start_ns) > AVERAGE_OVER_NS && self.num_samples > 0 { - self.avg = self.sum / self.num_samples; - self.max = self.next_max; - self.start_ns = now; - self.sum = 0; - self.num_samples = 0; - self.next_max = 0; - } - self.next_max = self.next_max.max(val); - self.sum += val; - self.num_samples += 1; - self.accum = 0; - } - - pub fn add(&mut self, val: usize) { - self.accum += val as u64; - } - - pub fn inc(&mut self) { - self.accum += 1; - } - - pub fn get_accum(&mut self) -> u64{ - self.accum - } - - /// Returns either the most up to date value if the counter is updated - /// with add add inc, or the average over the previous time slice. - pub fn get(&self) -> usize { - let result = if self.accum != 0 { - self.accum + pub fn get(&self) -> Option { + if self.value.is_finite() { + Some(self.value) } else { - self.avg - }; - - result as usize - } -} - -impl ProfileCounter for AverageIntProfileCounter { - fn description(&self) -> &'static str { - self.description - } - - fn value(&self) -> String { - format!("{:.2} (max {:.2})", self.avg, self.max) - } - - fn is_expected(&self) -> bool { - self.expect_avg.as_ref().map(|range| range.contains(&self.avg)).unwrap_or(true) - && self.expect_max.as_ref().map(|range| range.contains(&self.max)).unwrap_or(true) - } -} - -pub struct PercentageProfileCounter { - description: &'static str, - value: f32, -} - -impl ProfileCounter for PercentageProfileCounter { - fn description(&self) -> &'static str { - self.description - } - - fn value(&self) -> String { - format!("{:.2}%", self.value * 100.0) - } - - fn is_expected(&self) -> bool { true } -} - -#[derive(Clone)] -pub struct ResourceProfileCounter { - description: &'static str, - value: usize, - // in bytes. - size: usize, - expected_count: Option>, - // in MB - expected_size: Option>, -} - -impl ResourceProfileCounter { - fn new( - description: &'static str, - expected_count: Option>, - expected_size: Option> - ) -> Self { - ResourceProfileCounter { - description, - value: 0, - size: 0, - expected_count, - expected_size, + None } } - #[allow(dead_code)] - fn reset(&mut self) { - self.value = 0; - self.size = 0; - } - - #[inline(always)] - pub fn inc(&mut self, size: usize) { - self.value += 1; - self.size += size; - } - - pub fn set(&mut self, count: usize, size: usize) { - self.value = count; - self.size = size; - } - - pub fn size_mb(&self) -> f32 { - self.size as f32 / (1024.0 * 1024.0) - } -} - -impl ProfileCounter for ResourceProfileCounter { - fn description(&self) -> &'static str { - self.description - } - - fn value(&self) -> String { - format!("{} ({:.2} MB)", self.value, self.size_mb()) - } - - fn is_expected(&self) -> bool { - self.expected_count.as_ref().map(|range| range.contains(&self.value)).unwrap_or(true) - && self.expected_size.as_ref().map(|range| range.contains(&self.size_mb())).unwrap_or(true) - } -} - -#[derive(Clone)] -pub struct TimeProfileCounter { - description: &'static str, - nanoseconds: u64, - invert: bool, - expect_ms: Option>, -} - -pub struct Timer<'a> { - start: u64, - result: &'a mut u64, -} - -impl<'a> Drop for Timer<'a> { - fn drop(&mut self) { - let end = precise_time_ns(); - *self.result += end - self.start; - } -} - -impl TimeProfileCounter { - pub fn new(description: &'static str, invert: bool, expect_ms: Option>) -> Self { - TimeProfileCounter { - description, - nanoseconds: 0, - invert, - expect_ms, - } - } - - fn reset(&mut self) { - self.nanoseconds = 0; - } - - #[allow(dead_code)] - pub fn set(&mut self, ns: u64) { - self.nanoseconds = ns; - } - - pub fn profile(&mut self, callback: F) -> T - where - F: FnOnce() -> T, - { - let t0 = precise_time_ns(); - let val = callback(); - let t1 = precise_time_ns(); - let ns = t1 - t0; - self.nanoseconds += ns; - val - } - - pub fn timer(&mut self) -> Timer { - Timer { - start: precise_time_ns(), - result: &mut self.nanoseconds, - } - } - - pub fn inc(&mut self, ns: u64) { - self.nanoseconds += ns; - } - - pub fn get(&self) -> u64 { - self.nanoseconds - } - - pub fn get_ms(&self) -> f64 { - self.nanoseconds as f64 / 1000000.0 - } -} - -impl ProfileCounter for TimeProfileCounter { - fn description(&self) -> &'static str { - self.description - } - - fn value(&self) -> String { - if self.invert { - format!("{:.2} fps", 1000000000.0 / self.nanoseconds as f64) - } else { - format!("{:.2} ms", self.get_ms()) - } - } - - fn is_expected(&self) -> bool { - self.expect_ms.as_ref() - .map(|range| range.contains(&(self.nanoseconds as f64 / 1000000.0))) - .unwrap_or(true) - } -} - -#[derive(Clone)] -pub struct AverageTimeProfileCounter { - counter: AverageIntProfileCounter, - invert: bool, -} - -impl AverageTimeProfileCounter { - pub fn new( - description: &'static str, - invert: bool, - expect_avg: Option>, - expect_max: Option>, - ) -> Self { - let expect_avg_ns = expect_avg.map( - |range| (range.start * 1000000.0) as u64 .. (range.end * 1000000.0) as u64 - ); - let expect_max_ns = expect_max.map( - |range| (range.start * 1000000.0) as u64 .. (range.end * 1000000.0) as u64 - ); - - AverageTimeProfileCounter { - counter: AverageIntProfileCounter::new( - description, - expect_avg_ns, - expect_max_ns, - ), - invert, - } - } - - pub fn set(&mut self, ns: u64) { - self.counter.set_u64(ns); - } - - #[allow(dead_code)] - pub fn profile(&mut self, callback: F) -> T - where - F: FnOnce() -> T, - { - let t0 = precise_time_ns(); - let val = callback(); - let t1 = precise_time_ns(); - self.counter.set_u64(t1 - t0); - val - } - - pub fn avg_ms(&self) -> f64 { self.counter.avg as f64 / 1000000.0 } - - pub fn max_ms(&self) -> f64 { self.counter.max as f64 / 1000000.0 } -} - -impl ProfileCounter for AverageTimeProfileCounter { - fn description(&self) -> &'static str { - self.counter.description - } - - fn value(&self) -> String { - if self.invert { - format!("{:.2} fps", 1000000000.0 / self.counter.avg as f64) - } else { - format!("{:.2} ms (max {:.2} ms)", self.avg_ms(), self.max_ms()) - } - } - - fn is_expected(&self) -> bool { - self.counter.is_expected() - } -} - - -#[derive(Clone)] -pub struct FrameProfileCounters { - pub total_primitives: AverageIntProfileCounter, - pub visible_primitives: AverageIntProfileCounter, - pub targets_used: AverageIntProfileCounter, - pub targets_changed: AverageIntProfileCounter, - pub targets_created: AverageIntProfileCounter, -} - -impl FrameProfileCounters { - pub fn new() -> Self { - FrameProfileCounters { - total_primitives: AverageIntProfileCounter::new( - "Total Primitives", - None, Some(expected::TOTAL_PRIMITIVES), - ), - visible_primitives: AverageIntProfileCounter::new( - "Visible Primitives", - None, Some(expected::VISIBLE_PRIMITIVES), - ), - targets_used: AverageIntProfileCounter::new( - "Used targets", - None, Some(expected::USED_TARGETS), - ), - targets_changed: AverageIntProfileCounter::new( - "Changed targets", - None, Some(expected::CHANGED_TARGETS), - ), - targets_created: AverageIntProfileCounter::new( - "Created targets", - None, Some(expected::CREATED_TARGETS), - ), - } - } - - pub fn reset_targets(&mut self) { - self.targets_used.reset(); - self.targets_changed.reset(); - self.targets_created.reset(); - } -} - -#[derive(Clone)] -pub struct TextureCacheProfileCounters { - pub pages_alpha8_linear: ResourceProfileCounter, - pub pages_alpha16_linear: ResourceProfileCounter, - pub pages_color8_linear: ResourceProfileCounter, - pub pages_color8_nearest: ResourceProfileCounter, - pub pages_picture: ResourceProfileCounter, - pub rasterized_blob_pixels: ResourceProfileCounter, - pub standalone_bytes: IntProfileCounter, - pub shared_bytes: IntProfileCounter, -} - -impl TextureCacheProfileCounters { - pub fn new() -> Self { - TextureCacheProfileCounters { - pages_alpha8_linear: ResourceProfileCounter::new("Texture A8 cached pages", None, None), - pages_alpha16_linear: ResourceProfileCounter::new("Texture A16 cached pages", None, None), - pages_color8_linear: ResourceProfileCounter::new("Texture RGBA8 cached pages (L)", None, None), - pages_color8_nearest: ResourceProfileCounter::new("Texture RGBA8 cached pages (N)", None, None), - pages_picture: ResourceProfileCounter::new("Picture cached pages", None, None), - rasterized_blob_pixels: ResourceProfileCounter::new( - "Rasterized Blob Pixels", - Some(expected::NUM_RASTERIZED_BLOBS), - Some(expected::RASTERIZED_BLOBS_MB), - ), - standalone_bytes: IntProfileCounter::new("Standalone", None), - shared_bytes: IntProfileCounter::new("Shared", None), - } - } -} - -#[derive(Clone)] -pub struct GpuCacheProfileCounters { - pub allocated_rows: AverageIntProfileCounter, - pub allocated_blocks: AverageIntProfileCounter, - pub updated_rows: AverageIntProfileCounter, - pub updated_blocks: AverageIntProfileCounter, - pub saved_blocks: AverageIntProfileCounter, -} - -impl GpuCacheProfileCounters { - pub fn new() -> Self { - GpuCacheProfileCounters { - allocated_rows: AverageIntProfileCounter::new( - "GPU cache rows: total", - None, Some(expected::GPU_CACHE_ROWS_TOTAL), - ), - updated_rows: AverageIntProfileCounter::new( - "GPU cache rows: updated", - None, Some(expected::GPU_CACHE_ROWS_UPDATED), - ), - allocated_blocks: AverageIntProfileCounter::new( - "GPU cache blocks: total", - None, Some(expected::GPU_CACHE_BLOCKS_TOTAL), - ), - updated_blocks: AverageIntProfileCounter::new( - "GPU cache blocks: updated", - None, Some(expected::GPU_CACHE_BLOCKS_UPDATED), - ), - saved_blocks: AverageIntProfileCounter::new( - "GPU cache blocks: saved", - None, Some(expected::GPU_CACHE_BLOCKS_SAVED), - ), - } - } -} - -#[derive(Clone)] -pub struct BackendProfileCounters { - pub total_time: TimeProfileCounter, - pub resources: ResourceProfileCounters, - pub txn: TransactionProfileCounters, - pub intern: InternProfileCounters, - pub scene_changed: bool, -} - -#[derive(Clone)] -pub struct ResourceProfileCounters { - pub font_templates: ResourceProfileCounter, - pub image_templates: ResourceProfileCounter, - pub texture_cache: TextureCacheProfileCounters, - pub gpu_cache: GpuCacheProfileCounters, - pub picture_cache_slices: IntProfileCounter, -} - -#[derive(Clone)] -pub struct TransactionProfileCounters { - pub display_list_build_time: TimeProfileCounter, - pub scene_build_time: TimeProfileCounter, - /// Time between when the display list is built and when it is sent by the API. - pub content_send_time: TimeProfileCounter, - /// Time between sending the SetDisplayList from the API and picking it up on - /// the render scene builder thread. - pub api_send_time: TimeProfileCounter, - /// Sum of content_send_time and api_send_time. - pub total_send_time: TimeProfileCounter, - pub display_lists: ResourceProfileCounter, -} - -macro_rules! declare_intern_profile_counters { - ( $( $name:ident : $ty:ty, )+ ) => { - #[derive(Clone)] - pub struct InternProfileCounters { - $( - pub $name: ResourceProfileCounter, - )+ - } - - impl InternProfileCounters { - fn draw( - &self, - debug_renderer: &mut DebugRenderer, - draw_state: &mut DrawState, - ) { - Profiler::draw_counters( - &[ - $( - &self.$name, - )+ - ], - None, - debug_renderer, - false, - draw_state, - ); + pub fn write_value(&self, output: &mut String) { + match self.show_as { + ShowAs::Float => { + set_text!(output, "{:.2} {} (max: {:.2})", self.avg, self.unit, self.max); + } + ShowAs::Int => { + set_text!(output, "{:.0} {} (max: {:.0})", self.avg.round(), self.unit, self.max.round()); } } } -} -crate::enumerate_interners!(declare_intern_profile_counters); + pub fn enable_graph(&mut self, max_samples: usize) { + if self.graph.is_some() { + return; + } -impl TransactionProfileCounters { - pub fn set( - &mut self, - dl_build_start: u64, - dl_build_end: u64, - send_start: u64, - scene_build_start: u64, - scene_build_end: u64, - display_len: usize, - ) { - self.display_list_build_time.reset(); - self.content_send_time.reset(); - self.api_send_time.reset(); - self.total_send_time.reset(); - self.scene_build_time.reset(); - self.display_lists.reset(); + self.graph = Some(Graph::new(max_samples)); + } - let dl_build_time = dl_build_end - dl_build_start; - let scene_build_time = scene_build_end - scene_build_start; - let content_send_time = send_start - dl_build_end; - let api_send_time = scene_build_start - send_start; - self.display_list_build_time.inc(dl_build_time); - self.scene_build_time.inc(scene_build_time); - self.content_send_time.inc(content_send_time); - self.api_send_time.inc(api_send_time); - self.total_send_time.inc(content_send_time + api_send_time); - self.display_lists.inc(display_len); + pub fn disable_graph(&mut self) { + self.graph = None; + } + + pub fn is_unexpected_value(&self, value: f64) -> bool { + if let Some(range) = &self.expected.range { + return value.is_finite() && value >= range.end; + } + + false + } + + pub fn has_unexpected_value(&self) -> bool { + self.is_unexpected_value(self.value) + } + + pub fn has_unexpected_avg_max(&self) -> bool { + if let Some(range) = &self.expected.range { + if self.max.is_finite() && self.max >= range.end { + return true; + } + } + + if let Some(range) = &self.expected.avg { + if self.avg < range.start || self.avg >= range.end { + return true; + } + } + + false + } + + fn update(&mut self, update_avg: bool) { + let updated = self.value.is_finite(); + if updated { + self.next_max = self.next_max.max(self.value); + self.sum += self.value; + self.num_samples += 1; + self.change_indicator = (self.change_indicator + 1) % 15; + } + + if let Some(graph) = &mut self.graph { + graph.set(self.value); + } + + self.value = std::f64::NAN; + + if update_avg && self.num_samples > 0 { + self.avg = self.sum / self.num_samples as f64; + self.max = self.next_max; + self.sum = 0.0; + self.num_samples = 0; + self.next_max = std::f64::MIN; + } } } -impl BackendProfileCounters { +#[derive(Copy, Clone, Debug)] +pub enum Event { + Start(f64), + Value(f64), + None, +} + +// std::convert::From/TryFrom can't deal with integer to f64 so we roll our own... +pub trait EventValue { + fn into_f64(self) -> f64; +} + +impl EventValue for f64 { fn into_f64(self) -> f64 { self } } +impl EventValue for f32 { fn into_f64(self) -> f64 { self as f64 } } +impl EventValue for u32 { fn into_f64(self) -> f64 { self as f64 } } +impl EventValue for i32 { fn into_f64(self) -> f64 { self as f64 } } +impl EventValue for u64 { fn into_f64(self) -> f64 { self as f64 } } +impl EventValue for usize { fn into_f64(self) -> f64 { self as f64 } } + +/// A container for profiling information that moves along the rendering pipeline +/// and is handed off to the profiler at the end. +pub struct TransactionProfile { + pub events: Vec, +} + +impl TransactionProfile { pub fn new() -> Self { - BackendProfileCounters { - total_time: TimeProfileCounter::new( - "Backend CPU Time", false, - Some(expected::MAX_BACKEND_CPU_TIME), - ), - resources: ResourceProfileCounters { - font_templates: ResourceProfileCounter::new( - "Font Templates", - Some(expected::NUM_FONT_TEMPLATES), - Some(expected::FONT_TEMPLATES_MB), - ), - image_templates: ResourceProfileCounter::new( - "Image Templates", - Some(expected::NUM_IMAGE_TEMPLATES), - Some(expected::IMAGE_TEMPLATES_MB), - ), - picture_cache_slices: IntProfileCounter::new( - "Picture Cache Slices", - None, - ), - texture_cache: TextureCacheProfileCounters::new(), - gpu_cache: GpuCacheProfileCounters::new(), - }, - txn: TransactionProfileCounters { - display_list_build_time: TimeProfileCounter::new( - "DisplayList Build Time", false, - Some(expected::DISPLAY_LIST_BUILD_TIME) - ), - scene_build_time: TimeProfileCounter::new( - "Scene build time", false, - Some(expected::MAX_SCENE_BUILD_TIME), - ), - content_send_time: TimeProfileCounter::new( - "Content Send Time", false, - Some(expected::DISPLAY_LIST_SEND_TIME), - ), - api_send_time: TimeProfileCounter::new( - "API Send Time", false, - Some(expected::DISPLAY_LIST_SEND_TIME), - ), - total_send_time: TimeProfileCounter::new( - "Total IPC Time", false, - Some(expected::DISPLAY_LIST_TOTAL_TIME), - ), - display_lists: ResourceProfileCounter::new( - "DisplayLists Sent", - None, Some(expected::DISPLAY_LIST_MB), - ), - }, - //TODO: generate this by a macro - intern: InternProfileCounters { - prim: ResourceProfileCounter::new("Interned primitives", None, None), - conic_grad: ResourceProfileCounter::new("Interned conic gradients", None, None), - image: ResourceProfileCounter::new("Interned images", None, None), - image_border: ResourceProfileCounter::new("Interned image borders", None, None), - line_decoration: ResourceProfileCounter::new("Interned line decorations", None, None), - linear_grad: ResourceProfileCounter::new("Interned linear gradients", None, None), - normal_border: ResourceProfileCounter::new("Interned normal borders", None, None), - picture: ResourceProfileCounter::new("Interned pictures", None, None), - radial_grad: ResourceProfileCounter::new("Interned radial gradients", None, None), - text_run: ResourceProfileCounter::new("Interned text runs", None, None), - yuv_image: ResourceProfileCounter::new("Interned YUV images", None, None), - clip: ResourceProfileCounter::new("Interned clips", None, None), - filter_data: ResourceProfileCounter::new("Interned filter data", None, None), - backdrop: ResourceProfileCounter::new("Interned backdrops", None, None), - }, - scene_changed: false, + TransactionProfile { + events: vec![Event::None; NUM_PROFILER_EVENTS], } } - pub fn reset(&mut self) { - self.total_time.reset(); - self.resources.texture_cache.rasterized_blob_pixels.reset(); - self.scene_changed = false; + pub fn start_time(&mut self, id: usize) { + let ms = ns_to_ms(precise_time_ns()); + self.events[id] = Event::Start(ms); } -} -pub struct RendererProfileCounters { - pub frame_counter: IntProfileCounter, - pub frame_time: AverageTimeProfileCounter, - pub draw_calls: AverageIntProfileCounter, - pub vertices: AverageIntProfileCounter, - pub vao_count_and_size: ResourceProfileCounter, - pub color_passes: AverageIntProfileCounter, - pub alpha_passes: AverageIntProfileCounter, - pub texture_data_uploaded: AverageIntProfileCounter, - pub rendered_picture_cache_tiles: AverageIntProfileCounter, - pub total_picture_cache_tiles: AverageIntProfileCounter, -} + pub fn end_time(&mut self, id: usize) -> f64 { + self.end_time_if_started(id).unwrap() + } -pub struct RendererProfileTimers { - pub cpu_time: TimeProfileCounter, - pub gpu_graph: TimeProfileCounter, - pub gpu_samples: Vec, -} + /// Similar to end_time, but doesn't panic if not matched with start_time. + pub fn end_time_if_started(&mut self, id: usize) -> Option { + if let Event::Start(start) = self.events[id] { + let time = ns_to_ms(precise_time_ns()) - start; + self.events[id] = Event::Value(time); -impl RendererProfileCounters { - pub fn new() -> Self { - RendererProfileCounters { - frame_counter: IntProfileCounter::new("Frame", None), - frame_time: AverageTimeProfileCounter::new( - "FPS", true, None, None, - ), - draw_calls: AverageIntProfileCounter::new( - "Draw Calls", - None, Some(expected::DRAW_CALLS), - ), - vertices: AverageIntProfileCounter::new( - "Vertices", - None, Some(expected::VERTICES), - ), - vao_count_and_size: ResourceProfileCounter::new("VAO", None, None), - color_passes: AverageIntProfileCounter::new( - "Color passes", - None, Some(expected::COLOR_PASSES), - ), - alpha_passes: AverageIntProfileCounter::new( - "Alpha passes", - None, Some(expected::ALPHA_PASSES), - ), - texture_data_uploaded: AverageIntProfileCounter::new( - "Texture data, kb", - None, Some(expected::TEXTURE_DATA_UPLOADED), - ), - rendered_picture_cache_tiles: AverageIntProfileCounter::new( - "Rendered tiles", - None, Some(expected::RENDERED_PICTURE_CACHE_TILES), - ), - total_picture_cache_tiles: AverageIntProfileCounter::new( - "Total tiles", - None, Some(expected::TOTAL_PICTURE_CACHE_TILES), - ), + Some(time) + } else { + None } } - pub fn get_draw_calls(&mut self) -> u64 { - self.draw_calls.accum + pub fn set(&mut self, id: usize, value: T) where T: EventValue { + self.set_f64(id, value.into_f64()); } - pub fn reset(&mut self) { - self.draw_calls.reset(); - self.vertices.reset(); - self.color_passes.reset(); - self.alpha_passes.reset(); - self.texture_data_uploaded.reset(); - self.rendered_picture_cache_tiles.reset(); - self.total_picture_cache_tiles.reset(); - } -} -impl RendererProfileTimers { - pub fn new() -> Self { - RendererProfileTimers { - cpu_time: TimeProfileCounter::new("Renderer CPU Time", false, None), - gpu_samples: Vec::new(), - gpu_graph: TimeProfileCounter::new("GPU Time", false, None), + pub fn set_f64(&mut self, id: usize, value: f64) { + self.events[id] = Event::Value(value); + } + + pub fn get(&self, id: usize) -> Option { + if let Event::Value(val) = self.events[id] { + Some(val) + } else { + None + } + } + + pub fn get_or(&self, id: usize, or: f64) -> f64 { + self.get(id).unwrap_or(or) + } + + pub fn add(&mut self, id: usize, n: T) where T: EventValue { + let n = n.into_f64(); + + let evt = &mut self.events[id]; + + let val = match *evt { + Event::Value(v) => v + n, + Event::None => n, + Event::Start(..) => { panic!(); } + }; + + *evt = Event::Value(val); + } + + pub fn inc(&mut self, id: usize) { + self.add(id, 1.0); + } + + pub fn take(&mut self) -> Self { + TransactionProfile { + events: std::mem::take(&mut self.events), + } + } + + pub fn take_and_reset(&mut self) -> Self { + let events = std::mem::take(&mut self.events); + + *self = TransactionProfile::new(); + + TransactionProfile { events } + } + + pub fn merge(&mut self, other: &mut Self) { + for i in 0..self.events.len() { + match (self.events[i], other.events[i]) { + (Event::Value(v1), Event::Value(v2)) => { + self.events[i] = Event::Value(v1.max(v2)); + } + (Event::Value(_), _) => {} + (_, Event::Value(v2)) => { + self.events[i] = Event::Value(v2); + } + (Event::None, evt) => { + self.events[i] = evt; + } + (Event::Start(..), Event::Start(s)) => { + self.events[i] = Event::Start(s); + } + _=> {} + } + other.events[i] = Event::None; + } + } + + pub fn clear(&mut self) { + for evt in &mut self.events { + *evt = Event::None; } } } #[derive(Debug)] -struct GraphStats { - min_value: f32, - mean_value: f32, - max_value: f32, - sum: f32, +pub struct GraphStats { + pub min: f64, + pub avg: f64, + pub max: f64, + pub sum: f64, + pub samples: usize, } -struct ProfileGraph { - max_samples: usize, - scale: f32, - values: VecDeque, - short_description: &'static str, - unit_description: &'static str, +#[derive(Debug)] +pub struct Graph { + values: VecDeque, } -impl ProfileGraph { - fn new( - max_samples: usize, - scale: f32, - short_description: &'static str, - unit_description: &'static str, - ) -> Self { - ProfileGraph { - max_samples, - scale, - values: VecDeque::new(), - short_description, - unit_description, - } +impl Graph { + fn new(max_samples: usize) -> Self { + let mut values = VecDeque::new(); + values.reserve(max_samples); + + Graph { values } } - fn push(&mut self, ns: u64) { - let val = ns as f64 * self.scale as f64; - if self.values.len() == self.max_samples { + fn set(&mut self, val: f64) { + if self.values.len() == self.values.capacity() { self.values.pop_back(); } - self.values.push_front(val as f32); + self.values.push_front(val); } - fn stats(&self) -> GraphStats { + pub fn stats(&self) -> GraphStats { let mut stats = GraphStats { - min_value: f32::MAX, - mean_value: 0.0, - max_value: -f32::MAX, + min: f64::MAX, + avg: 0.0, + max: -f64::MAX, sum: 0.0, + samples: 0, }; + let mut samples = 0; for value in &self.values { - stats.min_value = stats.min_value.min(*value); - stats.max_value = stats.max_value.max(*value); - stats.sum += *value; + if value.is_finite() { + stats.min = stats.min.min(*value); + stats.max = stats.max.max(*value); + stats.sum += *value; + samples += 1; + } } - if !self.values.is_empty() { - stats.mean_value = stats.sum / self.values.len() as f32; + if samples > 0 { + stats.avg = stats.sum / samples as f64; + stats.samples = samples; } stats } - - fn draw_graph( - &self, - x: f32, - y: f32, - description: &'static str, - debug_renderer: &mut DebugRenderer, - ) -> default::Rect { - let size = Size2D::new(600.0, 100.0); - let line_height = debug_renderer.line_height(); - let graph_rect = Rect::new(Point2D::new(x, y), size); - let mut rect = graph_rect.inflate(10.0, 10.0); - - let stats = self.stats(); - - let text_color = ColorU::new(255, 255, 0, 255); - let text_origin = rect.origin + vec2(rect.size.width, 20.0); - debug_renderer.add_text( - text_origin.x, - text_origin.y, - description, - ColorU::new(0, 255, 0, 255), - None, - ); - debug_renderer.add_text( - text_origin.x, - text_origin.y + line_height, - &format!("Min: {:.2} {}", stats.min_value, self.unit_description), - text_color, - None, - ); - debug_renderer.add_text( - text_origin.x, - text_origin.y + line_height * 2.0, - &format!("Mean: {:.2} {}", stats.mean_value, self.unit_description), - text_color, - None, - ); - debug_renderer.add_text( - text_origin.x, - text_origin.y + line_height * 3.0, - &format!("Max: {:.2} {}", stats.max_value, self.unit_description), - text_color, - None, - ); - - rect.size.width += 140.0; - debug_renderer.add_quad( - rect.origin.x, - rect.origin.y, - rect.origin.x + rect.size.width + 10.0, - rect.origin.y + rect.size.height, - ColorU::new(25, 25, 25, 200), - ColorU::new(51, 51, 51, 200), - ); - - let bx1 = graph_rect.max_x(); - let by1 = graph_rect.max_y(); - - let w = graph_rect.size.width / self.max_samples as f32; - let h = graph_rect.size.height; - - let color_t0 = ColorU::new(0, 255, 0, 255); - let color_b0 = ColorU::new(0, 180, 0, 255); - - let color_t1 = ColorU::new(0, 255, 0, 255); - let color_b1 = ColorU::new(0, 180, 0, 255); - - let color_t2 = ColorU::new(255, 0, 0, 255); - let color_b2 = ColorU::new(180, 0, 0, 255); - - for (index, sample) in self.values.iter().enumerate() { - let sample = *sample; - let x1 = bx1 - index as f32 * w; - let x0 = x1 - w; - - let y0 = if stats.max_value != 0.0 { - by1 - (sample / stats.max_value) as f32 * h - } else { - by1 - }; - let y1 = by1; - - let (color_top, color_bottom) = if sample < 1000.0 / 60.0 { - (color_t0, color_b0) - } else if sample < 1000.0 / 30.0 { - (color_t1, color_b1) - } else { - (color_t2, color_b2) - }; - - debug_renderer.add_quad(x0, y0, x1, y1, color_top, color_bottom); - } - - rect - } } -impl ProfileCounter for ProfileGraph { - fn description(&self) -> &'static str { - self.short_description - } - - fn value(&self) -> String { - format!("{:.2}ms", self.stats().mean_value) - } - - fn is_expected(&self) -> bool { true } +#[derive(Copy, Clone, Debug, PartialEq, Eq)] +pub enum ShowAs { + Float, + Int, } struct GpuFrame { @@ -1143,785 +1586,25 @@ impl GpuFrameCollection { } } -impl GpuFrameCollection { - fn draw(&self, x: f32, y: f32, debug_renderer: &mut DebugRenderer) -> default::Rect { - let graph_rect = Rect::new( - Point2D::new(x, y), - Size2D::new(GRAPH_WIDTH, GRAPH_HEIGHT), - ); - let bounding_rect = graph_rect.inflate(GRAPH_PADDING, GRAPH_PADDING); - - debug_renderer.add_quad( - bounding_rect.origin.x, - bounding_rect.origin.y, - bounding_rect.origin.x + bounding_rect.size.width, - bounding_rect.origin.y + bounding_rect.size.height, - ColorU::new(25, 25, 25, 200), - ColorU::new(51, 51, 51, 200), - ); - - let w = graph_rect.size.width; - let mut y0 = graph_rect.origin.y; - - let max_time = self.frames - .iter() - .max_by_key(|f| f.total_time) - .unwrap() - .total_time as f32; - - let mut tags_present = FastHashMap::default(); - - for frame in &self.frames { - let y1 = y0 + GRAPH_FRAME_HEIGHT; - - let mut current_ns = 0; - for sample in &frame.samples { - let x0 = graph_rect.origin.x + w * current_ns as f32 / max_time; - current_ns += sample.time_ns; - let x1 = graph_rect.origin.x + w * current_ns as f32 / max_time; - let mut bottom_color = sample.tag.color; - bottom_color.a *= 0.5; - - debug_renderer.add_quad( - x0, - y0, - x1, - y1, - sample.tag.color.into(), - bottom_color.into(), - ); - - tags_present.insert(sample.tag.label, sample.tag.color); - } - - y0 = y1; - } - - // Add a legend to see which color correspond to what primitive. - const LEGEND_SIZE: f32 = 20.0; - const PADDED_LEGEND_SIZE: f32 = 25.0; - if !tags_present.is_empty() { - debug_renderer.add_quad( - bounding_rect.max_x() + GRAPH_PADDING, - bounding_rect.origin.y, - bounding_rect.max_x() + GRAPH_PADDING + 200.0, - bounding_rect.origin.y + tags_present.len() as f32 * PADDED_LEGEND_SIZE + GRAPH_PADDING, - ColorU::new(25, 25, 25, 200), - ColorU::new(51, 51, 51, 200), - ); - } - - for (i, (label, &color)) in tags_present.iter().enumerate() { - let x0 = bounding_rect.origin.x + bounding_rect.size.width + GRAPH_PADDING * 2.0; - let y0 = bounding_rect.origin.y + GRAPH_PADDING + i as f32 * PADDED_LEGEND_SIZE; - - debug_renderer.add_quad( - x0, y0, x0 + LEGEND_SIZE, y0 + LEGEND_SIZE, - color.into(), - color.into(), - ); - - debug_renderer.add_text( - x0 + PADDED_LEGEND_SIZE, - y0 + LEGEND_SIZE * 0.75, - label, - ColorU::new(255, 255, 0, 255), - None, - ); - } - - bounding_rect - } +pub fn ns_to_ms(ns: u64) -> f64 { + ns as f64 / 1_000_000.0 } -struct DrawState { - x_left: f32, - y_left: f32, - x_right: f32, - y_right: f32, +pub fn bytes_to_mb(bytes: usize) -> f64 { + bytes as f64 / 1_000_000.0 } -pub struct Profiler { - draw_state: DrawState, - backend_graph: ProfileGraph, - renderer_graph: ProfileGraph, - gpu_graph: ProfileGraph, - ipc_graph: ProfileGraph, - display_list_build_graph: ProfileGraph, - scene_build_graph: ProfileGraph, - blob_raster_graph: ProfileGraph, - backend_time: AverageTimeProfileCounter, - renderer_time: AverageTimeProfileCounter, - gpu_time: AverageTimeProfileCounter, - ipc_time: AverageTimeProfileCounter, - gpu_frames: GpuFrameCollection, - cooldowns: Vec, +#[derive(Debug, PartialEq)] +enum Item { + Counters(Vec), + Graph(usize), + ChangeIndicator(usize), + Fps, + GpuTimeQueries, + GpuCacheBars, + Text(String), + Space, + Column, + Row, } -impl Profiler { - pub fn new() -> Self { - let to_ms_scale = 1.0 / 1000000.0; - Profiler { - draw_state: DrawState { - x_left: 0.0, - y_left: 0.0, - x_right: 0.0, - y_right: 0.0, - }, - backend_graph: ProfileGraph::new(600, to_ms_scale, "Backend:", "ms"), - renderer_graph: ProfileGraph::new(600, to_ms_scale, "Renderer:", "ms"), - gpu_graph: ProfileGraph::new(600, to_ms_scale, "GPU:", "ms"), - ipc_graph: ProfileGraph::new(600, to_ms_scale, "IPC:", "ms"), - display_list_build_graph: ProfileGraph::new(600, to_ms_scale, "DisplayList build", "ms"), - scene_build_graph: ProfileGraph::new(600, to_ms_scale, "Scene build:", "ms"), - blob_raster_graph: ProfileGraph::new(600, 1.0, "Rasterized blob pixels:", "px"), - gpu_frames: GpuFrameCollection::new(), - backend_time: AverageTimeProfileCounter::new( - "Backend:", false, - Some(expected::AVG_BACKEND_CPU_TIME), - Some(expected::MAX_BACKEND_CPU_TIME), - ), - renderer_time: AverageTimeProfileCounter::new( - "Renderer:", false, - Some(expected::AVG_RENDERER_CPU_TIME), - Some(expected::MAX_RENDERER_CPU_TIME), - ), - ipc_time: AverageTimeProfileCounter::new( - "IPC:", false, - Some(expected::AVG_IPC_TIME), - Some(expected::MAX_IPC_TIME), - ), - gpu_time: AverageTimeProfileCounter::new( - "GPU:", false, - Some(expected::AVG_GPU_TIME), - Some(expected::MAX_GPU_TIME), - ), - cooldowns: Vec::new(), - } - } - - // If we have an array of "cooldown" counters, then only display profiles that - // are out of the ordinary and keep displaying them until the cooldown is over. - fn draw_counters( - counters: &[&T], - mut cooldowns: Option<&mut [i32]>, - debug_renderer: &mut DebugRenderer, - left: bool, - draw_state: &mut DrawState, - ) { - let mut label_rect = Rect::zero(); - let mut value_rect = Rect::zero(); - let (mut current_x, mut current_y) = if left { - (draw_state.x_left, draw_state.y_left) - } else { - (draw_state.x_right, draw_state.y_right) - }; - let mut color_index = 0; - let line_height = debug_renderer.line_height(); - - let colors = [ - // Regular values, - ColorU::new(255, 255, 255, 255), - ColorU::new(255, 255, 0, 255), - // Unexpected values, - ColorU::new(255, 80, 0, 255), - ColorU::new(255, 0, 0, 255), - ]; - - for (idx, counter) in counters.iter().enumerate() { - if let Some(cooldowns) = cooldowns.as_mut() { - if !counter.is_expected() { - cooldowns[idx] = 40; - } - if cooldowns[idx] == 0 { - continue; - } - } - let rect = debug_renderer.add_text( - current_x, - current_y, - counter.description(), - colors[color_index], - None, - ); - color_index = (color_index + 1) % 2; - - label_rect = label_rect.union(&rect); - current_y += line_height; - } - - color_index = 0; - current_x = label_rect.origin.x + label_rect.size.width + 60.0; - current_y = if left { draw_state.y_left } else { draw_state.y_right }; - - for (idx, counter) in counters.iter().enumerate() { - let expected_offset = if counter.is_expected() || cooldowns.is_some() { 0 } else { 2 }; - if let Some(cooldowns) = cooldowns.as_mut() { - if cooldowns[idx] > 0 { - cooldowns[idx] -= 1; - } else { - continue; - } - } - let rect = debug_renderer.add_text( - current_x, - current_y, - &counter.value(), - colors[color_index + expected_offset], - None, - ); - color_index = (color_index + 1) % 2; - - value_rect = value_rect.union(&rect); - current_y += line_height; - } - - let total_rect = label_rect.union(&value_rect).inflate(10.0, 10.0); - debug_renderer.add_quad( - total_rect.origin.x, - total_rect.origin.y, - total_rect.origin.x + total_rect.size.width, - total_rect.origin.y + total_rect.size.height, - ColorF::new(0.1, 0.1, 0.1, 0.8).into(), - ColorF::new(0.2, 0.2, 0.2, 0.8).into(), - ); - let new_y = total_rect.origin.y + total_rect.size.height + 30.0; - if left { - draw_state.y_left = new_y; - } else { - draw_state.y_right = new_y; - } - } - - fn draw_bar( - &mut self, - label: &str, - label_color: ColorU, - counters: &[(ColorU, &AverageIntProfileCounter)], - debug_renderer: &mut DebugRenderer, - ) -> default::Rect { - let mut rect = debug_renderer.add_text( - self.draw_state.x_left, - self.draw_state.y_left, - label, - label_color, - None, - ); - - let x_base = rect.origin.x + rect.size.width + 10.0; - let height = debug_renderer.line_height(); - let width = (self.draw_state.x_right - 30.0 - x_base).max(0.0); - let total_value = counters.last().unwrap().1.get(); - let scale = if total_value != 0 { - width / total_value as f32 - } else { - 0.0 - }; - - let mut x_current = x_base; - - for &(color, counter) in counters { - let x_stop = x_base + counter.get() as f32 * scale; - debug_renderer.add_quad( - x_current, - rect.origin.y, - x_stop, - rect.origin.y + height, - color, - color, - ); - x_current = x_stop; - } - - self.draw_state.y_left += height; - - rect.size.width += width + 10.0; - rect - } - - fn draw_gpu_cache_bars( - &mut self, - counters: &GpuCacheProfileCounters, - debug_renderer: &mut DebugRenderer, - ) { - let color_updated = ColorU::new(0xFF, 0, 0, 0xFF); - let color_free = ColorU::new(0, 0, 0xFF, 0xFF); - let color_saved = ColorU::new(0, 0xFF, 0, 0xFF); - - let mut requested_blocks = AverageIntProfileCounter::new("", None, None); - requested_blocks.set(counters.updated_blocks.get() + counters.saved_blocks.get()); - - let mut total_blocks = AverageIntProfileCounter::new("", None, None); - total_blocks.set(counters.allocated_rows.get() * MAX_VERTEX_TEXTURE_WIDTH); - - let rect0 = self.draw_bar( - &format!("GPU cache rows ({}):", counters.allocated_rows.get()), - ColorU::new(0xFF, 0xFF, 0xFF, 0xFF), - &[ - (color_updated, &counters.updated_rows), - (color_free, &counters.allocated_rows), - ], - debug_renderer, - ); - - let rect1 = self.draw_bar( - "GPU cache blocks", - ColorU::new(0xFF, 0xFF, 0, 0xFF), - &[ - (color_updated, &counters.updated_blocks), - (color_saved, &requested_blocks), - (color_free, &counters.allocated_blocks), - (ColorU::new(0, 0, 0, 0xFF), &total_blocks), - ], - debug_renderer, - ); - - let total_rect = rect0.union(&rect1).inflate(10.0, 10.0); - debug_renderer.add_quad( - total_rect.origin.x, - total_rect.origin.y, - total_rect.origin.x + total_rect.size.width, - total_rect.origin.y + total_rect.size.height, - ColorF::new(0.1, 0.1, 0.1, 0.8).into(), - ColorF::new(0.2, 0.2, 0.2, 0.8).into(), - ); - - self.draw_state.y_left = total_rect.origin.y + total_rect.size.height + 30.0; - } - - fn draw_frame_bars( - &mut self, - counters: &FrameProfileCounters, - debug_renderer: &mut DebugRenderer, - ) { - let rect0 = self.draw_bar( - &format!("primitives ({}):", counters.total_primitives.get()), - ColorU::new(0xFF, 0xFF, 0xFF, 0xFF), - &[ - (ColorU::new(0, 0, 0xFF, 0xFF), &counters.visible_primitives), - (ColorU::new(0, 0, 0, 0xFF), &counters.total_primitives), - ], - debug_renderer, - ); - - let rect1 = self.draw_bar( - &format!("GPU targets ({}):", &counters.targets_used.get()), - ColorU::new(0xFF, 0xFF, 0, 0xFF), - &[ - (ColorU::new(0, 0, 0xFF, 0xFF), &counters.targets_created), - (ColorU::new(0xFF, 0, 0, 0xFF), &counters.targets_changed), - (ColorU::new(0, 0xFF, 0, 0xFF), &counters.targets_used), - ], - debug_renderer, - ); - - let total_rect = rect0.union(&rect1).inflate(10.0, 10.0); - debug_renderer.add_quad( - total_rect.origin.x, - total_rect.origin.y, - total_rect.origin.x + total_rect.size.width, - total_rect.origin.y + total_rect.size.height, - ColorF::new(0.1, 0.1, 0.1, 0.8).into(), - ColorF::new(0.2, 0.2, 0.2, 0.8).into(), - ); - - self.draw_state.y_left = total_rect.origin.y + total_rect.size.height + 30.0; - } - - fn draw_compact_profile( - &mut self, - backend_profile: &BackendProfileCounters, - renderer_profile: &RendererProfileCounters, - debug_renderer: &mut DebugRenderer, - ) { - Profiler::draw_counters( - &[ - &renderer_profile.frame_time as &dyn ProfileCounter, - &renderer_profile.color_passes, - &renderer_profile.alpha_passes, - &renderer_profile.draw_calls, - &renderer_profile.vertices, - &renderer_profile.rendered_picture_cache_tiles, - &renderer_profile.texture_data_uploaded, - &backend_profile.resources.picture_cache_slices, - &self.ipc_time, - &self.backend_time, - &self.renderer_time, - &self.gpu_time, - ], - None, - debug_renderer, - true, - &mut self.draw_state, - ); - } - - fn draw_full_profile( - &mut self, - frame_profiles: &[FrameProfileCounters], - backend_profile: &BackendProfileCounters, - renderer_profile: &RendererProfileCounters, - renderer_timers: &mut RendererProfileTimers, - gpu_samplers: &[GpuSampler], - screen_fraction: f32, - debug_renderer: &mut DebugRenderer, - ) { - Profiler::draw_counters( - &[ - &renderer_profile.frame_time as &dyn ProfileCounter, - &renderer_profile.frame_counter, - &renderer_profile.color_passes, - &renderer_profile.alpha_passes, - &renderer_profile.rendered_picture_cache_tiles, - &renderer_profile.total_picture_cache_tiles, - &renderer_profile.texture_data_uploaded, - &backend_profile.resources.picture_cache_slices, - &backend_profile.resources.texture_cache.shared_bytes, - &backend_profile.resources.texture_cache.standalone_bytes, - ], - None, - debug_renderer, - true, - &mut self.draw_state - ); - - self.draw_gpu_cache_bars( - &backend_profile.resources.gpu_cache, - debug_renderer, - ); - - Profiler::draw_counters( - &[ - &backend_profile.resources.font_templates, - &backend_profile.resources.image_templates, - ], - None, - debug_renderer, - true, - &mut self.draw_state - ); - - backend_profile.intern.draw(debug_renderer, &mut self.draw_state); - - Profiler::draw_counters( - &[ - &backend_profile.resources.texture_cache.pages_alpha8_linear, - &backend_profile.resources.texture_cache.pages_color8_linear, - &backend_profile.resources.texture_cache.pages_color8_nearest, - &backend_profile.txn.display_lists, - ], - None, - debug_renderer, - true, - &mut self.draw_state - ); - - Profiler::draw_counters( - &[ - &backend_profile.txn.display_list_build_time, - &backend_profile.txn.scene_build_time, - &backend_profile.txn.content_send_time, - &backend_profile.txn.api_send_time, - &backend_profile.txn.total_send_time, - ], - None, - debug_renderer, - true, - &mut self.draw_state - ); - - for frame_profile in frame_profiles { - self.draw_frame_bars(frame_profile, debug_renderer); - } - - Profiler::draw_counters( - &[&renderer_profile.draw_calls, &renderer_profile.vertices], - None, - debug_renderer, - true, - &mut self.draw_state - ); - - Profiler::draw_counters( - &[ - &backend_profile.total_time, - &renderer_timers.cpu_time, - &renderer_timers.gpu_graph, - ], - None, - debug_renderer, - false, - &mut self.draw_state - ); - - if !gpu_samplers.is_empty() { - let mut samplers = Vec::::new(); - // Gathering unique GPU samplers. This has O(N^2) complexity, - // but we only have a few samplers per target. - let mut total = 0.0; - for sampler in gpu_samplers { - let value = sampler.count as f32 * screen_fraction; - total += value; - match samplers.iter().position(|s| { - s.description as *const _ == sampler.tag.label as *const _ - }) { - Some(pos) => samplers[pos].value += value, - None => samplers.push(PercentageProfileCounter { - description: sampler.tag.label, - value, - }), - } - } - samplers.push(PercentageProfileCounter { - description: "Total", - value: total, - }); - let samplers: Vec<&dyn ProfileCounter> = samplers.iter().map(|sampler| { - sampler as &dyn ProfileCounter - }).collect(); - Profiler::draw_counters( - &samplers, - None, - debug_renderer, - false, - &mut self.draw_state, - ); - } - - let rect = - self.backend_graph - .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "CPU (backend)", debug_renderer); - self.draw_state.y_right += rect.size.height + PROFILE_PADDING; - let rect = self.renderer_graph.draw_graph( - self.draw_state.x_right, - self.draw_state.y_right, - "CPU (renderer)", - debug_renderer, - ); - self.draw_state.y_right += rect.size.height + PROFILE_PADDING; - let rect = - self.ipc_graph - .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "DisplayList IPC", debug_renderer); - self.draw_state.y_right += rect.size.height + PROFILE_PADDING; - - let rect = self.display_list_build_graph - .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "DisplayList build", debug_renderer); - self.draw_state.y_right += rect.size.height + PROFILE_PADDING; - - let rect = self.scene_build_graph - .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "Scene build", debug_renderer); - self.draw_state.y_right += rect.size.height + PROFILE_PADDING; - - let rect = self.gpu_graph - .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "GPU", debug_renderer); - self.draw_state.y_right += rect.size.height + PROFILE_PADDING; - - let rect = self.blob_raster_graph - .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "Blob pixels", debug_renderer); - self.draw_state.y_right += rect.size.height + PROFILE_PADDING; - - let rect = self.gpu_frames - .draw(self.draw_state.x_left, f32::max(self.draw_state.y_left, self.draw_state.y_right), debug_renderer); - self.draw_state.y_right += rect.size.height + PROFILE_PADDING; - } - - fn draw_smart_profile( - &mut self, - backend_profile: &BackendProfileCounters, - renderer_profile: &RendererProfileCounters, - debug_renderer: &mut DebugRenderer, - ) { - while self.cooldowns.len() < 18 { - self.cooldowns.push(0); - } - - // Always show the fps counter. - Profiler::draw_counters( - &[ - &renderer_profile.frame_time, - ], - None, - debug_renderer, - true, - &mut self.draw_state, - ); - - let mut start = 0; - let counters: &[&[&dyn ProfileCounter]] = &[ - &[ - &self.backend_time, - &self.renderer_time, - &self.gpu_time, - ], - &[ - &renderer_profile.color_passes, - &renderer_profile.alpha_passes, - &renderer_profile.draw_calls, - &renderer_profile.vertices, - &renderer_profile.rendered_picture_cache_tiles, - &renderer_profile.total_picture_cache_tiles, - ], - &[ - &backend_profile.resources.gpu_cache.allocated_rows, - &backend_profile.resources.gpu_cache.updated_rows, - &backend_profile.resources.gpu_cache.allocated_blocks, - &backend_profile.resources.gpu_cache.updated_blocks, - &backend_profile.resources.gpu_cache.saved_blocks, - ], - &[ - &backend_profile.resources.image_templates, - &backend_profile.resources.font_templates, - &backend_profile.resources.texture_cache.rasterized_blob_pixels, - &backend_profile.txn.display_lists, - ], - ]; - - for group in counters { - let end = start + group.len(); - Profiler::draw_counters( - &group[..], - Some(&mut self.cooldowns[start..end]), - debug_renderer, - true, - &mut self.draw_state, - ); - start = end; - } - } - - pub fn draw_profile( - &mut self, - frame_profiles: &[FrameProfileCounters], - backend_profile: &BackendProfileCounters, - renderer_profile: &RendererProfileCounters, - renderer_timers: &mut RendererProfileTimers, - gpu_samplers: &[GpuSampler], - screen_fraction: f32, - debug_renderer: &mut DebugRenderer, - style: ProfileStyle, - ) { - self.draw_state.x_left = 20.0; - self.draw_state.y_left = 50.0; - self.draw_state.x_right = 450.0; - self.draw_state.y_right = 40.0; - - let mut gpu_graph = 0; - let gpu_graphrs = mem::replace(&mut renderer_timers.gpu_samples, Vec::new()); - for sample in &gpu_graphrs { - gpu_graph += sample.time_ns; - } - renderer_timers.gpu_graph.set(gpu_graph); - - self.backend_graph - .push(backend_profile.total_time.nanoseconds); - self.backend_time.set(backend_profile.total_time.nanoseconds); - self.renderer_graph - .push(renderer_timers.cpu_time.nanoseconds); - self.renderer_time.set(renderer_timers.cpu_time.nanoseconds); - self.ipc_graph - .push(backend_profile.txn.total_send_time.nanoseconds); - self.display_list_build_graph - .push(backend_profile.txn.display_list_build_time.nanoseconds); - self.scene_build_graph - .push(backend_profile.txn.scene_build_time.nanoseconds); - self.blob_raster_graph - .push(backend_profile.resources.texture_cache.rasterized_blob_pixels.size as u64); - self.ipc_time.set(backend_profile.txn.total_send_time.nanoseconds); - self.gpu_graph.push(gpu_graph); - self.gpu_time.set(gpu_graph); - self.gpu_frames.push(gpu_graph, gpu_graphrs); - - match style { - ProfileStyle::Full => { - self.draw_full_profile( - frame_profiles, - backend_profile, - renderer_profile, - renderer_timers, - gpu_samplers, - screen_fraction, - debug_renderer, - ); - } - ProfileStyle::Compact => { - self.draw_compact_profile( - backend_profile, - renderer_profile, - debug_renderer, - ); - } - ProfileStyle::Smart => { - self.draw_smart_profile( - backend_profile, - renderer_profile, - debug_renderer, - ); - } - ProfileStyle::NoDraw => { - // Don't draw anything. We just care about collecting samples. - } - } - } - - #[cfg(feature = "capture")] - pub fn dump_stats(&self, sink: &mut dyn std::io::Write) -> std::io::Result<()> { - writeln!(sink, "Backend (ms) {:?}", self.backend_graph.stats())?; - writeln!(sink, "Renderer (ms) {:?}", self.renderer_graph.stats())?; - writeln!(sink, "GPU (ms) {:?}", self.gpu_graph.stats())?; - writeln!(sink, "IPC (ms) {:?}", self.ipc_graph.stats())?; - writeln!(sink, "DisplayList builder (ms) {:?}", self.display_list_build_graph.stats())?; - writeln!(sink, "Scene build (ms) {:?}", self.scene_build_graph.stats())?; - writeln!(sink, "Rasterized blob (px) {:?}", self.blob_raster_graph.stats())?; - Ok(()) - } -} - -pub struct ChangeIndicator { - counter: u32, -} - -impl ChangeIndicator { - pub fn new() -> Self { - ChangeIndicator { - counter: 0 - } - } - - pub fn changed(&mut self) { - self.counter = (self.counter + 1) % 15; - } - - const WIDTH : f32 = 20.0; - const HEIGHT: f32 = 10.0; - - pub fn width() -> f32 { - ChangeIndicator::WIDTH * 16.0 - } - - pub fn draw( - &self, - x: f32, y: f32, - color: ColorU, - debug_renderer: &mut DebugRenderer - ) { - let margin = 0.0; - let tx = self.counter as f32 * ChangeIndicator::WIDTH; - debug_renderer.add_quad( - x - margin, - y - margin, - x + 15.0 * ChangeIndicator::WIDTH + margin, - y + ChangeIndicator::HEIGHT + margin, - ColorU::new(0, 0, 0, 150), - ColorU::new(0, 0, 0, 150), - ); - - debug_renderer.add_quad( - x + tx, - y, - x + tx + ChangeIndicator::WIDTH, - y + ChangeIndicator::HEIGHT, - color, - ColorU::new(25, 25, 25, 255), - ); - } -} diff --git a/gfx/wr/webrender/src/render_api.rs b/gfx/wr/webrender/src/render_api.rs index 81b6216e56ec..75c0825bf4ab 100644 --- a/gfx/wr/webrender/src/render_api.rs +++ b/gfx/wr/webrender/src/render_api.rs @@ -27,6 +27,7 @@ use crate::api::units::*; use crate::api_resources::ApiResources; use crate::scene_builder_thread::{SceneBuilderRequest, SceneBuilderResult}; use crate::intern::InterningMemoryReport; +use crate::profiler::{self, TransactionProfile}; #[repr(C)] #[derive(Clone, Copy, Debug)] @@ -388,6 +389,7 @@ impl Transaction { blob_rasterizer: None, blob_requests: Vec::new(), rasterized_blobs: Vec::new(), + profile: TransactionProfile::new(), }) } @@ -571,6 +573,8 @@ pub struct TransactionMsg { pub blob_requests: Vec, /// pub rasterized_blobs: Vec<(BlobImageRequest, BlobImageResult)>, + /// Collect various data along the rendering pipeline to display it in the embedded profiler. + pub profile: TransactionProfile, } impl fmt::Debug for TransactionMsg { @@ -1232,6 +1236,7 @@ impl RenderApi { blob_rasterizer: None, blob_requests: Vec::new(), rasterized_blobs: Vec::new(), + profile: TransactionProfile::new(), }) } @@ -1250,6 +1255,7 @@ impl RenderApi { blob_rasterizer: None, blob_requests: Vec::new(), rasterized_blobs: Vec::new(), + profile: TransactionProfile::new(), }) } @@ -1280,6 +1286,10 @@ impl RenderApi { self.resources.update(&mut transaction); transaction.use_scene_builder_thread |= !transaction.scene_ops.is_empty(); + if transaction.generate_frame { + transaction.profile.start_time(profiler::API_SEND_TIME); + transaction.profile.start_time(profiler::TOTAL_FRAME_CPU_TIME); + } if transaction.use_scene_builder_thread { let sender = if transaction.low_priority { @@ -1301,6 +1311,10 @@ impl RenderApi { .map(|(txn, id)| { let mut txn = txn.finalize(id); self.resources.update(&mut txn); + if txn.generate_frame { + txn.profile.start_time(profiler::API_SEND_TIME); + txn.profile.start_time(profiler::TOTAL_FRAME_CPU_TIME); + } txn }) diff --git a/gfx/wr/webrender/src/render_backend.rs b/gfx/wr/webrender/src/render_backend.rs index 561fe8d906c3..53533eaa399e 100644 --- a/gfx/wr/webrender/src/render_backend.rs +++ b/gfx/wr/webrender/src/render_backend.rs @@ -38,7 +38,7 @@ use crate::picture::{TileCacheLogger, PictureScratchBuffer, SliceId, TileCacheIn use crate::prim_store::{PrimitiveScratchBuffer, PrimitiveInstance}; use crate::prim_store::{PrimitiveInstanceKind, PrimTemplateCommonData, PrimitiveStore}; use crate::prim_store::interned::*; -use crate::profiler::{BackendProfileCounters, ResourceProfileCounters}; +use crate::profiler::{self, TransactionProfile}; use crate::render_task_graph::RenderTaskGraphCounters; use crate::renderer::{AsyncPropertySampler, PipelineInfo}; use crate::resource_cache::ResourceCache; @@ -274,12 +274,12 @@ macro_rules! declare_data_stores { fn apply_updates( &mut self, updates: InternerUpdates, - profile_counters: &mut BackendProfileCounters, + profile: &mut TransactionProfile, ) { $( self.$name.apply_updates( updates.$name, - &mut profile_counters.intern.$name, + profile, ); )+ } @@ -470,6 +470,8 @@ struct Document { /// Tracks if we need to invalidate dirty rects for this document, due to the picture /// cache slice configuration having changed when a new scene is swapped in. dirty_rects_are_valid: bool, + + profile: TransactionProfile, } impl Document { @@ -512,6 +514,7 @@ impl Document { loaded_scene: Scene::new(), prev_composite_descriptor: CompositeDescriptor::empty(), dirty_rects_are_valid: true, + profile: TransactionProfile::new(), } } @@ -603,11 +606,12 @@ impl Document { &mut self, resource_cache: &mut ResourceCache, gpu_cache: &mut GpuCache, - resource_profile: &mut ResourceProfileCounters, debug_flags: DebugFlags, tile_cache_logger: &mut TileCacheLogger, tile_caches: &mut FastHashMap>, ) -> RenderedDocument { + self.profile.start_time(profiler::FRAME_BUILDING_TIME); + let accumulated_scale_factor = self.view.accumulated_scale_factor(); let pan = self.view.frame.pan.to_f32() / accumulated_scale_factor; @@ -627,7 +631,6 @@ impl Document { self.view.scene.layer, self.view.scene.device_rect.origin, pan, - resource_profile, &self.dynamic_properties, &mut self.data_stores, &mut self.scratch, @@ -636,6 +639,7 @@ impl Document { tile_cache_logger, tile_caches, self.dirty_rects_are_valid, + &mut self.profile, ); frame @@ -647,9 +651,12 @@ impl Document { let is_new_scene = self.has_built_scene; self.has_built_scene = false; + self.profile.end_time(profiler::FRAME_BUILDING_TIME); + RenderedDocument { frame, is_new_scene, + profile: self.profile.take_and_reset(), } } @@ -875,7 +882,7 @@ impl RenderBackend { IdNamespace(NEXT_NAMESPACE_ID.fetch_add(1, Ordering::Relaxed) as u32) } - pub fn run(&mut self, mut profile_counters: BackendProfileCounters) { + pub fn run(&mut self) { let mut frame_counter: u32 = 0; let mut status = RenderBackendStatus::Continue; @@ -886,7 +893,7 @@ impl RenderBackend { while let RenderBackendStatus::Continue = status { status = match self.api_rx.recv() { Ok(msg) => { - self.process_api_msg(msg, &mut profile_counters, &mut frame_counter) + self.process_api_msg(msg, &mut frame_counter) } Err(..) => { RenderBackendStatus::ShutDown(None) } }; @@ -926,36 +933,21 @@ impl RenderBackend { mut txns: Vec>, result_tx: Option>, frame_counter: &mut u32, - profile_counters: &mut BackendProfileCounters, ) -> bool { self.prepare_for_frames(); self.maybe_force_nop_documents( frame_counter, - profile_counters, |document_id| txns.iter().any(|txn| txn.document_id == document_id)); let mut built_frame = false; for mut txn in txns.drain(..) { let has_built_scene = txn.built_scene.is_some(); - if let Some(timings) = txn.timings { - if has_built_scene { - profile_counters.scene_changed = true; - } - - profile_counters.txn.set( - timings.builder_start_time_ns, - timings.builder_end_time_ns, - timings.send_time_ns, - timings.scene_build_start_time_ns, - timings.scene_build_end_time_ns, - timings.display_list_len, - ); - } - if let Some(doc) = self.documents.get_mut(&txn.document_id) { + doc.removed_pipelines.append(&mut txn.removed_pipelines); doc.view.scene = txn.view; + doc.profile.merge(&mut txn.profile); if let Some(built_scene) = txn.built_scene.take() { doc.new_async_scene_ready( @@ -976,7 +968,7 @@ impl RenderBackend { self.tile_cache_logger.serialize_updates(&updates); } } - doc.data_stores.apply_updates(updates, profile_counters); + doc.data_stores.apply_updates(updates, &mut doc.profile); } // Build the hit tester while the APZ lock is held so that its content @@ -1000,6 +992,12 @@ impl RenderBackend { .spatial_tree .discard_frame_state_for_pipeline(*pipeline_id); } + + self.resource_cache.add_rasterized_blob_images( + txn.rasterized_blobs.take(), + &mut doc.profile, + ); + } else { // The document was removed while we were building it, skip it. // TODO: we might want to just ensure that removed documents are @@ -1010,11 +1008,6 @@ impl RenderBackend { continue; } - self.resource_cache.add_rasterized_blob_images( - txn.rasterized_blobs.take(), - &mut profile_counters.resources.texture_cache, - ); - built_frame |= self.update_document( txn.document_id, txn.resource_updates.take(), @@ -1023,7 +1016,6 @@ impl RenderBackend { txn.render_frame, txn.invalidate_rendered_frame, frame_counter, - profile_counters, has_built_scene, ); } @@ -1034,7 +1026,6 @@ impl RenderBackend { fn process_api_msg( &mut self, msg: ApiMsg, - profile_counters: &mut BackendProfileCounters, frame_counter: &mut u32, ) -> RenderBackendStatus { match msg { @@ -1115,7 +1106,7 @@ impl RenderBackend { } #[cfg(feature = "capture")] DebugCommand::SaveCapture(root, bits) => { - let output = self.save_capture(root, bits, profile_counters); + let output = self.save_capture(root, bits); ResultMsg::DebugOutput(output) }, #[cfg(feature = "capture")] @@ -1139,7 +1130,7 @@ impl RenderBackend { config.frame_id = frame_id; } - self.load_capture(config, profile_counters); + self.load_capture(config); for (id, doc) in &self.documents { let captured = CapturedDocument { @@ -1230,11 +1221,10 @@ impl RenderBackend { self.prepare_transactions( transaction_msgs, frame_counter, - profile_counters, ); } ApiMsg::SceneBuilderResult(msg) => { - return self.process_scene_builder_result(msg, profile_counters, frame_counter); + return self.process_scene_builder_result(msg, frame_counter); } } @@ -1244,7 +1234,6 @@ impl RenderBackend { fn process_scene_builder_result( &mut self, msg: SceneBuilderResult, - profile_counters: &mut BackendProfileCounters, frame_counter: &mut u32, ) -> RenderBackendStatus { profile_scope!("sb_msg"); @@ -1255,7 +1244,6 @@ impl RenderBackend { txns, result_tx, frame_counter, - profile_counters, ); self.bookkeep_after_frames(); }, @@ -1275,7 +1263,6 @@ impl RenderBackend { txns, result_tx, frame_counter, - profile_counters, ); if built_frame { @@ -1357,16 +1344,20 @@ impl RenderBackend { &mut self, txns: Vec>, frame_counter: &mut u32, - profile_counters: &mut BackendProfileCounters, ) { self.prepare_for_frames(); self.maybe_force_nop_documents( frame_counter, - profile_counters, |document_id| txns.iter().any(|txn| txn.document_id == document_id)); let mut built_frame = false; for mut txn in txns { + if txn.generate_frame { + txn.profile.end_time(profiler::API_SEND_TIME); + } + + self.documents.get_mut(&txn.document_id).unwrap().profile.merge(&mut txn.profile); + built_frame |= self.update_document( txn.document_id, txn.resource_updates.take(), @@ -1375,7 +1366,6 @@ impl RenderBackend { txn.generate_frame, txn.invalidate_rendered_frame, frame_counter, - profile_counters, false ); } @@ -1394,7 +1384,6 @@ impl RenderBackend { /// to force a frame build. fn maybe_force_nop_documents(&mut self, frame_counter: &mut u32, - profile_counters: &mut BackendProfileCounters, document_already_present: F) where F: Fn(DocumentId) -> bool { if self.requires_frame_build() { @@ -1413,7 +1402,6 @@ impl RenderBackend { false, false, frame_counter, - profile_counters, false); } #[cfg(feature = "capture")] @@ -1433,13 +1421,13 @@ impl RenderBackend { mut render_frame: bool, invalidate_rendered_frame: bool, frame_counter: &mut u32, - profile_counters: &mut BackendProfileCounters, has_built_scene: bool, ) -> bool { let requested_frame = render_frame; let requires_frame_build = self.requires_frame_build(); let doc = self.documents.get_mut(&document_id).unwrap(); + // If we have a sampler, get more frame ops from it and add them // to the transaction. This is a hook to allow the WR user code to // fiddle with things after a potentially long scene build, but just @@ -1458,7 +1446,6 @@ impl RenderBackend { // for something wrench specific and we should remove it. let mut scroll = false; for frame_msg in frame_ops { - let _timer = profile_counters.total_time.timer(); let op = doc.process_frame_msg(frame_msg); scroll |= op.scroll; } @@ -1471,7 +1458,7 @@ impl RenderBackend { self.resource_cache.post_scene_building_update( resource_updates, - &mut profile_counters.resources, + &mut doc.profile, ); if doc.dynamic_properties.flush_pending_updates() { @@ -1517,13 +1504,11 @@ impl RenderBackend { // borrow ck hack for profile_counters let (pending_update, rendered_document) = { - let _timer = profile_counters.total_time.timer(); let frame_build_start_time = precise_time_ns(); let rendered_document = doc.build_frame( &mut self.resource_cache, &mut self.gpu_cache, - &mut profile_counters.resources, self.debug_flags, &mut self.tile_cache_logger, &mut self.tile_caches, @@ -1586,10 +1571,8 @@ impl RenderBackend { document_id, rendered_document, pending_update, - profile_counters.clone() ); self.result_tx.send(msg).unwrap(); - profile_counters.reset(); } else if requested_frame { // WR-internal optimization to avoid doing a bunch of render work if // there's no pixels. We still want to pretend to render and request @@ -1712,7 +1695,6 @@ impl RenderBackend { &mut self, root: PathBuf, bits: CaptureBits, - profile_counters: &mut BackendProfileCounters, ) -> DebugOutput { use std::fs; use crate::render_task_graph::dump_render_tasks_as_svg; @@ -1735,7 +1717,6 @@ impl RenderBackend { let rendered_document = doc.build_frame( &mut self.resource_cache, &mut self.gpu_cache, - &mut profile_counters.resources, self.debug_flags, &mut self.tile_cache_logger, &mut self.tile_caches, @@ -1851,7 +1832,6 @@ impl RenderBackend { fn load_capture( &mut self, mut config: CaptureConfig, - profile_counters: &mut BackendProfileCounters, ) { debug!("capture: loading {:?}", config.frame_root()); let backend = config.deserialize_for_frame::("backend") @@ -1962,6 +1942,7 @@ impl RenderBackend { loaded_scene: scene.clone(), prev_composite_descriptor: CompositeDescriptor::empty(), dirty_rects_are_valid: false, + profile: TransactionProfile::new(), }; entry.insert(doc); } @@ -1978,12 +1959,10 @@ impl RenderBackend { let msg_publish = ResultMsg::PublishDocument( id, - RenderedDocument { frame, is_new_scene: true }, + RenderedDocument { frame, is_new_scene: true, profile: TransactionProfile::new() }, self.resource_cache.pending_updates(), - profile_counters.clone(), ); self.result_tx.send(msg_publish).unwrap(); - profile_counters.reset(); self.notifier.new_frame_ready(id, false, true, None); diff --git a/gfx/wr/webrender/src/renderer.rs b/gfx/wr/webrender/src/renderer.rs index e8b2954d39c8..b1a4ec01efb6 100644 --- a/gfx/wr/webrender/src/renderer.rs +++ b/gfx/wr/webrender/src/renderer.rs @@ -61,7 +61,7 @@ use crate::device::{DrawTarget, ExternalTexture, ReadTarget, TextureSlot}; use crate::device::{ShaderError, TextureFilter, TextureFlags, VertexUsageHint, VAO, VBO, CustomVAO}; use crate::device::ProgramCache; -use crate::device::query::GpuTimer; +use crate::device::query::{GpuSampler, GpuTimer}; #[cfg(feature = "capture")] use crate::device::FBOId; use euclid::{rect, Transform3D, Scale, default}; @@ -80,10 +80,8 @@ use crate::internal_types::{RenderTargetInfo, SavedTargetIndex, Swizzle}; use malloc_size_of::MallocSizeOfOps; use crate::picture::{self, RecordedDirtyRegion, ResolvedSurfaceTexture}; use crate::prim_store::DeferredResolve; -use crate::profiler::{BackendProfileCounters, FrameProfileCounters, TimeProfileCounter, - GpuProfileTag, RendererProfileCounters, RendererProfileTimers}; -use crate::profiler::{Profiler, ChangeIndicator, ProfileStyle, add_event_marker, - add_text_marker, thread_is_being_profiled}; +use crate::profiler::{self, GpuProfileTag, TransactionProfile}; +use crate::profiler::{Profiler, add_event_marker, add_text_marker, thread_is_being_profiled}; use crate::device::query::{GpuProfiler, GpuDebugMethod}; use rayon::{ThreadPool, ThreadPoolBuilder}; use crate::render_backend::{FrameId, RenderBackend}; @@ -242,15 +240,15 @@ const GPU_TAG_SCALE: GpuProfileTag = GpuProfileTag { color: debug_colors::GHOSTWHITE, }; const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag { - label: "Alpha Targets", + label: "Alpha targets", color: debug_colors::BLACK, }; const GPU_SAMPLER_TAG_OPAQUE: GpuProfileTag = GpuProfileTag { - label: "Opaque Pass", + label: "Opaque pass", color: debug_colors::BLACK, }; const GPU_SAMPLER_TAG_TRANSPARENT: GpuProfileTag = GpuProfileTag { - label: "Transparent Pass", + label: "Transparent pass", color: debug_colors::BLACK, }; const GPU_TAG_SVG_FILTER: GpuProfileTag = GpuProfileTag { @@ -2256,15 +2254,11 @@ pub struct Renderer { debug: LazyInitializedDebugRenderer, debug_flags: DebugFlags, - backend_profile_counters: BackendProfileCounters, - profile_counters: RendererProfileCounters, - resource_upload_time: u64, - gpu_cache_upload_time: u64, + profile: TransactionProfile, + frame_counter: u64, + resource_upload_time: f64, + gpu_cache_upload_time: f64, profiler: Profiler, - new_frame_indicator: ChangeIndicator, - new_scene_indicator: ChangeIndicator, - slow_frame_indicator: ChangeIndicator, - slow_txn_indicator: ChangeIndicator, last_time: u64, @@ -2486,8 +2480,6 @@ impl Renderer { None => Rc::new(RefCell::new(Shaders::new(&mut device, gl_type, &options)?)), }; - let backend_profile_counters = BackendProfileCounters::new(); - let dither_matrix_texture = if options.enable_dithering { let dither_matrix: [u8; 64] = [ 0, @@ -2828,7 +2820,7 @@ impl Renderer { debug_flags, namespace_alloc_by_client, ); - backend.run(backend_profile_counters); + backend.run(); if let Some(ref thread_listener) = *thread_listener_for_render_backend { thread_listener.thread_stopped(&rb_thread_name); } @@ -2866,15 +2858,11 @@ impl Renderer { shaders, debug: LazyInitializedDebugRenderer::new(), debug_flags: DebugFlags::empty(), - backend_profile_counters: BackendProfileCounters::new(), - profile_counters: RendererProfileCounters::new(), - resource_upload_time: 0, - gpu_cache_upload_time: 0, + profile: TransactionProfile::new(), + frame_counter: 0, + resource_upload_time: 0.0, + gpu_cache_upload_time: 0.0, profiler: Profiler::new(), - new_frame_indicator: ChangeIndicator::new(), - new_scene_indicator: ChangeIndicator::new(), - slow_frame_indicator: ChangeIndicator::new(), - slow_txn_indicator: ChangeIndicator::new(), max_recorded_profiles: options.max_recorded_profiles, clear_color: options.clear_color, enable_clear_scissor: options.enable_clear_scissor, @@ -3009,13 +2997,9 @@ impl Renderer { } ResultMsg::PublishDocument( document_id, - doc, + mut doc, resource_update_list, - profile_counters, ) => { - if doc.is_new_scene { - self.new_scene_indicator.changed(); - } // Add a new document to the active set, expressed as a `Vec` in order // to re-order based on `DocumentLayer` during rendering. @@ -3031,6 +3015,7 @@ impl Renderer { self.render_impl(None).ok(); } + doc.profile.merge(&mut self.active_documents[pos].1.profile); self.active_documents[pos].1 = doc; } None => self.active_documents.push((document_id, doc)), @@ -3050,7 +3035,6 @@ impl Renderer { self.pending_texture_cache_updates |= !resource_update_list.texture_updates.updates.is_empty(); self.pending_texture_updates.push(resource_update_list.texture_updates); self.pending_native_surface_updates.extend(resource_update_list.native_surface_updates); - self.backend_profile_counters = profile_counters; self.documents_seen.insert(document_id); } ResultMsg::UpdateGpuCache(mut list) => { @@ -3486,10 +3470,7 @@ impl Renderer { DebugFlags::RENDER_TARGET_DBG | DebugFlags::TEXTURE_CACHE_DBG | DebugFlags::EPOCHS | - DebugFlags::NEW_FRAME_INDICATOR | - DebugFlags::NEW_SCENE_INDICATOR | DebugFlags::GPU_CACHE_DBG | - DebugFlags::SLOW_FRAME_INDICATOR | DebugFlags::PICTURE_CACHING_DBG | DebugFlags::PRIMITIVE_DBG | DebugFlags::ZOOM_DBG @@ -3600,6 +3581,8 @@ impl Renderer { return Ok(results); } + self.profile.start_time(profiler::RENDERER_TIME); + let compositor_kind = self.active_documents[0].1.frame.composite_state.compositor_kind; // CompositorKind is updated if self.current_compositor_kind != compositor_kind { @@ -3629,9 +3612,6 @@ impl Renderer { self.current_compositor_kind = compositor_kind; } - let mut frame_profiles = Vec::new(); - let mut profile_timers = RendererProfileTimers::new(); - // The texture resolver scope should be outside of any rendering, including // debug rendering. This ensures that when we return render targets to the // pool via glInvalidateFramebuffer, we don't do any debug rendering after @@ -3640,25 +3620,11 @@ impl Renderer { // resolve step when the debug overlay is enabled. self.texture_resolver.begin_frame(); - let profile_samplers = { - let _gm = self.gpu_profiler.start_marker("build samples"); - // Block CPU waiting for last frame's GPU profiles to arrive. - // In general this shouldn't block unless heavily GPU limited. - let (gpu_frame_id, timers, samplers) = self.gpu_profiler.build_samples(); + if let Some(device_size) = device_size { + self.update_gpu_profile(device_size); + } - if self.max_recorded_profiles > 0 { - while self.gpu_profiles.len() >= self.max_recorded_profiles { - self.gpu_profiles.pop_front(); - } - self.gpu_profiles - .push_back(GpuProfile::new(gpu_frame_id, &timers)); - } - profile_timers.gpu_samples = timers; - samplers - }; - - - let cpu_frame_id = profile_timers.cpu_time.profile(|| { + let cpu_frame_id = { let _gm = self.gpu_profiler.start_marker("begin frame"); let frame_id = self.device.begin_frame(); self.gpu_profiler.begin_frame(frame_id); @@ -3672,7 +3638,7 @@ impl Renderer { self.update_native_surfaces(); frame_id - }); + }; // Inform the client that we are starting a composition transaction if native // compositing is enabled. This needs to be done early in the frame, so that @@ -3688,80 +3654,78 @@ impl Renderer { self.update_debug_overlay(device_size); } - profile_timers.cpu_time.profile(|| { - //Note: another borrowck dance - let mut active_documents = mem::replace(&mut self.active_documents, Vec::default()); - // sort by the document layer id - active_documents.sort_by_key(|&(_, ref render_doc)| render_doc.frame.layer); + //Note: another borrowck dance + let mut active_documents = mem::replace(&mut self.active_documents, Vec::default()); + // sort by the document layer id + active_documents.sort_by_key(|&(_, ref render_doc)| render_doc.frame.layer); - #[cfg(feature = "replay")] - self.texture_resolver.external_images.extend( - self.owned_external_images.iter().map(|(key, value)| (*key, value.clone())) + #[cfg(feature = "replay")] + self.texture_resolver.external_images.extend( + self.owned_external_images.iter().map(|(key, value)| (*key, value.clone())) + ); + + let last_document_index = active_documents.len() - 1; + for (doc_index, (document_id, RenderedDocument { ref mut frame, ref mut profile, .. })) in active_documents.iter_mut().enumerate() { + assert!(self.current_compositor_kind == frame.composite_state.compositor_kind); + + if self.shared_texture_cache_cleared { + assert!(self.documents_seen.contains(&document_id), + "Cleared texture cache without sending new document frame."); + } + + if let Err(e) = self.prepare_gpu_cache(frame) { + self.renderer_errors.push(e); + continue; + } + assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id, + "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})", + frame.gpu_cache_frame_id, self.gpu_cache_frame_id); + + { + profile_scope!("gl.flush"); + self.device.gl().flush(); // early start on gpu cache updates + } + + self.draw_frame( + frame, + device_size, + &mut results, + doc_index == 0, ); - let last_document_index = active_documents.len() - 1; - for (doc_index, (document_id, RenderedDocument { ref mut frame, .. })) in active_documents.iter_mut().enumerate() { - assert!(self.current_compositor_kind == frame.composite_state.compositor_kind); - - if self.shared_texture_cache_cleared { - assert!(self.documents_seen.contains(&document_id), - "Cleared texture cache without sending new document frame."); - } - - frame.profile_counters.reset_targets(); - if let Err(e) = self.prepare_gpu_cache(frame) { - self.renderer_errors.push(e); - continue; - } - assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id, - "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})", - frame.gpu_cache_frame_id, self.gpu_cache_frame_id); - - { - profile_scope!("gl.flush"); - self.device.gl().flush(); // early start on gpu cache updates - } - - self.draw_frame( - frame, - device_size, - &mut results, - doc_index == 0, - ); - - // Profile marker for the number of invalidated picture cache - if thread_is_being_profiled() { - let duration = Duration::new(0,0); - let message = self.profile_counters.rendered_picture_cache_tiles.get_accum().to_string(); + // TODO(nical): do this automatically by selecting counters in the wr profiler + // Profile marker for the number of invalidated picture cache + if thread_is_being_profiled() { + let duration = Duration::new(0,0); + if let Some(n) = self.profiler.get(profiler::RENDERED_PICTURE_TILES) { + let message = (n as usize).to_string(); add_text_marker(cstr!("NumPictureCacheInvalidated"), &message, duration); } - - if device_size.is_some() { - self.draw_frame_debug_items(&frame.debug_items); - } - if self.debug_flags.contains(DebugFlags::PROFILER_DBG) { - frame_profiles.push(frame.profile_counters.clone()); - } - - let dirty_regions = - mem::replace(&mut frame.recorded_dirty_regions, Vec::new()); - results.recorded_dirty_regions.extend(dirty_regions); - - // If we're the last document, don't call end_pass here, because we'll - // be moving on to drawing the debug overlays. See the comment above - // the end_pass call in draw_frame about debug draw overlays - // for a bit more context. - if doc_index != last_document_index { - self.texture_resolver.end_pass(&mut self.device, None, None); - } } - self.unlock_external_images(); - self.active_documents = active_documents; + if device_size.is_some() { + self.draw_frame_debug_items(&frame.debug_items); + } + let dirty_regions = + mem::replace(&mut frame.recorded_dirty_regions, Vec::new()); + results.recorded_dirty_regions.extend(dirty_regions); - let _gm = self.gpu_profiler.start_marker("end frame"); - self.gpu_profiler.end_frame(); - }); + // If we're the last document, don't call end_pass here, because we'll + // be moving on to drawing the debug overlays. See the comment above + // the end_pass call in draw_frame about debug draw overlays + // for a bit more context. + if doc_index != last_document_index { + self.texture_resolver.end_pass(&mut self.device, None, None); + } + + self.profile.merge(profile); + } + + self.unlock_external_images(); + self.active_documents = active_documents; + + let _gm = self.gpu_profiler.start_marker("end frame"); + self.gpu_profiler.end_frame(); if let Some(device_size) = device_size { // Bind a surface to draw the debug / profiler information to. @@ -3774,27 +3738,13 @@ impl Renderer { self.draw_epoch_debug(); } + self.profile.end_time(profiler::RENDERER_TIME); + self.profile.end_time_if_started(profiler::TOTAL_FRAME_CPU_TIME); + let current_time = precise_time_ns(); if device_size.is_some() { - let ns = current_time - self.last_time; - self.profile_counters.frame_time.set(ns); - } - - let frame_cpu_time_ns = self.backend_profile_counters.total_time.get() - + profile_timers.cpu_time.get(); - let frame_cpu_time_ms = frame_cpu_time_ns as f64 / 1000000.0; - if frame_cpu_time_ms > 16.0 { - self.slow_frame_indicator.changed(); - } - - if self.backend_profile_counters.scene_changed { - let txn_time_ns = self.backend_profile_counters.txn.total_send_time.get() - + self.backend_profile_counters.txn.display_list_build_time.get() - + self.backend_profile_counters.txn.scene_build_time.get(); - let txn_time_ms = txn_time_ns as f64 / 1000000.0; - if txn_time_ms > 100.0 { - self.slow_txn_indicator.changed(); - } + let time = profiler::ns_to_ms(current_time - self.last_time); + self.profile.set(profiler::FRAME_TIME, time); } if self.max_recorded_profiles > 0 { @@ -3803,127 +3753,71 @@ impl Renderer { } let cpu_profile = CpuProfile::new( cpu_frame_id, - self.backend_profile_counters.total_time.get(), - profile_timers.cpu_time.get(), - self.profile_counters.draw_calls.get(), + (self.profile.get_or(profiler::FRAME_BUILDING_TIME, 0.0) * 1000000.0) as u64, + (self.profile.get_or(profiler::RENDERER_TIME, 0.0) * 1000000.0) as u64, + self.profile.get_or(profiler::DRAW_CALLS, 0.0) as usize, ); self.cpu_profiles.push_back(cpu_profile); } + self.profiler.set_counters(&mut self.profile); + + // Note: profile counters must be set before this or they will count for next frame. + self.profiler.update(); + if self.debug_flags.intersects(DebugFlags::PROFILER_DBG | DebugFlags::PROFILER_CAPTURE) { if let Some(device_size) = device_size { //TODO: take device/pixel ratio into equation? if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) { - let style = if !self.debug_flags.contains(DebugFlags::PROFILER_DBG) { - // Don't draw the profiler, but collect samples for captures - assert!(self.debug_flags.contains(DebugFlags::PROFILER_CAPTURE)); - ProfileStyle::NoDraw - } else if self.debug_flags.contains(DebugFlags::SMART_PROFILER) { - ProfileStyle::Smart - } else if self.debug_flags.contains(DebugFlags::COMPACT_PROFILER) { - ProfileStyle::Compact - } else { - ProfileStyle::Full - }; - - let screen_fraction = 1.0 / device_size.to_f32().area(); self.profiler.draw_profile( - &frame_profiles, - &self.backend_profile_counters, - &self.profile_counters, - &mut profile_timers, - &profile_samplers, - screen_fraction, + self.frame_counter, debug_renderer, - style, + device_size, ); } } } - let mut x = 0.0; - if self.debug_flags.contains(DebugFlags::NEW_FRAME_INDICATOR) { - if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) { - self.new_frame_indicator.changed(); - self.new_frame_indicator.draw( - x, 0.0, - ColorU::new(0, 110, 220, 255), - debug_renderer, - ); - x += ChangeIndicator::width(); - } - } - - if self.debug_flags.contains(DebugFlags::NEW_SCENE_INDICATOR) { - if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) { - self.new_scene_indicator.draw( - x, 0.0, - ColorU::new(0, 220, 110, 255), - debug_renderer, - ); - x += ChangeIndicator::width(); - } - } - - if self.debug_flags.contains(DebugFlags::SLOW_FRAME_INDICATOR) { - if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) { - self.slow_txn_indicator.draw( - x, 0.0, - ColorU::new(250, 80, 80, 255), - debug_renderer, - ); - self.slow_frame_indicator.draw( - x, 10.0, - ColorU::new(220, 30, 10, 255), - debug_renderer, - ); - } - } - if self.debug_flags.contains(DebugFlags::ECHO_DRIVER_MESSAGES) { self.device.echo_driver_messages(); } if thread_is_being_profiled() { let duration = Duration::new(0,0); - let message = self.profile_counters.get_draw_calls().to_string(); + let message = (self.profiler.get(profiler::DRAW_CALLS).unwrap_or(0.0) as usize).to_string(); add_text_marker(cstr!("NumDrawCalls"), &message, duration); } - results.stats.texture_upload_kb = self.profile_counters.texture_data_uploaded.get(); - self.backend_profile_counters.reset(); - self.profile_counters.reset(); - self.profile_counters.frame_counter.inc(); + results.stats.texture_upload_kb = self.profile.get(profiler::TEXTURE_UPLOADS_MEM).unwrap_or(0.0) as usize; + self.frame_counter += 1; results.stats.resource_upload_time = self.resource_upload_time; - self.resource_upload_time = 0; + self.resource_upload_time = 0.0; results.stats.gpu_cache_upload_time = self.gpu_cache_upload_time; - self.gpu_cache_upload_time = 0; + self.gpu_cache_upload_time = 0.0; - profile_timers.cpu_time.profile(|| { - if let Some(debug_renderer) = self.debug.try_get_mut() { - let small_screen = self.debug_flags.contains(DebugFlags::SMALL_SCREEN); - let scale = if small_screen { 1.6 } else { 1.0 }; - // TODO(gw): Tidy this up so that compositor config integrates better - // with the (non-compositor) surface y-flip options. - let surface_origin_is_top_left = match self.current_compositor_kind { - CompositorKind::Native { .. } => true, - CompositorKind::Draw { .. } => self.device.surface_origin_is_top_left(), - }; - debug_renderer.render( - &mut self.device, - device_size, - scale, - surface_origin_is_top_left, - ); - } - // See comment for texture_resolver.begin_frame() for explanation - // of why this must be done after all rendering, including debug - // overlays. The end_frame() call implicitly calls end_pass(), which - // should ensure any left over render targets get invalidated and - // returned to the pool correctly. - self.texture_resolver.end_frame(&mut self.device, cpu_frame_id); - self.device.end_frame(); - }); + if let Some(debug_renderer) = self.debug.try_get_mut() { + let small_screen = self.debug_flags.contains(DebugFlags::SMALL_SCREEN); + let scale = if small_screen { 1.6 } else { 1.0 }; + // TODO(gw): Tidy this up so that compositor config integrates better + // with the (non-compositor) surface y-flip options. + let surface_origin_is_top_left = match self.current_compositor_kind { + CompositorKind::Native { .. } => true, + CompositorKind::Draw { .. } => self.device.surface_origin_is_top_left(), + }; + debug_renderer.render( + &mut self.device, + device_size, + scale, + surface_origin_is_top_left, + ); + } + // See comment for texture_resolver.begin_frame() for explanation + // of why this must be done after all rendering, including debug + // overlays. The end_frame() call implicitly calls end_pass(), which + // should ensure any left over render targets get invalidated and + // returned to the pool correctly. + self.texture_resolver.end_frame(&mut self.device, cpu_frame_id); + self.device.end_frame(); if device_size.is_some() { self.last_time = current_time; @@ -3952,6 +3846,48 @@ impl Renderer { } } + fn update_gpu_profile(&mut self, device_size: DeviceIntSize) { + let _gm = self.gpu_profiler.start_marker("build samples"); + // Block CPU waiting for last frame's GPU profiles to arrive. + // In general this shouldn't block unless heavily GPU limited. + let (gpu_frame_id, timers, samplers) = self.gpu_profiler.build_samples(); + + if self.max_recorded_profiles > 0 { + while self.gpu_profiles.len() >= self.max_recorded_profiles { + self.gpu_profiles.pop_front(); + } + + self.gpu_profiles.push_back(GpuProfile::new(gpu_frame_id, &timers)); + } + + self.profiler.set_gpu_time_queries(timers); + + if !samplers.is_empty() { + let screen_fraction = 1.0 / device_size.to_f32().area(); + + fn accumulate_sampler_value(description: &str, samplers: &[GpuSampler]) -> f32 { + let mut accum = 0.0; + for sampler in samplers { + if sampler.tag.label != description { + continue; + } + + accum += sampler.count as f32; + } + + accum + } + + let alpha_targets = accumulate_sampler_value(&"Alpha targets", &samplers) * screen_fraction; + let transparent_pass = accumulate_sampler_value(&"Transparent pass", &samplers) * screen_fraction; + let opaque_pass = accumulate_sampler_value(&"Opaque pass", &samplers) * screen_fraction; + self.profile.set(profiler::ALPHA_TARGETS_SAMPLERS, alpha_targets); + self.profile.set(profiler::TRANSPARENT_PASS_SAMPLERS, transparent_pass); + self.profile.set(profiler::OPAQUE_PASS_SAMPLERS, opaque_pass); + self.profile.set(profiler::TOTAL_SAMPLERS, alpha_targets + transparent_pass + opaque_pass); + } + } + fn update_gpu_cache(&mut self) { let _gm = self.gpu_profiler.start_marker("gpu cache update"); @@ -3999,15 +3935,12 @@ impl Renderer { .update(&mut self.device, &update_list); } - let mut upload_time = TimeProfileCounter::new("GPU cache upload time", false, Some(0.0..2.0)); - let updated_rows = upload_time.profile(|| { - self.gpu_cache_texture.flush(&mut self.device) - }); - self.gpu_cache_upload_time += upload_time.get(); + self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME); + let updated_rows = self.gpu_cache_texture.flush(&mut self.device); + self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME); - let counters = &mut self.backend_profile_counters.resources.gpu_cache; - counters.updated_rows.set(updated_rows); - counters.updated_blocks.set(updated_blocks); + self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows); + self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks); } fn prepare_gpu_cache(&mut self, frame: &Frame) -> Result<(), RendererError> { @@ -4043,191 +3976,192 @@ impl Renderer { let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]); self.pending_texture_cache_updates = false; - let mut upload_time = TimeProfileCounter::new("Resource upload time", false, Some(0.0..2.0)); - upload_time.profile(|| { - for update_list in pending_texture_updates.drain(..) { - for allocation in update_list.allocations { - match allocation.kind { - TextureCacheAllocationKind::Alloc(_) => add_event_marker(c_str!("TextureCacheAlloc")), - TextureCacheAllocationKind::Realloc(_) => add_event_marker(c_str!("TextureCacheRealloc")), - TextureCacheAllocationKind::Reset(_) => add_event_marker(c_str!("TextureCacheReset")), - TextureCacheAllocationKind::Free => add_event_marker(c_str!("TextureCacheFree")), - }; - let old = match allocation.kind { - TextureCacheAllocationKind::Alloc(ref info) | - TextureCacheAllocationKind::Realloc(ref info) | - TextureCacheAllocationKind::Reset(ref info) => { - // Create a new native texture, as requested by the texture cache. - // - // Ensure no PBO is bound when creating the texture storage, - // or GL will attempt to read data from there. - let mut texture = self.device.create_texture( - TextureTarget::Array, - info.format, - info.width, - info.height, - info.filter, - // This needs to be a render target because some render - // tasks get rendered into the texture cache. - Some(RenderTargetInfo { has_depth: info.has_depth }), - info.layer_count, - ); + self.profile.start_time(profiler::TEXTURE_CACHE_UPLOAD_TIME); - if info.is_shared_cache { - texture.flags_mut() - .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE); + for update_list in pending_texture_updates.drain(..) { + for allocation in update_list.allocations { + match allocation.kind { + TextureCacheAllocationKind::Alloc(_) => add_event_marker(c_str!("TextureCacheAlloc")), + TextureCacheAllocationKind::Realloc(_) => add_event_marker(c_str!("TextureCacheRealloc")), + TextureCacheAllocationKind::Reset(_) => add_event_marker(c_str!("TextureCacheReset")), + TextureCacheAllocationKind::Free => add_event_marker(c_str!("TextureCacheFree")), + }; + let old = match allocation.kind { + TextureCacheAllocationKind::Alloc(ref info) | + TextureCacheAllocationKind::Realloc(ref info) | + TextureCacheAllocationKind::Reset(ref info) => { + // Create a new native texture, as requested by the texture cache. + // + // Ensure no PBO is bound when creating the texture storage, + // or GL will attempt to read data from there. + let mut texture = self.device.create_texture( + TextureTarget::Array, + info.format, + info.width, + info.height, + info.filter, + // This needs to be a render target because some render + // tasks get rendered into the texture cache. + Some(RenderTargetInfo { has_depth: info.has_depth }), + info.layer_count, + ); - // Textures in the cache generally don't need to be cleared, - // but we do so if the debug display is active to make it - // easier to identify unallocated regions. - if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) { - self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR); - } + if info.is_shared_cache { + texture.flags_mut() + .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE); + + // Textures in the cache generally don't need to be cleared, + // but we do so if the debug display is active to make it + // easier to identify unallocated regions. + if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) { + self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR); } + } - self.texture_resolver.texture_cache_map.insert(allocation.id, texture) - } - TextureCacheAllocationKind::Free => { - self.texture_resolver.texture_cache_map.remove(&allocation.id) - } - }; - - match allocation.kind { - TextureCacheAllocationKind::Alloc(_) => { - assert!(old.is_none(), "Renderer and backend disagree!"); - } - TextureCacheAllocationKind::Realloc(_) => { - self.device.blit_renderable_texture( - self.texture_resolver.texture_cache_map.get_mut(&allocation.id).unwrap(), - old.as_ref().unwrap(), - ); - } - TextureCacheAllocationKind::Reset(_) | - TextureCacheAllocationKind::Free => { - assert!(old.is_some(), "Renderer and backend disagree!"); - } + self.texture_resolver.texture_cache_map.insert(allocation.id, texture) } + TextureCacheAllocationKind::Free => { + self.texture_resolver.texture_cache_map.remove(&allocation.id) + } + }; - if let Some(old) = old { - self.device.delete_texture(old); + match allocation.kind { + TextureCacheAllocationKind::Alloc(_) => { + assert!(old.is_none(), "Renderer and backend disagree!"); + } + TextureCacheAllocationKind::Realloc(_) => { + self.device.blit_renderable_texture( + self.texture_resolver.texture_cache_map.get_mut(&allocation.id).unwrap(), + old.as_ref().unwrap(), + ); + } + TextureCacheAllocationKind::Reset(_) | + TextureCacheAllocationKind::Free => { + assert!(old.is_some(), "Renderer and backend disagree!"); } } - for (texture_id, updates) in update_list.updates { - let texture = &self.texture_resolver.texture_cache_map[&texture_id]; - let device = &mut self.device; - - // Calculate the total size of buffer required to upload all updates. - let required_size = updates.iter().map(|update| { - // Perform any debug clears now. As this requires a mutable borrow of device, - // it must be done before all the updates which require a TextureUploader. - if let TextureUpdateSource::DebugClear = update.source { - let draw_target = DrawTarget::from_texture( - texture, - update.layer_index as usize, - false, - ); - device.bind_draw_target(draw_target); - device.clear_target( - Some(TEXTURE_CACHE_DBG_CLEAR_COLOR), - None, - Some(draw_target.to_framebuffer_rect(update.rect.to_i32())) - ); - - 0 - } else { - let (upload_size, _) = device.required_upload_size_and_stride( - update.rect.size, - texture.get_format(), - ); - upload_size - } - }).sum(); - - if required_size == 0 { - continue; - } - - // For best performance we use a single TextureUploader for all uploads. - // Using individual TextureUploaders was causing performance issues on some drivers - // due to allocating too many PBOs. - let mut uploader = device.upload_texture( - texture, - &self.texture_cache_upload_pbo, - required_size - ); - - for update in updates { - let TextureCacheUpdate { rect, stride, offset, layer_index, format_override, source } = update; - - let bytes_uploaded = match source { - TextureUpdateSource::Bytes { data } => { - let data = &data[offset as usize ..]; - uploader.upload( - rect, - layer_index, - stride, - format_override, - data.as_ptr(), - data.len(), - ) - } - TextureUpdateSource::External { id, channel_index } => { - let handler = self.external_image_handler - .as_mut() - .expect("Found external image, but no handler set!"); - // The filter is only relevant for NativeTexture external images. - let dummy_data; - let data = match handler.lock(id, channel_index, ImageRendering::Auto).source { - ExternalImageSource::RawData(data) => { - &data[offset as usize ..] - } - ExternalImageSource::Invalid => { - // Create a local buffer to fill the pbo. - let bpp = texture.get_format().bytes_per_pixel(); - let width = stride.unwrap_or(rect.size.width * bpp); - let total_size = width * rect.size.height; - // WR haven't support RGBAF32 format in texture_cache, so - // we use u8 type here. - dummy_data = vec![0xFFu8; total_size as usize]; - &dummy_data - } - ExternalImageSource::NativeTexture(eid) => { - panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id); - } - }; - let size = uploader.upload( - rect, - layer_index, - stride, - format_override, - data.as_ptr(), - data.len() - ); - handler.unlock(id, channel_index); - size - } - TextureUpdateSource::DebugClear => { - // DebugClear updates are handled separately. - 0 - } - }; - self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10); - } - } - - if update_list.clears_shared_cache { - self.shared_texture_cache_cleared = true; + if let Some(old) = old { + self.device.delete_texture(old); } } - drain_filter( - &mut self.notifications, - |n| { n.when() == Checkpoint::FrameTexturesUpdated }, - |n| { n.notify(); }, - ); - }); - self.resource_upload_time += upload_time.get(); + for (texture_id, updates) in update_list.updates { + let texture = &self.texture_resolver.texture_cache_map[&texture_id]; + let device = &mut self.device; + + // Calculate the total size of buffer required to upload all updates. + let required_size = updates.iter().map(|update| { + // Perform any debug clears now. As this requires a mutable borrow of device, + // it must be done before all the updates which require a TextureUploader. + if let TextureUpdateSource::DebugClear = update.source { + let draw_target = DrawTarget::from_texture( + texture, + update.layer_index as usize, + false, + ); + device.bind_draw_target(draw_target); + device.clear_target( + Some(TEXTURE_CACHE_DBG_CLEAR_COLOR), + None, + Some(draw_target.to_framebuffer_rect(update.rect.to_i32())) + ); + + 0 + } else { + let (upload_size, _) = device.required_upload_size_and_stride( + update.rect.size, + texture.get_format(), + ); + upload_size + } + }).sum(); + + if required_size == 0 { + continue; + } + + // For best performance we use a single TextureUploader for all uploads. + // Using individual TextureUploaders was causing performance issues on some drivers + // due to allocating too many PBOs. + let mut uploader = device.upload_texture( + texture, + &self.texture_cache_upload_pbo, + required_size + ); + + for update in updates { + let TextureCacheUpdate { rect, stride, offset, layer_index, format_override, source } = update; + + let bytes_uploaded = match source { + TextureUpdateSource::Bytes { data } => { + let data = &data[offset as usize ..]; + uploader.upload( + rect, + layer_index, + stride, + format_override, + data.as_ptr(), + data.len(), + ) + } + TextureUpdateSource::External { id, channel_index } => { + let handler = self.external_image_handler + .as_mut() + .expect("Found external image, but no handler set!"); + // The filter is only relevant for NativeTexture external images. + let dummy_data; + let data = match handler.lock(id, channel_index, ImageRendering::Auto).source { + ExternalImageSource::RawData(data) => { + &data[offset as usize ..] + } + ExternalImageSource::Invalid => { + // Create a local buffer to fill the pbo. + let bpp = texture.get_format().bytes_per_pixel(); + let width = stride.unwrap_or(rect.size.width * bpp); + let total_size = width * rect.size.height; + // WR haven't support RGBAF32 format in texture_cache, so + // we use u8 type here. + dummy_data = vec![0xFFu8; total_size as usize]; + &dummy_data + } + ExternalImageSource::NativeTexture(eid) => { + panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id); + } + }; + let size = uploader.upload( + rect, + layer_index, + stride, + format_override, + data.as_ptr(), + data.len() + ); + handler.unlock(id, channel_index); + size + } + TextureUpdateSource::DebugClear => { + // DebugClear updates are handled separately. + 0 + } + }; + self.profile.add(profiler::TEXTURE_UPLOADS, bytes_uploaded as f64 * 1.0e-6); + } + } + + if update_list.clears_shared_cache { + self.shared_texture_cache_cleared = true; + } + } + + drain_filter( + &mut self.notifications, + |n| { n.when() == Checkpoint::FrameTexturesUpdated }, + |n| { n.notify(); }, + ); + + let t = self.profile.end_time(profiler::TEXTURE_CACHE_UPLOAD_TIME); + self.resource_upload_time += t; } fn bind_textures(&mut self, textures: &BatchTextures) { @@ -4285,11 +4219,11 @@ impl Renderer { .update_vao_instances(vao, chunk, ONE_TIME_USAGE_HINT); self.device .draw_indexed_triangles_instanced_u16(6, chunk.len() as i32); - self.profile_counters.draw_calls.inc(); + self.profile.inc(profiler::DRAW_CALLS); stats.total_draw_calls += 1; } - self.profile_counters.vertices.add(6 * data.len()); + self.profile.add(profiler::VERTICES, 6 * data.len()); } fn handle_readback_composite( @@ -4509,7 +4443,7 @@ impl Renderer { ) { profile_scope!("draw_picture_cache_target"); - self.profile_counters.rendered_picture_cache_tiles.inc(); + self.profile.inc(profiler::RENDERED_PICTURE_TILES); let _gm = self.gpu_profiler.start_marker("picture cache target"); let framebuffer_kind = FramebufferKind::Other; @@ -5311,7 +5245,7 @@ impl Renderer { // count clear tiles here. let num_tiles = composite_state.opaque_tiles.len() + composite_state.alpha_tiles.len(); - self.profile_counters.total_picture_cache_tiles.set(num_tiles); + self.profile.set(profiler::PICTURE_TILES, num_tiles); // Draw opaque tiles first, front-to-back to get maxmum // z-reject efficiency. @@ -5374,7 +5308,7 @@ impl Renderer { ) { profile_scope!("draw_color_target"); - self.profile_counters.color_passes.inc(); + self.profile.inc(profiler::COLOR_PASSES); let _gm = self.gpu_profiler.start_marker("color target"); // sanity check for the depth buffer @@ -5598,7 +5532,7 @@ impl Renderer { ) { profile_scope!("draw_alpha_target"); - self.profile_counters.alpha_passes.inc(); + self.profile.inc(profiler::ALPHA_PASSES); let _gm = self.gpu_profiler.start_marker("alpha target"); let alpha_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_ALPHA); @@ -5998,7 +5932,7 @@ impl Renderer { /// Allocates a texture to be used as the output for a rendering pass. /// - /// We make an effort to reuse render targe textures across passes and + /// We make an effort to reuse render target textures across passes and /// across frames when the format and dimensions match. Because we use /// immutable storage, we can't resize textures. /// @@ -6012,7 +5946,6 @@ impl Renderer { fn allocate_target_texture( &mut self, list: &mut RenderTargetList, - counters: &mut FrameProfileCounters, ) -> Option { if list.targets.is_empty() { return None @@ -6032,7 +5965,7 @@ impl Renderer { (bounding_rect.size.height + 255) & !255, ); - counters.targets_used.inc(); + self.profile.inc(profiler::USED_TARGETS); // Try finding a match in the existing pool. If there's no match, we'll // create a new texture. @@ -6057,7 +5990,7 @@ impl Renderer { self.device.reuse_render_target::(&mut t, rt_info); t } else { - counters.targets_created.inc(); + self.profile.inc(profiler::CREATED_TARGETS); self.device.create_texture( TextureTarget::Array, list.format, @@ -6294,8 +6227,8 @@ impl Renderer { } => { profile_scope!("offscreen target"); - let alpha_tex = self.allocate_target_texture(alpha, &mut frame.profile_counters); - let color_tex = self.allocate_target_texture(color, &mut frame.profile_counters); + let alpha_tex = self.allocate_target_texture(alpha); + let color_tex = self.allocate_target_texture(color); // If this frame has already been drawn, then any texture // cache targets have already been updated and can be @@ -6312,7 +6245,7 @@ impl Renderer { } if !picture_cache.is_empty() { - self.profile_counters.color_passes.inc(); + self.profile.inc(profiler::COLOR_PASSES); } // Draw picture caching tiles for this pass. @@ -6569,6 +6502,10 @@ impl Renderer { self.debug_flags = flags; } + pub fn set_profiler_ui(&mut self, ui_str: &str) { + self.profiler.set_ui(ui_str); + } + fn draw_frame_debug_items(&mut self, items: &[DebugItem]) { if items.is_empty() { return; @@ -7366,8 +7303,8 @@ pub struct RendererStats { pub alpha_target_count: usize, pub color_target_count: usize, pub texture_upload_kb: usize, - pub resource_upload_time: u64, - pub gpu_cache_upload_time: u64, + pub resource_upload_time: f64, + pub gpu_cache_upload_time: f64, } /// Return type from render(), which contains some repr(C) statistics as well as diff --git a/gfx/wr/webrender/src/resource_cache.rs b/gfx/wr/webrender/src/resource_cache.rs index 1f0eb258084f..d21ae87fd7bb 100644 --- a/gfx/wr/webrender/src/resource_cache.rs +++ b/gfx/wr/webrender/src/resource_cache.rs @@ -26,7 +26,7 @@ use crate::glyph_rasterizer::{GLYPH_FLASHING, FontInstance, GlyphFormat, GlyphKe use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle}; use crate::gpu_types::UvRectKind; use crate::internal_types::{FastHashMap, FastHashSet, TextureSource, ResourceUpdateList}; -use crate::profiler::{ResourceProfileCounters, TextureCacheProfileCounters}; +use crate::profiler::{self, TransactionProfile, bytes_to_mb}; use crate::render_backend::{FrameId, FrameStamp}; use crate::render_task_graph::{RenderTaskGraph, RenderTaskId}; use crate::render_task_cache::{RenderTaskCache, RenderTaskCacheKey}; @@ -461,6 +461,9 @@ pub struct ResourceCache { /// A list of queued compositor surface updates to apply next frame. pending_native_surface_updates: Vec, + + image_templates_memory: usize, + font_templates_memory: usize, } impl ResourceCache { @@ -492,6 +495,8 @@ impl ResourceCache { pending_native_surface_updates: Vec::new(), #[cfg(feature = "capture")] capture_dirty: true, + image_templates_memory: 0, + font_templates_memory: 0, } } @@ -546,7 +551,7 @@ impl ResourceCache { pub fn post_scene_building_update( &mut self, updates: Vec, - profile_counters: &mut ResourceProfileCounters, + profile: &mut TransactionProfile, ) { // TODO, there is potential for optimization here, by processing updates in // bulk rather than one by one (for example by sorting allocations by size or @@ -561,7 +566,8 @@ impl ResourceCache { match update { ResourceUpdate::AddImage(img) => { if let ImageData::Raw(ref bytes) = img.data { - profile_counters.image_templates.inc(bytes.len()); + self.image_templates_memory += bytes.len(); + profile.set(profiler::IMAGE_TEMPLATES_MEM, bytes_to_mb(self.image_templates_memory)); } self.add_image_template( img.key, @@ -570,6 +576,7 @@ impl ResourceCache { &img.descriptor.size.into(), img.tiling, ); + profile.set(profiler::IMAGE_TEMPLATES, self.resources.image_templates.images.len()); } ResourceUpdate::UpdateImage(img) => { self.update_image_template(img.key, img.descriptor, img.data.into(), &img.dirty_rect); @@ -597,12 +604,16 @@ impl ResourceCache { } ResourceUpdate::DeleteImage(img) => { self.delete_image_template(img); + profile.set(profiler::IMAGE_TEMPLATES, self.resources.image_templates.images.len()); + profile.set(profiler::IMAGE_TEMPLATES_MEM, bytes_to_mb(self.image_templates_memory)); } ResourceUpdate::DeleteBlobImage(img) => { self.delete_image_template(img.as_image()); } ResourceUpdate::DeleteFont(font) => { self.delete_font_template(font); + profile.set(profiler::FONT_TEMPLATES, self.resources.font_templates.len()); + profile.set(profiler::FONT_TEMPLATES_MEM, bytes_to_mb(self.font_templates_memory)); } ResourceUpdate::DeleteFontInstance(font) => { self.delete_font_instance(font); @@ -614,13 +625,15 @@ impl ResourceCache { ResourceUpdate::AddFont(font) => { match font { AddFont::Raw(id, bytes, index) => { - profile_counters.font_templates.inc(bytes.len()); + self.font_templates_memory += bytes.len(); + profile.set(profiler::FONT_TEMPLATES_MEM, bytes_to_mb(self.font_templates_memory)); self.add_font_template(id, FontTemplate::Raw(bytes, index)); } AddFont::Native(id, native_font_handle) => { self.add_font_template(id, FontTemplate::Native(native_font_handle)); } } + profile.set(profiler::FONT_TEMPLATES, self.resources.font_templates.len()); } ResourceUpdate::AddFontInstance(..) => { // Already added in ApiResources. @@ -632,7 +645,7 @@ impl ResourceCache { pub fn add_rasterized_blob_images( &mut self, images: Vec<(BlobImageRequest, BlobImageResult)>, - texture_cache_profile: &mut TextureCacheProfileCounters, + profile: &mut TransactionProfile, ) { for (request, result) in images { let data = match result { @@ -643,7 +656,7 @@ impl ResourceCache { } }; - texture_cache_profile.rasterized_blob_pixels.inc(data.rasterized_rect.area() as usize); + profile.add(profiler::RASTERIZED_BLOBS_PX, data.rasterized_rect.area()); // First make sure we have an entry for this key (using a placeholder // if need be). @@ -680,7 +693,9 @@ impl ResourceCache { pub fn delete_font_template(&mut self, font_key: FontKey) { self.glyph_rasterizer.delete_font(font_key); - self.resources.font_templates.remove(&font_key); + if let Some(FontTemplate::Raw(data, _)) = self.resources.font_templates.remove(&font_key) { + self.font_templates_memory -= data.len(); + } self.cached_glyphs .clear_fonts(|font| font.font_key == font_key); } @@ -800,6 +815,10 @@ impl ResourceCache { match value { Some(image) => if image.data.is_blob() { + if let CachedImageData::Raw(data) = image.data { + self.image_templates_memory -= data.len(); + } + let blob_key = BlobImageKey(image_key); self.deleted_blob_keys.back_mut().unwrap().push(blob_key); self.rasterized_blob_images.remove(&blob_key); @@ -1150,7 +1169,7 @@ impl ResourceCache { &mut self, gpu_cache: &mut GpuCache, render_tasks: &mut RenderTaskGraph, - texture_cache_profile: &mut TextureCacheProfileCounters, + profile: &mut TransactionProfile, ) { profile_scope!("block_until_all_resources_added"); @@ -1163,7 +1182,7 @@ impl ResourceCache { gpu_cache, &mut self.cached_render_tasks, render_tasks, - texture_cache_profile, + profile, ); // Apply any updates of new / updated images (incl. blobs) to the texture cache. @@ -1384,11 +1403,11 @@ impl ResourceCache { } - pub fn end_frame(&mut self, texture_cache_profile: &mut TextureCacheProfileCounters) { + pub fn end_frame(&mut self, profile: &mut TransactionProfile) { debug_assert_eq!(self.state, State::QueryResources); profile_scope!("end_frame"); self.state = State::Idle; - self.texture_cache.end_frame(texture_cache_profile); + self.texture_cache.end_frame(profile); } pub fn set_debug_flags(&mut self, flags: DebugFlags) { diff --git a/gfx/wr/webrender/src/scene_builder_thread.rs b/gfx/wr/webrender/src/scene_builder_thread.rs index 639894000aa1..37f174b840b9 100644 --- a/gfx/wr/webrender/src/scene_builder_thread.rs +++ b/gfx/wr/webrender/src/scene_builder_thread.rs @@ -26,6 +26,7 @@ use crate::prim_store::image::{Image, YuvImage}; use crate::prim_store::line_dec::LineDecoration; use crate::prim_store::picture::Picture; use crate::prim_store::text_run::TextRun; +use crate::profiler::{self, TransactionProfile}; use crate::render_backend::SceneView; use crate::renderer::{PipelineInfo, SceneBuilderHooks}; use crate::scene::{Scene, BuiltScene, SceneStats}; @@ -41,19 +42,6 @@ use crate::debug_server; #[cfg(feature = "debugger")] use api::{BuiltDisplayListIter, DisplayItem}; -/// Various timing information that will be turned into -/// TransactionProfileCounters later down the pipeline. -#[derive(Clone, Debug)] -pub struct TransactionTimings { - pub builder_start_time_ns: u64, - pub builder_end_time_ns: u64, - pub send_time_ns: u64, - pub scene_build_start_time_ns: u64, - pub scene_build_end_time_ns: u64, - pub blob_rasterization_end_time_ns: u64, - pub display_list_len: usize, -} - fn rasterize_blobs(txn: &mut TransactionMsg, is_low_priority: bool) { profile_scope!("rasterize_blobs"); @@ -81,12 +69,10 @@ pub struct BuiltTransaction { pub removed_pipelines: Vec<(PipelineId, DocumentId)>, pub notifications: Vec, pub interner_updates: Option, - pub scene_build_start_time: u64, - pub scene_build_end_time: u64, pub render_frame: bool, pub invalidate_rendered_frame: bool, pub discard_frame_state_for_pipelines: Vec, - pub timings: Option, + pub profile: TransactionProfile, } #[cfg(feature = "replay")] @@ -427,8 +413,6 @@ impl SceneBuilderThread { for mut item in scenes { self.config = item.config; - let scene_build_start_time = precise_time_ns(); - let mut built_scene = None; let mut interner_updates = None; @@ -470,10 +454,8 @@ impl SceneBuilderThread { removed_pipelines: Vec::new(), discard_frame_state_for_pipelines: Vec::new(), notifications: Vec::new(), - scene_build_start_time, - scene_build_end_time: precise_time_ns(), interner_updates, - timings: None, + profile: TransactionProfile::new(), })]; self.forward_built_transactions(txns); @@ -580,12 +562,12 @@ impl SceneBuilderThread { hooks.pre_scene_build(); } - let scene_build_start_time = precise_time_ns(); - let doc = self.documents.get_mut(&txn.document_id).unwrap(); let scene = &mut doc.scene; - let mut timings = None; + let mut profile = txn.profile.take(); + + profile.start_time(profiler::SCENE_BUILD_TIME); let mut discard_frame_state_for_pipelines = Vec::new(); let mut removed_pipelines = Vec::new(); @@ -613,11 +595,15 @@ impl SceneBuilderThread { display_list, preserve_frame_state, } => { - let display_list_len = display_list.data().len(); - let (builder_start_time_ns, builder_end_time_ns, send_time_ns) = display_list.times(); + let content_send_time = profiler::ns_to_ms(precise_time_ns() - send_time_ns); + let dl_build_time = profiler::ns_to_ms(builder_end_time_ns - builder_start_time_ns); + profile.set(profiler::CONTENT_SEND_TIME, content_send_time); + profile.set(profiler::DISPLAY_LIST_BUILD_TIME, dl_build_time); + profile.set(profiler::DISPLAY_LIST_MEM, profiler::bytes_to_mb(display_list.data().len())); + if self.removed_pipelines.contains(&pipeline_id) { continue; } @@ -635,16 +621,6 @@ impl SceneBuilderThread { viewport_size, ); - timings = Some(TransactionTimings { - builder_start_time_ns, - builder_end_time_ns, - send_time_ns, - scene_build_start_time_ns: 0, - scene_build_end_time_ns: 0, - blob_rasterization_end_time_ns: 0, - display_list_len, - }); - if !preserve_frame_state { discard_frame_state_for_pipelines.push(pipeline_id); } @@ -689,15 +665,16 @@ impl SceneBuilderThread { built_scene = Some(built); } - let scene_build_end_time = precise_time_ns(); + profile.end_time(profiler::SCENE_BUILD_TIME); - let is_low_priority = false; - rasterize_blobs(txn, is_low_priority); - if let Some(timings) = timings.as_mut() { - timings.blob_rasterization_end_time_ns = precise_time_ns(); - timings.scene_build_start_time_ns = scene_build_start_time; - timings.scene_build_end_time_ns = scene_build_end_time; + if !txn.blob_requests.is_empty() { + profile.start_time(profiler::BLOB_RASTERIZATION_TIME); + + let is_low_priority = false; + rasterize_blobs(txn, is_low_priority); + + profile.end_time(profiler::BLOB_RASTERIZATION_TIME); } drain_filter( @@ -724,9 +701,7 @@ impl SceneBuilderThread { discard_frame_state_for_pipelines, notifications: replace(&mut txn.notifications, Vec::new()), interner_updates, - scene_build_start_time, - scene_build_end_time, - timings, + profile, }) } @@ -751,7 +726,7 @@ impl SceneBuilderThread { let (tx, rx) = single_msg_channel(); let txn = txns.iter().find(|txn| txn.built_scene.is_some()).unwrap(); - hooks.pre_scene_swap(txn.scene_build_end_time - txn.scene_build_start_time); + hooks.pre_scene_swap((txn.profile.get(profiler::SCENE_BUILD_TIME).unwrap() * 1000000.0) as u64); (Some(info), Some(tx), Some(rx)) } else { diff --git a/gfx/wr/webrender/src/texture_cache.rs b/gfx/wr/webrender/src/texture_cache.rs index f77eb954c7a0..e1ee983aafb8 100644 --- a/gfx/wr/webrender/src/texture_cache.rs +++ b/gfx/wr/webrender/src/texture_cache.rs @@ -17,7 +17,7 @@ use crate::internal_types::{ TextureCacheAllocInfo, TextureCacheUpdate, }; use crate::lru_cache::LRUCache; -use crate::profiler::{ResourceProfileCounter, TextureCacheProfileCounters}; +use crate::profiler::{self, TransactionProfile}; use crate::render_backend::FrameStamp; use crate::resource_cache::{CacheItem, CachedImageData}; use smallvec::SmallVec; @@ -376,7 +376,7 @@ impl PictureTextures { } } - fn update_profile(&self, profile: &mut ResourceProfileCounter) { + fn update_profile(&self, profile: &mut TransactionProfile) { // For now, this profile counter just accumulates the slices and bytes // from all picture cache texture arrays. let mut picture_slices = 0; @@ -385,7 +385,8 @@ impl PictureTextures { picture_slices += texture.slices.len(); picture_bytes += texture.size_in_bytes(); } - profile.set(picture_slices, picture_bytes); + profile.set(profiler::PICTURE_TILES, picture_slices); + profile.set(profiler::PICTURE_TILES_MEM, profiler::bytes_to_mb(picture_bytes)); } } @@ -615,7 +616,7 @@ impl TextureCache { self.evict_items_from_cache_if_required(); } - pub fn end_frame(&mut self, texture_cache_profile: &mut TextureCacheProfileCounters) { + pub fn end_frame(&mut self, profile: &mut TransactionProfile) { debug_assert!(self.now.is_valid()); self.expire_old_picture_cache_tiles(); @@ -627,18 +628,30 @@ impl TextureCache { self.shared_textures.array_color8_linear.release_empty_textures(&mut self.pending_updates); self.shared_textures.array_color8_nearest.release_empty_textures(&mut self.pending_updates); - self.shared_textures.array_alpha8_linear - .update_profile(&mut texture_cache_profile.pages_alpha8_linear); - self.shared_textures.array_alpha16_linear - .update_profile(&mut texture_cache_profile.pages_alpha16_linear); - self.shared_textures.array_color8_linear - .update_profile(&mut texture_cache_profile.pages_color8_linear); - self.shared_textures.array_color8_nearest - .update_profile(&mut texture_cache_profile.pages_color8_nearest); - self.picture_textures - .update_profile(&mut texture_cache_profile.pages_picture); - texture_cache_profile.shared_bytes.set(self.shared_bytes_allocated); - texture_cache_profile.standalone_bytes.set(self.standalone_bytes_allocated); + self.shared_textures.array_alpha8_linear.update_profile( + profiler::TEXTURE_CACHE_A8_REGIONS, + profiler::TEXTURE_CACHE_A8_MEM, + profile, + ); + self.shared_textures.array_alpha16_linear.update_profile( + profiler::TEXTURE_CACHE_A16_REGIONS, + profiler::TEXTURE_CACHE_A16_MEM, + profile, + ); + self.shared_textures.array_color8_linear.update_profile( + profiler::TEXTURE_CACHE_RGBA8_LINEAR_REGIONS, + profiler::TEXTURE_CACHE_RGBA8_LINEAR_MEM, + profile, + ); + self.shared_textures.array_color8_nearest.update_profile( + profiler::TEXTURE_CACHE_RGBA8_NEAREST_REGIONS, + profiler::TEXTURE_CACHE_RGBA8_NEAREST_MEM, + profile, + ); + self.picture_textures.update_profile(profile); + + profile.set(profiler::TEXTURE_CACHE_SHARED_MEM, self.shared_bytes_allocated); + profile.set(profiler::TEXTURE_CACHE_STANDALONE_MEM, self.standalone_bytes_allocated); self.now = FrameStamp::INVALID; } @@ -1461,9 +1474,10 @@ impl TextureArray { }); } - fn update_profile(&self, counter: &mut ResourceProfileCounter) { + fn update_profile(&self, count_idx: usize, mem_idx: usize, profile: &mut TransactionProfile) { let num_regions: usize = self.units.iter().map(|u| u.regions.len()).sum(); - counter.set(num_regions, self.size_in_bytes()); + profile.set(count_idx, num_regions); + profile.set(mem_idx, profiler::bytes_to_mb(self.size_in_bytes())); } /// Allocate space in this texture array. diff --git a/gfx/wr/webrender_api/src/lib.rs b/gfx/wr/webrender_api/src/lib.rs index a7d59608a7fd..f1824212bd95 100644 --- a/gfx/wr/webrender_api/src/lib.rs +++ b/gfx/wr/webrender_api/src/lib.rs @@ -513,57 +513,48 @@ bitflags! { const DISABLE_BATCHING = 1 << 5; /// Display the pipeline epochs. const EPOCHS = 1 << 6; - /// Reduce the amount of information displayed by the profiler so that - /// it occupies less screen real-estate. - const COMPACT_PROFILER = 1 << 7; /// Print driver messages to stdout. - const ECHO_DRIVER_MESSAGES = 1 << 8; - /// Show an indicator that moves every time a frame is rendered. - const NEW_FRAME_INDICATOR = 1 << 9; - /// Show an indicator that moves every time a scene is built. - const NEW_SCENE_INDICATOR = 1 << 10; + const ECHO_DRIVER_MESSAGES = 1 << 7; /// Show an overlay displaying overdraw amount. - const SHOW_OVERDRAW = 1 << 11; + const SHOW_OVERDRAW = 1 << 8; /// Display the contents of GPU cache. - const GPU_CACHE_DBG = 1 << 12; - /// Show a red bar that moves each time a slow frame is detected. - const SLOW_FRAME_INDICATOR = 1 << 13; + const GPU_CACHE_DBG = 1 << 9; /// Clear evicted parts of the texture cache for debugging purposes. - const TEXTURE_CACHE_DBG_CLEAR_EVICTED = 1 << 14; + const TEXTURE_CACHE_DBG_CLEAR_EVICTED = 1 << 10; /// Show picture caching debug overlay - const PICTURE_CACHING_DBG = 1 << 15; + const PICTURE_CACHING_DBG = 1 << 11; /// Highlight all primitives with colors based on kind. - const PRIMITIVE_DBG = 1 << 16; + const PRIMITIVE_DBG = 1 << 12; /// Draw a zoom widget showing part of the framebuffer zoomed in. - const ZOOM_DBG = 1 << 17; + const ZOOM_DBG = 1 << 13; /// Scale the debug renderer down for a smaller screen. This will disrupt /// any mapping between debug display items and page content, so shouldn't /// be used with overlays like the picture caching or primitive display. - const SMALL_SCREEN = 1 << 18; + const SMALL_SCREEN = 1 << 14; /// Disable various bits of the WebRender pipeline, to help narrow /// down where slowness might be coming from. - const DISABLE_OPAQUE_PASS = 1 << 19; + const DISABLE_OPAQUE_PASS = 1 << 15; /// - const DISABLE_ALPHA_PASS = 1 << 20; + const DISABLE_ALPHA_PASS = 1 << 16; /// - const DISABLE_CLIP_MASKS = 1 << 21; + const DISABLE_CLIP_MASKS = 1 << 17; /// - const DISABLE_TEXT_PRIMS = 1 << 22; + const DISABLE_TEXT_PRIMS = 1 << 18; /// - const DISABLE_GRADIENT_PRIMS = 1 << 23; + const DISABLE_GRADIENT_PRIMS = 1 << 19; /// - const OBSCURE_IMAGES = 1 << 24; + const OBSCURE_IMAGES = 1 << 20; /// Taint the transparent area of the glyphs with a random opacity to easily /// see when glyphs are re-rasterized. - const GLYPH_FLASHING = 1 << 25; + const GLYPH_FLASHING = 1 << 21; /// The profiler only displays information that is out of the ordinary. - const SMART_PROFILER = 1 << 26; + const SMART_PROFILER = 1 << 22; /// If set, dump picture cache invalidation debug to console. - const INVALIDATION_DBG = 1 << 27; + const INVALIDATION_DBG = 1 << 23; /// Log tile cache to memory for later saving as part of wr-capture - const TILE_CACHE_LOGGING_DBG = 1 << 28; + const TILE_CACHE_LOGGING_DBG = 1 << 24; /// Collect and dump profiler statistics to captures. - const PROFILER_CAPTURE = (1 as u32) << 31; // need "as u32" until we have cbindgen#556 + const PROFILER_CAPTURE = (1 as u32) << 25; // need "as u32" until we have cbindgen#556 } } diff --git a/gfx/wr/wrench/src/args.yaml b/gfx/wr/wrench/src/args.yaml index e19c5057c369..ac9fed9f600e 100644 --- a/gfx/wr/wrench/src/args.yaml +++ b/gfx/wr/wrench/src/args.yaml @@ -80,6 +80,10 @@ args: - no_block: long: no-block help: Don't block on UI events - run event loop as fast as possible. + - profiler_ui: + long: profiler-ui + takes_value: true + help: A string describing what to show on in the profiler HUD (See https://github.com/servo/webrender/wiki/Debugging-WebRender#anchor_6). subcommands: - png: diff --git a/gfx/wr/wrench/src/main.rs b/gfx/wr/wrench/src/main.rs index b77c98b9cd41..267bdacab93f 100644 --- a/gfx/wr/wrench/src/main.rs +++ b/gfx/wr/wrench/src/main.rs @@ -666,6 +666,11 @@ fn main() { dump_shader_source, notifier, ); + + if let Some(ui_str) = args.value_of("profiler_ui") { + wrench.renderer.set_profiler_ui(&ui_str); + } + window.update(&mut wrench); if let Some(window_title) = wrench.take_title() { @@ -802,7 +807,7 @@ fn render<'a>( // Default the profile overlay on for android. if cfg!(target_os = "android") { - debug_flags.toggle(DebugFlags::PROFILER_DBG | DebugFlags::COMPACT_PROFILER); + debug_flags.toggle(DebugFlags::PROFILER_DBG); wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags)); } @@ -864,11 +869,6 @@ fn render<'a>( wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags)); do_render = true; } - VirtualKeyCode::S => { - debug_flags.toggle(DebugFlags::COMPACT_PROFILER); - wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags)); - do_render = true; - } VirtualKeyCode::D => { debug_flags.toggle(DebugFlags::PICTURE_CACHING_DBG); wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags)); diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index 28d5d1caeb42..3b731bda4cf7 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -665,11 +665,7 @@ pref("gfx.webrender.debug.gpu-time-queries", false); pref("gfx.webrender.debug.gpu-sample-queries", false); pref("gfx.webrender.debug.disable-batching", false); pref("gfx.webrender.debug.epochs", false); -pref("gfx.webrender.debug.compact-profiler", false); -pref("gfx.webrender.debug.smart-profiler", false); pref("gfx.webrender.debug.echo-driver-messages", false); -pref("gfx.webrender.debug.new-frame-indicator", false); -pref("gfx.webrender.debug.new-scene-indicator", false); pref("gfx.webrender.debug.show-overdraw", false); pref("gfx.webrender.debug.slow-frame-indicator", false); pref("gfx.webrender.debug.picture-caching", false); @@ -679,6 +675,7 @@ pref("gfx.webrender.debug.small-screen", false); pref("gfx.webrender.debug.obscure-images", false); pref("gfx.webrender.debug.glyph-flashing", false); pref("gfx.webrender.debug.capture-profiler", false); +pref("gfx.webrender.debug.profiler-ui", "Default"); pref("accessibility.warn_on_browsewithcaret", true);