From 1fa351142a9e20e2b10167f7ef39ba03fde15911 Mon Sep 17 00:00:00 2001 From: Narcis Beleuzu Date: Mon, 19 Oct 2020 23:58:05 +0300 Subject: [PATCH] Backed out 1 changesets (bug 1671289) for wrench bustages on profiler.rs . CLOSED TREE Backed out changeset db80ac24d32f (bug 1671289) --- gfx/config/gfxVars.h | 1 - gfx/layers/ipc/CompositorBridgeParent.cpp | 12 - gfx/layers/ipc/CompositorBridgeParent.h | 5 - gfx/layers/wr/WebRenderBridgeParent.cpp | 7 +- gfx/layers/wr/WebRenderBridgeParent.h | 1 - gfx/thebes/gfxPlatform.cpp | 20 +- gfx/webrender_bindings/RenderThread.cpp | 18 - gfx/webrender_bindings/RenderThread.h | 3 - gfx/webrender_bindings/RendererOGL.cpp | 5 - gfx/webrender_bindings/RendererOGL.h | 2 - gfx/webrender_bindings/WebRenderAPI.cpp | 4 - gfx/webrender_bindings/WebRenderAPI.h | 1 - gfx/webrender_bindings/src/bindings.rs | 8 - gfx/wr/examples/common/boilerplate.rs | 4 + gfx/wr/examples/yuv.rs | 1 + gfx/wr/webrender/src/api_resources.rs | 3 - gfx/wr/webrender/src/clip.rs | 1 - gfx/wr/webrender/src/filterdata.rs | 1 - gfx/wr/webrender/src/frame_builder.rs | 47 +- gfx/wr/webrender/src/glyph_rasterizer/mod.rs | 23 +- gfx/wr/webrender/src/gpu_cache.rs | 16 +- gfx/wr/webrender/src/intern.rs | 12 +- gfx/wr/webrender/src/internal_types.rs | 4 +- gfx/wr/webrender/src/prepare.rs | 2 +- gfx/wr/webrender/src/prim_store/backdrop.rs | 1 - gfx/wr/webrender/src/prim_store/borders.rs | 2 - gfx/wr/webrender/src/prim_store/gradient.rs | 3 - gfx/wr/webrender/src/prim_store/image.rs | 2 - gfx/wr/webrender/src/prim_store/line_dec.rs | 1 - gfx/wr/webrender/src/prim_store/mod.rs | 1 - gfx/wr/webrender/src/prim_store/picture.rs | 1 - gfx/wr/webrender/src/prim_store/text_run.rs | 1 - gfx/wr/webrender/src/profiler.rs | 3039 ++++++++++-------- gfx/wr/webrender/src/render_api.rs | 14 - gfx/wr/webrender/src/render_backend.rs | 93 +- gfx/wr/webrender/src/renderer.rs | 813 ++--- gfx/wr/webrender/src/resource_cache.rs | 41 +- gfx/wr/webrender/src/scene_builder_thread.rs | 69 +- gfx/wr/webrender/src/texture_cache.rs | 50 +- gfx/wr/webrender_api/src/lib.rs | 47 +- gfx/wr/wrench/src/args.yaml | 4 - gfx/wr/wrench/src/main.rs | 12 +- modules/libpref/init/all.js | 5 +- 43 files changed, 2344 insertions(+), 2056 deletions(-) diff --git a/gfx/config/gfxVars.h b/gfx/config/gfxVars.h index ed93eeb2fa84..22c659f5fbd6 100644 --- a/gfx/config/gfxVars.h +++ b/gfx/config/gfxVars.h @@ -53,7 +53,6 @@ class gfxVarReceiver; _(UseWebRenderOptimizedShaders, bool, false) \ _(UseWebRenderMultithreading, bool, false) \ _(UseWebRenderScissoredCacheClears, bool, true) \ - _(WebRenderProfilerUI, nsCString, nsCString()) \ _(WebglAllowCoreProfile, bool, true) \ _(WebglAllowWindowsNativeGl, bool, false) \ _(WebRenderMaxPartialPresentRects, int32_t, 0) \ diff --git a/gfx/layers/ipc/CompositorBridgeParent.cpp b/gfx/layers/ipc/CompositorBridgeParent.cpp index 536c4ea515b8..9fc96d0335fe 100644 --- a/gfx/layers/ipc/CompositorBridgeParent.cpp +++ b/gfx/layers/ipc/CompositorBridgeParent.cpp @@ -2044,7 +2044,6 @@ void CompositorBridgeParent::InitializeStatics() { &UpdateWebRenderMultithreading); gfxVars::SetWebRenderBatchingLookbackListener( &UpdateWebRenderBatchingParameters); - gfxVars::SetWebRenderProfilerUIListener(&UpdateWebRenderProfilerUI); } /*static*/ @@ -2123,17 +2122,6 @@ void CompositorBridgeParent::UpdateWebRenderBatchingParameters() { }); } -/*static*/ -void CompositorBridgeParent::UpdateWebRenderProfilerUI() { - if (!sIndirectLayerTreesLock) { - return; - } - MonitorAutoLock lock(*sIndirectLayerTreesLock); - ForEachWebRenderBridgeParent([&](WebRenderBridgeParent* wrBridge) -> void { - wrBridge->UpdateProfilerUI(); - }); -} - RefPtr CompositorBridgeParent::GetWebRenderBridgeParent() const { return mWrBridge; diff --git a/gfx/layers/ipc/CompositorBridgeParent.h b/gfx/layers/ipc/CompositorBridgeParent.h index e35c532545ba..392392c28e35 100644 --- a/gfx/layers/ipc/CompositorBridgeParent.h +++ b/gfx/layers/ipc/CompositorBridgeParent.h @@ -735,11 +735,6 @@ class CompositorBridgeParent final : public CompositorBridgeParentBase, */ static void UpdateWebRenderBatchingParameters(); - /** - * Notify the compositor webrender profiler UI string has been updated. - */ - static void UpdateWebRenderProfilerUI(); - /** * Wrap the data structure to be sent over IPC. */ diff --git a/gfx/layers/wr/WebRenderBridgeParent.cpp b/gfx/layers/wr/WebRenderBridgeParent.cpp index ec15d49b9526..eed2b7a88f95 100644 --- a/gfx/layers/wr/WebRenderBridgeParent.cpp +++ b/gfx/layers/wr/WebRenderBridgeParent.cpp @@ -362,9 +362,9 @@ WebRenderBridgeParent::WebRenderBridgeParent( MOZ_ASSERT(!mCompositorScheduler); mCompositorScheduler = new CompositorVsyncScheduler(this, mWidget); } + UpdateDebugFlags(); UpdateQualitySettings(); - UpdateProfilerUI(); } WebRenderBridgeParent::WebRenderBridgeParent(const wr::PipelineId& aPipelineId, @@ -1500,11 +1500,6 @@ void WebRenderBridgeParent::UpdateDebugFlags() { mApi->UpdateDebugFlags(gfxVars::WebRenderDebugFlags()); } -void WebRenderBridgeParent::UpdateProfilerUI() { - nsCString uiString = gfxVars::GetWebRenderProfilerUIOrDefault(); - mApi->SetProfilerUI(uiString); -} - void WebRenderBridgeParent::UpdateMultithreading() { mApi->EnableMultithreading(gfxVars::UseWebRenderMultithreading()); } diff --git a/gfx/layers/wr/WebRenderBridgeParent.h b/gfx/layers/wr/WebRenderBridgeParent.h index bdc3562a9ad3..bac646e15beb 100644 --- a/gfx/layers/wr/WebRenderBridgeParent.h +++ b/gfx/layers/wr/WebRenderBridgeParent.h @@ -130,7 +130,6 @@ class WebRenderBridgeParent final : public PWebRenderBridgeParent, void UpdateDebugFlags(); void UpdateMultithreading(); void UpdateBatchingParameters(); - void UpdateProfilerUI(); mozilla::ipc::IPCResult RecvEnsureConnected( TextureFactoryIdentifier* aTextureFactoryIdentifier, diff --git a/gfx/thebes/gfxPlatform.cpp b/gfx/thebes/gfxPlatform.cpp index 39945bc7962d..cab3568c2691 100644 --- a/gfx/thebes/gfxPlatform.cpp +++ b/gfx/thebes/gfxPlatform.cpp @@ -570,14 +570,6 @@ void RecordingPrefChanged(const char* aPrefName, void* aClosure) { #define WR_DEBUG_PREF "gfx.webrender.debug" -static void WebRendeProfilerUIPrefChangeCallback(const char* aPrefName, void*) { - nsCString uiString; - if (NS_SUCCEEDED(Preferences::GetCString("gfx.webrender.debug.profiler-ui", - uiString))) { - gfxVars::SetWebRenderProfilerUI(uiString); - } -} - static void WebRenderDebugPrefChangeCallback(const char* aPrefName, void*) { wr::DebugFlags flags{0}; #define GFX_WEBRENDER_DEBUG(suffix, bit) \ @@ -592,11 +584,18 @@ static void WebRenderDebugPrefChangeCallback(const char* aPrefName, void*) { GFX_WEBRENDER_DEBUG(".gpu-sample-queries", wr::DebugFlags::GPU_SAMPLE_QUERIES) GFX_WEBRENDER_DEBUG(".disable-batching", wr::DebugFlags::DISABLE_BATCHING) GFX_WEBRENDER_DEBUG(".epochs", wr::DebugFlags::EPOCHS) + GFX_WEBRENDER_DEBUG(".compact-profiler", wr::DebugFlags::COMPACT_PROFILER) GFX_WEBRENDER_DEBUG(".smart-profiler", wr::DebugFlags::SMART_PROFILER) GFX_WEBRENDER_DEBUG(".echo-driver-messages", wr::DebugFlags::ECHO_DRIVER_MESSAGES) + GFX_WEBRENDER_DEBUG(".new-frame-indicator", + wr::DebugFlags::NEW_FRAME_INDICATOR) + GFX_WEBRENDER_DEBUG(".new-scene-indicator", + wr::DebugFlags::NEW_SCENE_INDICATOR) GFX_WEBRENDER_DEBUG(".show-overdraw", wr::DebugFlags::SHOW_OVERDRAW) GFX_WEBRENDER_DEBUG(".gpu-cache", wr::DebugFlags::GPU_CACHE_DBG) + GFX_WEBRENDER_DEBUG(".slow-frame-indicator", + wr::DebugFlags::SLOW_FRAME_INDICATOR) GFX_WEBRENDER_DEBUG(".texture-cache.clear-evicted", wr::DebugFlags::TEXTURE_CACHE_DBG_CLEAR_EVICTED) GFX_WEBRENDER_DEBUG(".picture-caching", wr::DebugFlags::PICTURE_CACHING_DBG) @@ -1386,8 +1385,6 @@ void gfxPlatform::ShutdownLayersIPC() { Preferences::UnregisterCallback(WebRenderDebugPrefChangeCallback, WR_DEBUG_PREF); - Preferences::UnregisterCallback(WebRendeProfilerUIPrefChangeCallback, - "gfx.webrender.debug.profiler-ui"); } } else { @@ -2799,9 +2796,6 @@ void gfxPlatform::InitWebRenderConfig() { Preferences::RegisterPrefixCallbackAndCall(WebRenderDebugPrefChangeCallback, WR_DEBUG_PREF); - Preferences::RegisterPrefixCallbackAndCall( - WebRendeProfilerUIPrefChangeCallback, - "gfx.webrender.debug.profiler-ui"); Preferences::RegisterCallback( WebRenderQualityPrefChangeCallback, nsDependentCString( diff --git a/gfx/webrender_bindings/RenderThread.cpp b/gfx/webrender_bindings/RenderThread.cpp index 4aedf8fb608f..113e29cbde4c 100644 --- a/gfx/webrender_bindings/RenderThread.cpp +++ b/gfx/webrender_bindings/RenderThread.cpp @@ -394,24 +394,6 @@ void RenderThread::SetClearColor(wr::WindowId aWindowId, wr::ColorF aColor) { } } -void RenderThread::SetProfilerUI(wr::WindowId aWindowId, nsCString aUI) { - if (mHasShutdown) { - return; - } - - if (!IsInRenderThread()) { - Loop()->PostTask(NewRunnableMethod( - "wr::RenderThread::SetProfilerUI", this, &RenderThread::SetProfilerUI, - aWindowId, aUI)); - return; - } - - auto it = mRenderers.find(aWindowId); - if (it != mRenderers.end()) { - it->second->SetProfilerUI(aUI); - } -} - void RenderThread::RunEvent(wr::WindowId aWindowId, UniquePtr aEvent) { if (!IsInRenderThread()) { diff --git a/gfx/webrender_bindings/RenderThread.h b/gfx/webrender_bindings/RenderThread.h index a04c5d1fa2ac..9eeb4fb50fa2 100644 --- a/gfx/webrender_bindings/RenderThread.h +++ b/gfx/webrender_bindings/RenderThread.h @@ -179,9 +179,6 @@ class RenderThread final { /// Automatically forwarded to the render thread. void SetClearColor(wr::WindowId aWindowId, wr::ColorF aColor); - /// Automatically forwarded to the render thread. - void SetProfilerUI(wr::WindowId aWindowId, nsCString aUI); - /// Automatically forwarded to the render thread. void PipelineSizeChanged(wr::WindowId aWindowId, uint64_t aPipelineId, float aWidth, float aHeight); diff --git a/gfx/webrender_bindings/RendererOGL.cpp b/gfx/webrender_bindings/RendererOGL.cpp index e3cde8067294..11f14f2554bd 100644 --- a/gfx/webrender_bindings/RendererOGL.cpp +++ b/gfx/webrender_bindings/RendererOGL.cpp @@ -436,10 +436,5 @@ void RendererOGL::AccumulateMemoryReport(MemoryReport* aReport) { aReport->swap_chain += swapChainSize; } -void RendererOGL::SetProfilerUI(const nsCString& aUI) { - wr_renderer_set_profiler_ui(GetRenderer(), (const uint8_t*)aUI.get(), - aUI.Length()); -} - } // namespace wr } // namespace mozilla diff --git a/gfx/webrender_bindings/RendererOGL.h b/gfx/webrender_bindings/RendererOGL.h index 48ce04846924..66a9ab629298 100644 --- a/gfx/webrender_bindings/RendererOGL.h +++ b/gfx/webrender_bindings/RendererOGL.h @@ -115,8 +115,6 @@ class RendererOGL { void AccumulateMemoryReport(MemoryReport* aReport); - void SetProfilerUI(const nsCString& aUI); - wr::Renderer* GetRenderer() { return mRenderer; } gl::GLContext* gl() const; diff --git a/gfx/webrender_bindings/WebRenderAPI.cpp b/gfx/webrender_bindings/WebRenderAPI.cpp index a40ef38ae4f1..7e3d1a300377 100644 --- a/gfx/webrender_bindings/WebRenderAPI.cpp +++ b/gfx/webrender_bindings/WebRenderAPI.cpp @@ -542,10 +542,6 @@ void WebRenderAPI::SetClearColor(const gfx::DeviceColor& aColor) { RenderThread::Get()->SetClearColor(mId, ToColorF(aColor)); } -void WebRenderAPI::SetProfilerUI(const nsCString& aUIString) { - RenderThread::Get()->SetProfilerUI(mId, aUIString); -} - void WebRenderAPI::Pause() { class PauseEvent : public RendererEvent { public: diff --git a/gfx/webrender_bindings/WebRenderAPI.h b/gfx/webrender_bindings/WebRenderAPI.h index 182e51e05f62..7315c535a840 100644 --- a/gfx/webrender_bindings/WebRenderAPI.h +++ b/gfx/webrender_bindings/WebRenderAPI.h @@ -258,7 +258,6 @@ class WebRenderAPI final { void SetBatchingLookback(uint32_t aCount); void SetClearColor(const gfx::DeviceColor& aColor); - void SetProfilerUI(const nsCString& aUIString); void Pause(); bool Resume(); diff --git a/gfx/webrender_bindings/src/bindings.rs b/gfx/webrender_bindings/src/bindings.rs index c6334643285b..780df9ee50bc 100644 --- a/gfx/webrender_bindings/src/bindings.rs +++ b/gfx/webrender_bindings/src/bindings.rs @@ -743,14 +743,6 @@ pub unsafe extern "C" fn wr_renderer_readback( renderer.read_pixels_into(FramebufferIntSize::new(width, height).into(), format, &mut slice); } -#[no_mangle] -pub unsafe extern "C" fn wr_renderer_set_profiler_ui(renderer: &mut Renderer, ui_str: *const u8, ui_str_len: usize) { - let slice = std::slice::from_raw_parts(ui_str, ui_str_len); - if let Ok(ui_str) = std::str::from_utf8(slice) { - renderer.set_profiler_ui(ui_str); - } -} - #[no_mangle] pub unsafe extern "C" fn wr_renderer_delete(renderer: *mut Renderer) { let renderer = Box::from_raw(renderer); diff --git a/gfx/wr/examples/common/boilerplate.rs b/gfx/wr/examples/common/boilerplate.rs index 1693cceb7760..d5208ad9cda7 100644 --- a/gfx/wr/examples/common/boilerplate.rs +++ b/gfx/wr/examples/common/boilerplate.rs @@ -254,10 +254,14 @@ pub fn main_wrapper( winit::VirtualKeyCode::P => debug_flags.toggle(DebugFlags::PROFILER_DBG), winit::VirtualKeyCode::O => debug_flags.toggle(DebugFlags::RENDER_TARGET_DBG), winit::VirtualKeyCode::I => debug_flags.toggle(DebugFlags::TEXTURE_CACHE_DBG), + winit::VirtualKeyCode::S => debug_flags.toggle(DebugFlags::COMPACT_PROFILER), winit::VirtualKeyCode::T => debug_flags.toggle(DebugFlags::PICTURE_CACHING_DBG), winit::VirtualKeyCode::Q => debug_flags.toggle( DebugFlags::GPU_TIME_QUERIES | DebugFlags::GPU_SAMPLE_QUERIES ), + winit::VirtualKeyCode::F => debug_flags.toggle( + DebugFlags::NEW_FRAME_INDICATOR | DebugFlags::NEW_SCENE_INDICATOR + ), winit::VirtualKeyCode::G => debug_flags.toggle(DebugFlags::GPU_CACHE_DBG), winit::VirtualKeyCode::Key1 => txn.set_document_view( device_size.into(), diff --git a/gfx/wr/examples/yuv.rs b/gfx/wr/examples/yuv.rs index e2b8d1efd61a..dabc1de467cb 100644 --- a/gfx/wr/examples/yuv.rs +++ b/gfx/wr/examples/yuv.rs @@ -218,6 +218,7 @@ fn main() { }; let opts = webrender::RendererOptions { + debug_flags: webrender::DebugFlags::NEW_FRAME_INDICATOR | webrender::DebugFlags::NEW_SCENE_INDICATOR, ..Default::default() }; diff --git a/gfx/wr/webrender/src/api_resources.rs b/gfx/wr/webrender/src/api_resources.rs index 0a48858fc422..3c61207aa927 100644 --- a/gfx/wr/webrender/src/api_resources.rs +++ b/gfx/wr/webrender/src/api_resources.rs @@ -10,7 +10,6 @@ use crate::api::SharedFontInstanceMap; use crate::api::units::*; use crate::render_api::{ResourceUpdate, TransactionMsg, AddFont}; use crate::image_tiling::*; -use crate::profiler; use std::collections::HashMap; use std::sync::Arc; @@ -165,8 +164,6 @@ impl ApiResources { } let (rasterizer, requests) = self.create_blob_scene_builder_requests(&blobs_to_rasterize); - transaction.profile.set(profiler::RASTERIZED_BLOBS, blobs_to_rasterize.len()); - transaction.profile.set(profiler::RASTERIZED_BLOB_TILES, requests.len()); transaction.use_scene_builder_thread |= !requests.is_empty(); transaction.use_scene_builder_thread |= !transaction.scene_ops.is_empty(); transaction.blob_rasterizer = rasterizer; diff --git a/gfx/wr/webrender/src/clip.rs b/gfx/wr/webrender/src/clip.rs index c10cd969a478..d71992e1efb7 100644 --- a/gfx/wr/webrender/src/clip.rs +++ b/gfx/wr/webrender/src/clip.rs @@ -1400,7 +1400,6 @@ impl intern::Internable for ClipIntern { type Key = ClipItemKey; type StoreData = ClipNode; type InternData = ClipInternData; - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_CLIPS; } #[derive(Debug, MallocSizeOf)] diff --git a/gfx/wr/webrender/src/filterdata.rs b/gfx/wr/webrender/src/filterdata.rs index d399b2252e22..9907d3d73475 100644 --- a/gfx/wr/webrender/src/filterdata.rs +++ b/gfx/wr/webrender/src/filterdata.rs @@ -162,7 +162,6 @@ impl intern::Internable for FilterDataIntern { type Key = SFilterDataKey; type StoreData = SFilterDataTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_FILTER_DATA; } fn push_component_transfer_data( diff --git a/gfx/wr/webrender/src/frame_builder.rs b/gfx/wr/webrender/src/frame_builder.rs index fb99f864bca5..2d26a29d4f21 100644 --- a/gfx/wr/webrender/src/frame_builder.rs +++ b/gfx/wr/webrender/src/frame_builder.rs @@ -19,7 +19,7 @@ use crate::picture::{BackdropKind, SubpixelMode, TileCacheLogger, RasterConfig, use crate::prepare::prepare_primitives; use crate::prim_store::{PictureIndex, PrimitiveDebugId}; use crate::prim_store::{DeferredResolve}; -use crate::profiler::{self, TransactionProfile}; +use crate::profiler::{FrameProfileCounters, TextureCacheProfileCounters, ResourceProfileCounters}; use crate::render_backend::{DataStores, FrameStamp, FrameId, ScratchBuffer}; use crate::render_target::{RenderTarget, PictureCacheTarget, TextureCacheRenderTarget}; use crate::render_target::{RenderTargetContext, RenderTargetKind}; @@ -176,6 +176,7 @@ pub struct FrameBuildingContext<'a> { pub struct FrameBuildingState<'a> { pub render_tasks: &'a mut RenderTaskGraph, + pub profile_counters: &'a mut FrameProfileCounters, pub clip_store: &'a mut ClipStore, pub resource_cache: &'a mut ResourceCache, pub gpu_cache: &'a mut GpuCache, @@ -184,7 +185,6 @@ pub struct FrameBuildingState<'a> { pub surfaces: &'a mut Vec, pub dirty_region_stack: Vec, pub composite_state: &'a mut CompositeState, - pub num_visible_primitives: u32, } impl<'a> FrameBuildingState<'a> { @@ -248,16 +248,17 @@ impl FrameBuilder { resource_cache: &mut ResourceCache, gpu_cache: &mut GpuCache, render_tasks: &mut RenderTaskGraph, + profile_counters: &mut FrameProfileCounters, global_device_pixel_scale: DevicePixelScale, scene_properties: &SceneProperties, transform_palette: &mut TransformPalette, data_stores: &mut DataStores, scratch: &mut ScratchBuffer, debug_flags: DebugFlags, + texture_cache_profile: &mut TextureCacheProfileCounters, composite_state: &mut CompositeState, tile_cache_logger: &mut TileCacheLogger, tile_caches: &mut FastHashMap>, - profile: &mut TransactionProfile, ) -> Option { profile_scope!("build_layer_screen_rects_and_cull_layers"); @@ -334,7 +335,6 @@ impl FrameBuilder { { profile_scope!("UpdateVisibility"); profile_marker!("UpdateVisibility"); - profile.start_time(profiler::FRAME_VISIBILITY_TIME); let visibility_context = FrameVisibilityContext { global_device_pixel_scale, @@ -371,14 +371,11 @@ impl FrameBuilder { visibility_state.scratch.frame.clip_chain_stack = visibility_state.clip_chain_stack.take(); visibility_state.scratch.frame.surface_stack = visibility_state.surface_stack.take(); - - profile.end_time(profiler::FRAME_VISIBILITY_TIME); } - profile.start_time(profiler::FRAME_PREPARE_TIME); - let mut frame_state = FrameBuildingState { render_tasks, + profile_counters, clip_store: &mut scene.clip_store, resource_cache, gpu_cache, @@ -387,7 +384,6 @@ impl FrameBuilder { surfaces: &mut surfaces, dirty_region_stack: scratch.frame.dirty_region_stack.take(), composite_state, - num_visible_primitives: 0, }; frame_state @@ -457,8 +453,6 @@ impl FrameBuilder { ); frame_state.pop_dirty_region(); - profile.end_time(profiler::FRAME_PREPARE_TIME); - profile.set(profiler::VISIBLE_PRIMITIVES, frame_state.num_visible_primitives); scratch.frame.dirty_region_stack = frame_state.dirty_region_stack.take(); scratch.frame.surfaces = surfaces.take(); @@ -466,11 +460,9 @@ impl FrameBuilder { { profile_marker!("BlockOnResources"); - resource_cache.block_until_all_resources_added( - gpu_cache, - render_tasks, - profile, - ); + resource_cache.block_until_all_resources_added(gpu_cache, + render_tasks, + texture_cache_profile); } Some(root_render_task_id) @@ -486,6 +478,7 @@ impl FrameBuilder { layer: DocumentLayer, device_origin: DeviceIntPoint, pan: WorldPoint, + resource_profile: &mut ResourceProfileCounters, scene_properties: &SceneProperties, data_stores: &mut DataStores, scratch: &mut ScratchBuffer, @@ -494,13 +487,15 @@ impl FrameBuilder { tile_cache_logger: &mut TileCacheLogger, tile_caches: &mut FastHashMap>, dirty_rects_are_valid: bool, - profile: &mut TransactionProfile, ) -> Frame { profile_scope!("build"); profile_marker!("BuildFrame"); - profile.set(profiler::PRIMITIVES, scene.prim_store.prim_count()); - profile.set(profiler::PICTURE_CACHE_SLICES, scene.tile_cache_config.picture_cache_slice_count); + let mut profile_counters = FrameProfileCounters::new(); + profile_counters + .total_primitives + .set(scene.prim_store.prim_count()); + resource_profile.picture_cache_slices.set(scene.tile_cache_config.picture_cache_slice_count); resource_cache.begin_frame(stamp); gpu_cache.begin_frame(stamp); @@ -537,20 +532,19 @@ impl FrameBuilder { resource_cache, gpu_cache, &mut render_tasks, + &mut profile_counters, global_device_pixel_scale, scene_properties, &mut transform_palette, data_stores, scratch, debug_flags, + &mut resource_profile.texture_cache, &mut composite_state, tile_cache_logger, tile_caches, - profile, ); - profile.start_time(profiler::FRAME_BATCHING_TIME); - let mut passes; let mut deferred_resolves = vec![]; let mut has_texture_cache_tasks = false; @@ -617,13 +611,11 @@ impl FrameBuilder { } } - profile.end_time(profiler::FRAME_BATCHING_TIME); - - let gpu_cache_frame_id = gpu_cache.end_frame(profile).frame_id(); + let gpu_cache_frame_id = gpu_cache.end_frame(&mut resource_profile.gpu_cache).frame_id(); render_tasks.write_task_data(); *render_task_counters = render_tasks.counters(); - resource_cache.end_frame(profile); + resource_cache.end_frame(&mut resource_profile.texture_cache); self.prim_headers_prealloc.record_vec(&mut prim_headers.headers_int); self.composite_state_prealloc.record(&composite_state); @@ -635,6 +627,7 @@ impl FrameBuilder { scene.output_rect.size, ), layer, + profile_counters, passes, transform_palette: transform_palette.finish(), render_tasks, @@ -995,6 +988,8 @@ pub struct Frame { pub device_rect: DeviceIntRect, pub layer: DocumentLayer, pub passes: Vec, + #[cfg_attr(any(feature = "capture", feature = "replay"), serde(default = "FrameProfileCounters::new", skip))] + pub profile_counters: FrameProfileCounters, pub transform_palette: Vec, pub render_tasks: RenderTaskGraph, diff --git a/gfx/wr/webrender/src/glyph_rasterizer/mod.rs b/gfx/wr/webrender/src/glyph_rasterizer/mod.rs index a9c6981c85d8..2eacd8657fe7 100644 --- a/gfx/wr/webrender/src/glyph_rasterizer/mod.rs +++ b/gfx/wr/webrender/src/glyph_rasterizer/mod.rs @@ -19,7 +19,7 @@ use crate::texture_cache::{TextureCache, TextureCacheHandle, Eviction}; use crate::gpu_cache::GpuCache; use crate::render_task_graph::RenderTaskGraph; use crate::render_task_cache::RenderTaskCache; -use crate::profiler::{self, TransactionProfile}; +use crate::profiler::TextureCacheProfileCounters; use malloc_size_of::{MallocSizeOf, MallocSizeOfOps}; use rayon::ThreadPool; use rayon::prelude::*; @@ -100,7 +100,6 @@ impl GlyphRasterizer { // Increment the total number of glyphs that are pending. This is used to determine // later whether to use worker threads for the remaining glyphs during resolve time. self.pending_glyph_count += 1; - self.glyph_request_count += 1; // Find a batch container for the font instance for this glyph. Use get_mut to avoid // cloning the font instance, since this is the common path. @@ -220,10 +219,8 @@ impl GlyphRasterizer { gpu_cache: &mut GpuCache, _: &mut RenderTaskCache, _: &mut RenderTaskGraph, - profile: &mut TransactionProfile, + _: &mut TextureCacheProfileCounters, ) { - profile.start_time(profiler::GLYPH_RESOLVE_TIME); - // Work around the borrow checker, since we call flush_glyph_requests below let mut pending_glyph_requests = mem::replace( &mut self.pending_glyph_requests, @@ -244,11 +241,6 @@ impl GlyphRasterizer { debug_assert_eq!(self.pending_glyph_count, 0); debug_assert!(self.pending_glyph_requests.is_empty()); - if self.glyph_request_count > 0 { - profile.set(profiler::RASTERIZED_GLYPHS, self.glyph_request_count); - self.glyph_request_count = 0; - } - profile_scope!("resolve_glyphs"); // Pull rasterized glyphs from the queue and update the caches. while self.pending_glyph_jobs > 0 { @@ -315,8 +307,6 @@ impl GlyphRasterizer { // Now that we are done with the critical path (rendering the glyphs), // we can schedule removing the fonts if needed. self.remove_dead_fonts(); - - profile.end_time(profiler::GLYPH_RESOLVE_TIME); } } @@ -946,9 +936,6 @@ pub struct GlyphRasterizer { /// The current number of glyph request jobs that have been kicked to worker threads. pending_glyph_jobs: usize, - /// The number of glyphs requested this frame. - glyph_request_count: usize, - /// A map of current glyph request batches. pending_glyph_requests: FastHashMap>, @@ -993,7 +980,6 @@ impl GlyphRasterizer { font_contexts: Arc::new(font_context), pending_glyph_jobs: 0, pending_glyph_count: 0, - glyph_request_count: 0, glyph_rx, glyph_tx, workers, @@ -1074,7 +1060,6 @@ impl GlyphRasterizer { //TODO: any signals need to be sent to the workers? self.pending_glyph_jobs = 0; self.pending_glyph_count = 0; - self.glyph_request_count = 0; self.fonts_to_remove.clear(); self.font_instances_to_remove.clear(); } @@ -1137,7 +1122,7 @@ mod test_glyph_rasterizer { use crate::gpu_cache::GpuCache; use crate::render_task_cache::RenderTaskCache; use crate::render_task_graph::{RenderTaskGraph, RenderTaskGraphCounters}; - use crate::profiler::TransactionProfile; + use crate::profiler::TextureCacheProfileCounters; use api::{FontKey, FontInstanceKey, FontSize, FontTemplate, FontRenderMode, IdNamespace, ColorU}; use api::units::DevicePoint; @@ -1208,7 +1193,7 @@ mod test_glyph_rasterizer { &mut gpu_cache, &mut render_task_cache, &mut render_task_tree, - &mut TransactionProfile::new(), + &mut TextureCacheProfileCounters::new(), ); } diff --git a/gfx/wr/webrender/src/gpu_cache.rs b/gfx/wr/webrender/src/gpu_cache.rs index da67f9df64c9..bac61d976c72 100644 --- a/gfx/wr/webrender/src/gpu_cache.rs +++ b/gfx/wr/webrender/src/gpu_cache.rs @@ -30,7 +30,7 @@ use api::IdNamespace; use api::units::*; use euclid::{HomogeneousVector, Rect}; use crate::internal_types::{FastHashMap, FastHashSet}; -use crate::profiler::{self, TransactionProfile}; +use crate::profiler::GpuCacheProfileCounters; use crate::render_backend::{FrameStamp, FrameId}; use crate::prim_store::VECS_PER_SEGMENT; use crate::renderer::MAX_VERTEX_TEXTURE_WIDTH; @@ -865,12 +865,18 @@ impl GpuCache { /// device specific cache texture. pub fn end_frame( &mut self, - profile: &mut TransactionProfile, + profile_counters: &mut GpuCacheProfileCounters, ) -> FrameStamp { profile_scope!("end_frame"); - profile.set(profiler::GPU_CACHE_ROWS_TOTAL, self.texture.rows.len()); - profile.set(profiler::GPU_CACHE_BLOCKS_TOTAL, self.texture.allocated_block_count); - profile.set(profiler::GPU_CACHE_BLOCKS_SAVED, self.saved_block_count); + profile_counters + .allocated_rows + .set(self.texture.rows.len()); + profile_counters + .allocated_blocks + .set(self.texture.allocated_block_count); + profile_counters + .saved_blocks + .set(self.saved_block_count); let reached_threshold = self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) && diff --git a/gfx/wr/webrender/src/intern.rs b/gfx/wr/webrender/src/intern.rs index 5dd9c90600b5..7633233ac496 100644 --- a/gfx/wr/webrender/src/intern.rs +++ b/gfx/wr/webrender/src/intern.rs @@ -35,12 +35,12 @@ use crate::internal_types::FastHashMap; use malloc_size_of::MallocSizeOf; +use crate::profiler::ResourceProfileCounter; use std::fmt::Debug; use std::hash::Hash; use std::marker::PhantomData; -use std::{ops, u64}; +use std::{mem, ops, u64}; use crate::util::VecHelper; -use crate::profiler::TransactionProfile; #[cfg_attr(feature = "capture", derive(Serialize))] #[cfg_attr(feature = "replay", derive(Deserialize))] @@ -166,7 +166,7 @@ impl DataStore { pub fn apply_updates( &mut self, update_list: UpdateList, - profile: &mut TransactionProfile, + profile_counter: &mut ResourceProfileCounter, ) { for insertion in update_list.insertions { self.items @@ -178,7 +178,8 @@ impl DataStore { self.items[removal.index] = None; } - profile.set(I::PROFILE_COUNTER, self.items.len()); + let per_item_size = mem::size_of::() + mem::size_of::(); + profile_counter.set(self.items.len(), per_item_size * self.items.len()); } } @@ -459,7 +460,4 @@ pub trait Internable: MallocSizeOf { type Key: Eq + Hash + Clone + Debug + MallocSizeOf + InternDebug + InternSerialize + for<'a> InternDeserialize<'a>; type StoreData: From + MallocSizeOf + InternSerialize + for<'a> InternDeserialize<'a>; type InternData: MallocSizeOf + InternSerialize + for<'a> InternDeserialize<'a>; - - // Profile counter indices, see the list in profiler.rs - const PROFILE_COUNTER: usize; } diff --git a/gfx/wr/webrender/src/internal_types.rs b/gfx/wr/webrender/src/internal_types.rs index e9da910f9d88..2bd8cccee4e5 100644 --- a/gfx/wr/webrender/src/internal_types.rs +++ b/gfx/wr/webrender/src/internal_types.rs @@ -12,9 +12,9 @@ use crate::device::TextureFilter; use crate::renderer::PipelineInfo; use crate::gpu_cache::GpuCacheUpdateList; use crate::frame_builder::Frame; -use crate::profiler::TransactionProfile; use fxhash::FxHasher; use plane_split::BspSplitter; +use crate::profiler::BackendProfileCounters; use smallvec::SmallVec; use std::{usize, i32}; use std::collections::{HashMap, HashSet}; @@ -539,7 +539,6 @@ impl ResourceUpdateList { pub struct RenderedDocument { pub frame: Frame, pub is_new_scene: bool, - pub profile: TransactionProfile, } pub enum DebugOutput { @@ -566,6 +565,7 @@ pub enum ResultMsg { DocumentId, RenderedDocument, ResourceUpdateList, + BackendProfileCounters, ), AppendNotificationRequests(Vec), ForceRedraw, diff --git a/gfx/wr/webrender/src/prepare.rs b/gfx/wr/webrender/src/prepare.rs index ab6c4e151418..0a83dab9de83 100644 --- a/gfx/wr/webrender/src/prepare.rs +++ b/gfx/wr/webrender/src/prepare.rs @@ -128,7 +128,7 @@ pub fn prepare_primitives( tile_cache_log, tile_caches, ) { - frame_state.num_visible_primitives += 1; + frame_state.profile_counters.visible_primitives.inc(); } else { prim_instance.clear_visibility(); } diff --git a/gfx/wr/webrender/src/prim_store/backdrop.rs b/gfx/wr/webrender/src/prim_store/backdrop.rs index c45bf78eef44..ea033574fb21 100644 --- a/gfx/wr/webrender/src/prim_store/backdrop.rs +++ b/gfx/wr/webrender/src/prim_store/backdrop.rs @@ -74,7 +74,6 @@ impl Internable for Backdrop { type Key = BackdropKey; type StoreData = BackdropTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_BACKDROPS; } impl InternablePrimitive for Backdrop { diff --git a/gfx/wr/webrender/src/prim_store/borders.rs b/gfx/wr/webrender/src/prim_store/borders.rs index ad7bb6a239c6..084350c3357e 100644 --- a/gfx/wr/webrender/src/prim_store/borders.rs +++ b/gfx/wr/webrender/src/prim_store/borders.rs @@ -145,7 +145,6 @@ impl intern::Internable for NormalBorderPrim { type Key = NormalBorderKey; type StoreData = NormalBorderTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_NORMAL_BORDERS; } impl InternablePrimitive for NormalBorderPrim { @@ -319,7 +318,6 @@ impl intern::Internable for ImageBorder { type Key = ImageBorderKey; type StoreData = ImageBorderTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_IMAGE_BORDERS; } impl InternablePrimitive for ImageBorder { diff --git a/gfx/wr/webrender/src/prim_store/gradient.rs b/gfx/wr/webrender/src/prim_store/gradient.rs index da623cf21288..b7901d48b82f 100644 --- a/gfx/wr/webrender/src/prim_store/gradient.rs +++ b/gfx/wr/webrender/src/prim_store/gradient.rs @@ -321,7 +321,6 @@ impl Internable for LinearGradient { type Key = LinearGradientKey; type StoreData = LinearGradientTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_LINEAR_GRADIENTS; } impl InternablePrimitive for LinearGradient { @@ -551,7 +550,6 @@ impl Internable for RadialGradient { type Key = RadialGradientKey; type StoreData = RadialGradientTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_RADIAL_GRADIENTS; } impl InternablePrimitive for RadialGradient { @@ -771,7 +769,6 @@ impl Internable for ConicGradient { type Key = ConicGradientKey; type StoreData = ConicGradientTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_CONIC_GRADIENTS; } impl InternablePrimitive for ConicGradient { diff --git a/gfx/wr/webrender/src/prim_store/image.rs b/gfx/wr/webrender/src/prim_store/image.rs index ce3db2e7a577..de7b2220e444 100644 --- a/gfx/wr/webrender/src/prim_store/image.rs +++ b/gfx/wr/webrender/src/prim_store/image.rs @@ -288,7 +288,6 @@ impl Internable for Image { type Key = ImageKey; type StoreData = ImageTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_IMAGES; } impl InternablePrimitive for Image { @@ -461,7 +460,6 @@ impl Internable for YuvImage { type Key = YuvImageKey; type StoreData = YuvImageTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_YUV_IMAGES; } impl InternablePrimitive for YuvImage { diff --git a/gfx/wr/webrender/src/prim_store/line_dec.rs b/gfx/wr/webrender/src/prim_store/line_dec.rs index 1889da1eee24..517fb2200f26 100644 --- a/gfx/wr/webrender/src/prim_store/line_dec.rs +++ b/gfx/wr/webrender/src/prim_store/line_dec.rs @@ -126,7 +126,6 @@ impl intern::Internable for LineDecoration { type Key = LineDecorationKey; type StoreData = LineDecorationTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_LINE_DECORATIONS; } impl InternablePrimitive for LineDecoration { diff --git a/gfx/wr/webrender/src/prim_store/mod.rs b/gfx/wr/webrender/src/prim_store/mod.rs index d6f9472c3a2e..2a9f3da17699 100644 --- a/gfx/wr/webrender/src/prim_store/mod.rs +++ b/gfx/wr/webrender/src/prim_store/mod.rs @@ -577,7 +577,6 @@ impl intern::Internable for PrimitiveKeyKind { type Key = PrimitiveKey; type StoreData = PrimitiveTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_PRIMITIVES; } impl InternablePrimitive for PrimitiveKeyKind { diff --git a/gfx/wr/webrender/src/prim_store/picture.rs b/gfx/wr/webrender/src/prim_store/picture.rs index d0815cdac877..2cc77fe05b9e 100644 --- a/gfx/wr/webrender/src/prim_store/picture.rs +++ b/gfx/wr/webrender/src/prim_store/picture.rs @@ -277,7 +277,6 @@ impl Internable for Picture { type Key = PictureKey; type StoreData = PictureTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_PICTURES; } impl InternablePrimitive for Picture { diff --git a/gfx/wr/webrender/src/prim_store/text_run.rs b/gfx/wr/webrender/src/prim_store/text_run.rs index a20cd1d27671..1360b42e2af5 100644 --- a/gfx/wr/webrender/src/prim_store/text_run.rs +++ b/gfx/wr/webrender/src/prim_store/text_run.rs @@ -151,7 +151,6 @@ impl intern::Internable for TextRun { type Key = TextRunKey; type StoreData = TextRunTemplate; type InternData = (); - const PROFILE_COUNTER: usize = crate::profiler::INTERNED_TEXT_RUNS; } impl InternablePrimitive for TextRun { diff --git a/gfx/wr/webrender/src/profiler.rs b/gfx/wr/webrender/src/profiler.rs index e54e7153bc4f..e39947ceba33 100644 --- a/gfx/wr/webrender/src/profiler.rs +++ b/gfx/wr/webrender/src/profiler.rs @@ -2,1058 +2,74 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -//! # Overlay profiler -//! -//! ## Profiler UI string syntax -//! -//! Comma-separated list of of tokens with trailing and leading spaces trimmed. -//! Each tokens can be: -//! - A counter name with an optional prefix. The name corresponds to the displayed name (see the -//! counters vector below. -//! - By default (no prefix) the counter is shown as average + max over half a second. -//! - With a '#' prefix the counter is shown as a graph. -//! - With a '*' prefix the counter is shown as a change indicator. -//! - Some special counters such as GPU time queries have specific visualizations ignoring prefixes. -//! - A preset name to append the preset to the UI (see PROFILER_PRESETS). -//! - An empty token to insert a bit of vertical space. -//! - A '|' token to start a new column. -//! - A '_' token to start a new row. - use api::{ColorF, ColorU}; use crate::debug_render::DebugRenderer; -use crate::device::query::GpuTimer; +use crate::device::query::{GpuSampler, GpuTimer}; use euclid::{Point2D, Rect, Size2D, vec2, default}; use crate::internal_types::FastHashMap; use crate::renderer::{MAX_VERTEX_TEXTURE_WIDTH, wr_has_been_initialized}; -use api::units::DeviceIntSize; use std::collections::vec_deque::VecDeque; -use std::fmt::{Write, Debug}; -use std::f32; +use std::{f32, mem}; use std::ffi::CStr; use std::ops::Range; use std::time::Duration; use time::precise_time_ns; -macro_rules! set_text { - ($dst:expr, $($arg:tt)*) => { - $dst.clear(); - write!($dst, $($arg)*).unwrap(); - }; +pub mod expected { + use std::ops::Range; + pub const AVG_BACKEND_CPU_TIME: Range = 0.0..3.0; + pub const MAX_BACKEND_CPU_TIME: Range = 0.0..6.0; + pub const AVG_RENDERER_CPU_TIME: Range = 0.0..5.0; + pub const MAX_RENDERER_CPU_TIME: Range = 0.0..10.0; + pub const AVG_IPC_TIME: Range = 0.0..2.0; + pub const MAX_IPC_TIME: Range = 0.0..4.0; + pub const AVG_GPU_TIME: Range = 0.0..8.0; + pub const MAX_GPU_TIME: Range = 0.0..15.0; + pub const DRAW_CALLS: Range = 1..100; + pub const VERTICES: Range = 10..25_000; + pub const TOTAL_PRIMITIVES: Range = 1..5000; + pub const VISIBLE_PRIMITIVES: Range = 1..5000; + pub const USED_TARGETS: Range = 1..4; + pub const COLOR_PASSES: Range = 1..4; + pub const ALPHA_PASSES: Range = 0..3; + pub const RENDERED_PICTURE_CACHE_TILES: Range = 0..5; + pub const TOTAL_PICTURE_CACHE_TILES: Range = 0..15; + pub const CREATED_TARGETS: Range = 0..3; + pub const CHANGED_TARGETS: Range = 0..3; + pub const TEXTURE_DATA_UPLOADED: Range = 0..10; + pub const GPU_CACHE_ROWS_TOTAL: Range = 1..50; + pub const GPU_CACHE_ROWS_UPDATED: Range = 0..25; + pub const GPU_CACHE_BLOCKS_TOTAL: Range = 1..65_000; + pub const GPU_CACHE_BLOCKS_UPDATED: Range = 0..1000; + pub const GPU_CACHE_BLOCKS_SAVED: Range = 0..50_000; + pub const DISPLAY_LIST_BUILD_TIME: Range = 0.0..3.0; + pub const MAX_SCENE_BUILD_TIME: Range = 0.0..3.0; + pub const DISPLAY_LIST_SEND_TIME: Range = 0.0..1.0; + pub const DISPLAY_LIST_TOTAL_TIME: Range = 0.0..4.0; + pub const NUM_FONT_TEMPLATES: Range = 0..50; + pub const FONT_TEMPLATES_MB: Range = 0.0..40.0; + pub const NUM_IMAGE_TEMPLATES: Range = 0..20; + pub const IMAGE_TEMPLATES_MB: Range = 0.0..10.0; + pub const DISPLAY_LIST_MB: Range = 0.0..0.2; + pub const NUM_RASTERIZED_BLOBS: Range = 0..25; // in tiles + pub const RASTERIZED_BLOBS_MB: Range = 0.0..4.0; } const GRAPH_WIDTH: f32 = 1024.0; const GRAPH_HEIGHT: f32 = 320.0; const GRAPH_PADDING: f32 = 8.0; const GRAPH_FRAME_HEIGHT: f32 = 16.0; -const PROFILE_SPACING: f32 = 15.0; -const PROFILE_PADDING: f32 = 10.0; -const BACKGROUND_COLOR: ColorU = ColorU { r: 20, g: 20, b: 20, a: 220 }; +const PROFILE_PADDING: f32 = 8.0; -const ONE_SECOND_NS: u64 = 1_000_000_000; +const ONE_SECOND_NS: u64 = 1000000000; +const AVERAGE_OVER_NS: u64 = ONE_SECOND_NS / 2; -const fn float(name: &'static str, unit: &'static str, index: usize, expected: Expected) -> CounterDescriptor { - CounterDescriptor { name, unit, show_as: ShowAs::Float, index, expected } -} - -const fn int(name: &'static str, unit: &'static str, index: usize, expected: Expected) -> CounterDescriptor { - CounterDescriptor { name, unit, show_as: ShowAs::Int, index, expected: expected.into_float() } -} - -// Not in the list below: -// - "GPU time queries" shows the details of the GPU time queries if selected as a graph. -// - "GPU cache bars" shows some info about the GPU cache. -pub static PORFILE_COUNTERS: &'static[CounterDescriptor] = &[ - float("Frame building", "ms", FRAME_BUILDING_TIME, expected(0.0..6.0).avg(0.0..3.0)), - - float("Visibility", "ms", FRAME_VISIBILITY_TIME, expected(0.0..3.0).avg(0.0..2.0)), - float("Prepare", "ms", FRAME_PREPARE_TIME, expected(0.0..3.0).avg(0.0..2.0)), - float("Batching", "ms", FRAME_BATCHING_TIME, expected(0.0..3.0).avg(0.0..2.0)), - - float("Renderer", "ms", RENDERER_TIME, expected(0.0..8.0).avg(0.0..5.0)), - float("Frame CPU total", "ms", TOTAL_FRAME_CPU_TIME, expected(0.0..15.0).avg(0.0..6.0)), - float("GPU", "ms", GPU_TIME, expected(0.0..15.0).avg(0.0..8.0)), - - float("Content send", "ms", CONTENT_SEND_TIME, expected(0.0..1.0).avg(0.0..1.0)), - float("API send", "ms", API_SEND_TIME, expected(0.0..1.0).avg(0.0..0.4)), - float("DisplayList", "ms", DISPLAY_LIST_BUILD_TIME, expected(0.0..5.0).avg(0.0..3.0)), - float("DisplayList mem", "MB", DISPLAY_LIST_MEM, expected(0.0..20.0)), - float("Scene building", "ms", SCENE_BUILD_TIME, expected(0.0..4.0).avg(0.0..3.0)), - - int("Rasterized blobs", "", RASTERIZED_BLOBS, expected(0..15)), - int("Rasterized blob tiles", "", RASTERIZED_BLOB_TILES, expected(0..15)), - int("Rasterized blob pixels", "px", RASTERIZED_BLOBS_PX, expected(0..300_000)), - float("Blob rasterization", "ms", BLOB_RASTERIZATION_TIME, expected(0.0..8.0)), - - int("Rasterized glyphs", "", RASTERIZED_GLYPHS, expected(0..15)), - float("Glyph resolve", "ms", GLYPH_RESOLVE_TIME, expected(0.0..4.0)), - - int("Draw calls", "", DRAW_CALLS, expected(1..120).avg(1..90)), - int("Vertices", "", VERTICES, expected(10..5000)), - int("Primitives", "", PRIMITIVES, expected(10..5000)), - int("Visible primitives", "", VISIBLE_PRIMITIVES, expected(1..5000)), - - int("Used targets", "", USED_TARGETS, expected(1..4)), - int("Created targets", "", CREATED_TARGETS, expected(0..3)), - int("Picture cache slices", "", PICTURE_CACHE_SLICES, expected(0..5)), - - int("Color passes", "", COLOR_PASSES, expected(1..4)), - int("Alpha passes", "", ALPHA_PASSES, expected(0..3)), - int("Picture tiles", "", PICTURE_TILES, expected(0..15)), - float("Picture tiles mem", "MB", PICTURE_TILES_MEM, expected(0.0..150.0)), - int("Rendered picture tiles", "", RENDERED_PICTURE_TILES, expected(0..5)), - int("Texture uploads", "", TEXTURE_UPLOADS, expected(0..10)), - float("Texture uploads mem", "MB", TEXTURE_UPLOADS_MEM, expected(0.0..10.0)), - - int("Font templates", "", FONT_TEMPLATES, expected(0..40)), - float("Font templates mem", "MB", FONT_TEMPLATES_MEM, expected(0.0..20.0)), - int("Image templates", "", IMAGE_TEMPLATES, expected(0..100)), - float("Image templates mem", "MB", IMAGE_TEMPLATES_MEM, expected(0.0..50.0)), - - int("GPU cache rows total", "", GPU_CACHE_ROWS_TOTAL, expected(1..50)), - int("GPU cache rows updated", "", GPU_CACHE_ROWS_UPDATED, expected(0..25)), - int("GPU blocks total", "", GPU_CACHE_BLOCKS_TOTAL, expected(1..65_000)), - int("GPU blocks updated", "", GPU_CACHE_BLOCKS_UPDATED, expected(0..1000)), - int("GPU blocks saved", "", GPU_CACHE_BLOCKS_SAVED, expected(0..50_000)), - - int("Texture cache A8 regions", "", TEXTURE_CACHE_A8_REGIONS, expected(0..100)), - float("Texture cache A8 mem", "MB", TEXTURE_CACHE_A8_MEM, expected(0.0..100.0)), - int("Texture cache A16 regions", "", TEXTURE_CACHE_A16_REGIONS, expected(0..100)), - float("Texture cache A16 mem", "MB", TEXTURE_CACHE_A16_MEM, expected(0.0..100.0)), - int("Texture cache RGBA8 linear regions", "", TEXTURE_CACHE_RGBA8_LINEAR_REGIONS, expected(0..100)), - float("Texture cache RGBA8 linear mem", "MB", TEXTURE_CACHE_RGBA8_LINEAR_MEM, expected(0.0..100.0)), - int("Texture cache RGBA8 nearest regions", "", TEXTURE_CACHE_RGBA8_NEAREST_REGIONS, expected(0..100)), - float("Texture cache RGBA8 nearest mem", "MB", TEXTURE_CACHE_RGBA8_NEAREST_MEM, expected(0.0..100.0)), - float("Texture cache shared mem", "", TEXTURE_CACHE_SHARED_MEM, expected(0.0..100.0)), - float("Texture cache standalone mem", "MB", TEXTURE_CACHE_STANDALONE_MEM, expected(0.0..100.0)), - - - float("Slow frame", "", SLOW_FRAME, expected(0.0..0.0)), - float("Slow transaction", "", SLOW_TXN, expected(0.0..0.0)), - - float("GPU cache upload", "ms", GPU_CACHE_UPLOAD_TIME, expected(0.0..2.0)), - float("Texture cache upload", "ms", TEXTURE_CACHE_UPLOAD_TIME, expected(0.0..3.0)), - - float("Frame", "ms", FRAME_TIME, Expected::none()), - - float("Alpha targets samplers", "%", ALPHA_TARGETS_SAMPLERS, Expected::none()), - float("Transparent pass samplers", "%", TRANSPARENT_PASS_SAMPLERS, Expected::none()), - float("Opaque pass samplers", "%", OPAQUE_PASS_SAMPLERS, Expected::none()), - float("Total samplers", "%", TOTAL_SAMPLERS, Expected::none()), - - int("Interned primitives", "", INTERNED_PRIMITIVES, Expected::none()), - int("Interned clips", "", INTERNED_CLIPS, Expected::none()), - int("Interned text runs", "", INTERNED_TEXT_RUNS, Expected::none()), - int("Interned normal borders", "", INTERNED_NORMAL_BORDERS, Expected::none()), - int("Interned image borders", "", INTERNED_IMAGE_BORDERS, Expected::none()), - int("Interned images", "", INTERNED_IMAGES, Expected::none()), - int("Interned YUV images", "", INTERNED_YUV_IMAGES, Expected::none()), - int("Interned line decorations", "", INTERNED_LINE_DECORATIONS, Expected::none()), - int("Interned linear gradients", "", INTERNED_LINEAR_GRADIENTS, Expected::none()), - int("Interned radial gradients", "", INTERNED_RADIAL_GRADIENTS, Expected::none()), - int("Interned conic gradients", "", INTERNED_CONIC_GRADIENTS, Expected::none()), - int("Interned pictures", "", INTERNED_PICTURES, Expected::none()), - int("Interned filter data", "", INTERNED_FILTER_DATA, Expected::none()), - int("Interned backdrops", "", INTERNED_BACKDROPS, Expected::none()), -]; - -/// Profiler UI string presets. Defined in the profiler UI string syntax, can contain other presets. -static PROFILER_PRESETS: &'static[(&'static str, &'static str)] = &[ - (&"Transaction times", &"DisplayList,Scene building,Content send,API send"), - (&"Frame times", &"Frame CPU total,Frame building,Visibility,Prepare,Batching,Glyph resolve,Renderer,GPU"), - (&"Frame stats", &"Primitives,Visible primitives,Draw calls,Vertices,Color passes,Alpha passes,Rendered picture tiles,Rasterized glyphs"), - (&"Time graphs", &"#DisplayList,#Scene building,#Blob rasterization, ,#Frame CPU total,#Frame building,#Renderer,#Texture cache upload, ,#GPU"), - (&"Memory", &"Image templates,Image templates mem,Font templates,Font templates mem,DisplayList mem,Picture tiles mem"), - (&"GPU samplers", &"Alpha targets samplers,Transparent pass samplers,Opaque pass samplers,Total samplers"), - (&"Interners", "Interned primitives,Interned clips,Interned pictures,Interned text runs,Interned normal borders,Interned image borders,Interned images,Interned YUV images,Interned line decorations,Interned linear gradients,Interned radial gradients,Interned conic gradients,Interned filter data,Interned backdrops"), - (&"Slow indicators", &"*Slow transaction,*Slow frame"), - (&"Compact", &"FPS, ,Frame times, ,Frame stats"), - (&"Default", &"FPS,|,Slow indicators,_,Time graphs,|,Frame times, ,Transaction times, ,Frame stats, ,Memory, ,Interners,_,GPU time queries"), -]; - -fn find_preset(name: &str) -> Option<&'static str> { - for preset in PROFILER_PRESETS { - if preset.0 == name { - return Some(preset.1); - } - } - - None -} - -// The indices here must match the PROFILE_COUNTERS array (checked at runtime). -pub const FRAME_BUILDING_TIME: usize = 0; -pub const FRAME_VISIBILITY_TIME: usize = 1; -pub const FRAME_PREPARE_TIME: usize = 2; -pub const FRAME_BATCHING_TIME: usize = 3; - -pub const RENDERER_TIME: usize = 4; -pub const TOTAL_FRAME_CPU_TIME: usize = 5; -pub const GPU_TIME: usize = 6; - -pub const CONTENT_SEND_TIME: usize = 7; -pub const API_SEND_TIME: usize = 8; - -pub const DISPLAY_LIST_BUILD_TIME: usize = 9; -pub const DISPLAY_LIST_MEM: usize = 10; - -pub const SCENE_BUILD_TIME: usize = 11; - -pub const RASTERIZED_BLOBS: usize = 12; -pub const RASTERIZED_BLOB_TILES: usize = 13; -pub const RASTERIZED_BLOBS_PX: usize = 14; -pub const BLOB_RASTERIZATION_TIME: usize = 15; - -pub const RASTERIZED_GLYPHS: usize = 16; -pub const GLYPH_RESOLVE_TIME: usize = 17; - -pub const DRAW_CALLS: usize = 18; -pub const VERTICES: usize = 19; -pub const PRIMITIVES: usize = 20; -pub const VISIBLE_PRIMITIVES: usize = 21; - -pub const USED_TARGETS: usize = 22; -pub const CREATED_TARGETS: usize = 23; -pub const PICTURE_CACHE_SLICES: usize = 24; - -pub const COLOR_PASSES: usize = 25; -pub const ALPHA_PASSES: usize = 26; -pub const PICTURE_TILES: usize = 27; -pub const PICTURE_TILES_MEM: usize = 28; -pub const RENDERED_PICTURE_TILES: usize = 29; -pub const TEXTURE_UPLOADS: usize = 30; -pub const TEXTURE_UPLOADS_MEM: usize = 31; - -pub const FONT_TEMPLATES: usize = 32; -pub const FONT_TEMPLATES_MEM: usize = 33; -pub const IMAGE_TEMPLATES: usize = 34; -pub const IMAGE_TEMPLATES_MEM: usize = 35; - -pub const GPU_CACHE_ROWS_TOTAL: usize = 36; -pub const GPU_CACHE_ROWS_UPDATED: usize = 37; -pub const GPU_CACHE_BLOCKS_TOTAL: usize = 38; -pub const GPU_CACHE_BLOCKS_UPDATED: usize = 39; -pub const GPU_CACHE_BLOCKS_SAVED: usize = 40; - -pub const TEXTURE_CACHE_A8_REGIONS: usize = 41; -pub const TEXTURE_CACHE_A8_MEM: usize = 42; -pub const TEXTURE_CACHE_A16_REGIONS: usize = 43; -pub const TEXTURE_CACHE_A16_MEM: usize = 44; -pub const TEXTURE_CACHE_RGBA8_LINEAR_REGIONS: usize = 45; -pub const TEXTURE_CACHE_RGBA8_LINEAR_MEM: usize = 46; -pub const TEXTURE_CACHE_RGBA8_NEAREST_REGIONS: usize = 47; -pub const TEXTURE_CACHE_RGBA8_NEAREST_MEM: usize = 48; -pub const TEXTURE_CACHE_SHARED_MEM: usize = 49; -pub const TEXTURE_CACHE_STANDALONE_MEM: usize = 50; - -pub const SLOW_FRAME: usize = 51; -pub const SLOW_TXN: usize = 52; - -pub const GPU_CACHE_UPLOAD_TIME: usize = 53; -pub const TEXTURE_CACHE_UPLOAD_TIME: usize = 54; - -pub const FRAME_TIME: usize = 55; - -pub const ALPHA_TARGETS_SAMPLERS: usize = 56; -pub const TRANSPARENT_PASS_SAMPLERS: usize = 57; -pub const OPAQUE_PASS_SAMPLERS: usize = 58; -pub const TOTAL_SAMPLERS: usize = 59; - -pub const INTERNED_PRIMITIVES: usize = 60; -pub const INTERNED_CLIPS: usize = 61; -pub const INTERNED_TEXT_RUNS: usize = 62; -pub const INTERNED_NORMAL_BORDERS: usize = 63; -pub const INTERNED_IMAGE_BORDERS: usize = 64; -pub const INTERNED_IMAGES: usize = 65; -pub const INTERNED_YUV_IMAGES: usize = 66; -pub const INTERNED_LINE_DECORATIONS: usize = 67; -pub const INTERNED_LINEAR_GRADIENTS: usize = 68; -pub const INTERNED_RADIAL_GRADIENTS: usize = 69; -pub const INTERNED_CONIC_GRADIENTS: usize = 70; -pub const INTERNED_PICTURES: usize = 71; -pub const INTERNED_FILTER_DATA: usize = 72; -pub const INTERNED_BACKDROPS: usize = 73; - -pub const NUM_PROFILER_EVENTS: usize = 74; - -pub struct Profiler { - counters: Vec, - gpu_frames: GpuFrameCollection, - - start: u64, - avg_over_period: u64, - num_graph_samples: usize, - - ui: Vec, -} - -impl Profiler { - pub fn new() -> Self { - - let mut counters = Vec::with_capacity(PORFILE_COUNTERS.len()); - - for (idx, descriptor) in PORFILE_COUNTERS.iter().enumerate() { - debug_assert_eq!(descriptor.index, idx); - counters.push(Counter::new(descriptor)); - } - - Profiler { - gpu_frames: GpuFrameCollection::new(), - - counters, - start: precise_time_ns(), - avg_over_period: ONE_SECOND_NS / 2, - - num_graph_samples: 500, // Would it be useful to control this via a pref? - ui: Vec::new(), - } - } - - /// Sum a few counters and if the total amount is larger than a threshold, update - /// a specific counter. - /// - /// This is useful to monitor slow frame and slow transactions. - fn update_slow_event(&mut self, dst_counter: usize, counters: &[usize], threshold: f64) { - let mut total = 0.0; - for &counter in counters { - if self.counters[counter].value.is_finite() { - total += self.counters[counter].value; - } - } - - if total > threshold { - self.counters[dst_counter].set(total); - } - } - - // Call at the end of every frame, after setting the counter values and before drawing the counters. - pub fn update(&mut self) { - let now = precise_time_ns(); - let update_avg = (now - self.start) > self.avg_over_period; - if update_avg { - self.start = now; - } - - self.update_slow_event( - SLOW_FRAME, - &[TOTAL_FRAME_CPU_TIME], - 15.0, - ); - self.update_slow_event( - SLOW_TXN, - &[DISPLAY_LIST_BUILD_TIME, CONTENT_SEND_TIME, SCENE_BUILD_TIME], - 80.0 - ); - - for counter in &mut self.counters { - counter.update(update_avg); - } - } - - pub fn set_gpu_time_queries(&mut self, gpu_queries: Vec) { - let mut gpu_time_ns = 0; - for sample in &gpu_queries { - gpu_time_ns += sample.time_ns; - } - - self.gpu_frames.push(gpu_time_ns, gpu_queries); - - self.counters[GPU_TIME].set_f64(ns_to_ms(gpu_time_ns)); - } - - // Find the index of a counter by its name. - pub fn index_of(&self, name: &str) -> Option { - self.counters.iter().position(|counter| counter.name == name) - } - - // Define the profiler UI, see comment about the syntax at the top of this file. - pub fn set_ui(&mut self, names: &str) { - let mut selection = Vec::new(); - - self.append_to_ui(&mut selection, names); - - if selection == self.ui { - return; - } - - for counter in &mut self.counters { - counter.disable_graph(); - } - - for item in &selection { - if let Item::Graph(idx) = item { - self.counters[*idx].enable_graph(self.num_graph_samples); - } - } - - self.ui = selection; - } - - fn append_to_ui(&mut self, selection: &mut Vec, names: &str) { - // Group successive counters together. - fn flush_counters(counters: &mut Vec, selection: &mut Vec) { - if !counters.is_empty() { - selection.push(Item::Counters(std::mem::take(counters))) - } - } - - let mut counters = Vec::new(); - - for name in names.split(",") { - let name = name.trim(); - let is_graph = name.starts_with("#"); - let is_indicator = name.starts_with("*"); - let name = if is_graph || is_indicator { - &name[1..] - } else { - name - }; - // See comment about the ui string syntax at the top of this file. - match name { - "" => { - flush_counters(&mut counters, selection); - selection.push(Item::Space); - } - "|" => { - flush_counters(&mut counters, selection); - selection.push(Item::Column); - } - "_" => { - flush_counters(&mut counters, selection); - selection.push(Item::Row); - } - "FPS" => { - flush_counters(&mut counters, selection); - selection.push(Item::Fps); - } - "GPU time queries" => { - flush_counters(&mut counters, selection); - selection.push(Item::GpuTimeQueries); - } - "GPU cache bars" => { - flush_counters(&mut counters, selection); - selection.push(Item::GpuCacheBars); - } - _ => { - if let Some(idx) = self.index_of(name) { - if is_graph { - flush_counters(&mut counters, selection); - selection.push(Item::Graph(idx)); - } else if is_indicator { - flush_counters(&mut counters, selection); - selection.push(Item::ChangeIndicator(idx)); - } else { - counters.push(idx); - } - } else if let Some(preset_str) = find_preset(name) { - flush_counters(&mut counters, selection); - self.append_to_ui(selection, preset_str); - } else { - selection.push(Item::Text(format!("Unknonw counter: {}", name))); - } - } - } - } - - flush_counters(&mut counters, selection); - } - - pub fn set_counters(&mut self, counters: &mut TransactionProfile) { - for (id, evt) in counters.events.iter_mut().enumerate() { - if let Event::Value(val) = *evt { - self.counters[id].set(val); - } - *evt = Event::None; - } - } - - pub fn get(&self, id: usize) -> Option { - self.counters[id].get() - } - - fn draw_counters( - counters: &[Counter], - selected: &[usize], - mut x: f32, mut y: f32, - text_buffer: &mut String, - debug_renderer: &mut DebugRenderer, - ) -> default::Rect { - let line_height = debug_renderer.line_height(); - - x += PROFILE_PADDING; - y += PROFILE_PADDING; - let origin = default::Point2D::new(x, y); - y += line_height * 0.5; - - let mut total_rect = Rect::zero(); - - let mut color_index = 0; - let colors = [ - // Regular values, - ColorU::new(255, 255, 255, 255), - ColorU::new(255, 255, 0, 255), - // Unexpected values, - ColorU::new(255, 80, 0, 255), - ColorU::new(255, 0, 0, 255), - ]; - - for idx in selected { - // If The index is invalid, add some vertical space. - let counter = &counters[*idx]; - - let rect = debug_renderer.add_text( - x, y, - counter.name, - colors[color_index], - None, - ); - color_index = (color_index + 1) % 2; - - total_rect = total_rect.union(&rect); - y += line_height; - } - - color_index = 0; - x = total_rect.max_x() + 60.0; - y = origin.y + line_height * 0.5; - - for idx in selected { - let counter = &counters[*idx]; - let expected_offset = if counter.has_unexpected_avg_max() { 2 } else { 0 }; - - counter.write_value(text_buffer); - - let rect = debug_renderer.add_text( - x, - y, - &text_buffer, - colors[color_index + expected_offset], - None, - ); - color_index = (color_index + 1) % 2; - - total_rect = total_rect.union(&rect); - y += line_height; - } - - total_rect = total_rect - .union(&Rect { origin, size: Size2D::new(1.0, 1.0) }) - .inflate(PROFILE_PADDING, PROFILE_PADDING); - - debug_renderer.add_quad( - total_rect.min_x(), - total_rect.min_y(), - total_rect.max_x(), - total_rect.max_y(), - BACKGROUND_COLOR, - BACKGROUND_COLOR, - ); - - total_rect - } - - fn draw_graph( - counter: &Counter, - x: f32, - y: f32, - text_buffer: &mut String, - debug_renderer: &mut DebugRenderer, - ) -> default::Rect { - let graph = counter.graph.as_ref().unwrap(); - - let max_samples = graph.values.capacity() as f32; - - let size = Size2D::new(max_samples, 100.0); - let line_height = debug_renderer.line_height(); - let graph_rect = Rect::new(Point2D::new(x + PROFILE_PADDING, y + PROFILE_PADDING), size); - let mut rect = graph_rect.inflate(PROFILE_PADDING, PROFILE_PADDING); - - let stats = graph.stats(); - - let text_color = ColorU::new(255, 255, 0, 255); - let text_origin = rect.origin + vec2(rect.size.width, 25.0); - set_text!(text_buffer, "{} ({})", counter.name, counter.unit); - debug_renderer.add_text( - text_origin.x, - text_origin.y, - if counter.unit == "" { counter.name } else { text_buffer }, - ColorU::new(0, 255, 0, 255), - None, - ); - - set_text!(text_buffer, "Samples: {}", stats.samples); - - debug_renderer.add_text( - text_origin.x, - text_origin.y + line_height, - text_buffer, - text_color, - None, - ); - - if stats.samples > 0 { - set_text!(text_buffer, "Min: {:.2} {}", stats.min, counter.unit); - debug_renderer.add_text( - text_origin.x, - text_origin.y + line_height * 2.0, - text_buffer, - text_color, - None, - ); - - set_text!(text_buffer, "Avg: {:.2} {}", stats.avg, counter.unit); - debug_renderer.add_text( - text_origin.x, - text_origin.y + line_height * 3.0, - text_buffer, - text_color, - None, - ); - - set_text!(text_buffer, "Max: {:.2} {}", stats.max, counter.unit); - debug_renderer.add_text( - text_origin.x, - text_origin.y + line_height * 4.0, - text_buffer, - text_color, - None, - ); - } - - rect.size.width += 200.0; - debug_renderer.add_quad( - rect.min_x(), - rect.min_y(), - rect.max_x(), - rect.max_y(), - BACKGROUND_COLOR, - BACKGROUND_COLOR, - ); - - let bx1 = graph_rect.max_x(); - let by1 = graph_rect.max_y(); - - let w = graph_rect.size.width / max_samples; - let h = graph_rect.size.height; - - let color_t0 = ColorU::new(0, 255, 0, 255); - let color_b0 = ColorU::new(0, 180, 0, 255); - - let color_t2 = ColorU::new(255, 0, 0, 255); - let color_b2 = ColorU::new(180, 0, 0, 255); - - for (index, sample) in graph.values.iter().enumerate() { - if !sample.is_finite() { - // NAN means no sample this frame. - continue; - } - let sample = *sample as f32; - let x1 = bx1 - index as f32 * w; - let x0 = x1 - w; - - let y0 = by1 - (sample / stats.max as f32) as f32 * h; - let y1 = by1; - - let (color_top, color_bottom) = if counter.is_unexpected_value(sample as f64) { - (color_t2, color_b2) - } else { - (color_t0, color_b0) - }; - - debug_renderer.add_quad(x0, y0, x1, y1, color_top, color_bottom); - } - - rect - } - - - fn draw_change_indicator( - counter: &Counter, - x: f32, y: f32, - debug_renderer: &mut DebugRenderer - ) -> default::Rect { - let height = 10.0; - let width = 20.0; - - // Draw the indicator red instead of blue if is is not within expected ranges. - let color = if counter.has_unexpected_value() || counter.has_unexpected_avg_max() { - ColorU::new(255, 20, 20, 255) - } else { - ColorU::new(0, 100, 250, 255) - }; - - let tx = counter.change_indicator as f32 * width; - debug_renderer.add_quad( - x, - y, - x + 15.0 * width, - y + height, - ColorU::new(0, 0, 0, 150), - ColorU::new(0, 0, 0, 150), - ); - - debug_renderer.add_quad( - x + tx, - y, - x + tx + width, - y + height, - color, - ColorU::new(25, 25, 25, 255), - ); - - Rect { - origin: Point2D::new(x, y), - size: Size2D::new(15.0 * width + 20.0, height), - } - } - - fn draw_bar( - label: &str, - label_color: ColorU, - counters: &[(ColorU, usize)], - x: f32, y: f32, - debug_renderer: &mut DebugRenderer, - ) -> default::Rect { - let x = x + 8.0; - let y = y + 24.0; - let text_rect = debug_renderer.add_text( - x, y, - label, - label_color, - None, - ); - - let x_base = text_rect.max_x() + 10.0; - let width = 300.0; - let total_value = counters.last().unwrap().1; - let scale = width / total_value as f32; - let mut x_current = x_base; - - for &(color, counter) in counters { - let x_stop = x_base + counter as f32 * scale; - debug_renderer.add_quad( - x_current, - text_rect.origin.y, - x_stop, - text_rect.max_y(), - color, - color, - ); - x_current = x_stop; - - } - - let mut total_rect = text_rect; - total_rect.size.width += width + 10.0; - - total_rect - } - - fn draw_gpu_cache_bars(&self, x: f32, mut y: f32, text_buffer: &mut String, debug_renderer: &mut DebugRenderer) -> default::Rect { - let color_updated = ColorU::new(0xFF, 0, 0, 0xFF); - let color_free = ColorU::new(0, 0, 0xFF, 0xFF); - let color_saved = ColorU::new(0, 0xFF, 0, 0xFF); - - let updated_blocks = self.get(GPU_CACHE_BLOCKS_UPDATED).unwrap_or(0.0) as usize; - let saved_blocks = self.get(GPU_CACHE_BLOCKS_SAVED).unwrap_or(0.0) as usize; - let allocated_blocks = self.get(GPU_CACHE_BLOCKS_TOTAL).unwrap_or(0.0) as usize; - let allocated_rows = self.get(GPU_CACHE_ROWS_TOTAL).unwrap_or(0.0) as usize; - let updated_rows = self.get(GPU_CACHE_ROWS_UPDATED).unwrap_or(0.0) as usize; - let requested_blocks = updated_blocks + saved_blocks; - let total_blocks = allocated_rows * MAX_VERTEX_TEXTURE_WIDTH; - - set_text!(text_buffer, "GPU cache rows ({}):", allocated_rows); - - let rect0 = Profiler::draw_bar( - text_buffer, - ColorU::new(0xFF, 0xFF, 0xFF, 0xFF), - &[ - (color_updated, updated_rows), - (color_free, allocated_rows), - ], - x, y, - debug_renderer, - ); - - y = rect0.max_y(); - - let rect1 = Profiler::draw_bar( - "GPU cache blocks", - ColorU::new(0xFF, 0xFF, 0, 0xFF), - &[ - (color_updated, updated_blocks), - (color_saved, requested_blocks), - (color_free, allocated_blocks), - (ColorU::new(0, 0, 0, 0xFF), total_blocks), - ], - x, y, - debug_renderer, - ); - - let total_rect = rect0.union(&rect1).inflate(10.0, 10.0); - debug_renderer.add_quad( - total_rect.origin.x, - total_rect.origin.y, - total_rect.origin.x + total_rect.size.width, - total_rect.origin.y + total_rect.size.height, - ColorF::new(0.1, 0.1, 0.1, 0.8).into(), - ColorF::new(0.2, 0.2, 0.2, 0.8).into(), - ); - - total_rect - } - - fn draw_gpu_time_queries( - time_queries: &GpuFrameCollection, - x: f32, y: f32, - debug_renderer: &mut DebugRenderer, - ) -> default::Rect { - let mut has_data = false; - for frame in &time_queries.frames { - if !frame.samples.is_empty() { - has_data = true; - break; - } - } - - if !has_data { - return Rect::zero(); - } - - let graph_rect = Rect::new( - Point2D::new(x + GRAPH_PADDING, y + GRAPH_PADDING), - Size2D::new(GRAPH_WIDTH, GRAPH_HEIGHT), - ); - let bounding_rect = graph_rect.inflate(GRAPH_PADDING, GRAPH_PADDING); - - debug_renderer.add_quad( - bounding_rect.origin.x, - bounding_rect.origin.y, - bounding_rect.origin.x + bounding_rect.size.width, - bounding_rect.origin.y + bounding_rect.size.height, - BACKGROUND_COLOR, - BACKGROUND_COLOR, - ); - - let w = graph_rect.size.width; - let mut y0 = graph_rect.origin.y; - - let mut max_time = time_queries.frames - .iter() - .max_by_key(|f| f.total_time) - .unwrap() - .total_time as f32; - - // If the max time is lower than 16ms, fix the scale - // at 16ms so that the graph is easier to interpret. - let baseline_ns = 16_000_000.0; // 16ms - max_time = max_time.max(baseline_ns); - - let mut tags_present = FastHashMap::default(); - - for frame in &time_queries.frames { - let y1 = y0 + GRAPH_FRAME_HEIGHT; - - let mut current_ns = 0; - for sample in &frame.samples { - let x0 = graph_rect.origin.x + w * current_ns as f32 / max_time; - current_ns += sample.time_ns; - let x1 = graph_rect.origin.x + w * current_ns as f32 / max_time; - let mut bottom_color = sample.tag.color; - bottom_color.a *= 0.5; - - debug_renderer.add_quad( - x0, - y0, - x1, - y1, - sample.tag.color.into(), - bottom_color.into(), - ); - - tags_present.insert(sample.tag.label, sample.tag.color); - } - - y0 = y1; - } - - // If the max time is higher than 16ms, show a vertical line at the - // 16ms mark. - if max_time > baseline_ns { - let x = graph_rect.origin.x + w * baseline_ns as f32 / max_time; - let height = time_queries.frames.len() as f32 * GRAPH_FRAME_HEIGHT; - - debug_renderer.add_quad( - x, - graph_rect.origin.y, - x + 4.0, - graph_rect.origin.y + height, - ColorU::new(120, 00, 00, 150), - ColorU::new(120, 00, 00, 100), - ); - } - - - // Add a legend to see which color correspond to what primitive. - const LEGEND_SIZE: f32 = 20.0; - const PADDED_LEGEND_SIZE: f32 = 25.0; - if !tags_present.is_empty() { - debug_renderer.add_quad( - bounding_rect.max_x() + GRAPH_PADDING, - bounding_rect.origin.y, - bounding_rect.max_x() + GRAPH_PADDING + 200.0, - bounding_rect.origin.y + tags_present.len() as f32 * PADDED_LEGEND_SIZE + GRAPH_PADDING, - BACKGROUND_COLOR, - BACKGROUND_COLOR, - ); - } - - for (i, (label, &color)) in tags_present.iter().enumerate() { - let x0 = bounding_rect.origin.x + bounding_rect.size.width + GRAPH_PADDING * 2.0; - let y0 = bounding_rect.origin.y + GRAPH_PADDING + i as f32 * PADDED_LEGEND_SIZE; - - debug_renderer.add_quad( - x0, y0, x0 + LEGEND_SIZE, y0 + LEGEND_SIZE, - color.into(), - color.into(), - ); - - debug_renderer.add_text( - x0 + PADDED_LEGEND_SIZE, - y0 + LEGEND_SIZE * 0.75, - label, - ColorU::new(255, 255, 0, 255), - None, - ); - } - - bounding_rect - } - - pub fn draw_profile( - &mut self, - _frame_index: u64, - debug_renderer: &mut DebugRenderer, - device_size: DeviceIntSize, - ) { - let x_start = 20.0; - let mut y_start = 150.0; - let default_column_width = 400.0; - - // set_text!(..) into this string instead of using format!(..) to avoid - // unnecessary allocations. - let mut text_buffer = String::with_capacity(32); - - let mut column_width = default_column_width; - let mut max_y = y_start; - - let mut x = x_start; - let mut y = y_start; - - for elt in &self.ui { - let rect = match elt { - Item::Counters(indices) => { - Profiler::draw_counters(&self.counters, &indices, x, y, &mut text_buffer, debug_renderer) - } - Item::Graph(idx) => { - Profiler::draw_graph(&self.counters[*idx], x, y, &mut text_buffer, debug_renderer) - } - Item::ChangeIndicator(idx) => { - Profiler::draw_change_indicator(&self.counters[*idx], x, y, debug_renderer) - } - Item::GpuTimeQueries => { - Profiler::draw_gpu_time_queries(&self.gpu_frames, x, y, debug_renderer) - } - Item::GpuCacheBars => { - self.draw_gpu_cache_bars(x, y, &mut text_buffer, debug_renderer) - } - Item::Text(text) => { - let p = 10.0; - let mut rect = debug_renderer.add_text( - x + p, - y + p, - &text, - ColorU::new(255, 255, 255, 255), - None, - ); - rect = rect.inflate(p, p); - - debug_renderer.add_quad( - rect.origin.x, - rect.origin.y, - rect.max_x(), - rect.max_y(), - BACKGROUND_COLOR, - BACKGROUND_COLOR, - ); - - rect - } - Item::Fps => { - set_text!(&mut text_buffer, "{:.2} fps", 1000.0 / self.counters[FRAME_TIME].max); - let mut rect = debug_renderer.add_text( - x + PROFILE_PADDING, - y + PROFILE_PADDING + 5.0, - &text_buffer, - ColorU::new(255, 255, 255, 255), - None, - ); - rect = rect.inflate(PROFILE_PADDING, PROFILE_PADDING); - - debug_renderer.add_quad( - rect.min_x(), - rect.min_y(), - rect.max_x(), - rect.max_y(), - BACKGROUND_COLOR, - BACKGROUND_COLOR, - ); - - rect - } - Item::Space => { - Rect { origin: Point2D::new(x, y), size: Size2D::new(0.0, PROFILE_SPACING) } - } - Item::Column => { - max_y = max_y.max(y); - x += column_width + PROFILE_SPACING; - y = y_start; - column_width = default_column_width; - - continue; - } - Item::Row => { - max_y = max_y.max(y); - y_start = max_y + PROFILE_SPACING; - y = y_start; - x = x_start; - column_width = default_column_width; - - continue; - } - }; - - column_width = column_width.max(rect.size.width); - y = rect.max_y(); - - if y > device_size.height as f32 - 100.0 { - max_y = max_y.max(y); - x += column_width + PROFILE_SPACING; - y = y_start; - column_width = default_column_width; - } - } - } - - #[cfg(feature = "capture")] - pub fn dump_stats(&self, sink: &mut dyn std::io::Write) -> std::io::Result<()> { - for counter in &self.counters { - if counter.value.is_finite() { - writeln!(sink, "{} {:?}{}", counter.name, counter.value, counter.unit)?; - } - } - - Ok(()) - } +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum ProfileStyle { + Full, + Compact, + Smart, + NoDraw, } /// Defines the interface for hooking up an external profiler to WR. @@ -1165,398 +181,939 @@ pub struct GpuProfileTag { pub color: ColorF, } -/// Ranges of expected value for a profile counter. -#[derive(Clone, Debug)] -pub struct Expected { - pub range: Option>, - pub avg: Option>, +trait ProfileCounter { + fn description(&self) -> &'static str; + fn value(&self) -> String; + fn is_expected(&self) -> bool; } -impl Expected { - const fn none() -> Self { - Expected { - range: None, - avg: None, +#[derive(Clone)] +pub struct IntProfileCounter { + description: &'static str, + value: usize, + expect: Option>, +} + +impl IntProfileCounter { + fn new(description: &'static str, expect: Option>) -> Self { + IntProfileCounter { + description, + value: 0, + expect, } } -} -const fn expected(range: Range) -> Expected { - Expected { - range: Some(range), - avg: None, + #[inline(always)] + pub fn inc(&mut self) { + self.value += 1; + } + + pub fn set(&mut self, value: usize) { + self.value = value; } } -impl Expected { - const fn avg(mut self, avg: Range) -> Self { - self.avg = Some(avg); - self +impl ProfileCounter for IntProfileCounter { + fn description(&self) -> &'static str { + self.description + } + + fn value(&self) -> String { + format!("{}", self.value) + } + + fn is_expected(&self) -> bool { + self.expect.as_ref().map(|range| range.contains(&(self.value as u64))).unwrap_or(true) } } -impl Expected { - const fn avg(mut self, avg: Range) -> Self { - self.avg = Some(avg); - self - } - - const fn into_float(self) -> Expected { - Expected { - range: match self.range { - Some(r) => Some(r.start as f64 .. r.end as f64), - None => None, - }, - avg: match self.avg { - Some(r) => Some(r.start as f64 .. r.end as f64), - None => None, - }, - } - } -} - -pub struct CounterDescriptor { - pub name: &'static str, - pub unit: &'static str, - pub index: usize, - pub show_as: ShowAs, - pub expected: Expected, -} - -#[derive(Debug)] -pub struct Counter { - pub name: &'static str, - pub unit: &'static str, - pub show_as: ShowAs, - pub expected: Expected, - - /// - value: f64, +/// A profile counter recording average and maximum integer values over time slices +/// of half a second. +#[derive(Clone)] +pub struct AverageIntProfileCounter { + description: &'static str, + /// Start of the current time slice. + start_ns: u64, + /// Sum of the values recorded during the current time slice. + sum: u64, /// Number of samples in the current time slice. num_samples: u64, - /// Sum of the values recorded during the current time slice. - sum: f64, /// The max value in in-progress time slice. - next_max: f64, + next_max: u64, /// The max value of the previous time slice (displayed). - max: f64, - /// The average value of the previous time slice (displayed). - avg: f64, - /// Incremented when the counter changes. - change_indicator: u8, - - /// Only used to check that the constants match the real index. - index: usize, - - graph: Option, + max: u64, + /// The average value of the previous time slice (displayed). + avg: u64, + /// Intermediate accumulator for `add` and `inc`. + accum: u64, + /// Expected average range of values, if any. + expect_avg: Option>, + /// Expected maximum range of values, if any. + expect_max: Option>, } -impl Counter { - pub fn new(descriptor: &CounterDescriptor) -> Self { - Counter { - name: descriptor.name, - unit: descriptor.unit, - show_as: descriptor.show_as, - expected: descriptor.expected.clone(), - index: descriptor.index, - value: std::f64::NAN, +impl AverageIntProfileCounter { + pub fn new( + description: &'static str, + expect_avg: Option>, + expect_max: Option>, + ) -> Self { + AverageIntProfileCounter { + description, + start_ns: precise_time_ns(), + sum: 0, num_samples: 0, - sum: 0.0, - next_max: 0.0, - max: 0.0, - avg: 0.0, - change_indicator: 0, - graph: None, - } - } - pub fn set_f64(&mut self, val: f64) { - self.value = val; - } - - pub fn set(&mut self, val: T) where T: Into { - self.set_f64(val.into()); - } - - pub fn get(&self) -> Option { - if self.value.is_finite() { - Some(self.value) - } else { - None + next_max: 0, + max: 0, + avg: 0, + accum: 0, + expect_avg, + expect_max, } } - pub fn write_value(&self, output: &mut String) { - match self.show_as { - ShowAs::Float => { - set_text!(output, "{:.2} {} (max: {:.2})", self.avg, self.unit, self.max); - } - ShowAs::Int => { - set_text!(output, "{:.0} {} (max: {:.0})", self.avg.round(), self.unit, self.max.round()); - } + pub fn reset(&mut self) { + if self.accum > 0 { + self.set_u64(self.accum); + self.accum = 0; } } - pub fn enable_graph(&mut self, max_samples: usize) { - if self.graph.is_some() { - return; - } - - self.graph = Some(Graph::new(max_samples)); + pub fn set(&mut self, val: usize) { + self.set_u64(val as u64); } - pub fn disable_graph(&mut self) { - self.graph = None; - } - - pub fn is_unexpected_value(&self, value: f64) -> bool { - if let Some(range) = &self.expected.range { - return value.is_finite() && value >= range.end; - } - - false - } - - pub fn has_unexpected_value(&self) -> bool { - self.is_unexpected_value(self.value) - } - - pub fn has_unexpected_avg_max(&self) -> bool { - if let Some(range) = &self.expected.range { - if self.max.is_finite() && self.max >= range.end { - return true; - } - } - - if let Some(range) = &self.expected.avg { - if self.avg < range.start || self.avg >= range.end { - return true; - } - } - - false - } - - fn update(&mut self, update_avg: bool) { - let updated = self.value.is_finite(); - if updated { - self.next_max = self.next_max.max(self.value); - self.sum += self.value; - self.num_samples += 1; - self.change_indicator = (self.change_indicator + 1) % 15; - } - - if let Some(graph) = &mut self.graph { - graph.set(self.value); - } - - self.value = std::f64::NAN; - - if update_avg && self.num_samples > 0 { - self.avg = self.sum / self.num_samples as f64; + pub fn set_u64(&mut self, val: u64) { + let now = precise_time_ns(); + if (now - self.start_ns) > AVERAGE_OVER_NS && self.num_samples > 0 { + self.avg = self.sum / self.num_samples; self.max = self.next_max; - self.sum = 0.0; + self.start_ns = now; + self.sum = 0; self.num_samples = 0; - self.next_max = std::f64::MIN; - } - } -} - -#[derive(Copy, Clone, Debug)] -pub enum Event { - Start(f64), - Value(f64), - None, -} - -// std::convert::From/TryFrom can't deal with integer to f64 so we roll our own... -pub trait EventValue { - fn into_f64(self) -> f64; -} - -impl EventValue for f64 { fn into_f64(self) -> f64 { self } } -impl EventValue for f32 { fn into_f64(self) -> f64 { self as f64 } } -impl EventValue for u32 { fn into_f64(self) -> f64 { self as f64 } } -impl EventValue for i32 { fn into_f64(self) -> f64 { self as f64 } } -impl EventValue for u64 { fn into_f64(self) -> f64 { self as f64 } } -impl EventValue for usize { fn into_f64(self) -> f64 { self as f64 } } - -/// A container for profiling information that moves along the rendering pipeline -/// and is handed off to the profiler at the end. -pub struct TransactionProfile { - pub events: Vec, -} - -impl TransactionProfile { - pub fn new() -> Self { - TransactionProfile { - events: vec![Event::None; NUM_PROFILER_EVENTS], + self.next_max = 0; } + self.next_max = self.next_max.max(val); + self.sum += val; + self.num_samples += 1; + self.accum = 0; } - pub fn start_time(&mut self, id: usize) { - let ms = ns_to_ms(precise_time_ns()); - self.events[id] = Event::Start(ms); + pub fn add(&mut self, val: usize) { + self.accum += val as u64; } - pub fn end_time(&mut self, id: usize) -> f64 { - self.end_time_if_started(id).unwrap() + pub fn inc(&mut self) { + self.accum += 1; } - /// Similar to end_time, but doesn't panic if not matched with start_time. - pub fn end_time_if_started(&mut self, id: usize) -> Option { - if let Event::Start(start) = self.events[id] { - let time = ns_to_ms(precise_time_ns()) - start; - self.events[id] = Event::Value(time); + pub fn get_accum(&mut self) -> u64{ + self.accum + } - Some(time) + /// Returns either the most up to date value if the counter is updated + /// with add add inc, or the average over the previous time slice. + pub fn get(&self) -> usize { + let result = if self.accum != 0 { + self.accum } else { - None - } - } - - pub fn set(&mut self, id: usize, value: T) where T: EventValue { - self.set_f64(id, value.into_f64()); - } - - - pub fn set_f64(&mut self, id: usize, value: f64) { - self.events[id] = Event::Value(value); - } - - pub fn get(&self, id: usize) -> Option { - if let Event::Value(val) = self.events[id] { - Some(val) - } else { - None - } - } - - pub fn get_or(&self, id: usize, or: f64) -> f64 { - self.get(id).unwrap_or(or) - } - - pub fn add(&mut self, id: usize, n: T) where T: EventValue { - let n = n.into_f64(); - - let evt = &mut self.events[id]; - - let val = match *evt { - Event::Value(v) => v + n, - Event::None => n, - Event::Start(..) => { panic!(); } + self.avg }; - *evt = Event::Value(val); + result as usize + } +} + +impl ProfileCounter for AverageIntProfileCounter { + fn description(&self) -> &'static str { + self.description } - pub fn inc(&mut self, id: usize) { - self.add(id, 1.0); + fn value(&self) -> String { + format!("{:.2} (max {:.2})", self.avg, self.max) } - pub fn take(&mut self) -> Self { - TransactionProfile { - events: std::mem::take(&mut self.events), + fn is_expected(&self) -> bool { + self.expect_avg.as_ref().map(|range| range.contains(&self.avg)).unwrap_or(true) + && self.expect_max.as_ref().map(|range| range.contains(&self.max)).unwrap_or(true) + } +} + +pub struct PercentageProfileCounter { + description: &'static str, + value: f32, +} + +impl ProfileCounter for PercentageProfileCounter { + fn description(&self) -> &'static str { + self.description + } + + fn value(&self) -> String { + format!("{:.2}%", self.value * 100.0) + } + + fn is_expected(&self) -> bool { true } +} + +#[derive(Clone)] +pub struct ResourceProfileCounter { + description: &'static str, + value: usize, + // in bytes. + size: usize, + expected_count: Option>, + // in MB + expected_size: Option>, +} + +impl ResourceProfileCounter { + fn new( + description: &'static str, + expected_count: Option>, + expected_size: Option> + ) -> Self { + ResourceProfileCounter { + description, + value: 0, + size: 0, + expected_count, + expected_size, } } - pub fn take_and_reset(&mut self) -> Self { - let events = std::mem::take(&mut self.events); - - *self = TransactionProfile::new(); - - TransactionProfile { events } + #[allow(dead_code)] + fn reset(&mut self) { + self.value = 0; + self.size = 0; } - pub fn merge(&mut self, other: &mut Self) { - for i in 0..self.events.len() { - match (self.events[i], other.events[i]) { - (Event::Value(v1), Event::Value(v2)) => { - self.events[i] = Event::Value(v1.max(v2)); - } - (Event::Value(_), _) => {} - (_, Event::Value(v2)) => { - self.events[i] = Event::Value(v2); - } - (Event::None, evt) => { - self.events[i] = evt; - } - (Event::Start(..), Event::Start(s)) => { - self.events[i] = Event::Start(s); - } - _=> {} + #[inline(always)] + pub fn inc(&mut self, size: usize) { + self.value += 1; + self.size += size; + } + + pub fn set(&mut self, count: usize, size: usize) { + self.value = count; + self.size = size; + } + + pub fn size_mb(&self) -> f32 { + self.size as f32 / (1024.0 * 1024.0) + } +} + +impl ProfileCounter for ResourceProfileCounter { + fn description(&self) -> &'static str { + self.description + } + + fn value(&self) -> String { + format!("{} ({:.2} MB)", self.value, self.size_mb()) + } + + fn is_expected(&self) -> bool { + self.expected_count.as_ref().map(|range| range.contains(&self.value)).unwrap_or(true) + && self.expected_size.as_ref().map(|range| range.contains(&self.size_mb())).unwrap_or(true) + } +} + +#[derive(Clone)] +pub struct TimeProfileCounter { + description: &'static str, + nanoseconds: u64, + invert: bool, + expect_ms: Option>, +} + +pub struct Timer<'a> { + start: u64, + result: &'a mut u64, +} + +impl<'a> Drop for Timer<'a> { + fn drop(&mut self) { + let end = precise_time_ns(); + *self.result += end - self.start; + } +} + +impl TimeProfileCounter { + pub fn new(description: &'static str, invert: bool, expect_ms: Option>) -> Self { + TimeProfileCounter { + description, + nanoseconds: 0, + invert, + expect_ms, + } + } + + fn reset(&mut self) { + self.nanoseconds = 0; + } + + #[allow(dead_code)] + pub fn set(&mut self, ns: u64) { + self.nanoseconds = ns; + } + + pub fn profile(&mut self, callback: F) -> T + where + F: FnOnce() -> T, + { + let t0 = precise_time_ns(); + let val = callback(); + let t1 = precise_time_ns(); + let ns = t1 - t0; + self.nanoseconds += ns; + val + } + + pub fn timer(&mut self) -> Timer { + Timer { + start: precise_time_ns(), + result: &mut self.nanoseconds, + } + } + + pub fn inc(&mut self, ns: u64) { + self.nanoseconds += ns; + } + + pub fn get(&self) -> u64 { + self.nanoseconds + } + + pub fn get_ms(&self) -> f64 { + self.nanoseconds as f64 / 1000000.0 + } +} + +impl ProfileCounter for TimeProfileCounter { + fn description(&self) -> &'static str { + self.description + } + + fn value(&self) -> String { + if self.invert { + format!("{:.2} fps", 1000000000.0 / self.nanoseconds as f64) + } else { + format!("{:.2} ms", self.get_ms()) + } + } + + fn is_expected(&self) -> bool { + self.expect_ms.as_ref() + .map(|range| range.contains(&(self.nanoseconds as f64 / 1000000.0))) + .unwrap_or(true) + } +} + +#[derive(Clone)] +pub struct AverageTimeProfileCounter { + counter: AverageIntProfileCounter, + invert: bool, +} + +impl AverageTimeProfileCounter { + pub fn new( + description: &'static str, + invert: bool, + expect_avg: Option>, + expect_max: Option>, + ) -> Self { + let expect_avg_ns = expect_avg.map( + |range| (range.start * 1000000.0) as u64 .. (range.end * 1000000.0) as u64 + ); + let expect_max_ns = expect_max.map( + |range| (range.start * 1000000.0) as u64 .. (range.end * 1000000.0) as u64 + ); + + AverageTimeProfileCounter { + counter: AverageIntProfileCounter::new( + description, + expect_avg_ns, + expect_max_ns, + ), + invert, + } + } + + pub fn set(&mut self, ns: u64) { + self.counter.set_u64(ns); + } + + #[allow(dead_code)] + pub fn profile(&mut self, callback: F) -> T + where + F: FnOnce() -> T, + { + let t0 = precise_time_ns(); + let val = callback(); + let t1 = precise_time_ns(); + self.counter.set_u64(t1 - t0); + val + } + + pub fn avg_ms(&self) -> f64 { self.counter.avg as f64 / 1000000.0 } + + pub fn max_ms(&self) -> f64 { self.counter.max as f64 / 1000000.0 } +} + +impl ProfileCounter for AverageTimeProfileCounter { + fn description(&self) -> &'static str { + self.counter.description + } + + fn value(&self) -> String { + if self.invert { + format!("{:.2} fps", 1000000000.0 / self.counter.avg as f64) + } else { + format!("{:.2} ms (max {:.2} ms)", self.avg_ms(), self.max_ms()) + } + } + + fn is_expected(&self) -> bool { + self.counter.is_expected() + } +} + + +#[derive(Clone)] +pub struct FrameProfileCounters { + pub total_primitives: AverageIntProfileCounter, + pub visible_primitives: AverageIntProfileCounter, + pub targets_used: AverageIntProfileCounter, + pub targets_changed: AverageIntProfileCounter, + pub targets_created: AverageIntProfileCounter, +} + +impl FrameProfileCounters { + pub fn new() -> Self { + FrameProfileCounters { + total_primitives: AverageIntProfileCounter::new( + "Total Primitives", + None, Some(expected::TOTAL_PRIMITIVES), + ), + visible_primitives: AverageIntProfileCounter::new( + "Visible Primitives", + None, Some(expected::VISIBLE_PRIMITIVES), + ), + targets_used: AverageIntProfileCounter::new( + "Used targets", + None, Some(expected::USED_TARGETS), + ), + targets_changed: AverageIntProfileCounter::new( + "Changed targets", + None, Some(expected::CHANGED_TARGETS), + ), + targets_created: AverageIntProfileCounter::new( + "Created targets", + None, Some(expected::CREATED_TARGETS), + ), + } + } + + pub fn reset_targets(&mut self) { + self.targets_used.reset(); + self.targets_changed.reset(); + self.targets_created.reset(); + } +} + +#[derive(Clone)] +pub struct TextureCacheProfileCounters { + pub pages_alpha8_linear: ResourceProfileCounter, + pub pages_alpha16_linear: ResourceProfileCounter, + pub pages_color8_linear: ResourceProfileCounter, + pub pages_color8_nearest: ResourceProfileCounter, + pub pages_picture: ResourceProfileCounter, + pub rasterized_blob_pixels: ResourceProfileCounter, + pub standalone_bytes: IntProfileCounter, + pub shared_bytes: IntProfileCounter, +} + +impl TextureCacheProfileCounters { + pub fn new() -> Self { + TextureCacheProfileCounters { + pages_alpha8_linear: ResourceProfileCounter::new("Texture A8 cached pages", None, None), + pages_alpha16_linear: ResourceProfileCounter::new("Texture A16 cached pages", None, None), + pages_color8_linear: ResourceProfileCounter::new("Texture RGBA8 cached pages (L)", None, None), + pages_color8_nearest: ResourceProfileCounter::new("Texture RGBA8 cached pages (N)", None, None), + pages_picture: ResourceProfileCounter::new("Picture cached pages", None, None), + rasterized_blob_pixels: ResourceProfileCounter::new( + "Rasterized Blob Pixels", + Some(expected::NUM_RASTERIZED_BLOBS), + Some(expected::RASTERIZED_BLOBS_MB), + ), + standalone_bytes: IntProfileCounter::new("Standalone", None), + shared_bytes: IntProfileCounter::new("Shared", None), + } + } +} + +#[derive(Clone)] +pub struct GpuCacheProfileCounters { + pub allocated_rows: AverageIntProfileCounter, + pub allocated_blocks: AverageIntProfileCounter, + pub updated_rows: AverageIntProfileCounter, + pub updated_blocks: AverageIntProfileCounter, + pub saved_blocks: AverageIntProfileCounter, +} + +impl GpuCacheProfileCounters { + pub fn new() -> Self { + GpuCacheProfileCounters { + allocated_rows: AverageIntProfileCounter::new( + "GPU cache rows: total", + None, Some(expected::GPU_CACHE_ROWS_TOTAL), + ), + updated_rows: AverageIntProfileCounter::new( + "GPU cache rows: updated", + None, Some(expected::GPU_CACHE_ROWS_UPDATED), + ), + allocated_blocks: AverageIntProfileCounter::new( + "GPU cache blocks: total", + None, Some(expected::GPU_CACHE_BLOCKS_TOTAL), + ), + updated_blocks: AverageIntProfileCounter::new( + "GPU cache blocks: updated", + None, Some(expected::GPU_CACHE_BLOCKS_UPDATED), + ), + saved_blocks: AverageIntProfileCounter::new( + "GPU cache blocks: saved", + None, Some(expected::GPU_CACHE_BLOCKS_SAVED), + ), + } + } +} + +#[derive(Clone)] +pub struct BackendProfileCounters { + pub total_time: TimeProfileCounter, + pub resources: ResourceProfileCounters, + pub txn: TransactionProfileCounters, + pub intern: InternProfileCounters, + pub scene_changed: bool, +} + +#[derive(Clone)] +pub struct ResourceProfileCounters { + pub font_templates: ResourceProfileCounter, + pub image_templates: ResourceProfileCounter, + pub texture_cache: TextureCacheProfileCounters, + pub gpu_cache: GpuCacheProfileCounters, + pub picture_cache_slices: IntProfileCounter, +} + +#[derive(Clone)] +pub struct TransactionProfileCounters { + pub display_list_build_time: TimeProfileCounter, + pub scene_build_time: TimeProfileCounter, + /// Time between when the display list is built and when it is sent by the API. + pub content_send_time: TimeProfileCounter, + /// Time between sending the SetDisplayList from the API and picking it up on + /// the render scene builder thread. + pub api_send_time: TimeProfileCounter, + /// Sum of content_send_time and api_send_time. + pub total_send_time: TimeProfileCounter, + pub display_lists: ResourceProfileCounter, +} + +macro_rules! declare_intern_profile_counters { + ( $( $name:ident : $ty:ty, )+ ) => { + #[derive(Clone)] + pub struct InternProfileCounters { + $( + pub $name: ResourceProfileCounter, + )+ + } + + impl InternProfileCounters { + fn draw( + &self, + debug_renderer: &mut DebugRenderer, + draw_state: &mut DrawState, + ) { + Profiler::draw_counters( + &[ + $( + &self.$name, + )+ + ], + None, + debug_renderer, + false, + draw_state, + ); } - other.events[i] = Event::None; + } + } +} + +crate::enumerate_interners!(declare_intern_profile_counters); + +impl TransactionProfileCounters { + pub fn set( + &mut self, + dl_build_start: u64, + dl_build_end: u64, + send_start: u64, + scene_build_start: u64, + scene_build_end: u64, + display_len: usize, + ) { + self.display_list_build_time.reset(); + self.content_send_time.reset(); + self.api_send_time.reset(); + self.total_send_time.reset(); + self.scene_build_time.reset(); + self.display_lists.reset(); + + let dl_build_time = dl_build_end - dl_build_start; + let scene_build_time = scene_build_end - scene_build_start; + let content_send_time = send_start - dl_build_end; + let api_send_time = scene_build_start - send_start; + self.display_list_build_time.inc(dl_build_time); + self.scene_build_time.inc(scene_build_time); + self.content_send_time.inc(content_send_time); + self.api_send_time.inc(api_send_time); + self.total_send_time.inc(content_send_time + api_send_time); + self.display_lists.inc(display_len); + } +} + +impl BackendProfileCounters { + pub fn new() -> Self { + BackendProfileCounters { + total_time: TimeProfileCounter::new( + "Backend CPU Time", false, + Some(expected::MAX_BACKEND_CPU_TIME), + ), + resources: ResourceProfileCounters { + font_templates: ResourceProfileCounter::new( + "Font Templates", + Some(expected::NUM_FONT_TEMPLATES), + Some(expected::FONT_TEMPLATES_MB), + ), + image_templates: ResourceProfileCounter::new( + "Image Templates", + Some(expected::NUM_IMAGE_TEMPLATES), + Some(expected::IMAGE_TEMPLATES_MB), + ), + picture_cache_slices: IntProfileCounter::new( + "Picture Cache Slices", + None, + ), + texture_cache: TextureCacheProfileCounters::new(), + gpu_cache: GpuCacheProfileCounters::new(), + }, + txn: TransactionProfileCounters { + display_list_build_time: TimeProfileCounter::new( + "DisplayList Build Time", false, + Some(expected::DISPLAY_LIST_BUILD_TIME) + ), + scene_build_time: TimeProfileCounter::new( + "Scene build time", false, + Some(expected::MAX_SCENE_BUILD_TIME), + ), + content_send_time: TimeProfileCounter::new( + "Content Send Time", false, + Some(expected::DISPLAY_LIST_SEND_TIME), + ), + api_send_time: TimeProfileCounter::new( + "API Send Time", false, + Some(expected::DISPLAY_LIST_SEND_TIME), + ), + total_send_time: TimeProfileCounter::new( + "Total IPC Time", false, + Some(expected::DISPLAY_LIST_TOTAL_TIME), + ), + display_lists: ResourceProfileCounter::new( + "DisplayLists Sent", + None, Some(expected::DISPLAY_LIST_MB), + ), + }, + //TODO: generate this by a macro + intern: InternProfileCounters { + prim: ResourceProfileCounter::new("Interned primitives", None, None), + conic_grad: ResourceProfileCounter::new("Interned conic gradients", None, None), + image: ResourceProfileCounter::new("Interned images", None, None), + image_border: ResourceProfileCounter::new("Interned image borders", None, None), + line_decoration: ResourceProfileCounter::new("Interned line decorations", None, None), + linear_grad: ResourceProfileCounter::new("Interned linear gradients", None, None), + normal_border: ResourceProfileCounter::new("Interned normal borders", None, None), + picture: ResourceProfileCounter::new("Interned pictures", None, None), + radial_grad: ResourceProfileCounter::new("Interned radial gradients", None, None), + text_run: ResourceProfileCounter::new("Interned text runs", None, None), + yuv_image: ResourceProfileCounter::new("Interned YUV images", None, None), + clip: ResourceProfileCounter::new("Interned clips", None, None), + filter_data: ResourceProfileCounter::new("Interned filter data", None, None), + backdrop: ResourceProfileCounter::new("Interned backdrops", None, None), + }, + scene_changed: false, } } - pub fn clear(&mut self) { - for evt in &mut self.events { - *evt = Event::None; + pub fn reset(&mut self) { + self.total_time.reset(); + self.resources.texture_cache.rasterized_blob_pixels.reset(); + self.scene_changed = false; + } +} + +pub struct RendererProfileCounters { + pub frame_counter: IntProfileCounter, + pub frame_time: AverageTimeProfileCounter, + pub draw_calls: AverageIntProfileCounter, + pub vertices: AverageIntProfileCounter, + pub vao_count_and_size: ResourceProfileCounter, + pub color_passes: AverageIntProfileCounter, + pub alpha_passes: AverageIntProfileCounter, + pub texture_data_uploaded: AverageIntProfileCounter, + pub rendered_picture_cache_tiles: AverageIntProfileCounter, + pub total_picture_cache_tiles: AverageIntProfileCounter, +} + +pub struct RendererProfileTimers { + pub cpu_time: TimeProfileCounter, + pub gpu_graph: TimeProfileCounter, + pub gpu_samples: Vec, +} + +impl RendererProfileCounters { + pub fn new() -> Self { + RendererProfileCounters { + frame_counter: IntProfileCounter::new("Frame", None), + frame_time: AverageTimeProfileCounter::new( + "FPS", true, None, None, + ), + draw_calls: AverageIntProfileCounter::new( + "Draw Calls", + None, Some(expected::DRAW_CALLS), + ), + vertices: AverageIntProfileCounter::new( + "Vertices", + None, Some(expected::VERTICES), + ), + vao_count_and_size: ResourceProfileCounter::new("VAO", None, None), + color_passes: AverageIntProfileCounter::new( + "Color passes", + None, Some(expected::COLOR_PASSES), + ), + alpha_passes: AverageIntProfileCounter::new( + "Alpha passes", + None, Some(expected::ALPHA_PASSES), + ), + texture_data_uploaded: AverageIntProfileCounter::new( + "Texture data, kb", + None, Some(expected::TEXTURE_DATA_UPLOADED), + ), + rendered_picture_cache_tiles: AverageIntProfileCounter::new( + "Rendered tiles", + None, Some(expected::RENDERED_PICTURE_CACHE_TILES), + ), + total_picture_cache_tiles: AverageIntProfileCounter::new( + "Total tiles", + None, Some(expected::TOTAL_PICTURE_CACHE_TILES), + ), + } + } + + pub fn get_draw_calls(&mut self) -> u64 { + self.draw_calls.accum + } + + pub fn reset(&mut self) { + self.draw_calls.reset(); + self.vertices.reset(); + self.color_passes.reset(); + self.alpha_passes.reset(); + self.texture_data_uploaded.reset(); + self.rendered_picture_cache_tiles.reset(); + self.total_picture_cache_tiles.reset(); + } +} + +impl RendererProfileTimers { + pub fn new() -> Self { + RendererProfileTimers { + cpu_time: TimeProfileCounter::new("Renderer CPU Time", false, None), + gpu_samples: Vec::new(), + gpu_graph: TimeProfileCounter::new("GPU Time", false, None), } } } #[derive(Debug)] -pub struct GraphStats { - pub min: f64, - pub avg: f64, - pub max: f64, - pub sum: f64, - pub samples: usize, +struct GraphStats { + min_value: f32, + mean_value: f32, + max_value: f32, + sum: f32, } -#[derive(Debug)] -pub struct Graph { - values: VecDeque, +struct ProfileGraph { + max_samples: usize, + scale: f32, + values: VecDeque, + short_description: &'static str, + unit_description: &'static str, } -impl Graph { - fn new(max_samples: usize) -> Self { - let mut values = VecDeque::new(); - values.reserve(max_samples); - - Graph { values } +impl ProfileGraph { + fn new( + max_samples: usize, + scale: f32, + short_description: &'static str, + unit_description: &'static str, + ) -> Self { + ProfileGraph { + max_samples, + scale, + values: VecDeque::new(), + short_description, + unit_description, + } } - fn set(&mut self, val: f64) { - if self.values.len() == self.values.capacity() { + fn push(&mut self, ns: u64) { + let val = ns as f64 * self.scale as f64; + if self.values.len() == self.max_samples { self.values.pop_back(); } - self.values.push_front(val); + self.values.push_front(val as f32); } - pub fn stats(&self) -> GraphStats { + fn stats(&self) -> GraphStats { let mut stats = GraphStats { - min: f64::MAX, - avg: 0.0, - max: -f64::MAX, + min_value: f32::MAX, + mean_value: 0.0, + max_value: -f32::MAX, sum: 0.0, - samples: 0, }; - let mut samples = 0; for value in &self.values { - if value.is_finite() { - stats.min = stats.min.min(*value); - stats.max = stats.max.max(*value); - stats.sum += *value; - samples += 1; - } + stats.min_value = stats.min_value.min(*value); + stats.max_value = stats.max_value.max(*value); + stats.sum += *value; } - if samples > 0 { - stats.avg = stats.sum / samples as f64; - stats.samples = samples; + if !self.values.is_empty() { + stats.mean_value = stats.sum / self.values.len() as f32; } stats } + + fn draw_graph( + &self, + x: f32, + y: f32, + description: &'static str, + debug_renderer: &mut DebugRenderer, + ) -> default::Rect { + let size = Size2D::new(600.0, 100.0); + let line_height = debug_renderer.line_height(); + let graph_rect = Rect::new(Point2D::new(x, y), size); + let mut rect = graph_rect.inflate(10.0, 10.0); + + let stats = self.stats(); + + let text_color = ColorU::new(255, 255, 0, 255); + let text_origin = rect.origin + vec2(rect.size.width, 20.0); + debug_renderer.add_text( + text_origin.x, + text_origin.y, + description, + ColorU::new(0, 255, 0, 255), + None, + ); + debug_renderer.add_text( + text_origin.x, + text_origin.y + line_height, + &format!("Min: {:.2} {}", stats.min_value, self.unit_description), + text_color, + None, + ); + debug_renderer.add_text( + text_origin.x, + text_origin.y + line_height * 2.0, + &format!("Mean: {:.2} {}", stats.mean_value, self.unit_description), + text_color, + None, + ); + debug_renderer.add_text( + text_origin.x, + text_origin.y + line_height * 3.0, + &format!("Max: {:.2} {}", stats.max_value, self.unit_description), + text_color, + None, + ); + + rect.size.width += 140.0; + debug_renderer.add_quad( + rect.origin.x, + rect.origin.y, + rect.origin.x + rect.size.width + 10.0, + rect.origin.y + rect.size.height, + ColorU::new(25, 25, 25, 200), + ColorU::new(51, 51, 51, 200), + ); + + let bx1 = graph_rect.max_x(); + let by1 = graph_rect.max_y(); + + let w = graph_rect.size.width / self.max_samples as f32; + let h = graph_rect.size.height; + + let color_t0 = ColorU::new(0, 255, 0, 255); + let color_b0 = ColorU::new(0, 180, 0, 255); + + let color_t1 = ColorU::new(0, 255, 0, 255); + let color_b1 = ColorU::new(0, 180, 0, 255); + + let color_t2 = ColorU::new(255, 0, 0, 255); + let color_b2 = ColorU::new(180, 0, 0, 255); + + for (index, sample) in self.values.iter().enumerate() { + let sample = *sample; + let x1 = bx1 - index as f32 * w; + let x0 = x1 - w; + + let y0 = if stats.max_value != 0.0 { + by1 - (sample / stats.max_value) as f32 * h + } else { + by1 + }; + let y1 = by1; + + let (color_top, color_bottom) = if sample < 1000.0 / 60.0 { + (color_t0, color_b0) + } else if sample < 1000.0 / 30.0 { + (color_t1, color_b1) + } else { + (color_t2, color_b2) + }; + + debug_renderer.add_quad(x0, y0, x1, y1, color_top, color_bottom); + } + + rect + } } -#[derive(Copy, Clone, Debug, PartialEq, Eq)] -pub enum ShowAs { - Float, - Int, +impl ProfileCounter for ProfileGraph { + fn description(&self) -> &'static str { + self.short_description + } + + fn value(&self) -> String { + format!("{:.2}ms", self.stats().mean_value) + } + + fn is_expected(&self) -> bool { true } } struct GpuFrame { @@ -1586,25 +1143,785 @@ impl GpuFrameCollection { } } -pub fn ns_to_ms(ns: u64) -> f64 { - ns as f64 / 1_000_000.0 +impl GpuFrameCollection { + fn draw(&self, x: f32, y: f32, debug_renderer: &mut DebugRenderer) -> default::Rect { + let graph_rect = Rect::new( + Point2D::new(x, y), + Size2D::new(GRAPH_WIDTH, GRAPH_HEIGHT), + ); + let bounding_rect = graph_rect.inflate(GRAPH_PADDING, GRAPH_PADDING); + + debug_renderer.add_quad( + bounding_rect.origin.x, + bounding_rect.origin.y, + bounding_rect.origin.x + bounding_rect.size.width, + bounding_rect.origin.y + bounding_rect.size.height, + ColorU::new(25, 25, 25, 200), + ColorU::new(51, 51, 51, 200), + ); + + let w = graph_rect.size.width; + let mut y0 = graph_rect.origin.y; + + let max_time = self.frames + .iter() + .max_by_key(|f| f.total_time) + .unwrap() + .total_time as f32; + + let mut tags_present = FastHashMap::default(); + + for frame in &self.frames { + let y1 = y0 + GRAPH_FRAME_HEIGHT; + + let mut current_ns = 0; + for sample in &frame.samples { + let x0 = graph_rect.origin.x + w * current_ns as f32 / max_time; + current_ns += sample.time_ns; + let x1 = graph_rect.origin.x + w * current_ns as f32 / max_time; + let mut bottom_color = sample.tag.color; + bottom_color.a *= 0.5; + + debug_renderer.add_quad( + x0, + y0, + x1, + y1, + sample.tag.color.into(), + bottom_color.into(), + ); + + tags_present.insert(sample.tag.label, sample.tag.color); + } + + y0 = y1; + } + + // Add a legend to see which color correspond to what primitive. + const LEGEND_SIZE: f32 = 20.0; + const PADDED_LEGEND_SIZE: f32 = 25.0; + if !tags_present.is_empty() { + debug_renderer.add_quad( + bounding_rect.max_x() + GRAPH_PADDING, + bounding_rect.origin.y, + bounding_rect.max_x() + GRAPH_PADDING + 200.0, + bounding_rect.origin.y + tags_present.len() as f32 * PADDED_LEGEND_SIZE + GRAPH_PADDING, + ColorU::new(25, 25, 25, 200), + ColorU::new(51, 51, 51, 200), + ); + } + + for (i, (label, &color)) in tags_present.iter().enumerate() { + let x0 = bounding_rect.origin.x + bounding_rect.size.width + GRAPH_PADDING * 2.0; + let y0 = bounding_rect.origin.y + GRAPH_PADDING + i as f32 * PADDED_LEGEND_SIZE; + + debug_renderer.add_quad( + x0, y0, x0 + LEGEND_SIZE, y0 + LEGEND_SIZE, + color.into(), + color.into(), + ); + + debug_renderer.add_text( + x0 + PADDED_LEGEND_SIZE, + y0 + LEGEND_SIZE * 0.75, + label, + ColorU::new(255, 255, 0, 255), + None, + ); + } + + bounding_rect + } } -pub fn bytes_to_mb(bytes: usize) -> f64 { - bytes as f64 / 1_000_000.0 +struct DrawState { + x_left: f32, + y_left: f32, + x_right: f32, + y_right: f32, } -#[derive(Debug, PartialEq)] -enum Item { - Counters(Vec), - Graph(usize), - ChangeIndicator(usize), - Fps, - GpuTimeQueries, - GpuCacheBars, - Text(String), - Space, - Column, - Row, +pub struct Profiler { + draw_state: DrawState, + backend_graph: ProfileGraph, + renderer_graph: ProfileGraph, + gpu_graph: ProfileGraph, + ipc_graph: ProfileGraph, + display_list_build_graph: ProfileGraph, + scene_build_graph: ProfileGraph, + blob_raster_graph: ProfileGraph, + backend_time: AverageTimeProfileCounter, + renderer_time: AverageTimeProfileCounter, + gpu_time: AverageTimeProfileCounter, + ipc_time: AverageTimeProfileCounter, + gpu_frames: GpuFrameCollection, + cooldowns: Vec, } +impl Profiler { + pub fn new() -> Self { + let to_ms_scale = 1.0 / 1000000.0; + Profiler { + draw_state: DrawState { + x_left: 0.0, + y_left: 0.0, + x_right: 0.0, + y_right: 0.0, + }, + backend_graph: ProfileGraph::new(600, to_ms_scale, "Backend:", "ms"), + renderer_graph: ProfileGraph::new(600, to_ms_scale, "Renderer:", "ms"), + gpu_graph: ProfileGraph::new(600, to_ms_scale, "GPU:", "ms"), + ipc_graph: ProfileGraph::new(600, to_ms_scale, "IPC:", "ms"), + display_list_build_graph: ProfileGraph::new(600, to_ms_scale, "DisplayList build", "ms"), + scene_build_graph: ProfileGraph::new(600, to_ms_scale, "Scene build:", "ms"), + blob_raster_graph: ProfileGraph::new(600, 1.0, "Rasterized blob pixels:", "px"), + gpu_frames: GpuFrameCollection::new(), + backend_time: AverageTimeProfileCounter::new( + "Backend:", false, + Some(expected::AVG_BACKEND_CPU_TIME), + Some(expected::MAX_BACKEND_CPU_TIME), + ), + renderer_time: AverageTimeProfileCounter::new( + "Renderer:", false, + Some(expected::AVG_RENDERER_CPU_TIME), + Some(expected::MAX_RENDERER_CPU_TIME), + ), + ipc_time: AverageTimeProfileCounter::new( + "IPC:", false, + Some(expected::AVG_IPC_TIME), + Some(expected::MAX_IPC_TIME), + ), + gpu_time: AverageTimeProfileCounter::new( + "GPU:", false, + Some(expected::AVG_GPU_TIME), + Some(expected::MAX_GPU_TIME), + ), + cooldowns: Vec::new(), + } + } + + // If we have an array of "cooldown" counters, then only display profiles that + // are out of the ordinary and keep displaying them until the cooldown is over. + fn draw_counters( + counters: &[&T], + mut cooldowns: Option<&mut [i32]>, + debug_renderer: &mut DebugRenderer, + left: bool, + draw_state: &mut DrawState, + ) { + let mut label_rect = Rect::zero(); + let mut value_rect = Rect::zero(); + let (mut current_x, mut current_y) = if left { + (draw_state.x_left, draw_state.y_left) + } else { + (draw_state.x_right, draw_state.y_right) + }; + let mut color_index = 0; + let line_height = debug_renderer.line_height(); + + let colors = [ + // Regular values, + ColorU::new(255, 255, 255, 255), + ColorU::new(255, 255, 0, 255), + // Unexpected values, + ColorU::new(255, 80, 0, 255), + ColorU::new(255, 0, 0, 255), + ]; + + for (idx, counter) in counters.iter().enumerate() { + if let Some(cooldowns) = cooldowns.as_mut() { + if !counter.is_expected() { + cooldowns[idx] = 40; + } + if cooldowns[idx] == 0 { + continue; + } + } + let rect = debug_renderer.add_text( + current_x, + current_y, + counter.description(), + colors[color_index], + None, + ); + color_index = (color_index + 1) % 2; + + label_rect = label_rect.union(&rect); + current_y += line_height; + } + + color_index = 0; + current_x = label_rect.origin.x + label_rect.size.width + 60.0; + current_y = if left { draw_state.y_left } else { draw_state.y_right }; + + for (idx, counter) in counters.iter().enumerate() { + let expected_offset = if counter.is_expected() || cooldowns.is_some() { 0 } else { 2 }; + if let Some(cooldowns) = cooldowns.as_mut() { + if cooldowns[idx] > 0 { + cooldowns[idx] -= 1; + } else { + continue; + } + } + let rect = debug_renderer.add_text( + current_x, + current_y, + &counter.value(), + colors[color_index + expected_offset], + None, + ); + color_index = (color_index + 1) % 2; + + value_rect = value_rect.union(&rect); + current_y += line_height; + } + + let total_rect = label_rect.union(&value_rect).inflate(10.0, 10.0); + debug_renderer.add_quad( + total_rect.origin.x, + total_rect.origin.y, + total_rect.origin.x + total_rect.size.width, + total_rect.origin.y + total_rect.size.height, + ColorF::new(0.1, 0.1, 0.1, 0.8).into(), + ColorF::new(0.2, 0.2, 0.2, 0.8).into(), + ); + let new_y = total_rect.origin.y + total_rect.size.height + 30.0; + if left { + draw_state.y_left = new_y; + } else { + draw_state.y_right = new_y; + } + } + + fn draw_bar( + &mut self, + label: &str, + label_color: ColorU, + counters: &[(ColorU, &AverageIntProfileCounter)], + debug_renderer: &mut DebugRenderer, + ) -> default::Rect { + let mut rect = debug_renderer.add_text( + self.draw_state.x_left, + self.draw_state.y_left, + label, + label_color, + None, + ); + + let x_base = rect.origin.x + rect.size.width + 10.0; + let height = debug_renderer.line_height(); + let width = (self.draw_state.x_right - 30.0 - x_base).max(0.0); + let total_value = counters.last().unwrap().1.get(); + let scale = if total_value != 0 { + width / total_value as f32 + } else { + 0.0 + }; + + let mut x_current = x_base; + + for &(color, counter) in counters { + let x_stop = x_base + counter.get() as f32 * scale; + debug_renderer.add_quad( + x_current, + rect.origin.y, + x_stop, + rect.origin.y + height, + color, + color, + ); + x_current = x_stop; + } + + self.draw_state.y_left += height; + + rect.size.width += width + 10.0; + rect + } + + fn draw_gpu_cache_bars( + &mut self, + counters: &GpuCacheProfileCounters, + debug_renderer: &mut DebugRenderer, + ) { + let color_updated = ColorU::new(0xFF, 0, 0, 0xFF); + let color_free = ColorU::new(0, 0, 0xFF, 0xFF); + let color_saved = ColorU::new(0, 0xFF, 0, 0xFF); + + let mut requested_blocks = AverageIntProfileCounter::new("", None, None); + requested_blocks.set(counters.updated_blocks.get() + counters.saved_blocks.get()); + + let mut total_blocks = AverageIntProfileCounter::new("", None, None); + total_blocks.set(counters.allocated_rows.get() * MAX_VERTEX_TEXTURE_WIDTH); + + let rect0 = self.draw_bar( + &format!("GPU cache rows ({}):", counters.allocated_rows.get()), + ColorU::new(0xFF, 0xFF, 0xFF, 0xFF), + &[ + (color_updated, &counters.updated_rows), + (color_free, &counters.allocated_rows), + ], + debug_renderer, + ); + + let rect1 = self.draw_bar( + "GPU cache blocks", + ColorU::new(0xFF, 0xFF, 0, 0xFF), + &[ + (color_updated, &counters.updated_blocks), + (color_saved, &requested_blocks), + (color_free, &counters.allocated_blocks), + (ColorU::new(0, 0, 0, 0xFF), &total_blocks), + ], + debug_renderer, + ); + + let total_rect = rect0.union(&rect1).inflate(10.0, 10.0); + debug_renderer.add_quad( + total_rect.origin.x, + total_rect.origin.y, + total_rect.origin.x + total_rect.size.width, + total_rect.origin.y + total_rect.size.height, + ColorF::new(0.1, 0.1, 0.1, 0.8).into(), + ColorF::new(0.2, 0.2, 0.2, 0.8).into(), + ); + + self.draw_state.y_left = total_rect.origin.y + total_rect.size.height + 30.0; + } + + fn draw_frame_bars( + &mut self, + counters: &FrameProfileCounters, + debug_renderer: &mut DebugRenderer, + ) { + let rect0 = self.draw_bar( + &format!("primitives ({}):", counters.total_primitives.get()), + ColorU::new(0xFF, 0xFF, 0xFF, 0xFF), + &[ + (ColorU::new(0, 0, 0xFF, 0xFF), &counters.visible_primitives), + (ColorU::new(0, 0, 0, 0xFF), &counters.total_primitives), + ], + debug_renderer, + ); + + let rect1 = self.draw_bar( + &format!("GPU targets ({}):", &counters.targets_used.get()), + ColorU::new(0xFF, 0xFF, 0, 0xFF), + &[ + (ColorU::new(0, 0, 0xFF, 0xFF), &counters.targets_created), + (ColorU::new(0xFF, 0, 0, 0xFF), &counters.targets_changed), + (ColorU::new(0, 0xFF, 0, 0xFF), &counters.targets_used), + ], + debug_renderer, + ); + + let total_rect = rect0.union(&rect1).inflate(10.0, 10.0); + debug_renderer.add_quad( + total_rect.origin.x, + total_rect.origin.y, + total_rect.origin.x + total_rect.size.width, + total_rect.origin.y + total_rect.size.height, + ColorF::new(0.1, 0.1, 0.1, 0.8).into(), + ColorF::new(0.2, 0.2, 0.2, 0.8).into(), + ); + + self.draw_state.y_left = total_rect.origin.y + total_rect.size.height + 30.0; + } + + fn draw_compact_profile( + &mut self, + backend_profile: &BackendProfileCounters, + renderer_profile: &RendererProfileCounters, + debug_renderer: &mut DebugRenderer, + ) { + Profiler::draw_counters( + &[ + &renderer_profile.frame_time as &dyn ProfileCounter, + &renderer_profile.color_passes, + &renderer_profile.alpha_passes, + &renderer_profile.draw_calls, + &renderer_profile.vertices, + &renderer_profile.rendered_picture_cache_tiles, + &renderer_profile.texture_data_uploaded, + &backend_profile.resources.picture_cache_slices, + &self.ipc_time, + &self.backend_time, + &self.renderer_time, + &self.gpu_time, + ], + None, + debug_renderer, + true, + &mut self.draw_state, + ); + } + + fn draw_full_profile( + &mut self, + frame_profiles: &[FrameProfileCounters], + backend_profile: &BackendProfileCounters, + renderer_profile: &RendererProfileCounters, + renderer_timers: &mut RendererProfileTimers, + gpu_samplers: &[GpuSampler], + screen_fraction: f32, + debug_renderer: &mut DebugRenderer, + ) { + Profiler::draw_counters( + &[ + &renderer_profile.frame_time as &dyn ProfileCounter, + &renderer_profile.frame_counter, + &renderer_profile.color_passes, + &renderer_profile.alpha_passes, + &renderer_profile.rendered_picture_cache_tiles, + &renderer_profile.total_picture_cache_tiles, + &renderer_profile.texture_data_uploaded, + &backend_profile.resources.picture_cache_slices, + &backend_profile.resources.texture_cache.shared_bytes, + &backend_profile.resources.texture_cache.standalone_bytes, + ], + None, + debug_renderer, + true, + &mut self.draw_state + ); + + self.draw_gpu_cache_bars( + &backend_profile.resources.gpu_cache, + debug_renderer, + ); + + Profiler::draw_counters( + &[ + &backend_profile.resources.font_templates, + &backend_profile.resources.image_templates, + ], + None, + debug_renderer, + true, + &mut self.draw_state + ); + + backend_profile.intern.draw(debug_renderer, &mut self.draw_state); + + Profiler::draw_counters( + &[ + &backend_profile.resources.texture_cache.pages_alpha8_linear, + &backend_profile.resources.texture_cache.pages_color8_linear, + &backend_profile.resources.texture_cache.pages_color8_nearest, + &backend_profile.txn.display_lists, + ], + None, + debug_renderer, + true, + &mut self.draw_state + ); + + Profiler::draw_counters( + &[ + &backend_profile.txn.display_list_build_time, + &backend_profile.txn.scene_build_time, + &backend_profile.txn.content_send_time, + &backend_profile.txn.api_send_time, + &backend_profile.txn.total_send_time, + ], + None, + debug_renderer, + true, + &mut self.draw_state + ); + + for frame_profile in frame_profiles { + self.draw_frame_bars(frame_profile, debug_renderer); + } + + Profiler::draw_counters( + &[&renderer_profile.draw_calls, &renderer_profile.vertices], + None, + debug_renderer, + true, + &mut self.draw_state + ); + + Profiler::draw_counters( + &[ + &backend_profile.total_time, + &renderer_timers.cpu_time, + &renderer_timers.gpu_graph, + ], + None, + debug_renderer, + false, + &mut self.draw_state + ); + + if !gpu_samplers.is_empty() { + let mut samplers = Vec::::new(); + // Gathering unique GPU samplers. This has O(N^2) complexity, + // but we only have a few samplers per target. + let mut total = 0.0; + for sampler in gpu_samplers { + let value = sampler.count as f32 * screen_fraction; + total += value; + match samplers.iter().position(|s| { + s.description as *const _ == sampler.tag.label as *const _ + }) { + Some(pos) => samplers[pos].value += value, + None => samplers.push(PercentageProfileCounter { + description: sampler.tag.label, + value, + }), + } + } + samplers.push(PercentageProfileCounter { + description: "Total", + value: total, + }); + let samplers: Vec<&dyn ProfileCounter> = samplers.iter().map(|sampler| { + sampler as &dyn ProfileCounter + }).collect(); + Profiler::draw_counters( + &samplers, + None, + debug_renderer, + false, + &mut self.draw_state, + ); + } + + let rect = + self.backend_graph + .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "CPU (backend)", debug_renderer); + self.draw_state.y_right += rect.size.height + PROFILE_PADDING; + let rect = self.renderer_graph.draw_graph( + self.draw_state.x_right, + self.draw_state.y_right, + "CPU (renderer)", + debug_renderer, + ); + self.draw_state.y_right += rect.size.height + PROFILE_PADDING; + let rect = + self.ipc_graph + .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "DisplayList IPC", debug_renderer); + self.draw_state.y_right += rect.size.height + PROFILE_PADDING; + + let rect = self.display_list_build_graph + .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "DisplayList build", debug_renderer); + self.draw_state.y_right += rect.size.height + PROFILE_PADDING; + + let rect = self.scene_build_graph + .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "Scene build", debug_renderer); + self.draw_state.y_right += rect.size.height + PROFILE_PADDING; + + let rect = self.gpu_graph + .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "GPU", debug_renderer); + self.draw_state.y_right += rect.size.height + PROFILE_PADDING; + + let rect = self.blob_raster_graph + .draw_graph(self.draw_state.x_right, self.draw_state.y_right, "Blob pixels", debug_renderer); + self.draw_state.y_right += rect.size.height + PROFILE_PADDING; + + let rect = self.gpu_frames + .draw(self.draw_state.x_left, f32::max(self.draw_state.y_left, self.draw_state.y_right), debug_renderer); + self.draw_state.y_right += rect.size.height + PROFILE_PADDING; + } + + fn draw_smart_profile( + &mut self, + backend_profile: &BackendProfileCounters, + renderer_profile: &RendererProfileCounters, + debug_renderer: &mut DebugRenderer, + ) { + while self.cooldowns.len() < 18 { + self.cooldowns.push(0); + } + + // Always show the fps counter. + Profiler::draw_counters( + &[ + &renderer_profile.frame_time, + ], + None, + debug_renderer, + true, + &mut self.draw_state, + ); + + let mut start = 0; + let counters: &[&[&dyn ProfileCounter]] = &[ + &[ + &self.backend_time, + &self.renderer_time, + &self.gpu_time, + ], + &[ + &renderer_profile.color_passes, + &renderer_profile.alpha_passes, + &renderer_profile.draw_calls, + &renderer_profile.vertices, + &renderer_profile.rendered_picture_cache_tiles, + &renderer_profile.total_picture_cache_tiles, + ], + &[ + &backend_profile.resources.gpu_cache.allocated_rows, + &backend_profile.resources.gpu_cache.updated_rows, + &backend_profile.resources.gpu_cache.allocated_blocks, + &backend_profile.resources.gpu_cache.updated_blocks, + &backend_profile.resources.gpu_cache.saved_blocks, + ], + &[ + &backend_profile.resources.image_templates, + &backend_profile.resources.font_templates, + &backend_profile.resources.texture_cache.rasterized_blob_pixels, + &backend_profile.txn.display_lists, + ], + ]; + + for group in counters { + let end = start + group.len(); + Profiler::draw_counters( + &group[..], + Some(&mut self.cooldowns[start..end]), + debug_renderer, + true, + &mut self.draw_state, + ); + start = end; + } + } + + pub fn draw_profile( + &mut self, + frame_profiles: &[FrameProfileCounters], + backend_profile: &BackendProfileCounters, + renderer_profile: &RendererProfileCounters, + renderer_timers: &mut RendererProfileTimers, + gpu_samplers: &[GpuSampler], + screen_fraction: f32, + debug_renderer: &mut DebugRenderer, + style: ProfileStyle, + ) { + self.draw_state.x_left = 20.0; + self.draw_state.y_left = 50.0; + self.draw_state.x_right = 450.0; + self.draw_state.y_right = 40.0; + + let mut gpu_graph = 0; + let gpu_graphrs = mem::replace(&mut renderer_timers.gpu_samples, Vec::new()); + for sample in &gpu_graphrs { + gpu_graph += sample.time_ns; + } + renderer_timers.gpu_graph.set(gpu_graph); + + self.backend_graph + .push(backend_profile.total_time.nanoseconds); + self.backend_time.set(backend_profile.total_time.nanoseconds); + self.renderer_graph + .push(renderer_timers.cpu_time.nanoseconds); + self.renderer_time.set(renderer_timers.cpu_time.nanoseconds); + self.ipc_graph + .push(backend_profile.txn.total_send_time.nanoseconds); + self.display_list_build_graph + .push(backend_profile.txn.display_list_build_time.nanoseconds); + self.scene_build_graph + .push(backend_profile.txn.scene_build_time.nanoseconds); + self.blob_raster_graph + .push(backend_profile.resources.texture_cache.rasterized_blob_pixels.size as u64); + self.ipc_time.set(backend_profile.txn.total_send_time.nanoseconds); + self.gpu_graph.push(gpu_graph); + self.gpu_time.set(gpu_graph); + self.gpu_frames.push(gpu_graph, gpu_graphrs); + + match style { + ProfileStyle::Full => { + self.draw_full_profile( + frame_profiles, + backend_profile, + renderer_profile, + renderer_timers, + gpu_samplers, + screen_fraction, + debug_renderer, + ); + } + ProfileStyle::Compact => { + self.draw_compact_profile( + backend_profile, + renderer_profile, + debug_renderer, + ); + } + ProfileStyle::Smart => { + self.draw_smart_profile( + backend_profile, + renderer_profile, + debug_renderer, + ); + } + ProfileStyle::NoDraw => { + // Don't draw anything. We just care about collecting samples. + } + } + } + + #[cfg(feature = "capture")] + pub fn dump_stats(&self, sink: &mut dyn std::io::Write) -> std::io::Result<()> { + writeln!(sink, "Backend (ms) {:?}", self.backend_graph.stats())?; + writeln!(sink, "Renderer (ms) {:?}", self.renderer_graph.stats())?; + writeln!(sink, "GPU (ms) {:?}", self.gpu_graph.stats())?; + writeln!(sink, "IPC (ms) {:?}", self.ipc_graph.stats())?; + writeln!(sink, "DisplayList builder (ms) {:?}", self.display_list_build_graph.stats())?; + writeln!(sink, "Scene build (ms) {:?}", self.scene_build_graph.stats())?; + writeln!(sink, "Rasterized blob (px) {:?}", self.blob_raster_graph.stats())?; + Ok(()) + } +} + +pub struct ChangeIndicator { + counter: u32, +} + +impl ChangeIndicator { + pub fn new() -> Self { + ChangeIndicator { + counter: 0 + } + } + + pub fn changed(&mut self) { + self.counter = (self.counter + 1) % 15; + } + + const WIDTH : f32 = 20.0; + const HEIGHT: f32 = 10.0; + + pub fn width() -> f32 { + ChangeIndicator::WIDTH * 16.0 + } + + pub fn draw( + &self, + x: f32, y: f32, + color: ColorU, + debug_renderer: &mut DebugRenderer + ) { + let margin = 0.0; + let tx = self.counter as f32 * ChangeIndicator::WIDTH; + debug_renderer.add_quad( + x - margin, + y - margin, + x + 15.0 * ChangeIndicator::WIDTH + margin, + y + ChangeIndicator::HEIGHT + margin, + ColorU::new(0, 0, 0, 150), + ColorU::new(0, 0, 0, 150), + ); + + debug_renderer.add_quad( + x + tx, + y, + x + tx + ChangeIndicator::WIDTH, + y + ChangeIndicator::HEIGHT, + color, + ColorU::new(25, 25, 25, 255), + ); + } +} diff --git a/gfx/wr/webrender/src/render_api.rs b/gfx/wr/webrender/src/render_api.rs index 75c0825bf4ab..81b6216e56ec 100644 --- a/gfx/wr/webrender/src/render_api.rs +++ b/gfx/wr/webrender/src/render_api.rs @@ -27,7 +27,6 @@ use crate::api::units::*; use crate::api_resources::ApiResources; use crate::scene_builder_thread::{SceneBuilderRequest, SceneBuilderResult}; use crate::intern::InterningMemoryReport; -use crate::profiler::{self, TransactionProfile}; #[repr(C)] #[derive(Clone, Copy, Debug)] @@ -389,7 +388,6 @@ impl Transaction { blob_rasterizer: None, blob_requests: Vec::new(), rasterized_blobs: Vec::new(), - profile: TransactionProfile::new(), }) } @@ -573,8 +571,6 @@ pub struct TransactionMsg { pub blob_requests: Vec, /// pub rasterized_blobs: Vec<(BlobImageRequest, BlobImageResult)>, - /// Collect various data along the rendering pipeline to display it in the embedded profiler. - pub profile: TransactionProfile, } impl fmt::Debug for TransactionMsg { @@ -1236,7 +1232,6 @@ impl RenderApi { blob_rasterizer: None, blob_requests: Vec::new(), rasterized_blobs: Vec::new(), - profile: TransactionProfile::new(), }) } @@ -1255,7 +1250,6 @@ impl RenderApi { blob_rasterizer: None, blob_requests: Vec::new(), rasterized_blobs: Vec::new(), - profile: TransactionProfile::new(), }) } @@ -1286,10 +1280,6 @@ impl RenderApi { self.resources.update(&mut transaction); transaction.use_scene_builder_thread |= !transaction.scene_ops.is_empty(); - if transaction.generate_frame { - transaction.profile.start_time(profiler::API_SEND_TIME); - transaction.profile.start_time(profiler::TOTAL_FRAME_CPU_TIME); - } if transaction.use_scene_builder_thread { let sender = if transaction.low_priority { @@ -1311,10 +1301,6 @@ impl RenderApi { .map(|(txn, id)| { let mut txn = txn.finalize(id); self.resources.update(&mut txn); - if txn.generate_frame { - txn.profile.start_time(profiler::API_SEND_TIME); - txn.profile.start_time(profiler::TOTAL_FRAME_CPU_TIME); - } txn }) diff --git a/gfx/wr/webrender/src/render_backend.rs b/gfx/wr/webrender/src/render_backend.rs index 53533eaa399e..561fe8d906c3 100644 --- a/gfx/wr/webrender/src/render_backend.rs +++ b/gfx/wr/webrender/src/render_backend.rs @@ -38,7 +38,7 @@ use crate::picture::{TileCacheLogger, PictureScratchBuffer, SliceId, TileCacheIn use crate::prim_store::{PrimitiveScratchBuffer, PrimitiveInstance}; use crate::prim_store::{PrimitiveInstanceKind, PrimTemplateCommonData, PrimitiveStore}; use crate::prim_store::interned::*; -use crate::profiler::{self, TransactionProfile}; +use crate::profiler::{BackendProfileCounters, ResourceProfileCounters}; use crate::render_task_graph::RenderTaskGraphCounters; use crate::renderer::{AsyncPropertySampler, PipelineInfo}; use crate::resource_cache::ResourceCache; @@ -274,12 +274,12 @@ macro_rules! declare_data_stores { fn apply_updates( &mut self, updates: InternerUpdates, - profile: &mut TransactionProfile, + profile_counters: &mut BackendProfileCounters, ) { $( self.$name.apply_updates( updates.$name, - profile, + &mut profile_counters.intern.$name, ); )+ } @@ -470,8 +470,6 @@ struct Document { /// Tracks if we need to invalidate dirty rects for this document, due to the picture /// cache slice configuration having changed when a new scene is swapped in. dirty_rects_are_valid: bool, - - profile: TransactionProfile, } impl Document { @@ -514,7 +512,6 @@ impl Document { loaded_scene: Scene::new(), prev_composite_descriptor: CompositeDescriptor::empty(), dirty_rects_are_valid: true, - profile: TransactionProfile::new(), } } @@ -606,12 +603,11 @@ impl Document { &mut self, resource_cache: &mut ResourceCache, gpu_cache: &mut GpuCache, + resource_profile: &mut ResourceProfileCounters, debug_flags: DebugFlags, tile_cache_logger: &mut TileCacheLogger, tile_caches: &mut FastHashMap>, ) -> RenderedDocument { - self.profile.start_time(profiler::FRAME_BUILDING_TIME); - let accumulated_scale_factor = self.view.accumulated_scale_factor(); let pan = self.view.frame.pan.to_f32() / accumulated_scale_factor; @@ -631,6 +627,7 @@ impl Document { self.view.scene.layer, self.view.scene.device_rect.origin, pan, + resource_profile, &self.dynamic_properties, &mut self.data_stores, &mut self.scratch, @@ -639,7 +636,6 @@ impl Document { tile_cache_logger, tile_caches, self.dirty_rects_are_valid, - &mut self.profile, ); frame @@ -651,12 +647,9 @@ impl Document { let is_new_scene = self.has_built_scene; self.has_built_scene = false; - self.profile.end_time(profiler::FRAME_BUILDING_TIME); - RenderedDocument { frame, is_new_scene, - profile: self.profile.take_and_reset(), } } @@ -882,7 +875,7 @@ impl RenderBackend { IdNamespace(NEXT_NAMESPACE_ID.fetch_add(1, Ordering::Relaxed) as u32) } - pub fn run(&mut self) { + pub fn run(&mut self, mut profile_counters: BackendProfileCounters) { let mut frame_counter: u32 = 0; let mut status = RenderBackendStatus::Continue; @@ -893,7 +886,7 @@ impl RenderBackend { while let RenderBackendStatus::Continue = status { status = match self.api_rx.recv() { Ok(msg) => { - self.process_api_msg(msg, &mut frame_counter) + self.process_api_msg(msg, &mut profile_counters, &mut frame_counter) } Err(..) => { RenderBackendStatus::ShutDown(None) } }; @@ -933,21 +926,36 @@ impl RenderBackend { mut txns: Vec>, result_tx: Option>, frame_counter: &mut u32, + profile_counters: &mut BackendProfileCounters, ) -> bool { self.prepare_for_frames(); self.maybe_force_nop_documents( frame_counter, + profile_counters, |document_id| txns.iter().any(|txn| txn.document_id == document_id)); let mut built_frame = false; for mut txn in txns.drain(..) { let has_built_scene = txn.built_scene.is_some(); - if let Some(doc) = self.documents.get_mut(&txn.document_id) { + if let Some(timings) = txn.timings { + if has_built_scene { + profile_counters.scene_changed = true; + } + profile_counters.txn.set( + timings.builder_start_time_ns, + timings.builder_end_time_ns, + timings.send_time_ns, + timings.scene_build_start_time_ns, + timings.scene_build_end_time_ns, + timings.display_list_len, + ); + } + + if let Some(doc) = self.documents.get_mut(&txn.document_id) { doc.removed_pipelines.append(&mut txn.removed_pipelines); doc.view.scene = txn.view; - doc.profile.merge(&mut txn.profile); if let Some(built_scene) = txn.built_scene.take() { doc.new_async_scene_ready( @@ -968,7 +976,7 @@ impl RenderBackend { self.tile_cache_logger.serialize_updates(&updates); } } - doc.data_stores.apply_updates(updates, &mut doc.profile); + doc.data_stores.apply_updates(updates, profile_counters); } // Build the hit tester while the APZ lock is held so that its content @@ -992,12 +1000,6 @@ impl RenderBackend { .spatial_tree .discard_frame_state_for_pipeline(*pipeline_id); } - - self.resource_cache.add_rasterized_blob_images( - txn.rasterized_blobs.take(), - &mut doc.profile, - ); - } else { // The document was removed while we were building it, skip it. // TODO: we might want to just ensure that removed documents are @@ -1008,6 +1010,11 @@ impl RenderBackend { continue; } + self.resource_cache.add_rasterized_blob_images( + txn.rasterized_blobs.take(), + &mut profile_counters.resources.texture_cache, + ); + built_frame |= self.update_document( txn.document_id, txn.resource_updates.take(), @@ -1016,6 +1023,7 @@ impl RenderBackend { txn.render_frame, txn.invalidate_rendered_frame, frame_counter, + profile_counters, has_built_scene, ); } @@ -1026,6 +1034,7 @@ impl RenderBackend { fn process_api_msg( &mut self, msg: ApiMsg, + profile_counters: &mut BackendProfileCounters, frame_counter: &mut u32, ) -> RenderBackendStatus { match msg { @@ -1106,7 +1115,7 @@ impl RenderBackend { } #[cfg(feature = "capture")] DebugCommand::SaveCapture(root, bits) => { - let output = self.save_capture(root, bits); + let output = self.save_capture(root, bits, profile_counters); ResultMsg::DebugOutput(output) }, #[cfg(feature = "capture")] @@ -1130,7 +1139,7 @@ impl RenderBackend { config.frame_id = frame_id; } - self.load_capture(config); + self.load_capture(config, profile_counters); for (id, doc) in &self.documents { let captured = CapturedDocument { @@ -1221,10 +1230,11 @@ impl RenderBackend { self.prepare_transactions( transaction_msgs, frame_counter, + profile_counters, ); } ApiMsg::SceneBuilderResult(msg) => { - return self.process_scene_builder_result(msg, frame_counter); + return self.process_scene_builder_result(msg, profile_counters, frame_counter); } } @@ -1234,6 +1244,7 @@ impl RenderBackend { fn process_scene_builder_result( &mut self, msg: SceneBuilderResult, + profile_counters: &mut BackendProfileCounters, frame_counter: &mut u32, ) -> RenderBackendStatus { profile_scope!("sb_msg"); @@ -1244,6 +1255,7 @@ impl RenderBackend { txns, result_tx, frame_counter, + profile_counters, ); self.bookkeep_after_frames(); }, @@ -1263,6 +1275,7 @@ impl RenderBackend { txns, result_tx, frame_counter, + profile_counters, ); if built_frame { @@ -1344,20 +1357,16 @@ impl RenderBackend { &mut self, txns: Vec>, frame_counter: &mut u32, + profile_counters: &mut BackendProfileCounters, ) { self.prepare_for_frames(); self.maybe_force_nop_documents( frame_counter, + profile_counters, |document_id| txns.iter().any(|txn| txn.document_id == document_id)); let mut built_frame = false; for mut txn in txns { - if txn.generate_frame { - txn.profile.end_time(profiler::API_SEND_TIME); - } - - self.documents.get_mut(&txn.document_id).unwrap().profile.merge(&mut txn.profile); - built_frame |= self.update_document( txn.document_id, txn.resource_updates.take(), @@ -1366,6 +1375,7 @@ impl RenderBackend { txn.generate_frame, txn.invalidate_rendered_frame, frame_counter, + profile_counters, false ); } @@ -1384,6 +1394,7 @@ impl RenderBackend { /// to force a frame build. fn maybe_force_nop_documents(&mut self, frame_counter: &mut u32, + profile_counters: &mut BackendProfileCounters, document_already_present: F) where F: Fn(DocumentId) -> bool { if self.requires_frame_build() { @@ -1402,6 +1413,7 @@ impl RenderBackend { false, false, frame_counter, + profile_counters, false); } #[cfg(feature = "capture")] @@ -1421,13 +1433,13 @@ impl RenderBackend { mut render_frame: bool, invalidate_rendered_frame: bool, frame_counter: &mut u32, + profile_counters: &mut BackendProfileCounters, has_built_scene: bool, ) -> bool { let requested_frame = render_frame; let requires_frame_build = self.requires_frame_build(); let doc = self.documents.get_mut(&document_id).unwrap(); - // If we have a sampler, get more frame ops from it and add them // to the transaction. This is a hook to allow the WR user code to // fiddle with things after a potentially long scene build, but just @@ -1446,6 +1458,7 @@ impl RenderBackend { // for something wrench specific and we should remove it. let mut scroll = false; for frame_msg in frame_ops { + let _timer = profile_counters.total_time.timer(); let op = doc.process_frame_msg(frame_msg); scroll |= op.scroll; } @@ -1458,7 +1471,7 @@ impl RenderBackend { self.resource_cache.post_scene_building_update( resource_updates, - &mut doc.profile, + &mut profile_counters.resources, ); if doc.dynamic_properties.flush_pending_updates() { @@ -1504,11 +1517,13 @@ impl RenderBackend { // borrow ck hack for profile_counters let (pending_update, rendered_document) = { + let _timer = profile_counters.total_time.timer(); let frame_build_start_time = precise_time_ns(); let rendered_document = doc.build_frame( &mut self.resource_cache, &mut self.gpu_cache, + &mut profile_counters.resources, self.debug_flags, &mut self.tile_cache_logger, &mut self.tile_caches, @@ -1571,8 +1586,10 @@ impl RenderBackend { document_id, rendered_document, pending_update, + profile_counters.clone() ); self.result_tx.send(msg).unwrap(); + profile_counters.reset(); } else if requested_frame { // WR-internal optimization to avoid doing a bunch of render work if // there's no pixels. We still want to pretend to render and request @@ -1695,6 +1712,7 @@ impl RenderBackend { &mut self, root: PathBuf, bits: CaptureBits, + profile_counters: &mut BackendProfileCounters, ) -> DebugOutput { use std::fs; use crate::render_task_graph::dump_render_tasks_as_svg; @@ -1717,6 +1735,7 @@ impl RenderBackend { let rendered_document = doc.build_frame( &mut self.resource_cache, &mut self.gpu_cache, + &mut profile_counters.resources, self.debug_flags, &mut self.tile_cache_logger, &mut self.tile_caches, @@ -1832,6 +1851,7 @@ impl RenderBackend { fn load_capture( &mut self, mut config: CaptureConfig, + profile_counters: &mut BackendProfileCounters, ) { debug!("capture: loading {:?}", config.frame_root()); let backend = config.deserialize_for_frame::("backend") @@ -1942,7 +1962,6 @@ impl RenderBackend { loaded_scene: scene.clone(), prev_composite_descriptor: CompositeDescriptor::empty(), dirty_rects_are_valid: false, - profile: TransactionProfile::new(), }; entry.insert(doc); } @@ -1959,10 +1978,12 @@ impl RenderBackend { let msg_publish = ResultMsg::PublishDocument( id, - RenderedDocument { frame, is_new_scene: true, profile: TransactionProfile::new() }, + RenderedDocument { frame, is_new_scene: true }, self.resource_cache.pending_updates(), + profile_counters.clone(), ); self.result_tx.send(msg_publish).unwrap(); + profile_counters.reset(); self.notifier.new_frame_ready(id, false, true, None); diff --git a/gfx/wr/webrender/src/renderer.rs b/gfx/wr/webrender/src/renderer.rs index b1a4ec01efb6..e8b2954d39c8 100644 --- a/gfx/wr/webrender/src/renderer.rs +++ b/gfx/wr/webrender/src/renderer.rs @@ -61,7 +61,7 @@ use crate::device::{DrawTarget, ExternalTexture, ReadTarget, TextureSlot}; use crate::device::{ShaderError, TextureFilter, TextureFlags, VertexUsageHint, VAO, VBO, CustomVAO}; use crate::device::ProgramCache; -use crate::device::query::{GpuSampler, GpuTimer}; +use crate::device::query::GpuTimer; #[cfg(feature = "capture")] use crate::device::FBOId; use euclid::{rect, Transform3D, Scale, default}; @@ -80,8 +80,10 @@ use crate::internal_types::{RenderTargetInfo, SavedTargetIndex, Swizzle}; use malloc_size_of::MallocSizeOfOps; use crate::picture::{self, RecordedDirtyRegion, ResolvedSurfaceTexture}; use crate::prim_store::DeferredResolve; -use crate::profiler::{self, GpuProfileTag, TransactionProfile}; -use crate::profiler::{Profiler, add_event_marker, add_text_marker, thread_is_being_profiled}; +use crate::profiler::{BackendProfileCounters, FrameProfileCounters, TimeProfileCounter, + GpuProfileTag, RendererProfileCounters, RendererProfileTimers}; +use crate::profiler::{Profiler, ChangeIndicator, ProfileStyle, add_event_marker, + add_text_marker, thread_is_being_profiled}; use crate::device::query::{GpuProfiler, GpuDebugMethod}; use rayon::{ThreadPool, ThreadPoolBuilder}; use crate::render_backend::{FrameId, RenderBackend}; @@ -240,15 +242,15 @@ const GPU_TAG_SCALE: GpuProfileTag = GpuProfileTag { color: debug_colors::GHOSTWHITE, }; const GPU_SAMPLER_TAG_ALPHA: GpuProfileTag = GpuProfileTag { - label: "Alpha targets", + label: "Alpha Targets", color: debug_colors::BLACK, }; const GPU_SAMPLER_TAG_OPAQUE: GpuProfileTag = GpuProfileTag { - label: "Opaque pass", + label: "Opaque Pass", color: debug_colors::BLACK, }; const GPU_SAMPLER_TAG_TRANSPARENT: GpuProfileTag = GpuProfileTag { - label: "Transparent pass", + label: "Transparent Pass", color: debug_colors::BLACK, }; const GPU_TAG_SVG_FILTER: GpuProfileTag = GpuProfileTag { @@ -2254,11 +2256,15 @@ pub struct Renderer { debug: LazyInitializedDebugRenderer, debug_flags: DebugFlags, - profile: TransactionProfile, - frame_counter: u64, - resource_upload_time: f64, - gpu_cache_upload_time: f64, + backend_profile_counters: BackendProfileCounters, + profile_counters: RendererProfileCounters, + resource_upload_time: u64, + gpu_cache_upload_time: u64, profiler: Profiler, + new_frame_indicator: ChangeIndicator, + new_scene_indicator: ChangeIndicator, + slow_frame_indicator: ChangeIndicator, + slow_txn_indicator: ChangeIndicator, last_time: u64, @@ -2480,6 +2486,8 @@ impl Renderer { None => Rc::new(RefCell::new(Shaders::new(&mut device, gl_type, &options)?)), }; + let backend_profile_counters = BackendProfileCounters::new(); + let dither_matrix_texture = if options.enable_dithering { let dither_matrix: [u8; 64] = [ 0, @@ -2820,7 +2828,7 @@ impl Renderer { debug_flags, namespace_alloc_by_client, ); - backend.run(); + backend.run(backend_profile_counters); if let Some(ref thread_listener) = *thread_listener_for_render_backend { thread_listener.thread_stopped(&rb_thread_name); } @@ -2858,11 +2866,15 @@ impl Renderer { shaders, debug: LazyInitializedDebugRenderer::new(), debug_flags: DebugFlags::empty(), - profile: TransactionProfile::new(), - frame_counter: 0, - resource_upload_time: 0.0, - gpu_cache_upload_time: 0.0, + backend_profile_counters: BackendProfileCounters::new(), + profile_counters: RendererProfileCounters::new(), + resource_upload_time: 0, + gpu_cache_upload_time: 0, profiler: Profiler::new(), + new_frame_indicator: ChangeIndicator::new(), + new_scene_indicator: ChangeIndicator::new(), + slow_frame_indicator: ChangeIndicator::new(), + slow_txn_indicator: ChangeIndicator::new(), max_recorded_profiles: options.max_recorded_profiles, clear_color: options.clear_color, enable_clear_scissor: options.enable_clear_scissor, @@ -2997,9 +3009,13 @@ impl Renderer { } ResultMsg::PublishDocument( document_id, - mut doc, + doc, resource_update_list, + profile_counters, ) => { + if doc.is_new_scene { + self.new_scene_indicator.changed(); + } // Add a new document to the active set, expressed as a `Vec` in order // to re-order based on `DocumentLayer` during rendering. @@ -3015,7 +3031,6 @@ impl Renderer { self.render_impl(None).ok(); } - doc.profile.merge(&mut self.active_documents[pos].1.profile); self.active_documents[pos].1 = doc; } None => self.active_documents.push((document_id, doc)), @@ -3035,6 +3050,7 @@ impl Renderer { self.pending_texture_cache_updates |= !resource_update_list.texture_updates.updates.is_empty(); self.pending_texture_updates.push(resource_update_list.texture_updates); self.pending_native_surface_updates.extend(resource_update_list.native_surface_updates); + self.backend_profile_counters = profile_counters; self.documents_seen.insert(document_id); } ResultMsg::UpdateGpuCache(mut list) => { @@ -3470,7 +3486,10 @@ impl Renderer { DebugFlags::RENDER_TARGET_DBG | DebugFlags::TEXTURE_CACHE_DBG | DebugFlags::EPOCHS | + DebugFlags::NEW_FRAME_INDICATOR | + DebugFlags::NEW_SCENE_INDICATOR | DebugFlags::GPU_CACHE_DBG | + DebugFlags::SLOW_FRAME_INDICATOR | DebugFlags::PICTURE_CACHING_DBG | DebugFlags::PRIMITIVE_DBG | DebugFlags::ZOOM_DBG @@ -3581,8 +3600,6 @@ impl Renderer { return Ok(results); } - self.profile.start_time(profiler::RENDERER_TIME); - let compositor_kind = self.active_documents[0].1.frame.composite_state.compositor_kind; // CompositorKind is updated if self.current_compositor_kind != compositor_kind { @@ -3612,6 +3629,9 @@ impl Renderer { self.current_compositor_kind = compositor_kind; } + let mut frame_profiles = Vec::new(); + let mut profile_timers = RendererProfileTimers::new(); + // The texture resolver scope should be outside of any rendering, including // debug rendering. This ensures that when we return render targets to the // pool via glInvalidateFramebuffer, we don't do any debug rendering after @@ -3620,11 +3640,25 @@ impl Renderer { // resolve step when the debug overlay is enabled. self.texture_resolver.begin_frame(); - if let Some(device_size) = device_size { - self.update_gpu_profile(device_size); - } + let profile_samplers = { + let _gm = self.gpu_profiler.start_marker("build samples"); + // Block CPU waiting for last frame's GPU profiles to arrive. + // In general this shouldn't block unless heavily GPU limited. + let (gpu_frame_id, timers, samplers) = self.gpu_profiler.build_samples(); - let cpu_frame_id = { + if self.max_recorded_profiles > 0 { + while self.gpu_profiles.len() >= self.max_recorded_profiles { + self.gpu_profiles.pop_front(); + } + self.gpu_profiles + .push_back(GpuProfile::new(gpu_frame_id, &timers)); + } + profile_timers.gpu_samples = timers; + samplers + }; + + + let cpu_frame_id = profile_timers.cpu_time.profile(|| { let _gm = self.gpu_profiler.start_marker("begin frame"); let frame_id = self.device.begin_frame(); self.gpu_profiler.begin_frame(frame_id); @@ -3638,7 +3672,7 @@ impl Renderer { self.update_native_surfaces(); frame_id - }; + }); // Inform the client that we are starting a composition transaction if native // compositing is enabled. This needs to be done early in the frame, so that @@ -3654,78 +3688,80 @@ impl Renderer { self.update_debug_overlay(device_size); } - //Note: another borrowck dance - let mut active_documents = mem::replace(&mut self.active_documents, Vec::default()); - // sort by the document layer id - active_documents.sort_by_key(|&(_, ref render_doc)| render_doc.frame.layer); + profile_timers.cpu_time.profile(|| { + //Note: another borrowck dance + let mut active_documents = mem::replace(&mut self.active_documents, Vec::default()); + // sort by the document layer id + active_documents.sort_by_key(|&(_, ref render_doc)| render_doc.frame.layer); - #[cfg(feature = "replay")] - self.texture_resolver.external_images.extend( - self.owned_external_images.iter().map(|(key, value)| (*key, value.clone())) - ); - - let last_document_index = active_documents.len() - 1; - for (doc_index, (document_id, RenderedDocument { ref mut frame, ref mut profile, .. })) in active_documents.iter_mut().enumerate() { - assert!(self.current_compositor_kind == frame.composite_state.compositor_kind); - - if self.shared_texture_cache_cleared { - assert!(self.documents_seen.contains(&document_id), - "Cleared texture cache without sending new document frame."); - } - - if let Err(e) = self.prepare_gpu_cache(frame) { - self.renderer_errors.push(e); - continue; - } - assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id, - "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})", - frame.gpu_cache_frame_id, self.gpu_cache_frame_id); - - { - profile_scope!("gl.flush"); - self.device.gl().flush(); // early start on gpu cache updates - } - - self.draw_frame( - frame, - device_size, - &mut results, - doc_index == 0, + #[cfg(feature = "replay")] + self.texture_resolver.external_images.extend( + self.owned_external_images.iter().map(|(key, value)| (*key, value.clone())) ); - // TODO(nical): do this automatically by selecting counters in the wr profiler - // Profile marker for the number of invalidated picture cache - if thread_is_being_profiled() { - let duration = Duration::new(0,0); - if let Some(n) = self.profiler.get(profiler::RENDERED_PICTURE_TILES) { - let message = (n as usize).to_string(); + let last_document_index = active_documents.len() - 1; + for (doc_index, (document_id, RenderedDocument { ref mut frame, .. })) in active_documents.iter_mut().enumerate() { + assert!(self.current_compositor_kind == frame.composite_state.compositor_kind); + + if self.shared_texture_cache_cleared { + assert!(self.documents_seen.contains(&document_id), + "Cleared texture cache without sending new document frame."); + } + + frame.profile_counters.reset_targets(); + if let Err(e) = self.prepare_gpu_cache(frame) { + self.renderer_errors.push(e); + continue; + } + assert!(frame.gpu_cache_frame_id <= self.gpu_cache_frame_id, + "Received frame depends on a later GPU cache epoch ({:?}) than one we received last via `UpdateGpuCache` ({:?})", + frame.gpu_cache_frame_id, self.gpu_cache_frame_id); + + { + profile_scope!("gl.flush"); + self.device.gl().flush(); // early start on gpu cache updates + } + + self.draw_frame( + frame, + device_size, + &mut results, + doc_index == 0, + ); + + // Profile marker for the number of invalidated picture cache + if thread_is_being_profiled() { + let duration = Duration::new(0,0); + let message = self.profile_counters.rendered_picture_cache_tiles.get_accum().to_string(); add_text_marker(cstr!("NumPictureCacheInvalidated"), &message, duration); } + + if device_size.is_some() { + self.draw_frame_debug_items(&frame.debug_items); + } + if self.debug_flags.contains(DebugFlags::PROFILER_DBG) { + frame_profiles.push(frame.profile_counters.clone()); + } + + let dirty_regions = + mem::replace(&mut frame.recorded_dirty_regions, Vec::new()); + results.recorded_dirty_regions.extend(dirty_regions); + + // If we're the last document, don't call end_pass here, because we'll + // be moving on to drawing the debug overlays. See the comment above + // the end_pass call in draw_frame about debug draw overlays + // for a bit more context. + if doc_index != last_document_index { + self.texture_resolver.end_pass(&mut self.device, None, None); + } } - if device_size.is_some() { - self.draw_frame_debug_items(&frame.debug_items); - } - let dirty_regions = - mem::replace(&mut frame.recorded_dirty_regions, Vec::new()); - results.recorded_dirty_regions.extend(dirty_regions); + self.unlock_external_images(); + self.active_documents = active_documents; - // If we're the last document, don't call end_pass here, because we'll - // be moving on to drawing the debug overlays. See the comment above - // the end_pass call in draw_frame about debug draw overlays - // for a bit more context. - if doc_index != last_document_index { - self.texture_resolver.end_pass(&mut self.device, None, None); - } - - self.profile.merge(profile); - } - - self.unlock_external_images(); - self.active_documents = active_documents; - - let _gm = self.gpu_profiler.start_marker("end frame"); - self.gpu_profiler.end_frame(); + let _gm = self.gpu_profiler.start_marker("end frame"); + self.gpu_profiler.end_frame(); + }); if let Some(device_size) = device_size { // Bind a surface to draw the debug / profiler information to. @@ -3738,13 +3774,27 @@ impl Renderer { self.draw_epoch_debug(); } - self.profile.end_time(profiler::RENDERER_TIME); - self.profile.end_time_if_started(profiler::TOTAL_FRAME_CPU_TIME); - let current_time = precise_time_ns(); if device_size.is_some() { - let time = profiler::ns_to_ms(current_time - self.last_time); - self.profile.set(profiler::FRAME_TIME, time); + let ns = current_time - self.last_time; + self.profile_counters.frame_time.set(ns); + } + + let frame_cpu_time_ns = self.backend_profile_counters.total_time.get() + + profile_timers.cpu_time.get(); + let frame_cpu_time_ms = frame_cpu_time_ns as f64 / 1000000.0; + if frame_cpu_time_ms > 16.0 { + self.slow_frame_indicator.changed(); + } + + if self.backend_profile_counters.scene_changed { + let txn_time_ns = self.backend_profile_counters.txn.total_send_time.get() + + self.backend_profile_counters.txn.display_list_build_time.get() + + self.backend_profile_counters.txn.scene_build_time.get(); + let txn_time_ms = txn_time_ns as f64 / 1000000.0; + if txn_time_ms > 100.0 { + self.slow_txn_indicator.changed(); + } } if self.max_recorded_profiles > 0 { @@ -3753,71 +3803,127 @@ impl Renderer { } let cpu_profile = CpuProfile::new( cpu_frame_id, - (self.profile.get_or(profiler::FRAME_BUILDING_TIME, 0.0) * 1000000.0) as u64, - (self.profile.get_or(profiler::RENDERER_TIME, 0.0) * 1000000.0) as u64, - self.profile.get_or(profiler::DRAW_CALLS, 0.0) as usize, + self.backend_profile_counters.total_time.get(), + profile_timers.cpu_time.get(), + self.profile_counters.draw_calls.get(), ); self.cpu_profiles.push_back(cpu_profile); } - self.profiler.set_counters(&mut self.profile); - - // Note: profile counters must be set before this or they will count for next frame. - self.profiler.update(); - if self.debug_flags.intersects(DebugFlags::PROFILER_DBG | DebugFlags::PROFILER_CAPTURE) { if let Some(device_size) = device_size { //TODO: take device/pixel ratio into equation? if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) { + let style = if !self.debug_flags.contains(DebugFlags::PROFILER_DBG) { + // Don't draw the profiler, but collect samples for captures + assert!(self.debug_flags.contains(DebugFlags::PROFILER_CAPTURE)); + ProfileStyle::NoDraw + } else if self.debug_flags.contains(DebugFlags::SMART_PROFILER) { + ProfileStyle::Smart + } else if self.debug_flags.contains(DebugFlags::COMPACT_PROFILER) { + ProfileStyle::Compact + } else { + ProfileStyle::Full + }; + + let screen_fraction = 1.0 / device_size.to_f32().area(); self.profiler.draw_profile( - self.frame_counter, + &frame_profiles, + &self.backend_profile_counters, + &self.profile_counters, + &mut profile_timers, + &profile_samplers, + screen_fraction, debug_renderer, - device_size, + style, ); } } } + let mut x = 0.0; + if self.debug_flags.contains(DebugFlags::NEW_FRAME_INDICATOR) { + if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) { + self.new_frame_indicator.changed(); + self.new_frame_indicator.draw( + x, 0.0, + ColorU::new(0, 110, 220, 255), + debug_renderer, + ); + x += ChangeIndicator::width(); + } + } + + if self.debug_flags.contains(DebugFlags::NEW_SCENE_INDICATOR) { + if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) { + self.new_scene_indicator.draw( + x, 0.0, + ColorU::new(0, 220, 110, 255), + debug_renderer, + ); + x += ChangeIndicator::width(); + } + } + + if self.debug_flags.contains(DebugFlags::SLOW_FRAME_INDICATOR) { + if let Some(debug_renderer) = self.debug.get_mut(&mut self.device) { + self.slow_txn_indicator.draw( + x, 0.0, + ColorU::new(250, 80, 80, 255), + debug_renderer, + ); + self.slow_frame_indicator.draw( + x, 10.0, + ColorU::new(220, 30, 10, 255), + debug_renderer, + ); + } + } + if self.debug_flags.contains(DebugFlags::ECHO_DRIVER_MESSAGES) { self.device.echo_driver_messages(); } if thread_is_being_profiled() { let duration = Duration::new(0,0); - let message = (self.profiler.get(profiler::DRAW_CALLS).unwrap_or(0.0) as usize).to_string(); + let message = self.profile_counters.get_draw_calls().to_string(); add_text_marker(cstr!("NumDrawCalls"), &message, duration); } - results.stats.texture_upload_kb = self.profile.get(profiler::TEXTURE_UPLOADS_MEM).unwrap_or(0.0) as usize; - self.frame_counter += 1; + results.stats.texture_upload_kb = self.profile_counters.texture_data_uploaded.get(); + self.backend_profile_counters.reset(); + self.profile_counters.reset(); + self.profile_counters.frame_counter.inc(); results.stats.resource_upload_time = self.resource_upload_time; - self.resource_upload_time = 0.0; + self.resource_upload_time = 0; results.stats.gpu_cache_upload_time = self.gpu_cache_upload_time; - self.gpu_cache_upload_time = 0.0; + self.gpu_cache_upload_time = 0; - if let Some(debug_renderer) = self.debug.try_get_mut() { - let small_screen = self.debug_flags.contains(DebugFlags::SMALL_SCREEN); - let scale = if small_screen { 1.6 } else { 1.0 }; - // TODO(gw): Tidy this up so that compositor config integrates better - // with the (non-compositor) surface y-flip options. - let surface_origin_is_top_left = match self.current_compositor_kind { - CompositorKind::Native { .. } => true, - CompositorKind::Draw { .. } => self.device.surface_origin_is_top_left(), - }; - debug_renderer.render( - &mut self.device, - device_size, - scale, - surface_origin_is_top_left, - ); - } - // See comment for texture_resolver.begin_frame() for explanation - // of why this must be done after all rendering, including debug - // overlays. The end_frame() call implicitly calls end_pass(), which - // should ensure any left over render targets get invalidated and - // returned to the pool correctly. - self.texture_resolver.end_frame(&mut self.device, cpu_frame_id); - self.device.end_frame(); + profile_timers.cpu_time.profile(|| { + if let Some(debug_renderer) = self.debug.try_get_mut() { + let small_screen = self.debug_flags.contains(DebugFlags::SMALL_SCREEN); + let scale = if small_screen { 1.6 } else { 1.0 }; + // TODO(gw): Tidy this up so that compositor config integrates better + // with the (non-compositor) surface y-flip options. + let surface_origin_is_top_left = match self.current_compositor_kind { + CompositorKind::Native { .. } => true, + CompositorKind::Draw { .. } => self.device.surface_origin_is_top_left(), + }; + debug_renderer.render( + &mut self.device, + device_size, + scale, + surface_origin_is_top_left, + ); + } + // See comment for texture_resolver.begin_frame() for explanation + // of why this must be done after all rendering, including debug + // overlays. The end_frame() call implicitly calls end_pass(), which + // should ensure any left over render targets get invalidated and + // returned to the pool correctly. + self.texture_resolver.end_frame(&mut self.device, cpu_frame_id); + self.device.end_frame(); + }); if device_size.is_some() { self.last_time = current_time; @@ -3846,48 +3952,6 @@ impl Renderer { } } - fn update_gpu_profile(&mut self, device_size: DeviceIntSize) { - let _gm = self.gpu_profiler.start_marker("build samples"); - // Block CPU waiting for last frame's GPU profiles to arrive. - // In general this shouldn't block unless heavily GPU limited. - let (gpu_frame_id, timers, samplers) = self.gpu_profiler.build_samples(); - - if self.max_recorded_profiles > 0 { - while self.gpu_profiles.len() >= self.max_recorded_profiles { - self.gpu_profiles.pop_front(); - } - - self.gpu_profiles.push_back(GpuProfile::new(gpu_frame_id, &timers)); - } - - self.profiler.set_gpu_time_queries(timers); - - if !samplers.is_empty() { - let screen_fraction = 1.0 / device_size.to_f32().area(); - - fn accumulate_sampler_value(description: &str, samplers: &[GpuSampler]) -> f32 { - let mut accum = 0.0; - for sampler in samplers { - if sampler.tag.label != description { - continue; - } - - accum += sampler.count as f32; - } - - accum - } - - let alpha_targets = accumulate_sampler_value(&"Alpha targets", &samplers) * screen_fraction; - let transparent_pass = accumulate_sampler_value(&"Transparent pass", &samplers) * screen_fraction; - let opaque_pass = accumulate_sampler_value(&"Opaque pass", &samplers) * screen_fraction; - self.profile.set(profiler::ALPHA_TARGETS_SAMPLERS, alpha_targets); - self.profile.set(profiler::TRANSPARENT_PASS_SAMPLERS, transparent_pass); - self.profile.set(profiler::OPAQUE_PASS_SAMPLERS, opaque_pass); - self.profile.set(profiler::TOTAL_SAMPLERS, alpha_targets + transparent_pass + opaque_pass); - } - } - fn update_gpu_cache(&mut self) { let _gm = self.gpu_profiler.start_marker("gpu cache update"); @@ -3935,12 +3999,15 @@ impl Renderer { .update(&mut self.device, &update_list); } - self.profile.start_time(profiler::GPU_CACHE_UPLOAD_TIME); - let updated_rows = self.gpu_cache_texture.flush(&mut self.device); - self.gpu_cache_upload_time += self.profile.end_time(profiler::GPU_CACHE_UPLOAD_TIME); + let mut upload_time = TimeProfileCounter::new("GPU cache upload time", false, Some(0.0..2.0)); + let updated_rows = upload_time.profile(|| { + self.gpu_cache_texture.flush(&mut self.device) + }); + self.gpu_cache_upload_time += upload_time.get(); - self.profile.set(profiler::GPU_CACHE_ROWS_UPDATED, updated_rows); - self.profile.set(profiler::GPU_CACHE_BLOCKS_UPDATED, updated_blocks); + let counters = &mut self.backend_profile_counters.resources.gpu_cache; + counters.updated_rows.set(updated_rows); + counters.updated_blocks.set(updated_blocks); } fn prepare_gpu_cache(&mut self, frame: &Frame) -> Result<(), RendererError> { @@ -3976,192 +4043,191 @@ impl Renderer { let mut pending_texture_updates = mem::replace(&mut self.pending_texture_updates, vec![]); self.pending_texture_cache_updates = false; - self.profile.start_time(profiler::TEXTURE_CACHE_UPLOAD_TIME); - - for update_list in pending_texture_updates.drain(..) { - for allocation in update_list.allocations { - match allocation.kind { - TextureCacheAllocationKind::Alloc(_) => add_event_marker(c_str!("TextureCacheAlloc")), - TextureCacheAllocationKind::Realloc(_) => add_event_marker(c_str!("TextureCacheRealloc")), - TextureCacheAllocationKind::Reset(_) => add_event_marker(c_str!("TextureCacheReset")), - TextureCacheAllocationKind::Free => add_event_marker(c_str!("TextureCacheFree")), - }; - let old = match allocation.kind { - TextureCacheAllocationKind::Alloc(ref info) | - TextureCacheAllocationKind::Realloc(ref info) | - TextureCacheAllocationKind::Reset(ref info) => { - // Create a new native texture, as requested by the texture cache. - // - // Ensure no PBO is bound when creating the texture storage, - // or GL will attempt to read data from there. - let mut texture = self.device.create_texture( - TextureTarget::Array, - info.format, - info.width, - info.height, - info.filter, - // This needs to be a render target because some render - // tasks get rendered into the texture cache. - Some(RenderTargetInfo { has_depth: info.has_depth }), - info.layer_count, - ); - - if info.is_shared_cache { - texture.flags_mut() - .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE); - - // Textures in the cache generally don't need to be cleared, - // but we do so if the debug display is active to make it - // easier to identify unallocated regions. - if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) { - self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR); - } - } - - self.texture_resolver.texture_cache_map.insert(allocation.id, texture) - } - TextureCacheAllocationKind::Free => { - self.texture_resolver.texture_cache_map.remove(&allocation.id) - } - }; - - match allocation.kind { - TextureCacheAllocationKind::Alloc(_) => { - assert!(old.is_none(), "Renderer and backend disagree!"); - } - TextureCacheAllocationKind::Realloc(_) => { - self.device.blit_renderable_texture( - self.texture_resolver.texture_cache_map.get_mut(&allocation.id).unwrap(), - old.as_ref().unwrap(), - ); - } - TextureCacheAllocationKind::Reset(_) | - TextureCacheAllocationKind::Free => { - assert!(old.is_some(), "Renderer and backend disagree!"); - } - } - - if let Some(old) = old { - self.device.delete_texture(old); - } - } - - for (texture_id, updates) in update_list.updates { - let texture = &self.texture_resolver.texture_cache_map[&texture_id]; - let device = &mut self.device; - - // Calculate the total size of buffer required to upload all updates. - let required_size = updates.iter().map(|update| { - // Perform any debug clears now. As this requires a mutable borrow of device, - // it must be done before all the updates which require a TextureUploader. - if let TextureUpdateSource::DebugClear = update.source { - let draw_target = DrawTarget::from_texture( - texture, - update.layer_index as usize, - false, - ); - device.bind_draw_target(draw_target); - device.clear_target( - Some(TEXTURE_CACHE_DBG_CLEAR_COLOR), - None, - Some(draw_target.to_framebuffer_rect(update.rect.to_i32())) - ); - - 0 - } else { - let (upload_size, _) = device.required_upload_size_and_stride( - update.rect.size, - texture.get_format(), - ); - upload_size - } - }).sum(); - - if required_size == 0 { - continue; - } - - // For best performance we use a single TextureUploader for all uploads. - // Using individual TextureUploaders was causing performance issues on some drivers - // due to allocating too many PBOs. - let mut uploader = device.upload_texture( - texture, - &self.texture_cache_upload_pbo, - required_size - ); - - for update in updates { - let TextureCacheUpdate { rect, stride, offset, layer_index, format_override, source } = update; - - let bytes_uploaded = match source { - TextureUpdateSource::Bytes { data } => { - let data = &data[offset as usize ..]; - uploader.upload( - rect, - layer_index, - stride, - format_override, - data.as_ptr(), - data.len(), - ) - } - TextureUpdateSource::External { id, channel_index } => { - let handler = self.external_image_handler - .as_mut() - .expect("Found external image, but no handler set!"); - // The filter is only relevant for NativeTexture external images. - let dummy_data; - let data = match handler.lock(id, channel_index, ImageRendering::Auto).source { - ExternalImageSource::RawData(data) => { - &data[offset as usize ..] - } - ExternalImageSource::Invalid => { - // Create a local buffer to fill the pbo. - let bpp = texture.get_format().bytes_per_pixel(); - let width = stride.unwrap_or(rect.size.width * bpp); - let total_size = width * rect.size.height; - // WR haven't support RGBAF32 format in texture_cache, so - // we use u8 type here. - dummy_data = vec![0xFFu8; total_size as usize]; - &dummy_data - } - ExternalImageSource::NativeTexture(eid) => { - panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id); - } - }; - let size = uploader.upload( - rect, - layer_index, - stride, - format_override, - data.as_ptr(), - data.len() + let mut upload_time = TimeProfileCounter::new("Resource upload time", false, Some(0.0..2.0)); + upload_time.profile(|| { + for update_list in pending_texture_updates.drain(..) { + for allocation in update_list.allocations { + match allocation.kind { + TextureCacheAllocationKind::Alloc(_) => add_event_marker(c_str!("TextureCacheAlloc")), + TextureCacheAllocationKind::Realloc(_) => add_event_marker(c_str!("TextureCacheRealloc")), + TextureCacheAllocationKind::Reset(_) => add_event_marker(c_str!("TextureCacheReset")), + TextureCacheAllocationKind::Free => add_event_marker(c_str!("TextureCacheFree")), + }; + let old = match allocation.kind { + TextureCacheAllocationKind::Alloc(ref info) | + TextureCacheAllocationKind::Realloc(ref info) | + TextureCacheAllocationKind::Reset(ref info) => { + // Create a new native texture, as requested by the texture cache. + // + // Ensure no PBO is bound when creating the texture storage, + // or GL will attempt to read data from there. + let mut texture = self.device.create_texture( + TextureTarget::Array, + info.format, + info.width, + info.height, + info.filter, + // This needs to be a render target because some render + // tasks get rendered into the texture cache. + Some(RenderTargetInfo { has_depth: info.has_depth }), + info.layer_count, ); - handler.unlock(id, channel_index); - size + + if info.is_shared_cache { + texture.flags_mut() + .insert(TextureFlags::IS_SHARED_TEXTURE_CACHE); + + // Textures in the cache generally don't need to be cleared, + // but we do so if the debug display is active to make it + // easier to identify unallocated regions. + if self.debug_flags.contains(DebugFlags::TEXTURE_CACHE_DBG) { + self.clear_texture(&texture, TEXTURE_CACHE_DBG_CLEAR_COLOR); + } + } + + self.texture_resolver.texture_cache_map.insert(allocation.id, texture) } - TextureUpdateSource::DebugClear => { - // DebugClear updates are handled separately. - 0 + TextureCacheAllocationKind::Free => { + self.texture_resolver.texture_cache_map.remove(&allocation.id) } }; - self.profile.add(profiler::TEXTURE_UPLOADS, bytes_uploaded as f64 * 1.0e-6); + + match allocation.kind { + TextureCacheAllocationKind::Alloc(_) => { + assert!(old.is_none(), "Renderer and backend disagree!"); + } + TextureCacheAllocationKind::Realloc(_) => { + self.device.blit_renderable_texture( + self.texture_resolver.texture_cache_map.get_mut(&allocation.id).unwrap(), + old.as_ref().unwrap(), + ); + } + TextureCacheAllocationKind::Reset(_) | + TextureCacheAllocationKind::Free => { + assert!(old.is_some(), "Renderer and backend disagree!"); + } + } + + if let Some(old) = old { + self.device.delete_texture(old); + } + } + + for (texture_id, updates) in update_list.updates { + let texture = &self.texture_resolver.texture_cache_map[&texture_id]; + let device = &mut self.device; + + // Calculate the total size of buffer required to upload all updates. + let required_size = updates.iter().map(|update| { + // Perform any debug clears now. As this requires a mutable borrow of device, + // it must be done before all the updates which require a TextureUploader. + if let TextureUpdateSource::DebugClear = update.source { + let draw_target = DrawTarget::from_texture( + texture, + update.layer_index as usize, + false, + ); + device.bind_draw_target(draw_target); + device.clear_target( + Some(TEXTURE_CACHE_DBG_CLEAR_COLOR), + None, + Some(draw_target.to_framebuffer_rect(update.rect.to_i32())) + ); + + 0 + } else { + let (upload_size, _) = device.required_upload_size_and_stride( + update.rect.size, + texture.get_format(), + ); + upload_size + } + }).sum(); + + if required_size == 0 { + continue; + } + + // For best performance we use a single TextureUploader for all uploads. + // Using individual TextureUploaders was causing performance issues on some drivers + // due to allocating too many PBOs. + let mut uploader = device.upload_texture( + texture, + &self.texture_cache_upload_pbo, + required_size + ); + + for update in updates { + let TextureCacheUpdate { rect, stride, offset, layer_index, format_override, source } = update; + + let bytes_uploaded = match source { + TextureUpdateSource::Bytes { data } => { + let data = &data[offset as usize ..]; + uploader.upload( + rect, + layer_index, + stride, + format_override, + data.as_ptr(), + data.len(), + ) + } + TextureUpdateSource::External { id, channel_index } => { + let handler = self.external_image_handler + .as_mut() + .expect("Found external image, but no handler set!"); + // The filter is only relevant for NativeTexture external images. + let dummy_data; + let data = match handler.lock(id, channel_index, ImageRendering::Auto).source { + ExternalImageSource::RawData(data) => { + &data[offset as usize ..] + } + ExternalImageSource::Invalid => { + // Create a local buffer to fill the pbo. + let bpp = texture.get_format().bytes_per_pixel(); + let width = stride.unwrap_or(rect.size.width * bpp); + let total_size = width * rect.size.height; + // WR haven't support RGBAF32 format in texture_cache, so + // we use u8 type here. + dummy_data = vec![0xFFu8; total_size as usize]; + &dummy_data + } + ExternalImageSource::NativeTexture(eid) => { + panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id); + } + }; + let size = uploader.upload( + rect, + layer_index, + stride, + format_override, + data.as_ptr(), + data.len() + ); + handler.unlock(id, channel_index); + size + } + TextureUpdateSource::DebugClear => { + // DebugClear updates are handled separately. + 0 + } + }; + self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10); + } + } + + if update_list.clears_shared_cache { + self.shared_texture_cache_cleared = true; } } - if update_list.clears_shared_cache { - self.shared_texture_cache_cleared = true; - } - } - - drain_filter( - &mut self.notifications, - |n| { n.when() == Checkpoint::FrameTexturesUpdated }, - |n| { n.notify(); }, - ); - - let t = self.profile.end_time(profiler::TEXTURE_CACHE_UPLOAD_TIME); - self.resource_upload_time += t; + drain_filter( + &mut self.notifications, + |n| { n.when() == Checkpoint::FrameTexturesUpdated }, + |n| { n.notify(); }, + ); + }); + self.resource_upload_time += upload_time.get(); } fn bind_textures(&mut self, textures: &BatchTextures) { @@ -4219,11 +4285,11 @@ impl Renderer { .update_vao_instances(vao, chunk, ONE_TIME_USAGE_HINT); self.device .draw_indexed_triangles_instanced_u16(6, chunk.len() as i32); - self.profile.inc(profiler::DRAW_CALLS); + self.profile_counters.draw_calls.inc(); stats.total_draw_calls += 1; } - self.profile.add(profiler::VERTICES, 6 * data.len()); + self.profile_counters.vertices.add(6 * data.len()); } fn handle_readback_composite( @@ -4443,7 +4509,7 @@ impl Renderer { ) { profile_scope!("draw_picture_cache_target"); - self.profile.inc(profiler::RENDERED_PICTURE_TILES); + self.profile_counters.rendered_picture_cache_tiles.inc(); let _gm = self.gpu_profiler.start_marker("picture cache target"); let framebuffer_kind = FramebufferKind::Other; @@ -5245,7 +5311,7 @@ impl Renderer { // count clear tiles here. let num_tiles = composite_state.opaque_tiles.len() + composite_state.alpha_tiles.len(); - self.profile.set(profiler::PICTURE_TILES, num_tiles); + self.profile_counters.total_picture_cache_tiles.set(num_tiles); // Draw opaque tiles first, front-to-back to get maxmum // z-reject efficiency. @@ -5308,7 +5374,7 @@ impl Renderer { ) { profile_scope!("draw_color_target"); - self.profile.inc(profiler::COLOR_PASSES); + self.profile_counters.color_passes.inc(); let _gm = self.gpu_profiler.start_marker("color target"); // sanity check for the depth buffer @@ -5532,7 +5598,7 @@ impl Renderer { ) { profile_scope!("draw_alpha_target"); - self.profile.inc(profiler::ALPHA_PASSES); + self.profile_counters.alpha_passes.inc(); let _gm = self.gpu_profiler.start_marker("alpha target"); let alpha_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_ALPHA); @@ -5932,7 +5998,7 @@ impl Renderer { /// Allocates a texture to be used as the output for a rendering pass. /// - /// We make an effort to reuse render target textures across passes and + /// We make an effort to reuse render targe textures across passes and /// across frames when the format and dimensions match. Because we use /// immutable storage, we can't resize textures. /// @@ -5946,6 +6012,7 @@ impl Renderer { fn allocate_target_texture( &mut self, list: &mut RenderTargetList, + counters: &mut FrameProfileCounters, ) -> Option { if list.targets.is_empty() { return None @@ -5965,7 +6032,7 @@ impl Renderer { (bounding_rect.size.height + 255) & !255, ); - self.profile.inc(profiler::USED_TARGETS); + counters.targets_used.inc(); // Try finding a match in the existing pool. If there's no match, we'll // create a new texture. @@ -5990,7 +6057,7 @@ impl Renderer { self.device.reuse_render_target::(&mut t, rt_info); t } else { - self.profile.inc(profiler::CREATED_TARGETS); + counters.targets_created.inc(); self.device.create_texture( TextureTarget::Array, list.format, @@ -6227,8 +6294,8 @@ impl Renderer { } => { profile_scope!("offscreen target"); - let alpha_tex = self.allocate_target_texture(alpha); - let color_tex = self.allocate_target_texture(color); + let alpha_tex = self.allocate_target_texture(alpha, &mut frame.profile_counters); + let color_tex = self.allocate_target_texture(color, &mut frame.profile_counters); // If this frame has already been drawn, then any texture // cache targets have already been updated and can be @@ -6245,7 +6312,7 @@ impl Renderer { } if !picture_cache.is_empty() { - self.profile.inc(profiler::COLOR_PASSES); + self.profile_counters.color_passes.inc(); } // Draw picture caching tiles for this pass. @@ -6502,10 +6569,6 @@ impl Renderer { self.debug_flags = flags; } - pub fn set_profiler_ui(&mut self, ui_str: &str) { - self.profiler.set_ui(ui_str); - } - fn draw_frame_debug_items(&mut self, items: &[DebugItem]) { if items.is_empty() { return; @@ -7303,8 +7366,8 @@ pub struct RendererStats { pub alpha_target_count: usize, pub color_target_count: usize, pub texture_upload_kb: usize, - pub resource_upload_time: f64, - pub gpu_cache_upload_time: f64, + pub resource_upload_time: u64, + pub gpu_cache_upload_time: u64, } /// Return type from render(), which contains some repr(C) statistics as well as diff --git a/gfx/wr/webrender/src/resource_cache.rs b/gfx/wr/webrender/src/resource_cache.rs index d21ae87fd7bb..1f0eb258084f 100644 --- a/gfx/wr/webrender/src/resource_cache.rs +++ b/gfx/wr/webrender/src/resource_cache.rs @@ -26,7 +26,7 @@ use crate::glyph_rasterizer::{GLYPH_FLASHING, FontInstance, GlyphFormat, GlyphKe use crate::gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle}; use crate::gpu_types::UvRectKind; use crate::internal_types::{FastHashMap, FastHashSet, TextureSource, ResourceUpdateList}; -use crate::profiler::{self, TransactionProfile, bytes_to_mb}; +use crate::profiler::{ResourceProfileCounters, TextureCacheProfileCounters}; use crate::render_backend::{FrameId, FrameStamp}; use crate::render_task_graph::{RenderTaskGraph, RenderTaskId}; use crate::render_task_cache::{RenderTaskCache, RenderTaskCacheKey}; @@ -461,9 +461,6 @@ pub struct ResourceCache { /// A list of queued compositor surface updates to apply next frame. pending_native_surface_updates: Vec, - - image_templates_memory: usize, - font_templates_memory: usize, } impl ResourceCache { @@ -495,8 +492,6 @@ impl ResourceCache { pending_native_surface_updates: Vec::new(), #[cfg(feature = "capture")] capture_dirty: true, - image_templates_memory: 0, - font_templates_memory: 0, } } @@ -551,7 +546,7 @@ impl ResourceCache { pub fn post_scene_building_update( &mut self, updates: Vec, - profile: &mut TransactionProfile, + profile_counters: &mut ResourceProfileCounters, ) { // TODO, there is potential for optimization here, by processing updates in // bulk rather than one by one (for example by sorting allocations by size or @@ -566,8 +561,7 @@ impl ResourceCache { match update { ResourceUpdate::AddImage(img) => { if let ImageData::Raw(ref bytes) = img.data { - self.image_templates_memory += bytes.len(); - profile.set(profiler::IMAGE_TEMPLATES_MEM, bytes_to_mb(self.image_templates_memory)); + profile_counters.image_templates.inc(bytes.len()); } self.add_image_template( img.key, @@ -576,7 +570,6 @@ impl ResourceCache { &img.descriptor.size.into(), img.tiling, ); - profile.set(profiler::IMAGE_TEMPLATES, self.resources.image_templates.images.len()); } ResourceUpdate::UpdateImage(img) => { self.update_image_template(img.key, img.descriptor, img.data.into(), &img.dirty_rect); @@ -604,16 +597,12 @@ impl ResourceCache { } ResourceUpdate::DeleteImage(img) => { self.delete_image_template(img); - profile.set(profiler::IMAGE_TEMPLATES, self.resources.image_templates.images.len()); - profile.set(profiler::IMAGE_TEMPLATES_MEM, bytes_to_mb(self.image_templates_memory)); } ResourceUpdate::DeleteBlobImage(img) => { self.delete_image_template(img.as_image()); } ResourceUpdate::DeleteFont(font) => { self.delete_font_template(font); - profile.set(profiler::FONT_TEMPLATES, self.resources.font_templates.len()); - profile.set(profiler::FONT_TEMPLATES_MEM, bytes_to_mb(self.font_templates_memory)); } ResourceUpdate::DeleteFontInstance(font) => { self.delete_font_instance(font); @@ -625,15 +614,13 @@ impl ResourceCache { ResourceUpdate::AddFont(font) => { match font { AddFont::Raw(id, bytes, index) => { - self.font_templates_memory += bytes.len(); - profile.set(profiler::FONT_TEMPLATES_MEM, bytes_to_mb(self.font_templates_memory)); + profile_counters.font_templates.inc(bytes.len()); self.add_font_template(id, FontTemplate::Raw(bytes, index)); } AddFont::Native(id, native_font_handle) => { self.add_font_template(id, FontTemplate::Native(native_font_handle)); } } - profile.set(profiler::FONT_TEMPLATES, self.resources.font_templates.len()); } ResourceUpdate::AddFontInstance(..) => { // Already added in ApiResources. @@ -645,7 +632,7 @@ impl ResourceCache { pub fn add_rasterized_blob_images( &mut self, images: Vec<(BlobImageRequest, BlobImageResult)>, - profile: &mut TransactionProfile, + texture_cache_profile: &mut TextureCacheProfileCounters, ) { for (request, result) in images { let data = match result { @@ -656,7 +643,7 @@ impl ResourceCache { } }; - profile.add(profiler::RASTERIZED_BLOBS_PX, data.rasterized_rect.area()); + texture_cache_profile.rasterized_blob_pixels.inc(data.rasterized_rect.area() as usize); // First make sure we have an entry for this key (using a placeholder // if need be). @@ -693,9 +680,7 @@ impl ResourceCache { pub fn delete_font_template(&mut self, font_key: FontKey) { self.glyph_rasterizer.delete_font(font_key); - if let Some(FontTemplate::Raw(data, _)) = self.resources.font_templates.remove(&font_key) { - self.font_templates_memory -= data.len(); - } + self.resources.font_templates.remove(&font_key); self.cached_glyphs .clear_fonts(|font| font.font_key == font_key); } @@ -815,10 +800,6 @@ impl ResourceCache { match value { Some(image) => if image.data.is_blob() { - if let CachedImageData::Raw(data) = image.data { - self.image_templates_memory -= data.len(); - } - let blob_key = BlobImageKey(image_key); self.deleted_blob_keys.back_mut().unwrap().push(blob_key); self.rasterized_blob_images.remove(&blob_key); @@ -1169,7 +1150,7 @@ impl ResourceCache { &mut self, gpu_cache: &mut GpuCache, render_tasks: &mut RenderTaskGraph, - profile: &mut TransactionProfile, + texture_cache_profile: &mut TextureCacheProfileCounters, ) { profile_scope!("block_until_all_resources_added"); @@ -1182,7 +1163,7 @@ impl ResourceCache { gpu_cache, &mut self.cached_render_tasks, render_tasks, - profile, + texture_cache_profile, ); // Apply any updates of new / updated images (incl. blobs) to the texture cache. @@ -1403,11 +1384,11 @@ impl ResourceCache { } - pub fn end_frame(&mut self, profile: &mut TransactionProfile) { + pub fn end_frame(&mut self, texture_cache_profile: &mut TextureCacheProfileCounters) { debug_assert_eq!(self.state, State::QueryResources); profile_scope!("end_frame"); self.state = State::Idle; - self.texture_cache.end_frame(profile); + self.texture_cache.end_frame(texture_cache_profile); } pub fn set_debug_flags(&mut self, flags: DebugFlags) { diff --git a/gfx/wr/webrender/src/scene_builder_thread.rs b/gfx/wr/webrender/src/scene_builder_thread.rs index 37f174b840b9..639894000aa1 100644 --- a/gfx/wr/webrender/src/scene_builder_thread.rs +++ b/gfx/wr/webrender/src/scene_builder_thread.rs @@ -26,7 +26,6 @@ use crate::prim_store::image::{Image, YuvImage}; use crate::prim_store::line_dec::LineDecoration; use crate::prim_store::picture::Picture; use crate::prim_store::text_run::TextRun; -use crate::profiler::{self, TransactionProfile}; use crate::render_backend::SceneView; use crate::renderer::{PipelineInfo, SceneBuilderHooks}; use crate::scene::{Scene, BuiltScene, SceneStats}; @@ -42,6 +41,19 @@ use crate::debug_server; #[cfg(feature = "debugger")] use api::{BuiltDisplayListIter, DisplayItem}; +/// Various timing information that will be turned into +/// TransactionProfileCounters later down the pipeline. +#[derive(Clone, Debug)] +pub struct TransactionTimings { + pub builder_start_time_ns: u64, + pub builder_end_time_ns: u64, + pub send_time_ns: u64, + pub scene_build_start_time_ns: u64, + pub scene_build_end_time_ns: u64, + pub blob_rasterization_end_time_ns: u64, + pub display_list_len: usize, +} + fn rasterize_blobs(txn: &mut TransactionMsg, is_low_priority: bool) { profile_scope!("rasterize_blobs"); @@ -69,10 +81,12 @@ pub struct BuiltTransaction { pub removed_pipelines: Vec<(PipelineId, DocumentId)>, pub notifications: Vec, pub interner_updates: Option, + pub scene_build_start_time: u64, + pub scene_build_end_time: u64, pub render_frame: bool, pub invalidate_rendered_frame: bool, pub discard_frame_state_for_pipelines: Vec, - pub profile: TransactionProfile, + pub timings: Option, } #[cfg(feature = "replay")] @@ -413,6 +427,8 @@ impl SceneBuilderThread { for mut item in scenes { self.config = item.config; + let scene_build_start_time = precise_time_ns(); + let mut built_scene = None; let mut interner_updates = None; @@ -454,8 +470,10 @@ impl SceneBuilderThread { removed_pipelines: Vec::new(), discard_frame_state_for_pipelines: Vec::new(), notifications: Vec::new(), + scene_build_start_time, + scene_build_end_time: precise_time_ns(), interner_updates, - profile: TransactionProfile::new(), + timings: None, })]; self.forward_built_transactions(txns); @@ -562,12 +580,12 @@ impl SceneBuilderThread { hooks.pre_scene_build(); } + let scene_build_start_time = precise_time_ns(); + let doc = self.documents.get_mut(&txn.document_id).unwrap(); let scene = &mut doc.scene; - let mut profile = txn.profile.take(); - - profile.start_time(profiler::SCENE_BUILD_TIME); + let mut timings = None; let mut discard_frame_state_for_pipelines = Vec::new(); let mut removed_pipelines = Vec::new(); @@ -595,15 +613,11 @@ impl SceneBuilderThread { display_list, preserve_frame_state, } => { + let display_list_len = display_list.data().len(); + let (builder_start_time_ns, builder_end_time_ns, send_time_ns) = display_list.times(); - let content_send_time = profiler::ns_to_ms(precise_time_ns() - send_time_ns); - let dl_build_time = profiler::ns_to_ms(builder_end_time_ns - builder_start_time_ns); - profile.set(profiler::CONTENT_SEND_TIME, content_send_time); - profile.set(profiler::DISPLAY_LIST_BUILD_TIME, dl_build_time); - profile.set(profiler::DISPLAY_LIST_MEM, profiler::bytes_to_mb(display_list.data().len())); - if self.removed_pipelines.contains(&pipeline_id) { continue; } @@ -621,6 +635,16 @@ impl SceneBuilderThread { viewport_size, ); + timings = Some(TransactionTimings { + builder_start_time_ns, + builder_end_time_ns, + send_time_ns, + scene_build_start_time_ns: 0, + scene_build_end_time_ns: 0, + blob_rasterization_end_time_ns: 0, + display_list_len, + }); + if !preserve_frame_state { discard_frame_state_for_pipelines.push(pipeline_id); } @@ -665,16 +689,15 @@ impl SceneBuilderThread { built_scene = Some(built); } - profile.end_time(profiler::SCENE_BUILD_TIME); + let scene_build_end_time = precise_time_ns(); + let is_low_priority = false; + rasterize_blobs(txn, is_low_priority); - if !txn.blob_requests.is_empty() { - profile.start_time(profiler::BLOB_RASTERIZATION_TIME); - - let is_low_priority = false; - rasterize_blobs(txn, is_low_priority); - - profile.end_time(profiler::BLOB_RASTERIZATION_TIME); + if let Some(timings) = timings.as_mut() { + timings.blob_rasterization_end_time_ns = precise_time_ns(); + timings.scene_build_start_time_ns = scene_build_start_time; + timings.scene_build_end_time_ns = scene_build_end_time; } drain_filter( @@ -701,7 +724,9 @@ impl SceneBuilderThread { discard_frame_state_for_pipelines, notifications: replace(&mut txn.notifications, Vec::new()), interner_updates, - profile, + scene_build_start_time, + scene_build_end_time, + timings, }) } @@ -726,7 +751,7 @@ impl SceneBuilderThread { let (tx, rx) = single_msg_channel(); let txn = txns.iter().find(|txn| txn.built_scene.is_some()).unwrap(); - hooks.pre_scene_swap((txn.profile.get(profiler::SCENE_BUILD_TIME).unwrap() * 1000000.0) as u64); + hooks.pre_scene_swap(txn.scene_build_end_time - txn.scene_build_start_time); (Some(info), Some(tx), Some(rx)) } else { diff --git a/gfx/wr/webrender/src/texture_cache.rs b/gfx/wr/webrender/src/texture_cache.rs index e1ee983aafb8..f77eb954c7a0 100644 --- a/gfx/wr/webrender/src/texture_cache.rs +++ b/gfx/wr/webrender/src/texture_cache.rs @@ -17,7 +17,7 @@ use crate::internal_types::{ TextureCacheAllocInfo, TextureCacheUpdate, }; use crate::lru_cache::LRUCache; -use crate::profiler::{self, TransactionProfile}; +use crate::profiler::{ResourceProfileCounter, TextureCacheProfileCounters}; use crate::render_backend::FrameStamp; use crate::resource_cache::{CacheItem, CachedImageData}; use smallvec::SmallVec; @@ -376,7 +376,7 @@ impl PictureTextures { } } - fn update_profile(&self, profile: &mut TransactionProfile) { + fn update_profile(&self, profile: &mut ResourceProfileCounter) { // For now, this profile counter just accumulates the slices and bytes // from all picture cache texture arrays. let mut picture_slices = 0; @@ -385,8 +385,7 @@ impl PictureTextures { picture_slices += texture.slices.len(); picture_bytes += texture.size_in_bytes(); } - profile.set(profiler::PICTURE_TILES, picture_slices); - profile.set(profiler::PICTURE_TILES_MEM, profiler::bytes_to_mb(picture_bytes)); + profile.set(picture_slices, picture_bytes); } } @@ -616,7 +615,7 @@ impl TextureCache { self.evict_items_from_cache_if_required(); } - pub fn end_frame(&mut self, profile: &mut TransactionProfile) { + pub fn end_frame(&mut self, texture_cache_profile: &mut TextureCacheProfileCounters) { debug_assert!(self.now.is_valid()); self.expire_old_picture_cache_tiles(); @@ -628,30 +627,18 @@ impl TextureCache { self.shared_textures.array_color8_linear.release_empty_textures(&mut self.pending_updates); self.shared_textures.array_color8_nearest.release_empty_textures(&mut self.pending_updates); - self.shared_textures.array_alpha8_linear.update_profile( - profiler::TEXTURE_CACHE_A8_REGIONS, - profiler::TEXTURE_CACHE_A8_MEM, - profile, - ); - self.shared_textures.array_alpha16_linear.update_profile( - profiler::TEXTURE_CACHE_A16_REGIONS, - profiler::TEXTURE_CACHE_A16_MEM, - profile, - ); - self.shared_textures.array_color8_linear.update_profile( - profiler::TEXTURE_CACHE_RGBA8_LINEAR_REGIONS, - profiler::TEXTURE_CACHE_RGBA8_LINEAR_MEM, - profile, - ); - self.shared_textures.array_color8_nearest.update_profile( - profiler::TEXTURE_CACHE_RGBA8_NEAREST_REGIONS, - profiler::TEXTURE_CACHE_RGBA8_NEAREST_MEM, - profile, - ); - self.picture_textures.update_profile(profile); - - profile.set(profiler::TEXTURE_CACHE_SHARED_MEM, self.shared_bytes_allocated); - profile.set(profiler::TEXTURE_CACHE_STANDALONE_MEM, self.standalone_bytes_allocated); + self.shared_textures.array_alpha8_linear + .update_profile(&mut texture_cache_profile.pages_alpha8_linear); + self.shared_textures.array_alpha16_linear + .update_profile(&mut texture_cache_profile.pages_alpha16_linear); + self.shared_textures.array_color8_linear + .update_profile(&mut texture_cache_profile.pages_color8_linear); + self.shared_textures.array_color8_nearest + .update_profile(&mut texture_cache_profile.pages_color8_nearest); + self.picture_textures + .update_profile(&mut texture_cache_profile.pages_picture); + texture_cache_profile.shared_bytes.set(self.shared_bytes_allocated); + texture_cache_profile.standalone_bytes.set(self.standalone_bytes_allocated); self.now = FrameStamp::INVALID; } @@ -1474,10 +1461,9 @@ impl TextureArray { }); } - fn update_profile(&self, count_idx: usize, mem_idx: usize, profile: &mut TransactionProfile) { + fn update_profile(&self, counter: &mut ResourceProfileCounter) { let num_regions: usize = self.units.iter().map(|u| u.regions.len()).sum(); - profile.set(count_idx, num_regions); - profile.set(mem_idx, profiler::bytes_to_mb(self.size_in_bytes())); + counter.set(num_regions, self.size_in_bytes()); } /// Allocate space in this texture array. diff --git a/gfx/wr/webrender_api/src/lib.rs b/gfx/wr/webrender_api/src/lib.rs index f1824212bd95..a7d59608a7fd 100644 --- a/gfx/wr/webrender_api/src/lib.rs +++ b/gfx/wr/webrender_api/src/lib.rs @@ -513,48 +513,57 @@ bitflags! { const DISABLE_BATCHING = 1 << 5; /// Display the pipeline epochs. const EPOCHS = 1 << 6; + /// Reduce the amount of information displayed by the profiler so that + /// it occupies less screen real-estate. + const COMPACT_PROFILER = 1 << 7; /// Print driver messages to stdout. - const ECHO_DRIVER_MESSAGES = 1 << 7; + const ECHO_DRIVER_MESSAGES = 1 << 8; + /// Show an indicator that moves every time a frame is rendered. + const NEW_FRAME_INDICATOR = 1 << 9; + /// Show an indicator that moves every time a scene is built. + const NEW_SCENE_INDICATOR = 1 << 10; /// Show an overlay displaying overdraw amount. - const SHOW_OVERDRAW = 1 << 8; + const SHOW_OVERDRAW = 1 << 11; /// Display the contents of GPU cache. - const GPU_CACHE_DBG = 1 << 9; + const GPU_CACHE_DBG = 1 << 12; + /// Show a red bar that moves each time a slow frame is detected. + const SLOW_FRAME_INDICATOR = 1 << 13; /// Clear evicted parts of the texture cache for debugging purposes. - const TEXTURE_CACHE_DBG_CLEAR_EVICTED = 1 << 10; + const TEXTURE_CACHE_DBG_CLEAR_EVICTED = 1 << 14; /// Show picture caching debug overlay - const PICTURE_CACHING_DBG = 1 << 11; + const PICTURE_CACHING_DBG = 1 << 15; /// Highlight all primitives with colors based on kind. - const PRIMITIVE_DBG = 1 << 12; + const PRIMITIVE_DBG = 1 << 16; /// Draw a zoom widget showing part of the framebuffer zoomed in. - const ZOOM_DBG = 1 << 13; + const ZOOM_DBG = 1 << 17; /// Scale the debug renderer down for a smaller screen. This will disrupt /// any mapping between debug display items and page content, so shouldn't /// be used with overlays like the picture caching or primitive display. - const SMALL_SCREEN = 1 << 14; + const SMALL_SCREEN = 1 << 18; /// Disable various bits of the WebRender pipeline, to help narrow /// down where slowness might be coming from. - const DISABLE_OPAQUE_PASS = 1 << 15; + const DISABLE_OPAQUE_PASS = 1 << 19; /// - const DISABLE_ALPHA_PASS = 1 << 16; + const DISABLE_ALPHA_PASS = 1 << 20; /// - const DISABLE_CLIP_MASKS = 1 << 17; + const DISABLE_CLIP_MASKS = 1 << 21; /// - const DISABLE_TEXT_PRIMS = 1 << 18; + const DISABLE_TEXT_PRIMS = 1 << 22; /// - const DISABLE_GRADIENT_PRIMS = 1 << 19; + const DISABLE_GRADIENT_PRIMS = 1 << 23; /// - const OBSCURE_IMAGES = 1 << 20; + const OBSCURE_IMAGES = 1 << 24; /// Taint the transparent area of the glyphs with a random opacity to easily /// see when glyphs are re-rasterized. - const GLYPH_FLASHING = 1 << 21; + const GLYPH_FLASHING = 1 << 25; /// The profiler only displays information that is out of the ordinary. - const SMART_PROFILER = 1 << 22; + const SMART_PROFILER = 1 << 26; /// If set, dump picture cache invalidation debug to console. - const INVALIDATION_DBG = 1 << 23; + const INVALIDATION_DBG = 1 << 27; /// Log tile cache to memory for later saving as part of wr-capture - const TILE_CACHE_LOGGING_DBG = 1 << 24; + const TILE_CACHE_LOGGING_DBG = 1 << 28; /// Collect and dump profiler statistics to captures. - const PROFILER_CAPTURE = (1 as u32) << 25; // need "as u32" until we have cbindgen#556 + const PROFILER_CAPTURE = (1 as u32) << 31; // need "as u32" until we have cbindgen#556 } } diff --git a/gfx/wr/wrench/src/args.yaml b/gfx/wr/wrench/src/args.yaml index ac9fed9f600e..e19c5057c369 100644 --- a/gfx/wr/wrench/src/args.yaml +++ b/gfx/wr/wrench/src/args.yaml @@ -80,10 +80,6 @@ args: - no_block: long: no-block help: Don't block on UI events - run event loop as fast as possible. - - profiler_ui: - long: profiler-ui - takes_value: true - help: A string describing what to show on in the profiler HUD (See https://github.com/servo/webrender/wiki/Debugging-WebRender#anchor_6). subcommands: - png: diff --git a/gfx/wr/wrench/src/main.rs b/gfx/wr/wrench/src/main.rs index 267bdacab93f..b77c98b9cd41 100644 --- a/gfx/wr/wrench/src/main.rs +++ b/gfx/wr/wrench/src/main.rs @@ -666,11 +666,6 @@ fn main() { dump_shader_source, notifier, ); - - if let Some(ui_str) = args.value_of("profiler_ui") { - wrench.renderer.set_profiler_ui(&ui_str); - } - window.update(&mut wrench); if let Some(window_title) = wrench.take_title() { @@ -807,7 +802,7 @@ fn render<'a>( // Default the profile overlay on for android. if cfg!(target_os = "android") { - debug_flags.toggle(DebugFlags::PROFILER_DBG); + debug_flags.toggle(DebugFlags::PROFILER_DBG | DebugFlags::COMPACT_PROFILER); wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags)); } @@ -869,6 +864,11 @@ fn render<'a>( wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags)); do_render = true; } + VirtualKeyCode::S => { + debug_flags.toggle(DebugFlags::COMPACT_PROFILER); + wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags)); + do_render = true; + } VirtualKeyCode::D => { debug_flags.toggle(DebugFlags::PICTURE_CACHING_DBG); wrench.api.send_debug_cmd(DebugCommand::SetFlags(debug_flags)); diff --git a/modules/libpref/init/all.js b/modules/libpref/init/all.js index 3b731bda4cf7..28d5d1caeb42 100644 --- a/modules/libpref/init/all.js +++ b/modules/libpref/init/all.js @@ -665,7 +665,11 @@ pref("gfx.webrender.debug.gpu-time-queries", false); pref("gfx.webrender.debug.gpu-sample-queries", false); pref("gfx.webrender.debug.disable-batching", false); pref("gfx.webrender.debug.epochs", false); +pref("gfx.webrender.debug.compact-profiler", false); +pref("gfx.webrender.debug.smart-profiler", false); pref("gfx.webrender.debug.echo-driver-messages", false); +pref("gfx.webrender.debug.new-frame-indicator", false); +pref("gfx.webrender.debug.new-scene-indicator", false); pref("gfx.webrender.debug.show-overdraw", false); pref("gfx.webrender.debug.slow-frame-indicator", false); pref("gfx.webrender.debug.picture-caching", false); @@ -675,7 +679,6 @@ pref("gfx.webrender.debug.small-screen", false); pref("gfx.webrender.debug.obscure-images", false); pref("gfx.webrender.debug.glyph-flashing", false); pref("gfx.webrender.debug.capture-profiler", false); -pref("gfx.webrender.debug.profiler-ui", "Default"); pref("accessibility.warn_on_browsewithcaret", true);