From 67286e28c2de2bd1dca5444fa0756c67ed5c3ccb Mon Sep 17 00:00:00 2001 From: Nicolas Silva Date: Tue, 4 May 2021 12:54:52 +0000 Subject: [PATCH] Bug 1696905 - CPU-side occlusion culling for picture cache tiles. r=gfx-reviewers,lsalzman,gw This patch introduces a simple culling algorithm that splits compositor tiles into only their visible parts, removing the need for a depth buffer. This reduces the draw-call count as well well as the memory usage and bandwidth associated with the depth buffer. Differential Revision: https://phabricator.services.mozilla.com/D113532 --- gfx/wr/webrender/src/device/gl.rs | 2 +- gfx/wr/webrender/src/lib.rs | 1 + gfx/wr/webrender/src/rectangle_occlusion.rs | 215 ++++++++++++++++++++ gfx/wr/webrender/src/renderer/mod.rs | 169 ++++++++------- gfx/wr/webrender_api/src/units.rs | 1 + 5 files changed, 315 insertions(+), 73 deletions(-) create mode 100644 gfx/wr/webrender/src/rectangle_occlusion.rs diff --git a/gfx/wr/webrender/src/device/gl.rs b/gfx/wr/webrender/src/device/gl.rs index 90a0ad4386d6..8fdf8e43d77b 100644 --- a/gfx/wr/webrender/src/device/gl.rs +++ b/gfx/wr/webrender/src/device/gl.rs @@ -2133,7 +2133,7 @@ impl Device { ) { let (fbo_id, rect, depth_available) = match target { DrawTarget::Default { rect, .. } => { - (self.default_draw_fbo, rect, true) + (self.default_draw_fbo, rect, false) } DrawTarget::Texture { dimensions, fbo_id, with_depth, .. } => { let rect = FramebufferIntRect::new( diff --git a/gfx/wr/webrender/src/lib.rs b/gfx/wr/webrender/src/lib.rs index 1b3f00e6024b..3b396bf99114 100644 --- a/gfx/wr/webrender/src/lib.rs +++ b/gfx/wr/webrender/src/lib.rs @@ -133,6 +133,7 @@ mod visibility; mod api_resources; mod image_tiling; mod image_source; +mod rectangle_occlusion; pub mod host_utils; /// diff --git a/gfx/wr/webrender/src/rectangle_occlusion.rs b/gfx/wr/webrender/src/rectangle_occlusion.rs new file mode 100644 index 000000000000..b01f335bef88 --- /dev/null +++ b/gfx/wr/webrender/src/rectangle_occlusion.rs @@ -0,0 +1,215 @@ +/* This Source Code Form is subject to the terms of the Mozilla Public + * License, v. 2.0. If a copy of the MPL was not distributed with this + * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ + +//! A simple occlusion culling algorithm for axis-aligned rectangles. +//! +//! ## Output +//! +//! Occlusion culling results in two lists of rectangles: +//! +//! - The opaque list should be rendered first. None of its rectangles overlap so order doesn't matter +//! within the opaque pass. +//! - The non-opaque list (or alpha list) which should be rendered in back-to-front order after the opaque pass. +//! +//! The output has minimal overdraw (no overdraw at all for opaque items and as little as possible for alpha ones). +//! +//! ## Algorithm overview +//! +//! The occlusion culling algorithm works in front-to-back order, accumulating rectangle in opaque and non-opaque lists. +//! Each time a rectangle is added, it is first tested against existing opaque rectangles and potentially split into visible +//! sub-rectangles, or even discarded completely. The front-to-back order ensures that once a rectangle is added it does not +//! have to be modified again, making the underlying data structure trivial (append-only). +//! +//! ## splitting +//! +//! Partially visible rectangles are split into up to 4 visible sub-rectangles by each intersecting occluder. +//! +//! ```ascii +//! +----------------------+ +----------------------+ +//! | rectangle | | | +//! | | | | +//! | +-----------+ | +--+-----------+-------+ +//! | |occluder | | --> | |\\\\\\\\\\\| | +//! | +-----------+ | +--+-----------+-------+ +//! | | | | +//! +----------------------+ +----------------------+ +//! ``` +//! +//! In the example above the rectangle is split into 4 visible parts with the central occluded part left out. +//! +//! This implementation favors longer horizontal bands instead creating nine-patches to deal with the corners. +//! The advantage is that it produces less rectangles which is good for the performance of the algorithm and +//! for SWGL which likes long horizontal spans, however it would cause artifacts if the resulting rectangles +//! were to be drawn with a non-axis-aligned transformation. +//! +//! ## Performance +//! +//! The cost of the algorithm grows with the number of opaque rectangle as each new rectangle is tested against +//! all previously added opaque rectangles. +//! +//! Note that opaque rectangles can either be added as opaque or non-opaque. This means a trade-off between +//! overdraw and number of rectangles can be explored to adjust performance: Small opaque rectangles, especially +//! towards the front of the scene, could be added as non-opaque to avoid causing many splits while adding only +//! a small amount of overdraw. +//! +//! This implementation is intended to be used with a small number of (opaque) items. A similar implementation +//! could use a spatial acceleration structure for opaque rectangles to perform better with a large amount of +//! occluders. +//! + +use euclid::point2; +use smallvec::SmallVec; +use api::units::*; + +#[derive(Copy, Clone, Debug, PartialEq)] +pub enum ItemSource { + Opaque(usize), + Alpha(usize), + Clear(usize), +} + +/// A visible part of a rectangle after occlusion culling. +#[derive(Debug, PartialEq)] +pub struct Item { + pub rectangle: DeviceBox2D, + pub src: ItemSource, +} + +/// A builder that applies occlusion culling with rectangles provided in front-to-back order. +pub struct FrontToBackBuilder { + opaque_items: Vec, + alpha_items: Vec, +} + +impl FrontToBackBuilder { + + /// Pre-allocating constructor. + pub fn with_capacity(opaque: usize, alpha: usize) -> Self { + FrontToBackBuilder { + opaque_items: Vec::with_capacity(opaque), + alpha_items: Vec::with_capacity(alpha), + } + } + + /// Add a rectangle, potentially splitting it and discarding the occluded parts if any. + /// + /// Returns true the rectangle is at least partially visible. + pub fn add(&mut self, rect: &DeviceBox2D, is_opaque: bool, src: ItemSource) -> bool { + let mut fragments: SmallVec<[DeviceBox2D; 16]> = SmallVec::new(); + fragments.push(*rect); + + for item in &self.opaque_items { + if fragments.is_empty() { + break; + } + if item.rectangle.intersects(rect) { + apply_occluder(&item.rectangle, &mut fragments); + } + } + + let list = if is_opaque { + &mut self.opaque_items + } else { + &mut self.alpha_items + }; + + for rect in &fragments { + list.push(Item { + rectangle: *rect, + src, + }); + } + + !fragments.is_empty() + } + + /// Returns true if the provided rect is at least partially visible, without adding it. + pub fn test(&self, rect: &DeviceBox2D) -> bool { + let mut fragments: SmallVec<[DeviceBox2D; 16]> = SmallVec::new(); + fragments.push(*rect); + + for item in &self.opaque_items { + if item.rectangle.intersects(rect) { + apply_occluder(&item.rectangle, &mut fragments); + } + } + + !fragments.is_empty() + } + + /// The visible opaque rectangles (front-to-back order). + pub fn opaque_items(&self) -> &[Item] { + &self.opaque_items + } + + /// The visible non-opaque rectangles (front-to-back order). + pub fn alpha_items(&self) -> &[Item] { + &self.alpha_items + } +} + + +// Split out the parts of the rects in the provided vector +fn apply_occluder(occluder: &DeviceBox2D, rects: &mut SmallVec<[DeviceBox2D; 16]>) { + // Iterate in reverse order so that we can push new rects at the back without + // visiting them; + let mut i = rects.len() - 1; + loop { + let r = rects[i]; + + if r.intersects(occluder) { + let top = r.min.y < occluder.min.y; + let bottom = r.max.y > occluder.max.y; + let left = r.min.x < occluder.min.x; + let right = r.max.x > occluder.max.x; + + if top { + rects.push(DeviceBox2D { + min: r.min, + max: point2(r.max.x, occluder.min.y), + }); + } + + if bottom { + rects.push(DeviceBox2D { + min: point2(r.min.x, occluder.max.y), + max: r.max, + }); + } + + if left { + let min_y = r.min.y.max(occluder.min.y); + let max_y = r.max.y.min(occluder.max.y); + rects.push(DeviceBox2D { + min: point2(r.min.x, min_y), + max: point2(occluder.min.x, max_y), + }); + } + + if right { + let min_y = r.min.y.max(occluder.min.y); + let max_y = r.max.y.min(occluder.max.y); + rects.push(DeviceBox2D { + min: point2(occluder.max.x, min_y), + max: point2(r.max.x, max_y), + }); + } + + // Remove the original rectangle, replacing it with + // one of the new ones we just added, or popping it + // if it is the last item. + if i == rects.len() { + rects.pop(); + } else { + rects.swap_remove(i); + } + } + + if i == 0 { + break; + } + + i -= 1; + } +} diff --git a/gfx/wr/webrender/src/renderer/mod.rs b/gfx/wr/webrender/src/renderer/mod.rs index 946f9a7cf849..855a792a2712 100644 --- a/gfx/wr/webrender/src/renderer/mod.rs +++ b/gfx/wr/webrender/src/renderer/mod.rs @@ -54,7 +54,7 @@ use crate::render_api::{RenderApiSender, DebugCommand, ApiMsg, FrameMsg, MemoryR use crate::batch::{AlphaBatchContainer, BatchKind, BatchFeatures, BatchTextures, BrushBatchKind, ClipBatchList}; #[cfg(any(feature = "capture", feature = "replay"))] use crate::capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage}; -use crate::composite::{CompositeState, CompositeTileSurface, CompositeTile, ResolvedExternalSurface, CompositorSurfaceTransform}; +use crate::composite::{CompositeState, CompositeTileSurface, ResolvedExternalSurface, CompositorSurfaceTransform}; use crate::composite::{CompositorKind, Compositor, NativeTileId, CompositeFeatures, CompositeSurfaceFormat, ResolvedExternalSurfaceColorData}; use crate::composite::{CompositorConfig, NativeSurfaceOperationDetails, NativeSurfaceId, NativeSurfaceOperation}; use crate::c_str; @@ -95,6 +95,7 @@ use crate::texture_cache::{TextureCache, TextureCacheConfig}; use crate::tile_cache::PictureCacheDebugInfo; use crate::util::drain_filter; use crate::host_utils::{thread_started, thread_stopped}; +use crate::rectangle_occlusion as occlusion; use upload::{upload_to_texture_cache, UploadTexturePool}; use euclid::{rect, Transform3D, Scale, default}; @@ -3255,12 +3256,12 @@ impl Renderer { } /// Draw a list of tiles to the framebuffer - fn draw_tile_list<'a, I: Iterator>( + fn draw_tile_list<'a, I: Iterator>( &mut self, tiles_iter: I, + composite_state: &CompositeState, external_surfaces: &[ResolvedExternalSurface], projection: &default::Transform3D, - partial_present_mode: Option, stats: &mut RendererStats, ) { let mut current_shader_params = ( @@ -3285,29 +3286,16 @@ impl Renderer { &mut self.renderer_errors ); - for tile in tiles_iter { - // Determine a clip rect to apply to this tile, depending on what - // the partial present mode is. - let partial_clip_rect = match partial_present_mode { - Some(PartialPresentMode::Single { dirty_rect }) => dirty_rect, - None => tile.rect, + for item in tiles_iter { + let tile = match item.src { + occ::ItemSource::Opaque(idx) => &composite_state.opaque_tiles[idx], + occ::ItemSource::Alpha(idx) => &composite_state.alpha_tiles[idx], + occ::ItemSource::Clear(..) => { + continue; + } }; - let clip_rect = match partial_clip_rect.intersection(&tile.clip_rect) { - Some(rect) => rect, - None => continue, - }; - - // Simple compositor needs the valid rect in device space to match clip rect - let valid_device_rect = tile.valid_rect.translate( - tile.rect.origin.to_vector() - ); - - // Only composite the part of the tile that contains valid pixels - let clip_rect = match clip_rect.intersection(&valid_device_rect) { - Some(rect) => rect, - None => continue, - }; + let clip_rect = item.rectangle.to_rect(); // Work out the draw params based on the tile surface let (instance, textures, shader_params) = match tile.surface { @@ -3327,22 +3315,6 @@ impl Renderer { (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None), ) } - CompositeTileSurface::Clear => { - let dummy = TextureSource::Dummy; - let image_buffer_kind = dummy.image_buffer_kind(); - let instance = CompositeInstance::new( - tile.rect, - clip_rect, - PremultipliedColorF::BLACK, - tile.z_id, - ); - let features = instance.get_rgb_features(); - ( - instance, - BatchTextures::composite_rgb(dummy), - (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None), - ) - } CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::TextureCache { texture } } => { let instance = CompositeInstance::new( tile.rect, @@ -3432,6 +3404,22 @@ impl Renderer { }, } } + CompositeTileSurface::Clear => { + let dummy = TextureSource::Dummy; + let image_buffer_kind = dummy.image_buffer_kind(); + let instance = CompositeInstance::new( + tile.rect, + clip_rect, + PremultipliedColorF::BLACK, + tile.z_id, + ); + let features = instance.get_rgb_features(); + ( + instance, + BatchTextures::composite_rgb(dummy), + (CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None), + ) + } CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::Native { .. } } => { unreachable!("bug: found native surface in simple composite path"); } @@ -3500,8 +3488,8 @@ impl Renderer { let _timer = self.gpu_profiler.start_timer(GPU_TAG_COMPOSITE); self.device.bind_draw_target(draw_target); - self.device.enable_depth(DepthFunction::LessEqual); - self.device.enable_depth_write(); + self.device.disable_depth_write(); + self.device.disable_depth(); // If using KHR_partial_update, call eglSetDamageRegion. // This must be called exactly once per frame, and prior to any rendering to the main @@ -3514,20 +3502,76 @@ impl Renderer { } } + let cap = composite_state.opaque_tiles.len() + + composite_state.alpha_tiles.len() + + composite_state.clear_tiles.len(); + + let mut occlusion = occlusion::FrontToBackBuilder::with_capacity(cap, cap); + + let mut items = Vec::with_capacity(cap); + + // TODO: This will get simpler if we stop storing tiles in separate arrays. + + for (idx, tile) in composite_state.opaque_tiles.iter().enumerate() { + items.push((tile.z_id.0, occ::ItemSource::Opaque(idx))); + } + for (idx, tile) in composite_state.alpha_tiles.iter().enumerate() { + items.push((tile.z_id.0, occ::ItemSource::Alpha(idx))); + } + for (idx, tile) in composite_state.clear_tiles.iter().enumerate() { + items.push((tile.z_id.0, occ::ItemSource::Clear(idx))); + } + + items.sort_by_key(|item| -item.0); + for &(_, src) in &items { + let tile = match src { + occ::ItemSource::Opaque(idx) => &composite_state.opaque_tiles[idx], + occ::ItemSource::Alpha(idx) => &composite_state.alpha_tiles[idx], + occ::ItemSource::Clear(idx) => &composite_state.clear_tiles[idx], + }; + + let is_opaque = !matches!(src, occ::ItemSource::Alpha(..)); + + // Determine a clip rect to apply to this tile, depending on what + // the partial present mode is. + let partial_clip_rect = match partial_present_mode { + Some(PartialPresentMode::Single { dirty_rect }) => dirty_rect.to_box2d(), + None => tile.rect.to_box2d(), + }; + + // Simple compositor needs the valid rect in device space to match clip rect + let valid_device_rect = tile.valid_rect.translate( + tile.rect.origin.to_vector() + ).to_box2d(); + + let rect = tile.rect.to_box2d() + .intersection_unchecked(&tile.clip_rect.to_box2d()) + .intersection_unchecked(&partial_clip_rect) + .intersection_unchecked(&valid_device_rect); + + if rect.is_empty() { + continue; + } + + occlusion.add(&rect, is_opaque, src); + } + // Clear the framebuffer let clear_color = self.clear_color.map(|color| color.to_array()); match partial_present_mode { Some(PartialPresentMode::Single { dirty_rect }) => { - // We have a single dirty rect, so clear only that - self.device.clear_target(clear_color, - Some(1.0), - Some(draw_target.to_framebuffer_rect(dirty_rect.to_i32()))); + if occlusion.test(&dirty_rect.to_box2d()) { + // We have a single dirty rect, so clear only that + self.device.clear_target(clear_color, + None, + Some(draw_target.to_framebuffer_rect(dirty_rect.to_i32()))); + } } None => { // Partial present is disabled, so clear the entire framebuffer self.device.clear_target(clear_color, - Some(1.0), + None, None); } } @@ -3538,48 +3582,29 @@ impl Renderer { + composite_state.alpha_tiles.len(); self.profile.set(profiler::PICTURE_TILES, num_tiles); - // Draw opaque tiles first, front-to-back to get maxmum - // z-reject efficiency. - if !composite_state.opaque_tiles.is_empty() { + if !occlusion.opaque_items().is_empty() { let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE); - self.device.enable_depth_write(); self.set_blend(false, FramebufferKind::Main); self.draw_tile_list( - composite_state.opaque_tiles.iter().rev(), + occlusion.opaque_items().iter(), + &composite_state, &composite_state.external_surfaces, projection, - partial_present_mode, &mut results.stats, ); self.gpu_profiler.finish_sampler(opaque_sampler); } - if !composite_state.clear_tiles.is_empty() { - let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT); - self.device.disable_depth_write(); - self.set_blend(true, FramebufferKind::Main); - self.device.set_blend_mode_premultiplied_dest_out(); - self.draw_tile_list( - composite_state.clear_tiles.iter(), - &composite_state.external_surfaces, - projection, - partial_present_mode, - &mut results.stats, - ); - self.gpu_profiler.finish_sampler(transparent_sampler); - } - // Draw alpha tiles - if !composite_state.alpha_tiles.is_empty() { + if !occlusion.alpha_items().is_empty() { let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT); - self.device.disable_depth_write(); self.set_blend(true, FramebufferKind::Main); self.set_blend_mode_premultiplied_alpha(FramebufferKind::Main); self.draw_tile_list( - composite_state.alpha_tiles.iter(), + occlusion.alpha_items().iter().rev(), + &composite_state, &composite_state.external_surfaces, projection, - partial_present_mode, &mut results.stats, ); self.gpu_profiler.finish_sampler(transparent_sampler); diff --git a/gfx/wr/webrender_api/src/units.rs b/gfx/wr/webrender_api/src/units.rs index 3b7f84c5a37d..5ec6a80e920e 100644 --- a/gfx/wr/webrender_api/src/units.rs +++ b/gfx/wr/webrender_api/src/units.rs @@ -32,6 +32,7 @@ pub type DeviceIntLength = Length; pub type DeviceIntSideOffsets = SideOffsets2D; pub type DeviceRect = Rect; +pub type DeviceBox2D = Box2D; pub type DevicePoint = Point2D; pub type DeviceVector2D = Vector2D; pub type DeviceSize = Size2D;