Bug 1696905 - CPU-side occlusion culling for picture cache tiles. r=gfx-reviewers,lsalzman,gw

This patch introduces a simple culling algorithm that splits compositor tiles into only their visible parts, removing the need for a depth buffer. This reduces the draw-call count as well well as the memory usage and bandwidth associated with the depth buffer.

Differential Revision: https://phabricator.services.mozilla.com/D113532
This commit is contained in:
Nicolas Silva 2021-05-04 12:54:52 +00:00
Родитель 2e0745f710
Коммит 67286e28c2
5 изменённых файлов: 315 добавлений и 73 удалений

Просмотреть файл

@ -2133,7 +2133,7 @@ impl Device {
) {
let (fbo_id, rect, depth_available) = match target {
DrawTarget::Default { rect, .. } => {
(self.default_draw_fbo, rect, true)
(self.default_draw_fbo, rect, false)
}
DrawTarget::Texture { dimensions, fbo_id, with_depth, .. } => {
let rect = FramebufferIntRect::new(

Просмотреть файл

@ -133,6 +133,7 @@ mod visibility;
mod api_resources;
mod image_tiling;
mod image_source;
mod rectangle_occlusion;
pub mod host_utils;
///

Просмотреть файл

@ -0,0 +1,215 @@
/* This Source Code Form is subject to the terms of the Mozilla Public
* License, v. 2.0. If a copy of the MPL was not distributed with this
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
//! A simple occlusion culling algorithm for axis-aligned rectangles.
//!
//! ## Output
//!
//! Occlusion culling results in two lists of rectangles:
//!
//! - The opaque list should be rendered first. None of its rectangles overlap so order doesn't matter
//! within the opaque pass.
//! - The non-opaque list (or alpha list) which should be rendered in back-to-front order after the opaque pass.
//!
//! The output has minimal overdraw (no overdraw at all for opaque items and as little as possible for alpha ones).
//!
//! ## Algorithm overview
//!
//! The occlusion culling algorithm works in front-to-back order, accumulating rectangle in opaque and non-opaque lists.
//! Each time a rectangle is added, it is first tested against existing opaque rectangles and potentially split into visible
//! sub-rectangles, or even discarded completely. The front-to-back order ensures that once a rectangle is added it does not
//! have to be modified again, making the underlying data structure trivial (append-only).
//!
//! ## splitting
//!
//! Partially visible rectangles are split into up to 4 visible sub-rectangles by each intersecting occluder.
//!
//! ```ascii
//! +----------------------+ +----------------------+
//! | rectangle | | |
//! | | | |
//! | +-----------+ | +--+-----------+-------+
//! | |occluder | | --> | |\\\\\\\\\\\| |
//! | +-----------+ | +--+-----------+-------+
//! | | | |
//! +----------------------+ +----------------------+
//! ```
//!
//! In the example above the rectangle is split into 4 visible parts with the central occluded part left out.
//!
//! This implementation favors longer horizontal bands instead creating nine-patches to deal with the corners.
//! The advantage is that it produces less rectangles which is good for the performance of the algorithm and
//! for SWGL which likes long horizontal spans, however it would cause artifacts if the resulting rectangles
//! were to be drawn with a non-axis-aligned transformation.
//!
//! ## Performance
//!
//! The cost of the algorithm grows with the number of opaque rectangle as each new rectangle is tested against
//! all previously added opaque rectangles.
//!
//! Note that opaque rectangles can either be added as opaque or non-opaque. This means a trade-off between
//! overdraw and number of rectangles can be explored to adjust performance: Small opaque rectangles, especially
//! towards the front of the scene, could be added as non-opaque to avoid causing many splits while adding only
//! a small amount of overdraw.
//!
//! This implementation is intended to be used with a small number of (opaque) items. A similar implementation
//! could use a spatial acceleration structure for opaque rectangles to perform better with a large amount of
//! occluders.
//!
use euclid::point2;
use smallvec::SmallVec;
use api::units::*;
#[derive(Copy, Clone, Debug, PartialEq)]
pub enum ItemSource {
Opaque(usize),
Alpha(usize),
Clear(usize),
}
/// A visible part of a rectangle after occlusion culling.
#[derive(Debug, PartialEq)]
pub struct Item {
pub rectangle: DeviceBox2D,
pub src: ItemSource,
}
/// A builder that applies occlusion culling with rectangles provided in front-to-back order.
pub struct FrontToBackBuilder {
opaque_items: Vec<Item>,
alpha_items: Vec<Item>,
}
impl FrontToBackBuilder {
/// Pre-allocating constructor.
pub fn with_capacity(opaque: usize, alpha: usize) -> Self {
FrontToBackBuilder {
opaque_items: Vec::with_capacity(opaque),
alpha_items: Vec::with_capacity(alpha),
}
}
/// Add a rectangle, potentially splitting it and discarding the occluded parts if any.
///
/// Returns true the rectangle is at least partially visible.
pub fn add(&mut self, rect: &DeviceBox2D, is_opaque: bool, src: ItemSource) -> bool {
let mut fragments: SmallVec<[DeviceBox2D; 16]> = SmallVec::new();
fragments.push(*rect);
for item in &self.opaque_items {
if fragments.is_empty() {
break;
}
if item.rectangle.intersects(rect) {
apply_occluder(&item.rectangle, &mut fragments);
}
}
let list = if is_opaque {
&mut self.opaque_items
} else {
&mut self.alpha_items
};
for rect in &fragments {
list.push(Item {
rectangle: *rect,
src,
});
}
!fragments.is_empty()
}
/// Returns true if the provided rect is at least partially visible, without adding it.
pub fn test(&self, rect: &DeviceBox2D) -> bool {
let mut fragments: SmallVec<[DeviceBox2D; 16]> = SmallVec::new();
fragments.push(*rect);
for item in &self.opaque_items {
if item.rectangle.intersects(rect) {
apply_occluder(&item.rectangle, &mut fragments);
}
}
!fragments.is_empty()
}
/// The visible opaque rectangles (front-to-back order).
pub fn opaque_items(&self) -> &[Item] {
&self.opaque_items
}
/// The visible non-opaque rectangles (front-to-back order).
pub fn alpha_items(&self) -> &[Item] {
&self.alpha_items
}
}
// Split out the parts of the rects in the provided vector
fn apply_occluder(occluder: &DeviceBox2D, rects: &mut SmallVec<[DeviceBox2D; 16]>) {
// Iterate in reverse order so that we can push new rects at the back without
// visiting them;
let mut i = rects.len() - 1;
loop {
let r = rects[i];
if r.intersects(occluder) {
let top = r.min.y < occluder.min.y;
let bottom = r.max.y > occluder.max.y;
let left = r.min.x < occluder.min.x;
let right = r.max.x > occluder.max.x;
if top {
rects.push(DeviceBox2D {
min: r.min,
max: point2(r.max.x, occluder.min.y),
});
}
if bottom {
rects.push(DeviceBox2D {
min: point2(r.min.x, occluder.max.y),
max: r.max,
});
}
if left {
let min_y = r.min.y.max(occluder.min.y);
let max_y = r.max.y.min(occluder.max.y);
rects.push(DeviceBox2D {
min: point2(r.min.x, min_y),
max: point2(occluder.min.x, max_y),
});
}
if right {
let min_y = r.min.y.max(occluder.min.y);
let max_y = r.max.y.min(occluder.max.y);
rects.push(DeviceBox2D {
min: point2(occluder.max.x, min_y),
max: point2(r.max.x, max_y),
});
}
// Remove the original rectangle, replacing it with
// one of the new ones we just added, or popping it
// if it is the last item.
if i == rects.len() {
rects.pop();
} else {
rects.swap_remove(i);
}
}
if i == 0 {
break;
}
i -= 1;
}
}

Просмотреть файл

@ -54,7 +54,7 @@ use crate::render_api::{RenderApiSender, DebugCommand, ApiMsg, FrameMsg, MemoryR
use crate::batch::{AlphaBatchContainer, BatchKind, BatchFeatures, BatchTextures, BrushBatchKind, ClipBatchList};
#[cfg(any(feature = "capture", feature = "replay"))]
use crate::capture::{CaptureConfig, ExternalCaptureImage, PlainExternalImage};
use crate::composite::{CompositeState, CompositeTileSurface, CompositeTile, ResolvedExternalSurface, CompositorSurfaceTransform};
use crate::composite::{CompositeState, CompositeTileSurface, ResolvedExternalSurface, CompositorSurfaceTransform};
use crate::composite::{CompositorKind, Compositor, NativeTileId, CompositeFeatures, CompositeSurfaceFormat, ResolvedExternalSurfaceColorData};
use crate::composite::{CompositorConfig, NativeSurfaceOperationDetails, NativeSurfaceId, NativeSurfaceOperation};
use crate::c_str;
@ -95,6 +95,7 @@ use crate::texture_cache::{TextureCache, TextureCacheConfig};
use crate::tile_cache::PictureCacheDebugInfo;
use crate::util::drain_filter;
use crate::host_utils::{thread_started, thread_stopped};
use crate::rectangle_occlusion as occlusion;
use upload::{upload_to_texture_cache, UploadTexturePool};
use euclid::{rect, Transform3D, Scale, default};
@ -3255,12 +3256,12 @@ impl Renderer {
}
/// Draw a list of tiles to the framebuffer
fn draw_tile_list<'a, I: Iterator<Item = &'a CompositeTile>>(
fn draw_tile_list<'a, I: Iterator<Item = &'a occlusion::Item>>(
&mut self,
tiles_iter: I,
composite_state: &CompositeState,
external_surfaces: &[ResolvedExternalSurface],
projection: &default::Transform3D<f32>,
partial_present_mode: Option<PartialPresentMode>,
stats: &mut RendererStats,
) {
let mut current_shader_params = (
@ -3285,29 +3286,16 @@ impl Renderer {
&mut self.renderer_errors
);
for tile in tiles_iter {
// Determine a clip rect to apply to this tile, depending on what
// the partial present mode is.
let partial_clip_rect = match partial_present_mode {
Some(PartialPresentMode::Single { dirty_rect }) => dirty_rect,
None => tile.rect,
for item in tiles_iter {
let tile = match item.src {
occ::ItemSource::Opaque(idx) => &composite_state.opaque_tiles[idx],
occ::ItemSource::Alpha(idx) => &composite_state.alpha_tiles[idx],
occ::ItemSource::Clear(..) => {
continue;
}
};
let clip_rect = match partial_clip_rect.intersection(&tile.clip_rect) {
Some(rect) => rect,
None => continue,
};
// Simple compositor needs the valid rect in device space to match clip rect
let valid_device_rect = tile.valid_rect.translate(
tile.rect.origin.to_vector()
);
// Only composite the part of the tile that contains valid pixels
let clip_rect = match clip_rect.intersection(&valid_device_rect) {
Some(rect) => rect,
None => continue,
};
let clip_rect = item.rectangle.to_rect();
// Work out the draw params based on the tile surface
let (instance, textures, shader_params) = match tile.surface {
@ -3327,22 +3315,6 @@ impl Renderer {
(CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None),
)
}
CompositeTileSurface::Clear => {
let dummy = TextureSource::Dummy;
let image_buffer_kind = dummy.image_buffer_kind();
let instance = CompositeInstance::new(
tile.rect,
clip_rect,
PremultipliedColorF::BLACK,
tile.z_id,
);
let features = instance.get_rgb_features();
(
instance,
BatchTextures::composite_rgb(dummy),
(CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None),
)
}
CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::TextureCache { texture } } => {
let instance = CompositeInstance::new(
tile.rect,
@ -3432,6 +3404,22 @@ impl Renderer {
},
}
}
CompositeTileSurface::Clear => {
let dummy = TextureSource::Dummy;
let image_buffer_kind = dummy.image_buffer_kind();
let instance = CompositeInstance::new(
tile.rect,
clip_rect,
PremultipliedColorF::BLACK,
tile.z_id,
);
let features = instance.get_rgb_features();
(
instance,
BatchTextures::composite_rgb(dummy),
(CompositeSurfaceFormat::Rgba, image_buffer_kind, features, None),
)
}
CompositeTileSurface::Texture { surface: ResolvedSurfaceTexture::Native { .. } } => {
unreachable!("bug: found native surface in simple composite path");
}
@ -3500,8 +3488,8 @@ impl Renderer {
let _timer = self.gpu_profiler.start_timer(GPU_TAG_COMPOSITE);
self.device.bind_draw_target(draw_target);
self.device.enable_depth(DepthFunction::LessEqual);
self.device.enable_depth_write();
self.device.disable_depth_write();
self.device.disable_depth();
// If using KHR_partial_update, call eglSetDamageRegion.
// This must be called exactly once per frame, and prior to any rendering to the main
@ -3514,20 +3502,76 @@ impl Renderer {
}
}
let cap = composite_state.opaque_tiles.len() +
composite_state.alpha_tiles.len() +
composite_state.clear_tiles.len();
let mut occlusion = occlusion::FrontToBackBuilder::with_capacity(cap, cap);
let mut items = Vec::with_capacity(cap);
// TODO: This will get simpler if we stop storing tiles in separate arrays.
for (idx, tile) in composite_state.opaque_tiles.iter().enumerate() {
items.push((tile.z_id.0, occ::ItemSource::Opaque(idx)));
}
for (idx, tile) in composite_state.alpha_tiles.iter().enumerate() {
items.push((tile.z_id.0, occ::ItemSource::Alpha(idx)));
}
for (idx, tile) in composite_state.clear_tiles.iter().enumerate() {
items.push((tile.z_id.0, occ::ItemSource::Clear(idx)));
}
items.sort_by_key(|item| -item.0);
for &(_, src) in &items {
let tile = match src {
occ::ItemSource::Opaque(idx) => &composite_state.opaque_tiles[idx],
occ::ItemSource::Alpha(idx) => &composite_state.alpha_tiles[idx],
occ::ItemSource::Clear(idx) => &composite_state.clear_tiles[idx],
};
let is_opaque = !matches!(src, occ::ItemSource::Alpha(..));
// Determine a clip rect to apply to this tile, depending on what
// the partial present mode is.
let partial_clip_rect = match partial_present_mode {
Some(PartialPresentMode::Single { dirty_rect }) => dirty_rect.to_box2d(),
None => tile.rect.to_box2d(),
};
// Simple compositor needs the valid rect in device space to match clip rect
let valid_device_rect = tile.valid_rect.translate(
tile.rect.origin.to_vector()
).to_box2d();
let rect = tile.rect.to_box2d()
.intersection_unchecked(&tile.clip_rect.to_box2d())
.intersection_unchecked(&partial_clip_rect)
.intersection_unchecked(&valid_device_rect);
if rect.is_empty() {
continue;
}
occlusion.add(&rect, is_opaque, src);
}
// Clear the framebuffer
let clear_color = self.clear_color.map(|color| color.to_array());
match partial_present_mode {
Some(PartialPresentMode::Single { dirty_rect }) => {
// We have a single dirty rect, so clear only that
self.device.clear_target(clear_color,
Some(1.0),
Some(draw_target.to_framebuffer_rect(dirty_rect.to_i32())));
if occlusion.test(&dirty_rect.to_box2d()) {
// We have a single dirty rect, so clear only that
self.device.clear_target(clear_color,
None,
Some(draw_target.to_framebuffer_rect(dirty_rect.to_i32())));
}
}
None => {
// Partial present is disabled, so clear the entire framebuffer
self.device.clear_target(clear_color,
Some(1.0),
None,
None);
}
}
@ -3538,48 +3582,29 @@ impl Renderer {
+ composite_state.alpha_tiles.len();
self.profile.set(profiler::PICTURE_TILES, num_tiles);
// Draw opaque tiles first, front-to-back to get maxmum
// z-reject efficiency.
if !composite_state.opaque_tiles.is_empty() {
if !occlusion.opaque_items().is_empty() {
let opaque_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_OPAQUE);
self.device.enable_depth_write();
self.set_blend(false, FramebufferKind::Main);
self.draw_tile_list(
composite_state.opaque_tiles.iter().rev(),
occlusion.opaque_items().iter(),
&composite_state,
&composite_state.external_surfaces,
projection,
partial_present_mode,
&mut results.stats,
);
self.gpu_profiler.finish_sampler(opaque_sampler);
}
if !composite_state.clear_tiles.is_empty() {
let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
self.device.disable_depth_write();
self.set_blend(true, FramebufferKind::Main);
self.device.set_blend_mode_premultiplied_dest_out();
self.draw_tile_list(
composite_state.clear_tiles.iter(),
&composite_state.external_surfaces,
projection,
partial_present_mode,
&mut results.stats,
);
self.gpu_profiler.finish_sampler(transparent_sampler);
}
// Draw alpha tiles
if !composite_state.alpha_tiles.is_empty() {
if !occlusion.alpha_items().is_empty() {
let transparent_sampler = self.gpu_profiler.start_sampler(GPU_SAMPLER_TAG_TRANSPARENT);
self.device.disable_depth_write();
self.set_blend(true, FramebufferKind::Main);
self.set_blend_mode_premultiplied_alpha(FramebufferKind::Main);
self.draw_tile_list(
composite_state.alpha_tiles.iter(),
occlusion.alpha_items().iter().rev(),
&composite_state,
&composite_state.external_surfaces,
projection,
partial_present_mode,
&mut results.stats,
);
self.gpu_profiler.finish_sampler(transparent_sampler);

Просмотреть файл

@ -32,6 +32,7 @@ pub type DeviceIntLength = Length<i32, DevicePixel>;
pub type DeviceIntSideOffsets = SideOffsets2D<i32, DevicePixel>;
pub type DeviceRect = Rect<f32, DevicePixel>;
pub type DeviceBox2D = Box2D<f32, DevicePixel>;
pub type DevicePoint = Point2D<f32, DevicePixel>;
pub type DeviceVector2D = Vector2D<f32, DevicePixel>;
pub type DeviceSize = Size2D<f32, DevicePixel>;