Bug 1516676 - Update webrender to commit 8b8ca774f555aeb8ef99743196593a192298dd60 (WR PR #3452). r=kats

https://github.com/servo/webrender/pull/3452

Differential Revision: https://phabricator.services.mozilla.com/D15494

--HG--
extra : moz-landing-system : lando
This commit is contained in:
WR Updater Bot 2018-12-29 21:29:46 +00:00
Родитель 17389dd116
Коммит fa50bcc2e7
12 изменённых файлов: 355 добавлений и 158 удалений

Просмотреть файл

@ -1 +1 @@
b4dfe9c4f98fdeca3814976cd075bde8ed409123
8b8ca774f555aeb8ef99743196593a192298dd60

Просмотреть файл

@ -31,13 +31,28 @@ use profiler::GpuCacheProfileCounters;
use render_backend::FrameId;
use renderer::MAX_VERTEX_TEXTURE_WIDTH;
use std::{mem, u16, u32};
use std::num::NonZeroU32;
use std::ops::Add;
use std::os::raw::c_void;
use std::time::{Duration, Instant};
pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 512;
/// At the time of this writing, Firefox uses about 15 GPU cache rows on
/// startup, and then gradually works its way up to the mid-30s with normal
/// browsing.
pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 20;
const NEW_ROWS_PER_RESIZE: i32 = 10;
/// The number of frames an entry can go unused before being evicted.
const FRAMES_BEFORE_EVICTION: usize = 10;
const NEW_ROWS_PER_RESIZE: i32 = 512;
/// The ratio of utilized blocks to total blocks for which we start the clock
/// on reclaiming memory.
const RECLAIM_THRESHOLD: f32 = 0.2;
/// The amount of time utilization must be below the above threshold before we
/// blow away the cache and rebuild it.
const RECLAIM_DELAY_S: u64 = 5;
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
#[cfg_attr(feature = "capture", derive(Serialize))]
@ -131,7 +146,7 @@ impl GpuCacheHandle {
// A unique address in the GPU cache. These are uploaded
// as part of the primitive instances, to allow the vertex
// shader to fetch the specific data.
#[derive(Copy, Debug, Clone)]
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
pub struct GpuCacheAddress {
@ -173,31 +188,67 @@ impl Add<usize> for GpuCacheAddress {
struct Block {
// The location in the cache of this block.
address: GpuCacheAddress,
// The current epoch (generation) of this block.
epoch: Epoch,
// Index of the next free block in the list it
// belongs to (either a free-list or the
// occupied list).
next: Option<BlockIndex>,
// The current epoch (generation) of this block.
epoch: Epoch,
// The last frame this block was referenced.
last_access_time: FrameId,
}
impl Block {
fn new(address: GpuCacheAddress, next: Option<BlockIndex>, frame_id: FrameId) -> Self {
fn new(
address: GpuCacheAddress,
next: Option<BlockIndex>,
frame_id: FrameId,
epoch: Epoch,
) -> Self {
Block {
address,
next,
last_access_time: frame_id,
epoch: Epoch(0),
epoch,
}
}
fn advance_epoch(&mut self, max_epoch: &mut Epoch) {
self.epoch.next();
if max_epoch.0 < self.epoch.0 {
max_epoch.0 = self.epoch.0;
}
}
/// Creates an invalid dummy block ID.
pub const INVALID: Block = Block {
address: GpuCacheAddress { u: 0, v: 0 },
epoch: Epoch(0),
next: None,
last_access_time: FrameId::INVALID,
};
}
/// Represents the index of a Block in the block array. We only create such
/// structs for blocks that represent the start of a chunk.
///
/// Because we use Option<BlockIndex> in a lot of places, we use a NonZeroU32
/// here and avoid ever using the index zero.
#[derive(Debug, Copy, Clone)]
#[cfg_attr(feature = "capture", derive(Serialize))]
#[cfg_attr(feature = "replay", derive(Deserialize))]
struct BlockIndex(usize);
struct BlockIndex(NonZeroU32);
impl BlockIndex {
fn new(idx: usize) -> Self {
debug_assert!(idx <= u32::MAX as usize);
BlockIndex(NonZeroU32::new(idx as u32).expect("Index zero forbidden"))
}
fn get(&self) -> usize {
self.0.get() as usize
}
}
// A row in the cache texture.
#[cfg_attr(feature = "capture", derive(Serialize))]
@ -232,10 +283,19 @@ pub enum GpuCacheUpdate {
},
}
pub struct GpuDebugChunk {
/// Command to inform the debug display in the renderer when chunks are allocated
/// or freed.
pub enum GpuCacheDebugCmd {
/// Describes an allocated chunk.
Alloc(GpuCacheDebugChunk),
/// Describes a freed chunk.
Free(GpuCacheAddress),
}
#[derive(Clone)]
pub struct GpuCacheDebugChunk {
pub address: GpuCacheAddress,
pub tag: u8,
pub size: u16,
pub size: usize,
}
#[must_use]
@ -254,7 +314,7 @@ pub struct GpuCacheUpdateList {
pub blocks: Vec<GpuBlockData>,
/// Whole state GPU block metadata for debugging.
#[cfg_attr(feature = "serde", serde(skip))]
pub debug_chunks: Vec<GpuDebugChunk>,
pub debug_commands: Vec<GpuCacheDebugCmd>,
}
// Holds the free lists of fixed size blocks. Mostly
@ -270,7 +330,10 @@ struct FreeBlockLists {
free_list_32: Option<BlockIndex>,
free_list_64: Option<BlockIndex>,
free_list_128: Option<BlockIndex>,
free_list_large: Option<BlockIndex>,
free_list_256: Option<BlockIndex>,
free_list_341: Option<BlockIndex>,
free_list_512: Option<BlockIndex>,
free_list_1024: Option<BlockIndex>,
}
impl FreeBlockLists {
@ -284,7 +347,10 @@ impl FreeBlockLists {
free_list_32: None,
free_list_64: None,
free_list_128: None,
free_list_large: None,
free_list_256: None,
free_list_341: None,
free_list_512: None,
free_list_1024: None,
}
}
@ -292,8 +358,14 @@ impl FreeBlockLists {
&mut self,
block_count: usize,
) -> (usize, &mut Option<BlockIndex>) {
// Find the appropriate free list to use
// based on the block size.
// Find the appropriate free list to use based on the block size.
//
// Note that we cheat a bit with the 341 bucket, since it's not quite
// a divisor of 1024, because purecss-francine allocates many 260-block
// chunks, and there's no reason we shouldn't pack these three to a row.
// This means the allocation statistics will under-report by one block
// for each row using 341-block buckets, which is fine.
debug_assert_eq!(MAX_VERTEX_TEXTURE_WIDTH, 1024, "Need to update bucketing");
match block_count {
0 => panic!("Can't allocate zero sized blocks!"),
1 => (1, &mut self.free_list_1),
@ -304,7 +376,10 @@ impl FreeBlockLists {
17...32 => (32, &mut self.free_list_32),
33...64 => (64, &mut self.free_list_64),
65...128 => (128, &mut self.free_list_128),
129...MAX_VERTEX_TEXTURE_WIDTH => (MAX_VERTEX_TEXTURE_WIDTH, &mut self.free_list_large),
129...256 => (256, &mut self.free_list_256),
257...341 => (341, &mut self.free_list_341),
342...512 => (512, &mut self.free_list_512),
513...1024 => (1024, &mut self.free_list_1024),
_ => panic!("Can't allocate > MAX_VERTEX_TEXTURE_WIDTH per resource!"),
}
}
@ -320,6 +395,12 @@ struct Texture {
blocks: Vec<Block>,
// Metadata about each allocated row.
rows: Vec<Row>,
// The base Epoch for this texture.
base_epoch: Epoch,
// The maximum epoch reached. We track this along with the above so
// that we can rebuild the Texture and avoid collisions with handles
// allocated for the old texture.
max_epoch: Epoch,
// Free lists of available blocks for each supported
// block size in the texture. These are intrusive
// linked lists.
@ -335,19 +416,39 @@ struct Texture {
updates: Vec<GpuCacheUpdate>,
// Profile stats
allocated_block_count: usize,
// The stamp at which we first reached our threshold for reclaiming `GpuCache`
// memory, or `None` if the threshold hasn't been reached.
#[cfg_attr(feature = "serde", serde(skip))]
reached_reclaim_threshold: Option<Instant>,
// List of debug commands to be sent to the renderer when the GPU cache
// debug display is enabled.
#[cfg_attr(feature = "serde", serde(skip))]
debug_commands: Vec<GpuCacheDebugCmd>,
// The current debug flags for the system.
debug_flags: DebugFlags,
}
impl Texture {
fn new() -> Self {
fn new(base_epoch: Epoch, debug_flags: DebugFlags) -> Self {
// Pre-fill the block array with one invalid block so that we never use
// 0 for a BlockIndex. This lets us use NonZeroU32 for BlockIndex, which
// saves memory.
let blocks = vec![Block::INVALID];
Texture {
height: GPU_CACHE_INITIAL_HEIGHT,
blocks: Vec::new(),
blocks,
rows: Vec::new(),
base_epoch,
max_epoch: base_epoch,
free_lists: FreeBlockLists::new(),
pending_blocks: Vec::new(),
updates: Vec::new(),
occupied_list_head: None,
allocated_block_count: 0,
reached_reclaim_threshold: None,
debug_commands: Vec::new(),
debug_flags,
}
}
@ -393,8 +494,8 @@ impl Texture {
let mut prev_block_index = None;
for i in 0 .. items_per_row {
let address = GpuCacheAddress::new(i * alloc_size, row_index);
let block_index = BlockIndex(self.blocks.len());
let block = Block::new(address, prev_block_index, frame_id);
let block_index = BlockIndex::new(self.blocks.len());
let block = Block::new(address, prev_block_index, frame_id, self.base_epoch);
self.blocks.push(block);
prev_block_index = Some(block_index);
}
@ -406,7 +507,7 @@ impl Texture {
// available in the appropriate free-list. Pull a block from the
// head of the list.
let free_block_index = free_list.take().unwrap();
let block = &mut self.blocks[free_block_index.0 as usize];
let block = &mut self.blocks[free_block_index.get()];
*free_list = block.next;
// Add the block to the occupied linked list.
@ -425,6 +526,18 @@ impl Texture {
});
}
// If we're using the debug display, communicate the allocation to the
// renderer thread. Note that we do this regardless of whether or not
// pending_block_index is None (if it is, the renderer thread will fill
// in the data via a deferred resolve, but the block is still considered
// allocated).
if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
self.debug_commands.push(GpuCacheDebugCmd::Alloc(GpuCacheDebugChunk {
address: block.address,
size: block_count,
}));
}
CacheLocation {
block_index: free_block_index,
epoch: block.epoch,
@ -442,7 +555,7 @@ impl Texture {
while let Some(index) = current_block {
let (next_block, should_unlink) = {
let block = &mut self.blocks[index.0 as usize];
let block = &mut self.blocks[index.get()];
let next_block = block.next;
let mut should_unlink = false;
@ -461,11 +574,16 @@ impl Texture {
let (_, free_list) = self.free_lists
.get_actual_block_count_and_free_list(row.block_count_per_item);
block.epoch.next();
block.advance_epoch(&mut self.max_epoch);
block.next = *free_list;
*free_list = Some(index);
self.allocated_block_count -= row.block_count_per_item;
if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
let cmd = GpuCacheDebugCmd::Free(block.address);
self.debug_commands.push(cmd);
}
};
(next_block, should_unlink)
@ -476,7 +594,7 @@ impl Texture {
if should_unlink {
match prev_block {
Some(prev_block) => {
self.blocks[prev_block.0 as usize].next = next_block;
self.blocks[prev_block.get()].next = next_block;
}
None => {
self.occupied_list_head = next_block;
@ -489,6 +607,15 @@ impl Texture {
current_block = next_block;
}
}
/// Returns the ratio of utilized blocks.
fn utilization(&self) -> f32 {
let total_blocks = self.rows.len() * MAX_VERTEX_TEXTURE_WIDTH;
debug_assert!(total_blocks > 0);
let ratio = self.allocated_block_count as f32 / total_blocks as f32;
debug_assert!(0.0 <= ratio && ratio <= 1.0, "Bad ratio: {}", ratio);
ratio
}
}
@ -546,14 +673,25 @@ pub struct GpuCache {
impl GpuCache {
pub fn new() -> Self {
let debug_flags = DebugFlags::empty();
GpuCache {
frame_id: FrameId::INVALID,
texture: Texture::new(),
texture: Texture::new(Epoch(0), debug_flags),
saved_block_count: 0,
debug_flags: DebugFlags::empty(),
debug_flags,
}
}
/// Drops everything in the GPU cache. Paired by the caller with a message
/// to the renderer thread telling it to do the same.
pub fn clear(&mut self) {
assert!(self.texture.updates.is_empty(), "Clearing with pending updates");
let mut next_base_epoch = self.texture.max_epoch;
next_base_epoch.next();
self.texture = Texture::new(next_base_epoch, self.debug_flags);
self.saved_block_count = 0;
}
/// Begin a new frame.
pub fn begin_frame(&mut self, frame_id: FrameId) {
debug_assert!(self.texture.pending_blocks.is_empty());
@ -567,29 +705,31 @@ impl GpuCache {
// will rebuild the data and upload it to the GPU.
pub fn invalidate(&mut self, handle: &GpuCacheHandle) {
if let Some(ref location) = handle.location {
let block = &mut self.texture.blocks[location.block_index.0];
// don't invalidate blocks that are already re-assigned
if block.epoch == location.epoch {
block.epoch.next();
if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
if block.epoch == location.epoch {
block.advance_epoch(&mut self.texture.max_epoch);
}
}
}
}
// Request a resource be added to the cache. If the resource
/// Request a resource be added to the cache. If the resource
/// is already in the cache, `None` will be returned.
pub fn request<'a>(&'a mut self, handle: &'a mut GpuCacheHandle) -> Option<GpuDataRequest<'a>> {
let mut max_block_count = MAX_VERTEX_TEXTURE_WIDTH;
// Check if the allocation for this handle is still valid.
if let Some(ref location) = handle.location {
let block = &mut self.texture.blocks[location.block_index.0];
max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item;
if block.epoch == location.epoch {
if block.last_access_time != self.frame_id {
// Mark last access time to avoid evicting this block.
block.last_access_time = self.frame_id;
self.saved_block_count += max_block_count;
if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
if block.epoch == location.epoch {
max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item;
if block.last_access_time != self.frame_id {
// Mark last access time to avoid evicting this block.
block.last_access_time = self.frame_id;
self.saved_block_count += max_block_count;
}
return None;
}
return None;
}
}
@ -631,7 +771,7 @@ impl GpuCache {
/// End the frame. Return the list of updates to apply to the
/// device specific cache texture.
pub fn end_frame(
&self,
&mut self,
profile_counters: &mut GpuCacheProfileCounters,
) -> FrameId {
profile_counters
@ -643,28 +783,32 @@ impl GpuCache {
profile_counters
.saved_blocks
.set(self.saved_block_count);
let reached_threshold =
self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) &&
self.texture.utilization() < RECLAIM_THRESHOLD;
if reached_threshold {
self.texture.reached_reclaim_threshold.get_or_insert_with(Instant::now);
} else {
self.texture.reached_reclaim_threshold = None;
}
self.frame_id
}
/// Returns true if utilization has been low enough for long enough that we
/// should blow the cache away and rebuild it.
pub fn should_reclaim_memory(&self) -> bool {
self.texture.reached_reclaim_threshold
.map_or(false, |t| t.elapsed() > Duration::from_secs(RECLAIM_DELAY_S))
}
/// Extract the pending updates from the cache.
pub fn extract_updates(&mut self) -> GpuCacheUpdateList {
GpuCacheUpdateList {
frame_id: self.frame_id,
height: self.texture.height,
debug_chunks: if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
self.texture.updates
.iter()
.map(|update| match *update {
GpuCacheUpdate::Copy { address, block_index: _, block_count } => GpuDebugChunk {
address,
tag: 0, //TODO
size: block_count.min(0xFFFF) as u16,
}
})
.collect()
} else {
Vec::new()
},
debug_commands: mem::replace(&mut self.texture.debug_commands, Vec::new()),
updates: mem::replace(&mut self.texture.updates, Vec::new()),
blocks: mem::replace(&mut self.texture.pending_blocks, Vec::new()),
}
@ -673,6 +817,7 @@ impl GpuCache {
/// Sets the current debug flags for the system.
pub fn set_debug_flags(&mut self, flags: DebugFlags) {
self.debug_flags = flags;
self.texture.debug_flags = flags;
}
/// Get the actual GPU address in the texture for a given slot ID.
@ -681,7 +826,7 @@ impl GpuCache {
/// freed or pending slot will panic!
pub fn get_address(&self, id: &GpuCacheHandle) -> GpuCacheAddress {
let location = id.location.expect("handle not requested or allocated!");
let block = &self.texture.blocks[location.block_index.0];
let block = &self.texture.blocks[location.block_index.get()];
debug_assert_eq!(block.epoch, location.epoch);
debug_assert_eq!(block.last_access_time, self.frame_id);
block.address
@ -692,3 +837,12 @@ impl GpuCache {
self.texture.malloc_size_of(op)
}
}
#[test]
#[cfg(target_pointer_width = "64")]
fn test_struct_sizes() {
use std::mem;
// We can end up with a lot of blocks stored in the global vec, and keeping
// them small helps reduce memory overhead.
assert_eq!(mem::size_of::<Block>(), 24, "Block size changed");
}

Просмотреть файл

@ -295,6 +295,7 @@ pub enum ResultMsg {
DebugOutput(DebugOutput),
RefreshShader(PathBuf),
UpdateGpuCache(GpuCacheUpdateList),
ClearGpuCache,
UpdateResources {
updates: TextureUpdateList,
memory_pressure: bool,

Просмотреть файл

@ -374,7 +374,7 @@ impl IsVisible for ImageBorder {
}
#[test]
#[cfg(target_os = "linux")]
#[cfg(target_pointer_width = "64")]
fn test_struct_sizes() {
use std::mem;
// The sizes of these structures are critical for performance on a number of
@ -384,9 +384,9 @@ fn test_struct_sizes() {
// (b) You made a structure larger. This is not necessarily a problem, but should only
// be done with care, and after checking if talos performance regresses badly.
assert_eq!(mem::size_of::<NormalBorderPrim>(), 84, "NormalBorderPrim size changed");
assert_eq!(mem::size_of::<NormalBorderTemplate>(), 240, "NormalBorderTemplate size changed");
assert_eq!(mem::size_of::<NormalBorderTemplate>(), 224, "NormalBorderTemplate size changed");
assert_eq!(mem::size_of::<NormalBorderKey>(), 112, "NormalBorderKey size changed");
assert_eq!(mem::size_of::<ImageBorder>(), 92, "ImageBorder size changed");
assert_eq!(mem::size_of::<ImageBorderTemplate>(), 104, "ImageBorderTemplate size changed");
assert_eq!(mem::size_of::<ImageBorderTemplate>(), 88, "ImageBorderTemplate size changed");
assert_eq!(mem::size_of::<ImageBorderKey>(), 120, "ImageBorderKey size changed");
}

Просмотреть файл

@ -714,7 +714,7 @@ impl GradientGpuBlockBuilder {
}
#[test]
#[cfg(target_os = "linux")]
#[cfg(target_pointer_width = "64")]
fn test_struct_sizes() {
use std::mem;
// The sizes of these structures are critical for performance on a number of
@ -724,10 +724,10 @@ fn test_struct_sizes() {
// (b) You made a structure larger. This is not necessarily a problem, but should only
// be done with care, and after checking if talos performance regresses badly.
assert_eq!(mem::size_of::<LinearGradient>(), 72, "LinearGradient size changed");
assert_eq!(mem::size_of::<LinearGradientTemplate>(), 168, "LinearGradientTemplate size changed");
assert_eq!(mem::size_of::<LinearGradientTemplate>(), 128, "LinearGradientTemplate size changed");
assert_eq!(mem::size_of::<LinearGradientKey>(), 96, "LinearGradientKey size changed");
assert_eq!(mem::size_of::<RadialGradient>(), 72, "RadialGradient size changed");
assert_eq!(mem::size_of::<RadialGradientTemplate>(), 168, "RadialGradientTemplate size changed");
assert_eq!(mem::size_of::<RadialGradientTemplate>(), 136, "RadialGradientTemplate size changed");
assert_eq!(mem::size_of::<RadialGradientKey>(), 104, "RadialGradientKey size changed");
}

Просмотреть файл

@ -549,7 +549,7 @@ impl IsVisible for YuvImage {
}
#[test]
#[cfg(target_os = "linux")]
#[cfg(target_pointer_width = "64")]
fn test_struct_sizes() {
use std::mem;
// The sizes of these structures are critical for performance on a number of
@ -559,9 +559,9 @@ fn test_struct_sizes() {
// (b) You made a structure larger. This is not necessarily a problem, but should only
// be done with care, and after checking if talos performance regresses badly.
assert_eq!(mem::size_of::<Image>(), 56, "Image size changed");
assert_eq!(mem::size_of::<ImageTemplate>(), 144, "ImageTemplate size changed");
assert_eq!(mem::size_of::<ImageTemplate>(), 124, "ImageTemplate size changed");
assert_eq!(mem::size_of::<ImageKey>(), 84, "ImageKey size changed");
assert_eq!(mem::size_of::<YuvImage>(), 36, "YuvImage size changed");
assert_eq!(mem::size_of::<YuvImageTemplate>(), 96, "YuvImageTemplate size changed");
assert_eq!(mem::size_of::<YuvImageTemplate>(), 72, "YuvImageTemplate size changed");
assert_eq!(mem::size_of::<YuvImageKey>(), 64, "YuvImageKey size changed");
}

Просмотреть файл

@ -180,7 +180,7 @@ impl IsVisible for LineDecoration {
}
#[test]
#[cfg(target_os = "linux")]
#[cfg(target_pointer_width = "64")]
fn test_struct_sizes() {
use std::mem;
// The sizes of these structures are critical for performance on a number of
@ -190,6 +190,6 @@ fn test_struct_sizes() {
// (b) You made a structure larger. This is not necessarily a problem, but should only
// be done with care, and after checking if talos performance regresses badly.
assert_eq!(mem::size_of::<LineDecoration>(), 20, "LineDecoration size changed");
assert_eq!(mem::size_of::<LineDecorationTemplate>(), 88, "LineDecorationTemplate size changed");
assert_eq!(mem::size_of::<LineDecorationTemplate>(), 68, "LineDecorationTemplate size changed");
assert_eq!(mem::size_of::<LineDecorationKey>(), 48, "LineDecorationKey size changed");
}

Просмотреть файл

@ -3311,7 +3311,7 @@ fn update_opacity_binding(
}
#[test]
#[cfg(target_os = "linux")]
#[cfg(target_pointer_width = "64")]
fn test_struct_sizes() {
use std::mem;
// The sizes of these structures are critical for performance on a number of
@ -3322,7 +3322,7 @@ fn test_struct_sizes() {
// be done with care, and after checking if talos performance regresses badly.
assert_eq!(mem::size_of::<PrimitiveInstance>(), 120, "PrimitiveInstance size changed");
assert_eq!(mem::size_of::<PrimitiveInstanceKind>(), 40, "PrimitiveInstanceKind size changed");
assert_eq!(mem::size_of::<PrimitiveTemplate>(), 80, "PrimitiveTemplate size changed");
assert_eq!(mem::size_of::<PrimitiveTemplate>(), 56, "PrimitiveTemplate size changed");
assert_eq!(mem::size_of::<PrimitiveTemplateKind>(), 20, "PrimitiveTemplateKind size changed");
assert_eq!(mem::size_of::<PrimitiveKey>(), 36, "PrimitiveKey size changed");
assert_eq!(mem::size_of::<PrimitiveKeyKind>(), 5, "PrimitiveKeyKind size changed");

Просмотреть файл

@ -224,7 +224,7 @@ impl IsVisible for Picture {
}
#[test]
#[cfg(target_os = "linux")]
#[cfg(target_pointer_width = "64")]
fn test_struct_sizes() {
use std::mem;
// The sizes of these structures are critical for performance on a number of
@ -234,6 +234,6 @@ fn test_struct_sizes() {
// (b) You made a structure larger. This is not necessarily a problem, but should only
// be done with care, and after checking if talos performance regresses badly.
assert_eq!(mem::size_of::<Picture>(), 84, "Picture size changed");
assert_eq!(mem::size_of::<PictureTemplate>(), 56, "PictureTemplate size changed");
assert_eq!(mem::size_of::<PictureTemplate>(), 36, "PictureTemplate size changed");
assert_eq!(mem::size_of::<PictureKey>(), 112, "PictureKey size changed");
}

Просмотреть файл

@ -328,6 +328,7 @@ impl TextRunPrimitive {
}
}
/// These are linux only because FontInstancePlatformOptions varies in size by platform.
#[test]
#[cfg(target_os = "linux")]
fn test_struct_sizes() {
@ -339,7 +340,7 @@ fn test_struct_sizes() {
// (b) You made a structure larger. This is not necessarily a problem, but should only
// be done with care, and after checking if talos performance regresses badly.
assert_eq!(mem::size_of::<TextRun>(), 112, "TextRun size changed");
assert_eq!(mem::size_of::<TextRunTemplate>(), 160, "TextRunTemplate size changed");
assert_eq!(mem::size_of::<TextRunTemplate>(), 144, "TextRunTemplate size changed");
assert_eq!(mem::size_of::<TextRunKey>(), 136, "TextRunKey size changed");
assert_eq!(mem::size_of::<TextRunPrimitive>(), 88, "TextRunPrimitive size changed");
}

Просмотреть файл

@ -8,7 +8,7 @@
//! See the comment at the top of the `renderer` module for a description of
//! how these two pieces interact.
use api::{ApiMsg, BuiltDisplayList, ClearCache, DebugCommand};
use api::{ApiMsg, BuiltDisplayList, ClearCache, DebugCommand, DebugFlags};
#[cfg(feature = "debugger")]
use api::{BuiltDisplayListIter, SpecificDisplayItem};
use api::{DevicePixelScale, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
@ -659,6 +659,7 @@ pub struct RenderBackend {
recorder: Option<Box<ApiRecordingReceiver>>,
sampler: Option<Box<AsyncPropertySampler + Send>>,
size_of_op: Option<VoidPtrToSizeFn>,
debug_flags: DebugFlags,
namespace_alloc_by_client: bool,
}
@ -677,6 +678,7 @@ impl RenderBackend {
recorder: Option<Box<ApiRecordingReceiver>>,
sampler: Option<Box<AsyncPropertySampler + Send>>,
size_of_op: Option<VoidPtrToSizeFn>,
debug_flags: DebugFlags,
namespace_alloc_by_client: bool,
) -> RenderBackend {
RenderBackend {
@ -696,6 +698,7 @@ impl RenderBackend {
recorder,
sampler,
size_of_op,
debug_flags,
namespace_alloc_by_client,
}
}
@ -1005,6 +1008,8 @@ impl RenderBackend {
// recently used resources.
self.resource_cache.clear(ClearCache::all());
self.clear_gpu_cache();
let pending_update = self.resource_cache.pending_updates();
let msg = ResultMsg::UpdateResources {
updates: pending_update,
@ -1104,6 +1109,22 @@ impl RenderBackend {
DebugCommand::SetFlags(flags) => {
self.resource_cache.set_debug_flags(flags);
self.gpu_cache.set_debug_flags(flags);
// If we're toggling on the GPU cache debug display, we
// need to blow away the cache. This is because we only
// send allocation/free notifications to the renderer
// thread when the debug display is enabled, and thus
// enabling it when the cache is partially populated will
// give the renderer an incomplete view of the world.
// And since we might as well drop all the debugging state
// from the renderer when we disable the debug display,
// we just clear the cache on toggle.
let changed = self.debug_flags ^ flags;
if changed.contains(DebugFlags::GPU_CACHE_DBG) {
self.clear_gpu_cache();
}
self.debug_flags = flags;
ResultMsg::DebugCommand(option)
}
_ => ResultMsg::DebugCommand(option),
@ -1156,6 +1177,13 @@ impl RenderBackend {
&mut profile_counters.resources,
);
// If we've been above the threshold for reclaiming GPU cache memory for
// long enough, drop it and rebuild it. This needs to be done before any
// updates for this frame are made.
if self.gpu_cache.should_reclaim_memory() {
self.clear_gpu_cache();
}
for scene_msg in transaction_msg.scene_ops.drain(..) {
let _timer = profile_counters.total_time.timer();
self.process_scene_msg(
@ -1520,6 +1548,13 @@ impl RenderBackend {
// thread waiting on the request.
self.scene_tx.send(SceneBuilderRequest::ReportMemory(report, tx)).unwrap();
}
/// Drops everything in the GPU cache. Must not be called once gpu cache entries
/// for the next frame have already been requested.
fn clear_gpu_cache(&mut self) {
self.gpu_cache.clear();
self.result_tx.send(ResultMsg::ClearGpuCache).unwrap();
}
}
fn get_blob_image_updates(updates: &[ResourceUpdate]) -> Vec<BlobImageKey> {

Просмотреть файл

@ -49,7 +49,7 @@ use gleam::gl;
use glyph_rasterizer::{GlyphFormat, GlyphRasterizer};
use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
#[cfg(feature = "debug_renderer")]
use gpu_cache::GpuDebugChunk;
use gpu_cache::{GpuCacheDebugChunk, GpuCacheDebugCmd};
#[cfg(feature = "pathfinder")]
use gpu_glyph_renderer::GpuGlyphRenderer;
use gpu_types::ScalingInstance;
@ -1017,14 +1017,21 @@ pub enum BlendMode {
SubpixelWithBgColor,
}
// Tracks the state of each row in the GPU cache texture.
/// Tracks the state of each row in the GPU cache texture.
struct CacheRow {
/// Mirrored block data on CPU for this row. We store a copy of
/// the data on the CPU side to improve upload batching.
cpu_blocks: Box<[GpuBlockData; MAX_VERTEX_TEXTURE_WIDTH]>,
/// True if this row is dirty.
is_dirty: bool,
}
impl CacheRow {
fn new() -> Self {
CacheRow { is_dirty: false }
CacheRow {
cpu_blocks: Box::new([GpuBlockData::EMPTY; MAX_VERTEX_TEXTURE_WIDTH]),
is_dirty: false,
}
}
}
@ -1036,10 +1043,8 @@ enum GpuCacheBus {
PixelBuffer {
/// PBO used for transfers.
buffer: PBO,
/// Meta-data about the cached rows.
/// Per-row data.
rows: Vec<CacheRow>,
/// Mirrored block data on CPU.
cpu_blocks: Vec<GpuBlockData>,
},
/// Shader-based scattering updates. Currently rendered by a set
/// of points into the GPU texture, each carrying a `GpuBlockData`.
@ -1057,16 +1062,6 @@ enum GpuCacheBus {
},
}
impl GpuCacheBus {
/// Returns true if this bus uses a render target for a texture.
fn uses_render_target(&self) -> bool {
match *self {
GpuCacheBus::Scatter { .. } => true,
GpuCacheBus::PixelBuffer { .. } => false,
}
}
}
/// The device-specific representation of the cache texture in gpu_cache.rs
struct GpuCacheTexture {
texture: Option<Texture>,
@ -1077,38 +1072,22 @@ impl GpuCacheTexture {
/// Ensures that we have an appropriately-sized texture. Returns true if a
/// new texture was created.
fn ensure_texture(&mut self, device: &mut Device, height: i32) -> bool {
fn ensure_texture(&mut self, device: &mut Device, height: i32) {
// If we already have a texture that works, we're done.
if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
if GPU_CACHE_RESIZE_TEST && self.bus.uses_render_target() {
if GPU_CACHE_RESIZE_TEST {
// Special debug mode - resize the texture even though it's fine.
} else {
return false;
return;
}
}
// Compute a few parameters for the new texture. We round the height up to
// a multiple of 256 to avoid many small resizes.
let new_height = (height + 255) & !255;
let new_size = DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, new_height);
let rt_info = if self.bus.uses_render_target() {
Some(RenderTargetInfo { has_depth: false })
} else {
None
};
// Take the old texture, if any, and deinitialize it unless we're going
// to blit it's contents to the new one.
let mut blit_source = None;
if let Some(t) = self.texture.take() {
if rt_info.is_some() {
blit_source = Some(t);
} else {
device.delete_texture(t);
}
}
// Take the old texture, if any.
let blit_source = self.texture.take();
// Create the new texture.
let new_size = DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, height);
let rt_info = Some(RenderTargetInfo { has_depth: false });
let mut texture = device.create_texture(
TextureTarget::Default,
ImageFormat::RGBAF32,
@ -1126,7 +1105,6 @@ impl GpuCacheTexture {
}
self.texture = Some(texture);
true
}
fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
@ -1156,7 +1134,6 @@ impl GpuCacheTexture {
GpuCacheBus::PixelBuffer {
buffer,
rows: Vec::new(),
cpu_blocks: Vec::new(),
}
};
@ -1193,18 +1170,9 @@ impl GpuCacheTexture {
total_block_count: usize,
max_height: i32,
) {
let allocated_new_texture = self.ensure_texture(device, max_height);
self.ensure_texture(device, max_height);
match self.bus {
GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
if allocated_new_texture {
// If we had to resize the texture, just mark all rows
// as dirty so they will be uploaded to the texture
// during the next flush.
for row in rows.iter_mut() {
row.is_dirty = true;
}
}
}
GpuCacheBus::PixelBuffer { .. } => {},
GpuCacheBus::Scatter {
ref mut buf_position,
ref mut buf_value,
@ -1222,7 +1190,7 @@ impl GpuCacheTexture {
fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
match self.bus {
GpuCacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
for update in &updates.updates {
match *update {
GpuCacheUpdate::Copy {
@ -1237,19 +1205,16 @@ impl GpuCacheTexture {
while rows.len() <= row {
// Add a new row.
rows.push(CacheRow::new());
// Add enough GPU blocks for this row.
cpu_blocks
.extend_from_slice(&[GpuBlockData::EMPTY; MAX_VERTEX_TEXTURE_WIDTH]);
}
// This row is dirty (needs to be updated in GPU texture).
rows[row].is_dirty = true;
// Copy the blocks from the patch array in the shadow CPU copy.
let block_offset = row * MAX_VERTEX_TEXTURE_WIDTH + address.u as usize;
let data = &mut cpu_blocks[block_offset .. (block_offset + block_count)];
let block_offset = address.u as usize;
let data = &mut rows[row].cpu_blocks;
for i in 0 .. block_count {
data[i] = updates.blocks[block_index + i];
data[block_offset + i] = updates.blocks[block_index + i];
}
}
}
@ -1294,7 +1259,7 @@ impl GpuCacheTexture {
fn flush(&mut self, device: &mut Device) -> usize {
let texture = self.texture.as_ref().unwrap();
match self.bus {
GpuCacheBus::PixelBuffer { ref buffer, ref mut rows, ref cpu_blocks } => {
GpuCacheBus::PixelBuffer { ref buffer, ref mut rows } => {
let rows_dirty = rows
.iter()
.filter(|row| row.is_dirty)
@ -1314,15 +1279,12 @@ impl GpuCacheTexture {
continue;
}
let block_index = row_index * MAX_VERTEX_TEXTURE_WIDTH;
let cpu_blocks =
&cpu_blocks[block_index .. (block_index + MAX_VERTEX_TEXTURE_WIDTH)];
let rect = DeviceIntRect::new(
DeviceIntPoint::new(0, row_index as i32),
DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, 1),
);
uploader.upload(rect, 0, None, cpu_blocks);
uploader.upload(rect, 0, None, &*row.cpu_blocks);
row.is_dirty = false;
}
@ -1515,6 +1477,7 @@ pub struct Renderer {
pub device: Device,
pending_texture_updates: Vec<TextureUpdateList>,
pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
pending_gpu_cache_clear: bool,
pending_shader_updates: Vec<PathBuf>,
active_documents: Vec<(DocumentId, RenderedDocument)>,
@ -1552,8 +1515,12 @@ pub struct Renderer {
transforms_texture: VertexDataTexture,
render_task_texture: VertexDataTexture,
gpu_cache_texture: GpuCacheTexture,
/// When the GPU cache debugger is enabled, we keep track of the live blocks
/// in the GPU cache so that we can use them for the debug display. This
/// member stores those live blocks, indexed by row.
#[cfg(feature = "debug_renderer")]
gpu_cache_debug_chunks: Vec<GpuDebugChunk>,
gpu_cache_debug_chunks: Vec<Vec<GpuCacheDebugChunk>>,
gpu_cache_frame_id: FrameId,
gpu_cache_overflow: bool,
@ -1851,9 +1818,7 @@ impl Renderer {
};
let device_pixel_ratio = options.device_pixel_ratio;
// First set the flags to default and later call set_debug_flags to ensure any
// potential transition when enabling a flag is run.
let debug_flags = DebugFlags::default();
let debug_flags = options.debug_flags;
let payload_rx_for_backend = payload_rx.to_mpsc_receiver();
let recorder = options.recorder;
let thread_listener = Arc::new(options.thread_listener);
@ -1973,6 +1938,7 @@ impl Renderer {
recorder,
sampler,
size_of_op,
debug_flags,
namespace_alloc_by_client,
);
backend.run(backend_profile_counters);
@ -1993,11 +1959,12 @@ impl Renderer {
active_documents: Vec::new(),
pending_texture_updates: Vec::new(),
pending_gpu_cache_updates: Vec::new(),
pending_gpu_cache_clear: false,
pending_shader_updates: Vec::new(),
shaders,
#[cfg(feature = "debug_renderer")]
debug: LazyInitializedDebugRenderer::new(),
debug_flags,
debug_flags: DebugFlags::empty(),
backend_profile_counters: BackendProfileCounters::new(),
profile_counters: RendererProfileCounters::new(),
resource_upload_time: 0,
@ -2053,7 +2020,9 @@ impl Renderer {
framebuffer_size: None,
};
renderer.set_debug_flags(options.debug_flags);
// We initially set the flags to default and then now call set_debug_flags
// to ensure any potential transition when enabling a flag is run.
renderer.set_debug_flags(debug_flags);
let sender = RenderApiSender::new(api_tx, payload_tx);
Ok((renderer, sender))
@ -2144,10 +2113,33 @@ impl Renderer {
ResultMsg::UpdateGpuCache(mut list) => {
#[cfg(feature = "debug_renderer")]
{
self.gpu_cache_debug_chunks = mem::replace(&mut list.debug_chunks, Vec::new());
for cmd in mem::replace(&mut list.debug_commands, Vec::new()) {
match cmd {
GpuCacheDebugCmd::Alloc(chunk) => {
let row = chunk.address.v as usize;
if row >= self.gpu_cache_debug_chunks.len() {
self.gpu_cache_debug_chunks.resize(row + 1, Vec::new());
}
self.gpu_cache_debug_chunks[row].push(chunk);
},
GpuCacheDebugCmd::Free(address) => {
let chunks = &mut self.gpu_cache_debug_chunks[address.v as usize];
let pos = chunks.iter()
.position(|x| x.address == address).unwrap();
chunks.remove(pos);
},
}
}
}
self.pending_gpu_cache_updates.push(list);
}
ResultMsg::ClearGpuCache => {
#[cfg(feature = "debug_renderer")]
{
self.gpu_cache_debug_chunks = Vec::new();
}
self.pending_gpu_cache_clear = true;
}
ResultMsg::UpdateResources {
updates,
memory_pressure,
@ -2743,7 +2735,7 @@ impl Renderer {
height: gpu_cache_height,
blocks: vec![[1f32; 4].into()],
updates: Vec::new(),
debug_chunks: Vec::new(),
debug_commands: Vec::new(),
});
}
@ -2789,6 +2781,15 @@ impl Renderer {
}
fn prepare_gpu_cache(&mut self, frame: &Frame) {
if self.pending_gpu_cache_clear {
let use_scatter =
matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter).unwrap();
let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
old_cache.deinit(&mut self.device);
self.pending_gpu_cache_clear = false;
}
let deferred_update_list = self.update_deferred_resolves(&frame.deferred_resolves);
self.pending_gpu_cache_updates.extend(deferred_update_list);
@ -3856,7 +3857,7 @@ impl Renderer {
height: self.gpu_cache_texture.get_height(),
blocks: Vec::new(),
updates: Vec::new(),
debug_chunks: Vec::new(),
debug_commands: Vec::new(),
};
for deferred_resolve in deferred_resolves {
@ -4444,21 +4445,21 @@ impl Renderer {
};
let (x_off, y_off) = (30f32, 30f32);
//let x_end = framebuffer_size.width as f32 - x_off;
let y_end = framebuffer_size.height as f32 - y_off;
let height = self.gpu_cache_texture.texture
.as_ref().map_or(0, |t| t.get_dimensions().height)
.min(framebuffer_size.height - (y_off as i32) * 2) as usize;
debug_renderer.add_quad(
x_off,
y_off,
x_off + MAX_VERTEX_TEXTURE_WIDTH as f32,
y_end,
y_off + height as f32,
ColorU::new(80, 80, 80, 80),
ColorU::new(80, 80, 80, 80),
);
for chunk in &self.gpu_cache_debug_chunks {
let color = match chunk.tag {
_ => ColorU::new(250, 0, 0, 200),
};
let upper = self.gpu_cache_debug_chunks.len().min(height);
for chunk in self.gpu_cache_debug_chunks[0..upper].iter().flatten() {
let color = ColorU::new(250, 0, 0, 200);
debug_renderer.add_quad(
x_off + chunk.address.u as f32,
y_off + chunk.address.v as f32,
@ -4548,8 +4549,10 @@ impl Renderer {
let mut report = MemoryReport::default();
// GPU cache CPU memory.
if let GpuCacheBus::PixelBuffer{ref cpu_blocks, ..} = self.gpu_cache_texture.bus {
report.gpu_cache_cpu_mirror += self.size_of(cpu_blocks.as_ptr());
if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.gpu_cache_texture.bus {
for row in rows.iter() {
report.gpu_cache_cpu_mirror += self.size_of(&*row.cpu_blocks as *const _);
}
}
// GPU cache GPU memory.
@ -5205,7 +5208,7 @@ impl Renderer {
);
self.gpu_cache_texture.texture = Some(t);
match self.gpu_cache_texture.bus {
GpuCacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
let dim = self.gpu_cache_texture.texture.as_ref().unwrap().get_dimensions();
let blocks = unsafe {
slice::from_raw_parts(
@ -5215,9 +5218,12 @@ impl Renderer {
};
// fill up the CPU cache from the contents we just loaded
rows.clear();
cpu_blocks.clear();
rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
cpu_blocks.extend_from_slice(blocks);
let chunks = blocks.chunks(MAX_VERTEX_TEXTURE_WIDTH);
debug_assert_eq!(chunks.len(), rows.len());
for (row, chunk) in rows.iter_mut().zip(chunks) {
row.cpu_blocks.copy_from_slice(chunk);
}
}
GpuCacheBus::Scatter { .. } => {}
}