зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1516676 - Update webrender to commit 8b8ca774f555aeb8ef99743196593a192298dd60 (WR PR #3452). r=kats
https://github.com/servo/webrender/pull/3452 Differential Revision: https://phabricator.services.mozilla.com/D15494 --HG-- extra : moz-landing-system : lando
This commit is contained in:
Родитель
17389dd116
Коммит
fa50bcc2e7
|
@ -1 +1 @@
|
|||
b4dfe9c4f98fdeca3814976cd075bde8ed409123
|
||||
8b8ca774f555aeb8ef99743196593a192298dd60
|
||||
|
|
|
@ -31,13 +31,28 @@ use profiler::GpuCacheProfileCounters;
|
|||
use render_backend::FrameId;
|
||||
use renderer::MAX_VERTEX_TEXTURE_WIDTH;
|
||||
use std::{mem, u16, u32};
|
||||
use std::num::NonZeroU32;
|
||||
use std::ops::Add;
|
||||
use std::os::raw::c_void;
|
||||
use std::time::{Duration, Instant};
|
||||
|
||||
|
||||
pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 512;
|
||||
/// At the time of this writing, Firefox uses about 15 GPU cache rows on
|
||||
/// startup, and then gradually works its way up to the mid-30s with normal
|
||||
/// browsing.
|
||||
pub const GPU_CACHE_INITIAL_HEIGHT: i32 = 20;
|
||||
const NEW_ROWS_PER_RESIZE: i32 = 10;
|
||||
|
||||
/// The number of frames an entry can go unused before being evicted.
|
||||
const FRAMES_BEFORE_EVICTION: usize = 10;
|
||||
const NEW_ROWS_PER_RESIZE: i32 = 512;
|
||||
|
||||
/// The ratio of utilized blocks to total blocks for which we start the clock
|
||||
/// on reclaiming memory.
|
||||
const RECLAIM_THRESHOLD: f32 = 0.2;
|
||||
|
||||
/// The amount of time utilization must be below the above threshold before we
|
||||
/// blow away the cache and rebuild it.
|
||||
const RECLAIM_DELAY_S: u64 = 5;
|
||||
|
||||
#[derive(Debug, Copy, Clone, Eq, PartialEq)]
|
||||
#[cfg_attr(feature = "capture", derive(Serialize))]
|
||||
|
@ -131,7 +146,7 @@ impl GpuCacheHandle {
|
|||
// A unique address in the GPU cache. These are uploaded
|
||||
// as part of the primitive instances, to allow the vertex
|
||||
// shader to fetch the specific data.
|
||||
#[derive(Copy, Debug, Clone)]
|
||||
#[derive(Copy, Debug, Clone, Eq, PartialEq)]
|
||||
#[cfg_attr(feature = "capture", derive(Serialize))]
|
||||
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
||||
pub struct GpuCacheAddress {
|
||||
|
@ -173,31 +188,67 @@ impl Add<usize> for GpuCacheAddress {
|
|||
struct Block {
|
||||
// The location in the cache of this block.
|
||||
address: GpuCacheAddress,
|
||||
// The current epoch (generation) of this block.
|
||||
epoch: Epoch,
|
||||
// Index of the next free block in the list it
|
||||
// belongs to (either a free-list or the
|
||||
// occupied list).
|
||||
next: Option<BlockIndex>,
|
||||
// The current epoch (generation) of this block.
|
||||
epoch: Epoch,
|
||||
// The last frame this block was referenced.
|
||||
last_access_time: FrameId,
|
||||
}
|
||||
|
||||
impl Block {
|
||||
fn new(address: GpuCacheAddress, next: Option<BlockIndex>, frame_id: FrameId) -> Self {
|
||||
fn new(
|
||||
address: GpuCacheAddress,
|
||||
next: Option<BlockIndex>,
|
||||
frame_id: FrameId,
|
||||
epoch: Epoch,
|
||||
) -> Self {
|
||||
Block {
|
||||
address,
|
||||
next,
|
||||
last_access_time: frame_id,
|
||||
epoch: Epoch(0),
|
||||
epoch,
|
||||
}
|
||||
}
|
||||
|
||||
fn advance_epoch(&mut self, max_epoch: &mut Epoch) {
|
||||
self.epoch.next();
|
||||
if max_epoch.0 < self.epoch.0 {
|
||||
max_epoch.0 = self.epoch.0;
|
||||
}
|
||||
}
|
||||
|
||||
/// Creates an invalid dummy block ID.
|
||||
pub const INVALID: Block = Block {
|
||||
address: GpuCacheAddress { u: 0, v: 0 },
|
||||
epoch: Epoch(0),
|
||||
next: None,
|
||||
last_access_time: FrameId::INVALID,
|
||||
};
|
||||
}
|
||||
|
||||
/// Represents the index of a Block in the block array. We only create such
|
||||
/// structs for blocks that represent the start of a chunk.
|
||||
///
|
||||
/// Because we use Option<BlockIndex> in a lot of places, we use a NonZeroU32
|
||||
/// here and avoid ever using the index zero.
|
||||
#[derive(Debug, Copy, Clone)]
|
||||
#[cfg_attr(feature = "capture", derive(Serialize))]
|
||||
#[cfg_attr(feature = "replay", derive(Deserialize))]
|
||||
struct BlockIndex(usize);
|
||||
struct BlockIndex(NonZeroU32);
|
||||
|
||||
impl BlockIndex {
|
||||
fn new(idx: usize) -> Self {
|
||||
debug_assert!(idx <= u32::MAX as usize);
|
||||
BlockIndex(NonZeroU32::new(idx as u32).expect("Index zero forbidden"))
|
||||
}
|
||||
|
||||
fn get(&self) -> usize {
|
||||
self.0.get() as usize
|
||||
}
|
||||
}
|
||||
|
||||
// A row in the cache texture.
|
||||
#[cfg_attr(feature = "capture", derive(Serialize))]
|
||||
|
@ -232,10 +283,19 @@ pub enum GpuCacheUpdate {
|
|||
},
|
||||
}
|
||||
|
||||
pub struct GpuDebugChunk {
|
||||
/// Command to inform the debug display in the renderer when chunks are allocated
|
||||
/// or freed.
|
||||
pub enum GpuCacheDebugCmd {
|
||||
/// Describes an allocated chunk.
|
||||
Alloc(GpuCacheDebugChunk),
|
||||
/// Describes a freed chunk.
|
||||
Free(GpuCacheAddress),
|
||||
}
|
||||
|
||||
#[derive(Clone)]
|
||||
pub struct GpuCacheDebugChunk {
|
||||
pub address: GpuCacheAddress,
|
||||
pub tag: u8,
|
||||
pub size: u16,
|
||||
pub size: usize,
|
||||
}
|
||||
|
||||
#[must_use]
|
||||
|
@ -254,7 +314,7 @@ pub struct GpuCacheUpdateList {
|
|||
pub blocks: Vec<GpuBlockData>,
|
||||
/// Whole state GPU block metadata for debugging.
|
||||
#[cfg_attr(feature = "serde", serde(skip))]
|
||||
pub debug_chunks: Vec<GpuDebugChunk>,
|
||||
pub debug_commands: Vec<GpuCacheDebugCmd>,
|
||||
}
|
||||
|
||||
// Holds the free lists of fixed size blocks. Mostly
|
||||
|
@ -270,7 +330,10 @@ struct FreeBlockLists {
|
|||
free_list_32: Option<BlockIndex>,
|
||||
free_list_64: Option<BlockIndex>,
|
||||
free_list_128: Option<BlockIndex>,
|
||||
free_list_large: Option<BlockIndex>,
|
||||
free_list_256: Option<BlockIndex>,
|
||||
free_list_341: Option<BlockIndex>,
|
||||
free_list_512: Option<BlockIndex>,
|
||||
free_list_1024: Option<BlockIndex>,
|
||||
}
|
||||
|
||||
impl FreeBlockLists {
|
||||
|
@ -284,7 +347,10 @@ impl FreeBlockLists {
|
|||
free_list_32: None,
|
||||
free_list_64: None,
|
||||
free_list_128: None,
|
||||
free_list_large: None,
|
||||
free_list_256: None,
|
||||
free_list_341: None,
|
||||
free_list_512: None,
|
||||
free_list_1024: None,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -292,8 +358,14 @@ impl FreeBlockLists {
|
|||
&mut self,
|
||||
block_count: usize,
|
||||
) -> (usize, &mut Option<BlockIndex>) {
|
||||
// Find the appropriate free list to use
|
||||
// based on the block size.
|
||||
// Find the appropriate free list to use based on the block size.
|
||||
//
|
||||
// Note that we cheat a bit with the 341 bucket, since it's not quite
|
||||
// a divisor of 1024, because purecss-francine allocates many 260-block
|
||||
// chunks, and there's no reason we shouldn't pack these three to a row.
|
||||
// This means the allocation statistics will under-report by one block
|
||||
// for each row using 341-block buckets, which is fine.
|
||||
debug_assert_eq!(MAX_VERTEX_TEXTURE_WIDTH, 1024, "Need to update bucketing");
|
||||
match block_count {
|
||||
0 => panic!("Can't allocate zero sized blocks!"),
|
||||
1 => (1, &mut self.free_list_1),
|
||||
|
@ -304,7 +376,10 @@ impl FreeBlockLists {
|
|||
17...32 => (32, &mut self.free_list_32),
|
||||
33...64 => (64, &mut self.free_list_64),
|
||||
65...128 => (128, &mut self.free_list_128),
|
||||
129...MAX_VERTEX_TEXTURE_WIDTH => (MAX_VERTEX_TEXTURE_WIDTH, &mut self.free_list_large),
|
||||
129...256 => (256, &mut self.free_list_256),
|
||||
257...341 => (341, &mut self.free_list_341),
|
||||
342...512 => (512, &mut self.free_list_512),
|
||||
513...1024 => (1024, &mut self.free_list_1024),
|
||||
_ => panic!("Can't allocate > MAX_VERTEX_TEXTURE_WIDTH per resource!"),
|
||||
}
|
||||
}
|
||||
|
@ -320,6 +395,12 @@ struct Texture {
|
|||
blocks: Vec<Block>,
|
||||
// Metadata about each allocated row.
|
||||
rows: Vec<Row>,
|
||||
// The base Epoch for this texture.
|
||||
base_epoch: Epoch,
|
||||
// The maximum epoch reached. We track this along with the above so
|
||||
// that we can rebuild the Texture and avoid collisions with handles
|
||||
// allocated for the old texture.
|
||||
max_epoch: Epoch,
|
||||
// Free lists of available blocks for each supported
|
||||
// block size in the texture. These are intrusive
|
||||
// linked lists.
|
||||
|
@ -335,19 +416,39 @@ struct Texture {
|
|||
updates: Vec<GpuCacheUpdate>,
|
||||
// Profile stats
|
||||
allocated_block_count: usize,
|
||||
// The stamp at which we first reached our threshold for reclaiming `GpuCache`
|
||||
// memory, or `None` if the threshold hasn't been reached.
|
||||
#[cfg_attr(feature = "serde", serde(skip))]
|
||||
reached_reclaim_threshold: Option<Instant>,
|
||||
// List of debug commands to be sent to the renderer when the GPU cache
|
||||
// debug display is enabled.
|
||||
#[cfg_attr(feature = "serde", serde(skip))]
|
||||
debug_commands: Vec<GpuCacheDebugCmd>,
|
||||
// The current debug flags for the system.
|
||||
debug_flags: DebugFlags,
|
||||
}
|
||||
|
||||
impl Texture {
|
||||
fn new() -> Self {
|
||||
fn new(base_epoch: Epoch, debug_flags: DebugFlags) -> Self {
|
||||
// Pre-fill the block array with one invalid block so that we never use
|
||||
// 0 for a BlockIndex. This lets us use NonZeroU32 for BlockIndex, which
|
||||
// saves memory.
|
||||
let blocks = vec![Block::INVALID];
|
||||
|
||||
Texture {
|
||||
height: GPU_CACHE_INITIAL_HEIGHT,
|
||||
blocks: Vec::new(),
|
||||
blocks,
|
||||
rows: Vec::new(),
|
||||
base_epoch,
|
||||
max_epoch: base_epoch,
|
||||
free_lists: FreeBlockLists::new(),
|
||||
pending_blocks: Vec::new(),
|
||||
updates: Vec::new(),
|
||||
occupied_list_head: None,
|
||||
allocated_block_count: 0,
|
||||
reached_reclaim_threshold: None,
|
||||
debug_commands: Vec::new(),
|
||||
debug_flags,
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -393,8 +494,8 @@ impl Texture {
|
|||
let mut prev_block_index = None;
|
||||
for i in 0 .. items_per_row {
|
||||
let address = GpuCacheAddress::new(i * alloc_size, row_index);
|
||||
let block_index = BlockIndex(self.blocks.len());
|
||||
let block = Block::new(address, prev_block_index, frame_id);
|
||||
let block_index = BlockIndex::new(self.blocks.len());
|
||||
let block = Block::new(address, prev_block_index, frame_id, self.base_epoch);
|
||||
self.blocks.push(block);
|
||||
prev_block_index = Some(block_index);
|
||||
}
|
||||
|
@ -406,7 +507,7 @@ impl Texture {
|
|||
// available in the appropriate free-list. Pull a block from the
|
||||
// head of the list.
|
||||
let free_block_index = free_list.take().unwrap();
|
||||
let block = &mut self.blocks[free_block_index.0 as usize];
|
||||
let block = &mut self.blocks[free_block_index.get()];
|
||||
*free_list = block.next;
|
||||
|
||||
// Add the block to the occupied linked list.
|
||||
|
@ -425,6 +526,18 @@ impl Texture {
|
|||
});
|
||||
}
|
||||
|
||||
// If we're using the debug display, communicate the allocation to the
|
||||
// renderer thread. Note that we do this regardless of whether or not
|
||||
// pending_block_index is None (if it is, the renderer thread will fill
|
||||
// in the data via a deferred resolve, but the block is still considered
|
||||
// allocated).
|
||||
if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
|
||||
self.debug_commands.push(GpuCacheDebugCmd::Alloc(GpuCacheDebugChunk {
|
||||
address: block.address,
|
||||
size: block_count,
|
||||
}));
|
||||
}
|
||||
|
||||
CacheLocation {
|
||||
block_index: free_block_index,
|
||||
epoch: block.epoch,
|
||||
|
@ -442,7 +555,7 @@ impl Texture {
|
|||
|
||||
while let Some(index) = current_block {
|
||||
let (next_block, should_unlink) = {
|
||||
let block = &mut self.blocks[index.0 as usize];
|
||||
let block = &mut self.blocks[index.get()];
|
||||
|
||||
let next_block = block.next;
|
||||
let mut should_unlink = false;
|
||||
|
@ -461,11 +574,16 @@ impl Texture {
|
|||
let (_, free_list) = self.free_lists
|
||||
.get_actual_block_count_and_free_list(row.block_count_per_item);
|
||||
|
||||
block.epoch.next();
|
||||
block.advance_epoch(&mut self.max_epoch);
|
||||
block.next = *free_list;
|
||||
*free_list = Some(index);
|
||||
|
||||
self.allocated_block_count -= row.block_count_per_item;
|
||||
|
||||
if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
|
||||
let cmd = GpuCacheDebugCmd::Free(block.address);
|
||||
self.debug_commands.push(cmd);
|
||||
}
|
||||
};
|
||||
|
||||
(next_block, should_unlink)
|
||||
|
@ -476,7 +594,7 @@ impl Texture {
|
|||
if should_unlink {
|
||||
match prev_block {
|
||||
Some(prev_block) => {
|
||||
self.blocks[prev_block.0 as usize].next = next_block;
|
||||
self.blocks[prev_block.get()].next = next_block;
|
||||
}
|
||||
None => {
|
||||
self.occupied_list_head = next_block;
|
||||
|
@ -489,6 +607,15 @@ impl Texture {
|
|||
current_block = next_block;
|
||||
}
|
||||
}
|
||||
|
||||
/// Returns the ratio of utilized blocks.
|
||||
fn utilization(&self) -> f32 {
|
||||
let total_blocks = self.rows.len() * MAX_VERTEX_TEXTURE_WIDTH;
|
||||
debug_assert!(total_blocks > 0);
|
||||
let ratio = self.allocated_block_count as f32 / total_blocks as f32;
|
||||
debug_assert!(0.0 <= ratio && ratio <= 1.0, "Bad ratio: {}", ratio);
|
||||
ratio
|
||||
}
|
||||
}
|
||||
|
||||
|
||||
|
@ -546,14 +673,25 @@ pub struct GpuCache {
|
|||
|
||||
impl GpuCache {
|
||||
pub fn new() -> Self {
|
||||
let debug_flags = DebugFlags::empty();
|
||||
GpuCache {
|
||||
frame_id: FrameId::INVALID,
|
||||
texture: Texture::new(),
|
||||
texture: Texture::new(Epoch(0), debug_flags),
|
||||
saved_block_count: 0,
|
||||
debug_flags: DebugFlags::empty(),
|
||||
debug_flags,
|
||||
}
|
||||
}
|
||||
|
||||
/// Drops everything in the GPU cache. Paired by the caller with a message
|
||||
/// to the renderer thread telling it to do the same.
|
||||
pub fn clear(&mut self) {
|
||||
assert!(self.texture.updates.is_empty(), "Clearing with pending updates");
|
||||
let mut next_base_epoch = self.texture.max_epoch;
|
||||
next_base_epoch.next();
|
||||
self.texture = Texture::new(next_base_epoch, self.debug_flags);
|
||||
self.saved_block_count = 0;
|
||||
}
|
||||
|
||||
/// Begin a new frame.
|
||||
pub fn begin_frame(&mut self, frame_id: FrameId) {
|
||||
debug_assert!(self.texture.pending_blocks.is_empty());
|
||||
|
@ -567,29 +705,31 @@ impl GpuCache {
|
|||
// will rebuild the data and upload it to the GPU.
|
||||
pub fn invalidate(&mut self, handle: &GpuCacheHandle) {
|
||||
if let Some(ref location) = handle.location {
|
||||
let block = &mut self.texture.blocks[location.block_index.0];
|
||||
// don't invalidate blocks that are already re-assigned
|
||||
if block.epoch == location.epoch {
|
||||
block.epoch.next();
|
||||
if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
|
||||
if block.epoch == location.epoch {
|
||||
block.advance_epoch(&mut self.texture.max_epoch);
|
||||
}
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
// Request a resource be added to the cache. If the resource
|
||||
/// Request a resource be added to the cache. If the resource
|
||||
/// is already in the cache, `None` will be returned.
|
||||
pub fn request<'a>(&'a mut self, handle: &'a mut GpuCacheHandle) -> Option<GpuDataRequest<'a>> {
|
||||
let mut max_block_count = MAX_VERTEX_TEXTURE_WIDTH;
|
||||
// Check if the allocation for this handle is still valid.
|
||||
if let Some(ref location) = handle.location {
|
||||
let block = &mut self.texture.blocks[location.block_index.0];
|
||||
max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item;
|
||||
if block.epoch == location.epoch {
|
||||
if block.last_access_time != self.frame_id {
|
||||
// Mark last access time to avoid evicting this block.
|
||||
block.last_access_time = self.frame_id;
|
||||
self.saved_block_count += max_block_count;
|
||||
if let Some(block) = self.texture.blocks.get_mut(location.block_index.get()) {
|
||||
if block.epoch == location.epoch {
|
||||
max_block_count = self.texture.rows[block.address.v as usize].block_count_per_item;
|
||||
if block.last_access_time != self.frame_id {
|
||||
// Mark last access time to avoid evicting this block.
|
||||
block.last_access_time = self.frame_id;
|
||||
self.saved_block_count += max_block_count;
|
||||
}
|
||||
return None;
|
||||
}
|
||||
return None;
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -631,7 +771,7 @@ impl GpuCache {
|
|||
/// End the frame. Return the list of updates to apply to the
|
||||
/// device specific cache texture.
|
||||
pub fn end_frame(
|
||||
&self,
|
||||
&mut self,
|
||||
profile_counters: &mut GpuCacheProfileCounters,
|
||||
) -> FrameId {
|
||||
profile_counters
|
||||
|
@ -643,28 +783,32 @@ impl GpuCache {
|
|||
profile_counters
|
||||
.saved_blocks
|
||||
.set(self.saved_block_count);
|
||||
|
||||
let reached_threshold =
|
||||
self.texture.rows.len() > (GPU_CACHE_INITIAL_HEIGHT as usize) &&
|
||||
self.texture.utilization() < RECLAIM_THRESHOLD;
|
||||
if reached_threshold {
|
||||
self.texture.reached_reclaim_threshold.get_or_insert_with(Instant::now);
|
||||
} else {
|
||||
self.texture.reached_reclaim_threshold = None;
|
||||
}
|
||||
|
||||
self.frame_id
|
||||
}
|
||||
|
||||
/// Returns true if utilization has been low enough for long enough that we
|
||||
/// should blow the cache away and rebuild it.
|
||||
pub fn should_reclaim_memory(&self) -> bool {
|
||||
self.texture.reached_reclaim_threshold
|
||||
.map_or(false, |t| t.elapsed() > Duration::from_secs(RECLAIM_DELAY_S))
|
||||
}
|
||||
|
||||
/// Extract the pending updates from the cache.
|
||||
pub fn extract_updates(&mut self) -> GpuCacheUpdateList {
|
||||
GpuCacheUpdateList {
|
||||
frame_id: self.frame_id,
|
||||
height: self.texture.height,
|
||||
debug_chunks: if self.debug_flags.contains(DebugFlags::GPU_CACHE_DBG) {
|
||||
self.texture.updates
|
||||
.iter()
|
||||
.map(|update| match *update {
|
||||
GpuCacheUpdate::Copy { address, block_index: _, block_count } => GpuDebugChunk {
|
||||
address,
|
||||
tag: 0, //TODO
|
||||
size: block_count.min(0xFFFF) as u16,
|
||||
}
|
||||
})
|
||||
.collect()
|
||||
} else {
|
||||
Vec::new()
|
||||
},
|
||||
debug_commands: mem::replace(&mut self.texture.debug_commands, Vec::new()),
|
||||
updates: mem::replace(&mut self.texture.updates, Vec::new()),
|
||||
blocks: mem::replace(&mut self.texture.pending_blocks, Vec::new()),
|
||||
}
|
||||
|
@ -673,6 +817,7 @@ impl GpuCache {
|
|||
/// Sets the current debug flags for the system.
|
||||
pub fn set_debug_flags(&mut self, flags: DebugFlags) {
|
||||
self.debug_flags = flags;
|
||||
self.texture.debug_flags = flags;
|
||||
}
|
||||
|
||||
/// Get the actual GPU address in the texture for a given slot ID.
|
||||
|
@ -681,7 +826,7 @@ impl GpuCache {
|
|||
/// freed or pending slot will panic!
|
||||
pub fn get_address(&self, id: &GpuCacheHandle) -> GpuCacheAddress {
|
||||
let location = id.location.expect("handle not requested or allocated!");
|
||||
let block = &self.texture.blocks[location.block_index.0];
|
||||
let block = &self.texture.blocks[location.block_index.get()];
|
||||
debug_assert_eq!(block.epoch, location.epoch);
|
||||
debug_assert_eq!(block.last_access_time, self.frame_id);
|
||||
block.address
|
||||
|
@ -692,3 +837,12 @@ impl GpuCache {
|
|||
self.texture.malloc_size_of(op)
|
||||
}
|
||||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn test_struct_sizes() {
|
||||
use std::mem;
|
||||
// We can end up with a lot of blocks stored in the global vec, and keeping
|
||||
// them small helps reduce memory overhead.
|
||||
assert_eq!(mem::size_of::<Block>(), 24, "Block size changed");
|
||||
}
|
||||
|
|
|
@ -295,6 +295,7 @@ pub enum ResultMsg {
|
|||
DebugOutput(DebugOutput),
|
||||
RefreshShader(PathBuf),
|
||||
UpdateGpuCache(GpuCacheUpdateList),
|
||||
ClearGpuCache,
|
||||
UpdateResources {
|
||||
updates: TextureUpdateList,
|
||||
memory_pressure: bool,
|
||||
|
|
|
@ -374,7 +374,7 @@ impl IsVisible for ImageBorder {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn test_struct_sizes() {
|
||||
use std::mem;
|
||||
// The sizes of these structures are critical for performance on a number of
|
||||
|
@ -384,9 +384,9 @@ fn test_struct_sizes() {
|
|||
// (b) You made a structure larger. This is not necessarily a problem, but should only
|
||||
// be done with care, and after checking if talos performance regresses badly.
|
||||
assert_eq!(mem::size_of::<NormalBorderPrim>(), 84, "NormalBorderPrim size changed");
|
||||
assert_eq!(mem::size_of::<NormalBorderTemplate>(), 240, "NormalBorderTemplate size changed");
|
||||
assert_eq!(mem::size_of::<NormalBorderTemplate>(), 224, "NormalBorderTemplate size changed");
|
||||
assert_eq!(mem::size_of::<NormalBorderKey>(), 112, "NormalBorderKey size changed");
|
||||
assert_eq!(mem::size_of::<ImageBorder>(), 92, "ImageBorder size changed");
|
||||
assert_eq!(mem::size_of::<ImageBorderTemplate>(), 104, "ImageBorderTemplate size changed");
|
||||
assert_eq!(mem::size_of::<ImageBorderTemplate>(), 88, "ImageBorderTemplate size changed");
|
||||
assert_eq!(mem::size_of::<ImageBorderKey>(), 120, "ImageBorderKey size changed");
|
||||
}
|
||||
|
|
|
@ -714,7 +714,7 @@ impl GradientGpuBlockBuilder {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn test_struct_sizes() {
|
||||
use std::mem;
|
||||
// The sizes of these structures are critical for performance on a number of
|
||||
|
@ -724,10 +724,10 @@ fn test_struct_sizes() {
|
|||
// (b) You made a structure larger. This is not necessarily a problem, but should only
|
||||
// be done with care, and after checking if talos performance regresses badly.
|
||||
assert_eq!(mem::size_of::<LinearGradient>(), 72, "LinearGradient size changed");
|
||||
assert_eq!(mem::size_of::<LinearGradientTemplate>(), 168, "LinearGradientTemplate size changed");
|
||||
assert_eq!(mem::size_of::<LinearGradientTemplate>(), 128, "LinearGradientTemplate size changed");
|
||||
assert_eq!(mem::size_of::<LinearGradientKey>(), 96, "LinearGradientKey size changed");
|
||||
|
||||
assert_eq!(mem::size_of::<RadialGradient>(), 72, "RadialGradient size changed");
|
||||
assert_eq!(mem::size_of::<RadialGradientTemplate>(), 168, "RadialGradientTemplate size changed");
|
||||
assert_eq!(mem::size_of::<RadialGradientTemplate>(), 136, "RadialGradientTemplate size changed");
|
||||
assert_eq!(mem::size_of::<RadialGradientKey>(), 104, "RadialGradientKey size changed");
|
||||
}
|
||||
|
|
|
@ -549,7 +549,7 @@ impl IsVisible for YuvImage {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn test_struct_sizes() {
|
||||
use std::mem;
|
||||
// The sizes of these structures are critical for performance on a number of
|
||||
|
@ -559,9 +559,9 @@ fn test_struct_sizes() {
|
|||
// (b) You made a structure larger. This is not necessarily a problem, but should only
|
||||
// be done with care, and after checking if talos performance regresses badly.
|
||||
assert_eq!(mem::size_of::<Image>(), 56, "Image size changed");
|
||||
assert_eq!(mem::size_of::<ImageTemplate>(), 144, "ImageTemplate size changed");
|
||||
assert_eq!(mem::size_of::<ImageTemplate>(), 124, "ImageTemplate size changed");
|
||||
assert_eq!(mem::size_of::<ImageKey>(), 84, "ImageKey size changed");
|
||||
assert_eq!(mem::size_of::<YuvImage>(), 36, "YuvImage size changed");
|
||||
assert_eq!(mem::size_of::<YuvImageTemplate>(), 96, "YuvImageTemplate size changed");
|
||||
assert_eq!(mem::size_of::<YuvImageTemplate>(), 72, "YuvImageTemplate size changed");
|
||||
assert_eq!(mem::size_of::<YuvImageKey>(), 64, "YuvImageKey size changed");
|
||||
}
|
||||
|
|
|
@ -180,7 +180,7 @@ impl IsVisible for LineDecoration {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn test_struct_sizes() {
|
||||
use std::mem;
|
||||
// The sizes of these structures are critical for performance on a number of
|
||||
|
@ -190,6 +190,6 @@ fn test_struct_sizes() {
|
|||
// (b) You made a structure larger. This is not necessarily a problem, but should only
|
||||
// be done with care, and after checking if talos performance regresses badly.
|
||||
assert_eq!(mem::size_of::<LineDecoration>(), 20, "LineDecoration size changed");
|
||||
assert_eq!(mem::size_of::<LineDecorationTemplate>(), 88, "LineDecorationTemplate size changed");
|
||||
assert_eq!(mem::size_of::<LineDecorationTemplate>(), 68, "LineDecorationTemplate size changed");
|
||||
assert_eq!(mem::size_of::<LineDecorationKey>(), 48, "LineDecorationKey size changed");
|
||||
}
|
||||
|
|
|
@ -3311,7 +3311,7 @@ fn update_opacity_binding(
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn test_struct_sizes() {
|
||||
use std::mem;
|
||||
// The sizes of these structures are critical for performance on a number of
|
||||
|
@ -3322,7 +3322,7 @@ fn test_struct_sizes() {
|
|||
// be done with care, and after checking if talos performance regresses badly.
|
||||
assert_eq!(mem::size_of::<PrimitiveInstance>(), 120, "PrimitiveInstance size changed");
|
||||
assert_eq!(mem::size_of::<PrimitiveInstanceKind>(), 40, "PrimitiveInstanceKind size changed");
|
||||
assert_eq!(mem::size_of::<PrimitiveTemplate>(), 80, "PrimitiveTemplate size changed");
|
||||
assert_eq!(mem::size_of::<PrimitiveTemplate>(), 56, "PrimitiveTemplate size changed");
|
||||
assert_eq!(mem::size_of::<PrimitiveTemplateKind>(), 20, "PrimitiveTemplateKind size changed");
|
||||
assert_eq!(mem::size_of::<PrimitiveKey>(), 36, "PrimitiveKey size changed");
|
||||
assert_eq!(mem::size_of::<PrimitiveKeyKind>(), 5, "PrimitiveKeyKind size changed");
|
||||
|
|
|
@ -224,7 +224,7 @@ impl IsVisible for Picture {
|
|||
}
|
||||
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
#[cfg(target_pointer_width = "64")]
|
||||
fn test_struct_sizes() {
|
||||
use std::mem;
|
||||
// The sizes of these structures are critical for performance on a number of
|
||||
|
@ -234,6 +234,6 @@ fn test_struct_sizes() {
|
|||
// (b) You made a structure larger. This is not necessarily a problem, but should only
|
||||
// be done with care, and after checking if talos performance regresses badly.
|
||||
assert_eq!(mem::size_of::<Picture>(), 84, "Picture size changed");
|
||||
assert_eq!(mem::size_of::<PictureTemplate>(), 56, "PictureTemplate size changed");
|
||||
assert_eq!(mem::size_of::<PictureTemplate>(), 36, "PictureTemplate size changed");
|
||||
assert_eq!(mem::size_of::<PictureKey>(), 112, "PictureKey size changed");
|
||||
}
|
||||
|
|
|
@ -328,6 +328,7 @@ impl TextRunPrimitive {
|
|||
}
|
||||
}
|
||||
|
||||
/// These are linux only because FontInstancePlatformOptions varies in size by platform.
|
||||
#[test]
|
||||
#[cfg(target_os = "linux")]
|
||||
fn test_struct_sizes() {
|
||||
|
@ -339,7 +340,7 @@ fn test_struct_sizes() {
|
|||
// (b) You made a structure larger. This is not necessarily a problem, but should only
|
||||
// be done with care, and after checking if talos performance regresses badly.
|
||||
assert_eq!(mem::size_of::<TextRun>(), 112, "TextRun size changed");
|
||||
assert_eq!(mem::size_of::<TextRunTemplate>(), 160, "TextRunTemplate size changed");
|
||||
assert_eq!(mem::size_of::<TextRunTemplate>(), 144, "TextRunTemplate size changed");
|
||||
assert_eq!(mem::size_of::<TextRunKey>(), 136, "TextRunKey size changed");
|
||||
assert_eq!(mem::size_of::<TextRunPrimitive>(), 88, "TextRunPrimitive size changed");
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@
|
|||
//! See the comment at the top of the `renderer` module for a description of
|
||||
//! how these two pieces interact.
|
||||
|
||||
use api::{ApiMsg, BuiltDisplayList, ClearCache, DebugCommand};
|
||||
use api::{ApiMsg, BuiltDisplayList, ClearCache, DebugCommand, DebugFlags};
|
||||
#[cfg(feature = "debugger")]
|
||||
use api::{BuiltDisplayListIter, SpecificDisplayItem};
|
||||
use api::{DevicePixelScale, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
|
||||
|
@ -659,6 +659,7 @@ pub struct RenderBackend {
|
|||
recorder: Option<Box<ApiRecordingReceiver>>,
|
||||
sampler: Option<Box<AsyncPropertySampler + Send>>,
|
||||
size_of_op: Option<VoidPtrToSizeFn>,
|
||||
debug_flags: DebugFlags,
|
||||
namespace_alloc_by_client: bool,
|
||||
}
|
||||
|
||||
|
@ -677,6 +678,7 @@ impl RenderBackend {
|
|||
recorder: Option<Box<ApiRecordingReceiver>>,
|
||||
sampler: Option<Box<AsyncPropertySampler + Send>>,
|
||||
size_of_op: Option<VoidPtrToSizeFn>,
|
||||
debug_flags: DebugFlags,
|
||||
namespace_alloc_by_client: bool,
|
||||
) -> RenderBackend {
|
||||
RenderBackend {
|
||||
|
@ -696,6 +698,7 @@ impl RenderBackend {
|
|||
recorder,
|
||||
sampler,
|
||||
size_of_op,
|
||||
debug_flags,
|
||||
namespace_alloc_by_client,
|
||||
}
|
||||
}
|
||||
|
@ -1005,6 +1008,8 @@ impl RenderBackend {
|
|||
// recently used resources.
|
||||
self.resource_cache.clear(ClearCache::all());
|
||||
|
||||
self.clear_gpu_cache();
|
||||
|
||||
let pending_update = self.resource_cache.pending_updates();
|
||||
let msg = ResultMsg::UpdateResources {
|
||||
updates: pending_update,
|
||||
|
@ -1104,6 +1109,22 @@ impl RenderBackend {
|
|||
DebugCommand::SetFlags(flags) => {
|
||||
self.resource_cache.set_debug_flags(flags);
|
||||
self.gpu_cache.set_debug_flags(flags);
|
||||
|
||||
// If we're toggling on the GPU cache debug display, we
|
||||
// need to blow away the cache. This is because we only
|
||||
// send allocation/free notifications to the renderer
|
||||
// thread when the debug display is enabled, and thus
|
||||
// enabling it when the cache is partially populated will
|
||||
// give the renderer an incomplete view of the world.
|
||||
// And since we might as well drop all the debugging state
|
||||
// from the renderer when we disable the debug display,
|
||||
// we just clear the cache on toggle.
|
||||
let changed = self.debug_flags ^ flags;
|
||||
if changed.contains(DebugFlags::GPU_CACHE_DBG) {
|
||||
self.clear_gpu_cache();
|
||||
}
|
||||
self.debug_flags = flags;
|
||||
|
||||
ResultMsg::DebugCommand(option)
|
||||
}
|
||||
_ => ResultMsg::DebugCommand(option),
|
||||
|
@ -1156,6 +1177,13 @@ impl RenderBackend {
|
|||
&mut profile_counters.resources,
|
||||
);
|
||||
|
||||
// If we've been above the threshold for reclaiming GPU cache memory for
|
||||
// long enough, drop it and rebuild it. This needs to be done before any
|
||||
// updates for this frame are made.
|
||||
if self.gpu_cache.should_reclaim_memory() {
|
||||
self.clear_gpu_cache();
|
||||
}
|
||||
|
||||
for scene_msg in transaction_msg.scene_ops.drain(..) {
|
||||
let _timer = profile_counters.total_time.timer();
|
||||
self.process_scene_msg(
|
||||
|
@ -1520,6 +1548,13 @@ impl RenderBackend {
|
|||
// thread waiting on the request.
|
||||
self.scene_tx.send(SceneBuilderRequest::ReportMemory(report, tx)).unwrap();
|
||||
}
|
||||
|
||||
/// Drops everything in the GPU cache. Must not be called once gpu cache entries
|
||||
/// for the next frame have already been requested.
|
||||
fn clear_gpu_cache(&mut self) {
|
||||
self.gpu_cache.clear();
|
||||
self.result_tx.send(ResultMsg::ClearGpuCache).unwrap();
|
||||
}
|
||||
}
|
||||
|
||||
fn get_blob_image_updates(updates: &[ResourceUpdate]) -> Vec<BlobImageKey> {
|
||||
|
|
|
@ -49,7 +49,7 @@ use gleam::gl;
|
|||
use glyph_rasterizer::{GlyphFormat, GlyphRasterizer};
|
||||
use gpu_cache::{GpuBlockData, GpuCacheUpdate, GpuCacheUpdateList};
|
||||
#[cfg(feature = "debug_renderer")]
|
||||
use gpu_cache::GpuDebugChunk;
|
||||
use gpu_cache::{GpuCacheDebugChunk, GpuCacheDebugCmd};
|
||||
#[cfg(feature = "pathfinder")]
|
||||
use gpu_glyph_renderer::GpuGlyphRenderer;
|
||||
use gpu_types::ScalingInstance;
|
||||
|
@ -1017,14 +1017,21 @@ pub enum BlendMode {
|
|||
SubpixelWithBgColor,
|
||||
}
|
||||
|
||||
// Tracks the state of each row in the GPU cache texture.
|
||||
/// Tracks the state of each row in the GPU cache texture.
|
||||
struct CacheRow {
|
||||
/// Mirrored block data on CPU for this row. We store a copy of
|
||||
/// the data on the CPU side to improve upload batching.
|
||||
cpu_blocks: Box<[GpuBlockData; MAX_VERTEX_TEXTURE_WIDTH]>,
|
||||
/// True if this row is dirty.
|
||||
is_dirty: bool,
|
||||
}
|
||||
|
||||
impl CacheRow {
|
||||
fn new() -> Self {
|
||||
CacheRow { is_dirty: false }
|
||||
CacheRow {
|
||||
cpu_blocks: Box::new([GpuBlockData::EMPTY; MAX_VERTEX_TEXTURE_WIDTH]),
|
||||
is_dirty: false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
|
@ -1036,10 +1043,8 @@ enum GpuCacheBus {
|
|||
PixelBuffer {
|
||||
/// PBO used for transfers.
|
||||
buffer: PBO,
|
||||
/// Meta-data about the cached rows.
|
||||
/// Per-row data.
|
||||
rows: Vec<CacheRow>,
|
||||
/// Mirrored block data on CPU.
|
||||
cpu_blocks: Vec<GpuBlockData>,
|
||||
},
|
||||
/// Shader-based scattering updates. Currently rendered by a set
|
||||
/// of points into the GPU texture, each carrying a `GpuBlockData`.
|
||||
|
@ -1057,16 +1062,6 @@ enum GpuCacheBus {
|
|||
},
|
||||
}
|
||||
|
||||
impl GpuCacheBus {
|
||||
/// Returns true if this bus uses a render target for a texture.
|
||||
fn uses_render_target(&self) -> bool {
|
||||
match *self {
|
||||
GpuCacheBus::Scatter { .. } => true,
|
||||
GpuCacheBus::PixelBuffer { .. } => false,
|
||||
}
|
||||
}
|
||||
}
|
||||
|
||||
/// The device-specific representation of the cache texture in gpu_cache.rs
|
||||
struct GpuCacheTexture {
|
||||
texture: Option<Texture>,
|
||||
|
@ -1077,38 +1072,22 @@ impl GpuCacheTexture {
|
|||
|
||||
/// Ensures that we have an appropriately-sized texture. Returns true if a
|
||||
/// new texture was created.
|
||||
fn ensure_texture(&mut self, device: &mut Device, height: i32) -> bool {
|
||||
fn ensure_texture(&mut self, device: &mut Device, height: i32) {
|
||||
// If we already have a texture that works, we're done.
|
||||
if self.texture.as_ref().map_or(false, |t| t.get_dimensions().height >= height) {
|
||||
if GPU_CACHE_RESIZE_TEST && self.bus.uses_render_target() {
|
||||
if GPU_CACHE_RESIZE_TEST {
|
||||
// Special debug mode - resize the texture even though it's fine.
|
||||
} else {
|
||||
return false;
|
||||
return;
|
||||
}
|
||||
}
|
||||
|
||||
// Compute a few parameters for the new texture. We round the height up to
|
||||
// a multiple of 256 to avoid many small resizes.
|
||||
let new_height = (height + 255) & !255;
|
||||
let new_size = DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, new_height);
|
||||
let rt_info = if self.bus.uses_render_target() {
|
||||
Some(RenderTargetInfo { has_depth: false })
|
||||
} else {
|
||||
None
|
||||
};
|
||||
|
||||
// Take the old texture, if any, and deinitialize it unless we're going
|
||||
// to blit it's contents to the new one.
|
||||
let mut blit_source = None;
|
||||
if let Some(t) = self.texture.take() {
|
||||
if rt_info.is_some() {
|
||||
blit_source = Some(t);
|
||||
} else {
|
||||
device.delete_texture(t);
|
||||
}
|
||||
}
|
||||
// Take the old texture, if any.
|
||||
let blit_source = self.texture.take();
|
||||
|
||||
// Create the new texture.
|
||||
let new_size = DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, height);
|
||||
let rt_info = Some(RenderTargetInfo { has_depth: false });
|
||||
let mut texture = device.create_texture(
|
||||
TextureTarget::Default,
|
||||
ImageFormat::RGBAF32,
|
||||
|
@ -1126,7 +1105,6 @@ impl GpuCacheTexture {
|
|||
}
|
||||
|
||||
self.texture = Some(texture);
|
||||
true
|
||||
}
|
||||
|
||||
fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
|
||||
|
@ -1156,7 +1134,6 @@ impl GpuCacheTexture {
|
|||
GpuCacheBus::PixelBuffer {
|
||||
buffer,
|
||||
rows: Vec::new(),
|
||||
cpu_blocks: Vec::new(),
|
||||
}
|
||||
};
|
||||
|
||||
|
@ -1193,18 +1170,9 @@ impl GpuCacheTexture {
|
|||
total_block_count: usize,
|
||||
max_height: i32,
|
||||
) {
|
||||
let allocated_new_texture = self.ensure_texture(device, max_height);
|
||||
self.ensure_texture(device, max_height);
|
||||
match self.bus {
|
||||
GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
|
||||
if allocated_new_texture {
|
||||
// If we had to resize the texture, just mark all rows
|
||||
// as dirty so they will be uploaded to the texture
|
||||
// during the next flush.
|
||||
for row in rows.iter_mut() {
|
||||
row.is_dirty = true;
|
||||
}
|
||||
}
|
||||
}
|
||||
GpuCacheBus::PixelBuffer { .. } => {},
|
||||
GpuCacheBus::Scatter {
|
||||
ref mut buf_position,
|
||||
ref mut buf_value,
|
||||
|
@ -1222,7 +1190,7 @@ impl GpuCacheTexture {
|
|||
|
||||
fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
|
||||
match self.bus {
|
||||
GpuCacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
|
||||
GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
|
||||
for update in &updates.updates {
|
||||
match *update {
|
||||
GpuCacheUpdate::Copy {
|
||||
|
@ -1237,19 +1205,16 @@ impl GpuCacheTexture {
|
|||
while rows.len() <= row {
|
||||
// Add a new row.
|
||||
rows.push(CacheRow::new());
|
||||
// Add enough GPU blocks for this row.
|
||||
cpu_blocks
|
||||
.extend_from_slice(&[GpuBlockData::EMPTY; MAX_VERTEX_TEXTURE_WIDTH]);
|
||||
}
|
||||
|
||||
// This row is dirty (needs to be updated in GPU texture).
|
||||
rows[row].is_dirty = true;
|
||||
|
||||
// Copy the blocks from the patch array in the shadow CPU copy.
|
||||
let block_offset = row * MAX_VERTEX_TEXTURE_WIDTH + address.u as usize;
|
||||
let data = &mut cpu_blocks[block_offset .. (block_offset + block_count)];
|
||||
let block_offset = address.u as usize;
|
||||
let data = &mut rows[row].cpu_blocks;
|
||||
for i in 0 .. block_count {
|
||||
data[i] = updates.blocks[block_index + i];
|
||||
data[block_offset + i] = updates.blocks[block_index + i];
|
||||
}
|
||||
}
|
||||
}
|
||||
|
@ -1294,7 +1259,7 @@ impl GpuCacheTexture {
|
|||
fn flush(&mut self, device: &mut Device) -> usize {
|
||||
let texture = self.texture.as_ref().unwrap();
|
||||
match self.bus {
|
||||
GpuCacheBus::PixelBuffer { ref buffer, ref mut rows, ref cpu_blocks } => {
|
||||
GpuCacheBus::PixelBuffer { ref buffer, ref mut rows } => {
|
||||
let rows_dirty = rows
|
||||
.iter()
|
||||
.filter(|row| row.is_dirty)
|
||||
|
@ -1314,15 +1279,12 @@ impl GpuCacheTexture {
|
|||
continue;
|
||||
}
|
||||
|
||||
let block_index = row_index * MAX_VERTEX_TEXTURE_WIDTH;
|
||||
let cpu_blocks =
|
||||
&cpu_blocks[block_index .. (block_index + MAX_VERTEX_TEXTURE_WIDTH)];
|
||||
let rect = DeviceIntRect::new(
|
||||
DeviceIntPoint::new(0, row_index as i32),
|
||||
DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, 1),
|
||||
);
|
||||
|
||||
uploader.upload(rect, 0, None, cpu_blocks);
|
||||
uploader.upload(rect, 0, None, &*row.cpu_blocks);
|
||||
|
||||
row.is_dirty = false;
|
||||
}
|
||||
|
@ -1515,6 +1477,7 @@ pub struct Renderer {
|
|||
pub device: Device,
|
||||
pending_texture_updates: Vec<TextureUpdateList>,
|
||||
pending_gpu_cache_updates: Vec<GpuCacheUpdateList>,
|
||||
pending_gpu_cache_clear: bool,
|
||||
pending_shader_updates: Vec<PathBuf>,
|
||||
active_documents: Vec<(DocumentId, RenderedDocument)>,
|
||||
|
||||
|
@ -1552,8 +1515,12 @@ pub struct Renderer {
|
|||
transforms_texture: VertexDataTexture,
|
||||
render_task_texture: VertexDataTexture,
|
||||
gpu_cache_texture: GpuCacheTexture,
|
||||
|
||||
/// When the GPU cache debugger is enabled, we keep track of the live blocks
|
||||
/// in the GPU cache so that we can use them for the debug display. This
|
||||
/// member stores those live blocks, indexed by row.
|
||||
#[cfg(feature = "debug_renderer")]
|
||||
gpu_cache_debug_chunks: Vec<GpuDebugChunk>,
|
||||
gpu_cache_debug_chunks: Vec<Vec<GpuCacheDebugChunk>>,
|
||||
|
||||
gpu_cache_frame_id: FrameId,
|
||||
gpu_cache_overflow: bool,
|
||||
|
@ -1851,9 +1818,7 @@ impl Renderer {
|
|||
};
|
||||
|
||||
let device_pixel_ratio = options.device_pixel_ratio;
|
||||
// First set the flags to default and later call set_debug_flags to ensure any
|
||||
// potential transition when enabling a flag is run.
|
||||
let debug_flags = DebugFlags::default();
|
||||
let debug_flags = options.debug_flags;
|
||||
let payload_rx_for_backend = payload_rx.to_mpsc_receiver();
|
||||
let recorder = options.recorder;
|
||||
let thread_listener = Arc::new(options.thread_listener);
|
||||
|
@ -1973,6 +1938,7 @@ impl Renderer {
|
|||
recorder,
|
||||
sampler,
|
||||
size_of_op,
|
||||
debug_flags,
|
||||
namespace_alloc_by_client,
|
||||
);
|
||||
backend.run(backend_profile_counters);
|
||||
|
@ -1993,11 +1959,12 @@ impl Renderer {
|
|||
active_documents: Vec::new(),
|
||||
pending_texture_updates: Vec::new(),
|
||||
pending_gpu_cache_updates: Vec::new(),
|
||||
pending_gpu_cache_clear: false,
|
||||
pending_shader_updates: Vec::new(),
|
||||
shaders,
|
||||
#[cfg(feature = "debug_renderer")]
|
||||
debug: LazyInitializedDebugRenderer::new(),
|
||||
debug_flags,
|
||||
debug_flags: DebugFlags::empty(),
|
||||
backend_profile_counters: BackendProfileCounters::new(),
|
||||
profile_counters: RendererProfileCounters::new(),
|
||||
resource_upload_time: 0,
|
||||
|
@ -2053,7 +2020,9 @@ impl Renderer {
|
|||
framebuffer_size: None,
|
||||
};
|
||||
|
||||
renderer.set_debug_flags(options.debug_flags);
|
||||
// We initially set the flags to default and then now call set_debug_flags
|
||||
// to ensure any potential transition when enabling a flag is run.
|
||||
renderer.set_debug_flags(debug_flags);
|
||||
|
||||
let sender = RenderApiSender::new(api_tx, payload_tx);
|
||||
Ok((renderer, sender))
|
||||
|
@ -2144,10 +2113,33 @@ impl Renderer {
|
|||
ResultMsg::UpdateGpuCache(mut list) => {
|
||||
#[cfg(feature = "debug_renderer")]
|
||||
{
|
||||
self.gpu_cache_debug_chunks = mem::replace(&mut list.debug_chunks, Vec::new());
|
||||
for cmd in mem::replace(&mut list.debug_commands, Vec::new()) {
|
||||
match cmd {
|
||||
GpuCacheDebugCmd::Alloc(chunk) => {
|
||||
let row = chunk.address.v as usize;
|
||||
if row >= self.gpu_cache_debug_chunks.len() {
|
||||
self.gpu_cache_debug_chunks.resize(row + 1, Vec::new());
|
||||
}
|
||||
self.gpu_cache_debug_chunks[row].push(chunk);
|
||||
},
|
||||
GpuCacheDebugCmd::Free(address) => {
|
||||
let chunks = &mut self.gpu_cache_debug_chunks[address.v as usize];
|
||||
let pos = chunks.iter()
|
||||
.position(|x| x.address == address).unwrap();
|
||||
chunks.remove(pos);
|
||||
},
|
||||
}
|
||||
}
|
||||
}
|
||||
self.pending_gpu_cache_updates.push(list);
|
||||
}
|
||||
ResultMsg::ClearGpuCache => {
|
||||
#[cfg(feature = "debug_renderer")]
|
||||
{
|
||||
self.gpu_cache_debug_chunks = Vec::new();
|
||||
}
|
||||
self.pending_gpu_cache_clear = true;
|
||||
}
|
||||
ResultMsg::UpdateResources {
|
||||
updates,
|
||||
memory_pressure,
|
||||
|
@ -2743,7 +2735,7 @@ impl Renderer {
|
|||
height: gpu_cache_height,
|
||||
blocks: vec![[1f32; 4].into()],
|
||||
updates: Vec::new(),
|
||||
debug_chunks: Vec::new(),
|
||||
debug_commands: Vec::new(),
|
||||
});
|
||||
}
|
||||
|
||||
|
@ -2789,6 +2781,15 @@ impl Renderer {
|
|||
}
|
||||
|
||||
fn prepare_gpu_cache(&mut self, frame: &Frame) {
|
||||
if self.pending_gpu_cache_clear {
|
||||
let use_scatter =
|
||||
matches!(self.gpu_cache_texture.bus, GpuCacheBus::Scatter { .. });
|
||||
let new_cache = GpuCacheTexture::new(&mut self.device, use_scatter).unwrap();
|
||||
let old_cache = mem::replace(&mut self.gpu_cache_texture, new_cache);
|
||||
old_cache.deinit(&mut self.device);
|
||||
self.pending_gpu_cache_clear = false;
|
||||
}
|
||||
|
||||
let deferred_update_list = self.update_deferred_resolves(&frame.deferred_resolves);
|
||||
self.pending_gpu_cache_updates.extend(deferred_update_list);
|
||||
|
||||
|
@ -3856,7 +3857,7 @@ impl Renderer {
|
|||
height: self.gpu_cache_texture.get_height(),
|
||||
blocks: Vec::new(),
|
||||
updates: Vec::new(),
|
||||
debug_chunks: Vec::new(),
|
||||
debug_commands: Vec::new(),
|
||||
};
|
||||
|
||||
for deferred_resolve in deferred_resolves {
|
||||
|
@ -4444,21 +4445,21 @@ impl Renderer {
|
|||
};
|
||||
|
||||
let (x_off, y_off) = (30f32, 30f32);
|
||||
//let x_end = framebuffer_size.width as f32 - x_off;
|
||||
let y_end = framebuffer_size.height as f32 - y_off;
|
||||
let height = self.gpu_cache_texture.texture
|
||||
.as_ref().map_or(0, |t| t.get_dimensions().height)
|
||||
.min(framebuffer_size.height - (y_off as i32) * 2) as usize;
|
||||
debug_renderer.add_quad(
|
||||
x_off,
|
||||
y_off,
|
||||
x_off + MAX_VERTEX_TEXTURE_WIDTH as f32,
|
||||
y_end,
|
||||
y_off + height as f32,
|
||||
ColorU::new(80, 80, 80, 80),
|
||||
ColorU::new(80, 80, 80, 80),
|
||||
);
|
||||
|
||||
for chunk in &self.gpu_cache_debug_chunks {
|
||||
let color = match chunk.tag {
|
||||
_ => ColorU::new(250, 0, 0, 200),
|
||||
};
|
||||
let upper = self.gpu_cache_debug_chunks.len().min(height);
|
||||
for chunk in self.gpu_cache_debug_chunks[0..upper].iter().flatten() {
|
||||
let color = ColorU::new(250, 0, 0, 200);
|
||||
debug_renderer.add_quad(
|
||||
x_off + chunk.address.u as f32,
|
||||
y_off + chunk.address.v as f32,
|
||||
|
@ -4548,8 +4549,10 @@ impl Renderer {
|
|||
let mut report = MemoryReport::default();
|
||||
|
||||
// GPU cache CPU memory.
|
||||
if let GpuCacheBus::PixelBuffer{ref cpu_blocks, ..} = self.gpu_cache_texture.bus {
|
||||
report.gpu_cache_cpu_mirror += self.size_of(cpu_blocks.as_ptr());
|
||||
if let GpuCacheBus::PixelBuffer{ref rows, ..} = self.gpu_cache_texture.bus {
|
||||
for row in rows.iter() {
|
||||
report.gpu_cache_cpu_mirror += self.size_of(&*row.cpu_blocks as *const _);
|
||||
}
|
||||
}
|
||||
|
||||
// GPU cache GPU memory.
|
||||
|
@ -5205,7 +5208,7 @@ impl Renderer {
|
|||
);
|
||||
self.gpu_cache_texture.texture = Some(t);
|
||||
match self.gpu_cache_texture.bus {
|
||||
GpuCacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
|
||||
GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
|
||||
let dim = self.gpu_cache_texture.texture.as_ref().unwrap().get_dimensions();
|
||||
let blocks = unsafe {
|
||||
slice::from_raw_parts(
|
||||
|
@ -5215,9 +5218,12 @@ impl Renderer {
|
|||
};
|
||||
// fill up the CPU cache from the contents we just loaded
|
||||
rows.clear();
|
||||
cpu_blocks.clear();
|
||||
rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
|
||||
cpu_blocks.extend_from_slice(blocks);
|
||||
let chunks = blocks.chunks(MAX_VERTEX_TEXTURE_WIDTH);
|
||||
debug_assert_eq!(chunks.len(), rows.len());
|
||||
for (row, chunk) in rows.iter_mut().zip(chunks) {
|
||||
row.cpu_blocks.copy_from_slice(chunk);
|
||||
}
|
||||
}
|
||||
GpuCacheBus::Scatter { .. } => {}
|
||||
}
|
||||
|
|
Загрузка…
Ссылка в новой задаче