diff --git a/gfx/wr/webrender/src/device/gl.rs b/gfx/wr/webrender/src/device/gl.rs index 210eccd98887..d4d1644a821e 100644 --- a/gfx/wr/webrender/src/device/gl.rs +++ b/gfx/wr/webrender/src/device/gl.rs @@ -2697,67 +2697,29 @@ impl Device { pbo.reserved_size = 0 } - /// Returns the size in bytes required to upload an area of pixels of the specified - /// size, with the specified stride, to a texture of the specified format. - pub fn required_upload_size(size: DeviceIntSize, stride: Option, format: ImageFormat, optimal_pbo_stride: NonZeroUsize) -> usize { - assert!(size.width >= 0); - assert!(size.height >= 0); - if let Some(stride) = stride { - assert!(stride >= 0); - } - - let bytes_pp = format.bytes_per_pixel() as usize; - let width_bytes = size.width as usize * bytes_pp; - let src_stride = stride.map_or(width_bytes, |stride| { - assert!(stride >= 0); - stride as usize - }); - - let dst_stride = round_up_to_multiple(src_stride, optimal_pbo_stride); - - // The size of the chunk should only need to be (height - 1) * dst_stride + width_bytes, - // however, the android emulator will error unless it is height * dst_stride. - // See bug 1587047 for details. - // Using the full final row also ensures that the offset of the next chunk is - // optimally aligned. - dst_stride * size.height as usize - } - - /// Returns a `TextureUploader` which can be used to upload texture data to `texture`. - /// The total size in bytes is specified by `upload_size`, and must be greater than zero - /// and at least as large as the sum of the `required_upload_size()` for each subsequent - /// call to `TextureUploader.upload()`. pub fn upload_texture<'a, T>( &'a mut self, texture: &'a Texture, pbo: &PBO, - upload_size: usize, + upload_count: usize, ) -> TextureUploader<'a, T> { debug_assert!(self.inside_frame); - assert_ne!(upload_size, 0, "Must specify valid upload size"); - self.bind_texture(DEFAULT_TEXTURE, texture, Swizzle::default()); let buffer = match self.upload_method { UploadMethod::Immediate => None, UploadMethod::PixelBuffer(hint) => { + let upload_size = upload_count * mem::size_of::(); self.gl.bind_buffer(gl::PIXEL_UNPACK_BUFFER, pbo.id); - self.gl.buffer_data_untyped( - gl::PIXEL_UNPACK_BUFFER, - upload_size as _, - ptr::null(), - hint.to_gl(), - ); - let ptr = self.gl.map_buffer_range( - gl::PIXEL_UNPACK_BUFFER, - 0, - upload_size as _, - gl::MAP_WRITE_BIT | gl::MAP_INVALIDATE_BUFFER_BIT, - ); - let mapping = unsafe { - slice::from_raw_parts_mut(ptr as *mut _, upload_size) - }; - Some(PixelBuffer::new(upload_size, mapping)) + if upload_size != 0 { + self.gl.buffer_data_untyped( + gl::PIXEL_UNPACK_BUFFER, + upload_size as _, + ptr::null(), + hint.to_gl(), + ); + } + Some(PixelBuffer::new(hint.to_gl(), upload_size)) }, }; @@ -3523,24 +3485,24 @@ struct UploadChunk { format_override: Option, } -struct PixelBuffer<'a> { +struct PixelBuffer { + usage: gl::GLenum, size_allocated: usize, size_used: usize, // small vector avoids heap allocation for a single chunk chunks: SmallVec<[UploadChunk; 1]>, - mapping: &'a mut [mem::MaybeUninit], } -impl<'a> PixelBuffer<'a> { +impl PixelBuffer { fn new( + usage: gl::GLenum, size_allocated: usize, - mapping: &'a mut [mem::MaybeUninit], ) -> Self { PixelBuffer { + usage, size_allocated, size_used: 0, chunks: SmallVec::new(), - mapping, } } } @@ -3554,14 +3516,13 @@ struct UploadTarget<'a> { pub struct TextureUploader<'a, T> { target: UploadTarget<'a>, - buffer: Option>, + buffer: Option, marker: PhantomData, } impl<'a, T> Drop for TextureUploader<'a, T> { fn drop(&mut self) { if let Some(buffer) = self.buffer.take() { - self.target.gl.unmap_buffer(gl::PIXEL_UNPACK_BUFFER); for chunk in buffer.chunks { self.target.update_impl(chunk); } @@ -3603,42 +3564,70 @@ impl<'a, T> TextureUploader<'a, T> { let src_size = (rect.size.height as usize - 1) * src_stride + width_bytes; assert!(src_size <= len * mem::size_of::()); - // for optimal PBO texture uploads the offset and stride of the data in + // for optimal PBO texture uploads the stride of the data in // the buffer may have to be a multiple of a certain value. let dst_stride = round_up_to_multiple(src_stride, self.target.optimal_pbo_stride); - let dst_size = Device::required_upload_size( - rect.size, - stride, - self.target.texture.format, - self.target.optimal_pbo_stride - ); + // The size of the PBO should only need to be (height - 1) * dst_stride + width_bytes, + // however, the android emulator will error unless it is height * dst_stride. + // See bug 1587047 for details. + let dst_size = rect.size.height as usize * dst_stride; match self.buffer { Some(ref mut buffer) => { - assert!(buffer.size_used + dst_size <= buffer.size_allocated, "UploadBuffer is too small"); + if buffer.size_used + dst_size > buffer.size_allocated { + // flush + for chunk in buffer.chunks.drain() { + self.target.update_impl(chunk); + } + buffer.size_used = 0; + } - unsafe { - let src: &[mem::MaybeUninit] = slice::from_raw_parts(data as *const _, src_size); + if dst_size > buffer.size_allocated { + // allocate a buffer large enough + self.target.gl.buffer_data_untyped( + gl::PIXEL_UNPACK_BUFFER, + dst_size as _, + ptr::null(), + buffer.usage, + ); + buffer.size_allocated = dst_size; + } - if src_stride == dst_stride { - // the stride is already optimal, so simply copy - // the data as-is in to the buffer - let dst_start = buffer.size_used; - let dst_end = dst_start + src_size; + if src_stride == dst_stride { + // the stride is already optimal, so simply copy + // the data as-is in to the buffer + assert_eq!(src_size % mem::size_of::(), 0); + self.target.gl.buffer_sub_data_untyped( + gl::PIXEL_UNPACK_BUFFER, + buffer.size_used as isize, + src_size as isize, + data as *const _, + ); + } else { + // copy the data line-by-line in to the buffer so + // that it has an optimal stride + let ptr = self.target.gl.map_buffer_range( + gl::PIXEL_UNPACK_BUFFER, + buffer.size_used as _, + dst_size as _, + gl::MAP_WRITE_BIT | gl::MAP_INVALIDATE_RANGE_BIT, + ); + + unsafe { + let src: &[mem::MaybeUninit] = slice::from_raw_parts(data as *const _, src_size); + let dst: &mut [mem::MaybeUninit] = slice::from_raw_parts_mut(ptr as *mut _, dst_size); - buffer.mapping[dst_start..dst_end].copy_from_slice(src); - } else { - // copy the data line-by-line in to the buffer so - // that it has an optimal stride for y in 0..rect.size.height as usize { let src_start = y * src_stride; let src_end = src_start + width_bytes; - let dst_start = buffer.size_used + y * dst_stride; + let dst_start = y * dst_stride; let dst_end = dst_start + width_bytes; - buffer.mapping[dst_start..dst_end].copy_from_slice(&src[src_start..src_end]) + dst[dst_start..dst_end].copy_from_slice(&src[src_start..src_end]) } } + + self.target.gl.unmap_buffer(gl::PIXEL_UNPACK_BUFFER); } buffer.chunks.push(UploadChunk { diff --git a/gfx/wr/webrender/src/internal_types.rs b/gfx/wr/webrender/src/internal_types.rs index 156198a17f3e..dba5e8a9ba07 100644 --- a/gfx/wr/webrender/src/internal_types.rs +++ b/gfx/wr/webrender/src/internal_types.rs @@ -363,7 +363,7 @@ pub struct TextureUpdateList { /// Commands to alloc/realloc/free the textures. Processed first. pub allocations: Vec, /// Commands to update the contents of the textures. Processed second. - pub updates: FastHashMap>, + pub updates: Vec, } impl TextureUpdateList { @@ -372,7 +372,7 @@ impl TextureUpdateList { TextureUpdateList { clears_shared_cache: false, allocations: Vec::new(), - updates: FastHashMap::default(), + updates: Vec::new(), } } @@ -390,10 +390,7 @@ impl TextureUpdateList { /// Pushes an update operation onto the list. #[inline] pub fn push_update(&mut self, update: TextureCacheUpdate) { - self.updates - .entry(update.id) - .or_default() - .push(update); + self.updates.push(update); } /// Sends a command to the Renderer to clear the portion of the shared region @@ -483,7 +480,7 @@ impl TextureUpdateList { self.debug_assert_coalesced(id); // Drop any unapplied updates to the to-be-freed texture. - self.updates.remove(&id); + self.updates.retain(|x| x.id != id); // Drop any allocations for it as well. If we happen to be allocating and // freeing in the same batch, we can collapse them to a no-op. diff --git a/gfx/wr/webrender/src/renderer.rs b/gfx/wr/webrender/src/renderer.rs index 5a91bfeb9cd8..4516df844a6a 100644 --- a/gfx/wr/webrender/src/renderer.rs +++ b/gfx/wr/webrender/src/renderer.rs @@ -1519,17 +1519,10 @@ impl GpuCacheTexture { return 0 } - let upload_size = rows_dirty * Device::required_upload_size( - DeviceIntSize::new(MAX_VERTEX_TEXTURE_WIDTH as i32, 1), - None, - texture.get_format(), - device.optimal_pbo_stride(), - ); - let mut uploader = device.upload_texture( texture, buffer, - upload_size, + rows_dirty * MAX_VERTEX_TEXTURE_WIDTH, ); for (row_index, row) in rows.iter_mut().enumerate() { @@ -1670,14 +1663,8 @@ impl VertexDataTexture { ); debug_assert!(len <= data.capacity(), "CPU copy will read out of bounds"); - let upload_size = Device::required_upload_size( - rect.size, - None, - self.texture().get_format(), - device.optimal_pbo_stride(), - ); device - .upload_texture(self.texture(), &self.pbo, upload_size) + .upload_texture(self.texture(), &self.pbo, 0) .upload(rect, 0, None, None, data.as_ptr(), len); } @@ -3467,108 +3454,83 @@ impl Renderer { } } - for (texture_id, updates) in update_list.updates { - let texture = &self.texture_resolver.texture_cache_map[&texture_id]; - let device = &mut self.device; + for update in update_list.updates { + let TextureCacheUpdate { id, rect, stride, offset, layer_index, format_override, source } = update; + let texture = &self.texture_resolver.texture_cache_map[&id]; - // Calculate the total size of buffer required to upload all updates. - let required_size = updates.iter().map(|update| { - // Perform any debug clears now. As this requires a mutable borrow of device, - // it must be done before all the updates which require a TextureUploader. - if let TextureUpdateSource::DebugClear = update.source { - let draw_target = DrawTarget::from_texture( + let bytes_uploaded = match source { + TextureUpdateSource::Bytes { data } => { + let mut uploader = self.device.upload_texture( texture, - update.layer_index as usize, - false, + &self.texture_cache_upload_pbo, + 0, ); - device.bind_draw_target(draw_target); - device.clear_target( - Some(TEXTURE_CACHE_DBG_CLEAR_COLOR), - None, - Some(draw_target.to_framebuffer_rect(update.rect.to_i32())) - ); - - 0 - } else { - Device::required_upload_size( - update.rect.size, - update.stride, - texture.get_format(), - device.optimal_pbo_stride(), + let data = &data[offset as usize ..]; + uploader.upload( + rect, + layer_index, + stride, + format_override, + data.as_ptr(), + data.len(), ) } - }).sum(); - - if required_size == 0 { - break; - } - - // For best performance we use a single TextureUploader for all uploads. - // Using individual TextureUploaders was causing performance issues on some drivers - // due to allocating too many PBOs. - let mut uploader = device.upload_texture( - texture, - &self.texture_cache_upload_pbo, - required_size - ); - - for update in updates { - let TextureCacheUpdate { id: _id, rect, stride, offset, layer_index, format_override, source } = update; - - let bytes_uploaded = match source { - TextureUpdateSource::Bytes { data } => { - let data = &data[offset as usize ..]; - uploader.upload( - rect, - layer_index, - stride, - format_override, - data.as_ptr(), - data.len(), - ) - } - TextureUpdateSource::External { id, channel_index } => { - let handler = self.external_image_handler - .as_mut() - .expect("Found external image, but no handler set!"); - // The filter is only relevant for NativeTexture external images. - let dummy_data; - let data = match handler.lock(id, channel_index, ImageRendering::Auto).source { - ExternalImageSource::RawData(data) => { - &data[offset as usize ..] - } - ExternalImageSource::Invalid => { - // Create a local buffer to fill the pbo. - let bpp = texture.get_format().bytes_per_pixel(); - let width = stride.unwrap_or(rect.size.width * bpp); - let total_size = width * rect.size.height; - // WR haven't support RGBAF32 format in texture_cache, so - // we use u8 type here. - dummy_data = vec![0xFFu8; total_size as usize]; - &dummy_data - } - ExternalImageSource::NativeTexture(eid) => { - panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id); - } - }; - let size = uploader.upload( - rect, - layer_index, - stride, - format_override, - data.as_ptr(), - data.len() - ); - handler.unlock(id, channel_index); - size - } - TextureUpdateSource::DebugClear => { - // DebugClear updates are handled separately. - 0 - } - }; - self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10); - } + TextureUpdateSource::External { id, channel_index } => { + let mut uploader = self.device.upload_texture( + texture, + &self.texture_cache_upload_pbo, + 0, + ); + let handler = self.external_image_handler + .as_mut() + .expect("Found external image, but no handler set!"); + // The filter is only relevant for NativeTexture external images. + let dummy_data; + let data = match handler.lock(id, channel_index, ImageRendering::Auto).source { + ExternalImageSource::RawData(data) => { + &data[offset as usize ..] + } + ExternalImageSource::Invalid => { + // Create a local buffer to fill the pbo. + let bpp = texture.get_format().bytes_per_pixel(); + let width = stride.unwrap_or(rect.size.width * bpp); + let total_size = width * rect.size.height; + // WR haven't support RGBAF32 format in texture_cache, so + // we use u8 type here. + dummy_data = vec![0xFFu8; total_size as usize]; + &dummy_data + } + ExternalImageSource::NativeTexture(eid) => { + panic!("Unexpected external texture {:?} for the texture cache update of {:?}", eid, id); + } + }; + let size = uploader.upload( + rect, + layer_index, + stride, + format_override, + data.as_ptr(), + data.len() + ); + handler.unlock(id, channel_index); + size + } + TextureUpdateSource::DebugClear => { + let draw_target = DrawTarget::from_texture( + texture, + layer_index as usize, + false, + ); + self.device.bind_draw_target(draw_target); + self.device.clear_target( + Some(TEXTURE_CACHE_DBG_CLEAR_COLOR), + None, + Some(draw_target.to_framebuffer_rect(rect.to_i32())) + ); + 0 + } + }; + self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10); } if update_list.clears_shared_cache {