From 1229a401486631ef35cd1d4272292b0dcb2b64e1 Mon Sep 17 00:00:00 2001 From: Jamie Nicol Date: Mon, 24 Apr 2023 16:37:07 +0000 Subject: [PATCH] Bug 1828248 - Use QCOM_tiled_rendering when rendering color and picture cache targets. r=gfx-reviewers,nical We have seen reports that various websites, including twitter, perform poorly on older Adreno devices due, to backdrop filter. We previously encountered similar on Mali-G710 devices in bug 1809738, and it appeared to be due to having to copy the contents of large framebuffers--required to render the backdrop filter--to and from the GPU's tile memory. On Mali we were able to avoid this penalty by ensuring we performed an unscissored clear immediately after binding the framebuffer, allowing the driver to omit initalizing the contents of tile memory prior to rendering. It's plausible that older Adreno drivers are not clever enough to be able to make this optimization. However, there exists an extension QCOM_tiled_rendering, which allows us to explicitly tell the driver which subregion of a render target we are rendering too, and whether it must be pre-initilized or post-resolved. This patch makes use of this extension when rendering to color and picture cache targets. In both cases we supply the region that is being rendered and must only resolve the color attachment back to main memory. In most cases we can additionally avoid initializing tile memory prior to rendering, with the exception being in draw_color_target() when we do not perform an initial clear, in which case we must initialize the color attachment. This results in a significant performance improvement on twitter when tested on a Nexus 5 (Adreno 330) device. Differential Revision: https://phabricator.services.mozilla.com/D176154 --- gfx/wr/webrender/src/device/gl.rs | 5 +++++ gfx/wr/webrender/src/renderer/mod.rs | 32 ++++++++++++++++++++++++++++ 2 files changed, 37 insertions(+) diff --git a/gfx/wr/webrender/src/device/gl.rs b/gfx/wr/webrender/src/device/gl.rs index bc4a295c15bc..625ddb39c49a 100644 --- a/gfx/wr/webrender/src/device/gl.rs +++ b/gfx/wr/webrender/src/device/gl.rs @@ -985,6 +985,8 @@ pub struct Capabilities { pub supports_render_target_invalidate: bool, /// Whether the driver can reliably upload data to R8 format textures. pub supports_r8_texture_upload: bool, + /// Whether the extension QCOM_tiled_rendering is supported. + pub supports_qcom_tiled_rendering: bool, /// Whether clip-masking is supported natively by the GL implementation /// rather than emulated in shaders. pub uses_native_clip_mask: bool, @@ -1839,6 +1841,8 @@ impl Device { true }; + let supports_qcom_tiled_rendering = supports_extension(&extensions, "GL_QCOM_tiled_rendering"); + // On some Adreno 3xx devices the vertex array object must be unbound and rebound after // an attached buffer has been orphaned. let requires_vao_rebind_after_orphaning = is_adreno_3xx; @@ -1876,6 +1880,7 @@ impl Device { prefers_clear_scissor, supports_render_target_invalidate, supports_r8_texture_upload, + supports_qcom_tiled_rendering, uses_native_clip_mask, uses_native_antialiasing, supports_image_external_essl3, diff --git a/gfx/wr/webrender/src/renderer/mod.rs b/gfx/wr/webrender/src/renderer/mod.rs index af639e13d12f..b44c107a6636 100644 --- a/gfx/wr/webrender/src/renderer/mod.rs +++ b/gfx/wr/webrender/src/renderer/mod.rs @@ -2483,6 +2483,17 @@ impl Renderer { { let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET); self.device.bind_draw_target(draw_target); + + if self.device.get_capabilities().supports_qcom_tiled_rendering { + self.device.gl().start_tiling_qcom( + target.dirty_rect.min.x.max(0) as _, + target.dirty_rect.min.y.max(0) as _, + target.dirty_rect.width() as _, + target.dirty_rect.height() as _, + 0, + ); + } + self.device.enable_depth_write(); self.set_blend(false, framebuffer_kind); @@ -2583,6 +2594,9 @@ impl Renderer { } self.device.invalidate_depth_target(); + if self.device.get_capabilities().supports_qcom_tiled_rendering { + self.device.gl().end_tiling_qcom(gl::COLOR_BUFFER_BIT0_QCOM); + } } /// Draw an alpha batch container into a given draw target. This is used @@ -3349,6 +3363,21 @@ impl Renderer { { let _timer = self.gpu_profiler.start_timer(GPU_TAG_SETUP_TARGET); self.device.bind_draw_target(draw_target); + + if self.device.get_capabilities().supports_qcom_tiled_rendering { + let preserve_mask = match target.clear_color { + Some(_) => 0, + None => gl::COLOR_BUFFER_BIT0_QCOM, + }; + self.device.gl().start_tiling_qcom( + target.used_rect.min.x.max(0) as _, + target.used_rect.min.y.max(0) as _, + target.used_rect.width() as _, + target.used_rect.height() as _, + preserve_mask, + ); + } + self.device.disable_depth(); self.set_blend(false, framebuffer_kind); @@ -3479,6 +3508,9 @@ impl Renderer { if clear_depth.is_some() { self.device.invalidate_depth_target(); } + if self.device.get_capabilities().supports_qcom_tiled_rendering { + self.device.gl().end_tiling_qcom(gl::COLOR_BUFFER_BIT0_QCOM); + } } fn draw_blurs(