From 4f480c9072123b94c0eed17f2055388ec498c176 Mon Sep 17 00:00:00 2001 From: Lee Salzman Date: Mon, 14 Sep 2020 02:54:53 +0000 Subject: [PATCH] Bug 1664479 - optimize cs_blur shader for SWGL. r=gw Differential Revision: https://phabricator.services.mozilla.com/D89929 --- gfx/wr/swgl/src/glsl.h | 16 +++++-- gfx/wr/webrender/res/cs_blur.glsl | 77 +++++++++++++++++++------------ 2 files changed, 58 insertions(+), 35 deletions(-) diff --git a/gfx/wr/swgl/src/glsl.h b/gfx/wr/swgl/src/glsl.h index 0864baa56c79..d0fd7e39d0dc 100644 --- a/gfx/wr/swgl/src/glsl.h +++ b/gfx/wr/swgl/src/glsl.h @@ -2,8 +2,6 @@ * License, v. 2.0. If a copy of the MPL was not distributed with this * file, You can obtain one at http://mozilla.org/MPL/2.0/. */ -// Some of this is copied from Skia and is governed by a BSD-style license -// Every function in this file should be marked static and inline using SI. #define SI ALWAYS_INLINE static #include "vector_type.h" @@ -612,6 +610,7 @@ Float approx_log2(Float x) { return e - 124.225514990f - 1.498030302f * m - 1.725879990f / (0.3520887068f + m); } + Float approx_pow2(Float x) { Float f = fract(x); return bit_cast( @@ -619,14 +618,21 @@ Float approx_pow2(Float x) { 27.728023300f / (4.84252568f - f))); } -// From skia +#define pow __glsl_pow + +SI float pow(float x, float y) { return powf(x, y); } + Float pow(Float x, Float y) { return if_then_else((x == 0) | (x == 1), x, approx_pow2(approx_log2(x) * y)); } +#define exp __glsl_exp + +SI float exp(float x) { return expf(x); } + Float exp(Float y) { - float x = 2.718281828459045235360287471352; - return approx_pow2(log2f(x) * y); + float l2e = 1.4426950408889634074f; + return approx_pow2(l2e * y); } struct ivec4; diff --git a/gfx/wr/webrender/res/cs_blur.glsl b/gfx/wr/webrender/res/cs_blur.glsl index a10fe00d7909..bfd5c906a8e3 100644 --- a/gfx/wr/webrender/res/cs_blur.glsl +++ b/gfx/wr/webrender/res/cs_blur.glsl @@ -4,12 +4,13 @@ #include shared,prim_shared -varying vec3 vUv; +varying vec2 vUv; +flat varying float vUvLayer; flat varying vec4 vUvRect; flat varying vec2 vOffsetScale; -flat varying float vSigma; // The number of pixels on each end that we apply the blur filter over. flat varying int vSupport; +flat varying vec2 vGaussCoefficients; #ifdef WR_VERTEX_SHADER // Applies a separable gaussian blur in one direction, as specified @@ -40,6 +41,30 @@ BlurTask fetch_blur_task(int address) { return task; } +void calculate_gauss_coefficients(float sigma) { + // Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889) + vGaussCoefficients = vec2(1.0 / (sqrt(2.0 * 3.14159265) * sigma), + exp(-0.5 / (sigma * sigma))); + + // Pre-calculate the coefficient total in the vertex shader so that + // we can avoid having to do it per-fragment and also avoid division + // by zero in the degenerate case. + vec3 gauss_coefficient = vec3(vGaussCoefficients, + vGaussCoefficients.y * vGaussCoefficients.y); + float gauss_coefficient_total = gauss_coefficient.x; + for (int i = 1; i <= vSupport; i += 2) { + gauss_coefficient.xy *= gauss_coefficient.yz; + float gauss_coefficient_subtotal = gauss_coefficient.x; + gauss_coefficient.xy *= gauss_coefficient.yz; + gauss_coefficient_subtotal += gauss_coefficient.x; + gauss_coefficient_total += 2.0 * gauss_coefficient_subtotal; + } + + // Scale initial coefficient by total to avoid passing the total separately + // to the fragment shader. + vGaussCoefficients.x /= gauss_coefficient_total; +} + void main(void) { BlurTask blur_task = fetch_blur_task(aBlurRenderTaskAddress); RenderTaskCommonData src_task = fetch_render_task_common_data(aBlurSourceTaskAddress); @@ -52,8 +77,7 @@ void main(void) { #else vec2 texture_size = vec2(textureSize(sPrevPassAlpha, 0).xy); #endif - vUv.z = src_task.texture_layer_index; - vSigma = blur_task.blur_radius; + vUvLayer = src_task.texture_layer_index; // Ensure that the support is an even number of pixels to simplify the // fragment shader logic. @@ -62,6 +86,13 @@ void main(void) { // hardware for linear filtering. vSupport = int(ceil(1.5 * blur_task.blur_radius)) * 2; + if (vSupport > 0) { + calculate_gauss_coefficients(blur_task.blur_radius); + } else { + // The gauss function gets NaNs when blur radius is zero. + vGaussCoefficients = vec2(1.0, 1.0); + } + switch (aBlurDirection) { case DIR_HORIZONTAL: vOffsetScale = vec2(1.0 / texture_size.x, 0.0); @@ -81,7 +112,7 @@ void main(void) { vec2 uv0 = src_rect.p0 / texture_size; vec2 uv1 = (src_rect.p0 + src_rect.size) / texture_size; - vUv.xy = mix(uv0, uv1, aPosition.xy); + vUv = mix(uv0, uv1, aPosition.xy); gl_Position = uTransform * vec4(pos, 0.0, 1.0); } @@ -91,10 +122,10 @@ void main(void) { #if defined WR_FEATURE_COLOR_TARGET #define SAMPLE_TYPE vec4 -#define SAMPLE_TEXTURE(uv) texture(sPrevPassColor, uv) +#define SAMPLE_TEXTURE(uv) texture(sPrevPassColor, vec3(uv, vUvLayer)) #else #define SAMPLE_TYPE float -#define SAMPLE_TEXTURE(uv) texture(sPrevPassAlpha, uv).r +#define SAMPLE_TEXTURE(uv) texture(sPrevPassAlpha, vec3(uv, vUvLayer)).r #endif // TODO(gw): Write a fast path blur that handles smaller blur radii @@ -104,23 +135,11 @@ void main(void) { void main(void) { SAMPLE_TYPE original_color = SAMPLE_TEXTURE(vUv); - // TODO(gw): The gauss function gets NaNs when blur radius - // is zero. In the future, detect this earlier - // and skip the blur passes completely. - if (vSupport == 0) { - oFragColor = vec4(original_color); - return; - } - // Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889) - vec3 gauss_coefficient; - gauss_coefficient.x = 1.0 / (sqrt(2.0 * 3.14159265) * vSigma); - gauss_coefficient.y = exp(-0.5 / (vSigma * vSigma)); - gauss_coefficient.z = gauss_coefficient.y * gauss_coefficient.y; + vec3 gauss_coefficient = vec3(vGaussCoefficients, + vGaussCoefficients.y * vGaussCoefficients.y); - float gauss_coefficient_total = gauss_coefficient.x; SAMPLE_TYPE avg_color = original_color * gauss_coefficient.x; - gauss_coefficient.xy *= gauss_coefficient.yz; // Evaluate two adjacent texels at a time. We can do this because, if c0 // and c1 are colors of adjacent texels and k0 and k1 are arbitrary @@ -142,6 +161,8 @@ void main(void) { // Equation 1 with a single texture lookup. for (int i = 1; i <= vSupport; i += 2) { + gauss_coefficient.xy *= gauss_coefficient.yz; + float gauss_coefficient_subtotal = gauss_coefficient.x; gauss_coefficient.xy *= gauss_coefficient.yz; gauss_coefficient_subtotal += gauss_coefficient.x; @@ -149,16 +170,12 @@ void main(void) { vec2 offset = vOffsetScale * (float(i) + gauss_ratio); - vec2 st0 = clamp(vUv.xy - offset, vUvRect.xy, vUvRect.zw); - avg_color += SAMPLE_TEXTURE(vec3(st0, vUv.z)) * gauss_coefficient_subtotal; - - vec2 st1 = clamp(vUv.xy + offset, vUvRect.xy, vUvRect.zw); - avg_color += SAMPLE_TEXTURE(vec3(st1, vUv.z)) * gauss_coefficient_subtotal; - - gauss_coefficient_total += 2.0 * gauss_coefficient_subtotal; - gauss_coefficient.xy *= gauss_coefficient.yz; + vec2 st0 = max(vUv - offset, vUvRect.xy); + vec2 st1 = min(vUv + offset, vUvRect.zw); + avg_color += (SAMPLE_TEXTURE(st0) + SAMPLE_TEXTURE(st1)) * + gauss_coefficient_subtotal; } - oFragColor = vec4(avg_color) / gauss_coefficient_total; + oFragColor = vec4(avg_color); } #endif