зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1664479 - optimize cs_blur shader for SWGL. r=gw
Differential Revision: https://phabricator.services.mozilla.com/D89929
This commit is contained in:
Родитель
b6dd17d5b5
Коммит
4f480c9072
|
@ -2,8 +2,6 @@
|
||||||
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
* License, v. 2.0. If a copy of the MPL was not distributed with this
|
||||||
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
* file, You can obtain one at http://mozilla.org/MPL/2.0/. */
|
||||||
|
|
||||||
// Some of this is copied from Skia and is governed by a BSD-style license
|
|
||||||
// Every function in this file should be marked static and inline using SI.
|
|
||||||
#define SI ALWAYS_INLINE static
|
#define SI ALWAYS_INLINE static
|
||||||
|
|
||||||
#include "vector_type.h"
|
#include "vector_type.h"
|
||||||
|
@ -612,6 +610,7 @@ Float approx_log2(Float x) {
|
||||||
return e - 124.225514990f - 1.498030302f * m -
|
return e - 124.225514990f - 1.498030302f * m -
|
||||||
1.725879990f / (0.3520887068f + m);
|
1.725879990f / (0.3520887068f + m);
|
||||||
}
|
}
|
||||||
|
|
||||||
Float approx_pow2(Float x) {
|
Float approx_pow2(Float x) {
|
||||||
Float f = fract(x);
|
Float f = fract(x);
|
||||||
return bit_cast<Float>(
|
return bit_cast<Float>(
|
||||||
|
@ -619,14 +618,21 @@ Float approx_pow2(Float x) {
|
||||||
27.728023300f / (4.84252568f - f)));
|
27.728023300f / (4.84252568f - f)));
|
||||||
}
|
}
|
||||||
|
|
||||||
// From skia
|
#define pow __glsl_pow
|
||||||
|
|
||||||
|
SI float pow(float x, float y) { return powf(x, y); }
|
||||||
|
|
||||||
Float pow(Float x, Float y) {
|
Float pow(Float x, Float y) {
|
||||||
return if_then_else((x == 0) | (x == 1), x, approx_pow2(approx_log2(x) * y));
|
return if_then_else((x == 0) | (x == 1), x, approx_pow2(approx_log2(x) * y));
|
||||||
}
|
}
|
||||||
|
|
||||||
|
#define exp __glsl_exp
|
||||||
|
|
||||||
|
SI float exp(float x) { return expf(x); }
|
||||||
|
|
||||||
Float exp(Float y) {
|
Float exp(Float y) {
|
||||||
float x = 2.718281828459045235360287471352;
|
float l2e = 1.4426950408889634074f;
|
||||||
return approx_pow2(log2f(x) * y);
|
return approx_pow2(l2e * y);
|
||||||
}
|
}
|
||||||
|
|
||||||
struct ivec4;
|
struct ivec4;
|
||||||
|
|
|
@ -4,12 +4,13 @@
|
||||||
|
|
||||||
#include shared,prim_shared
|
#include shared,prim_shared
|
||||||
|
|
||||||
varying vec3 vUv;
|
varying vec2 vUv;
|
||||||
|
flat varying float vUvLayer;
|
||||||
flat varying vec4 vUvRect;
|
flat varying vec4 vUvRect;
|
||||||
flat varying vec2 vOffsetScale;
|
flat varying vec2 vOffsetScale;
|
||||||
flat varying float vSigma;
|
|
||||||
// The number of pixels on each end that we apply the blur filter over.
|
// The number of pixels on each end that we apply the blur filter over.
|
||||||
flat varying int vSupport;
|
flat varying int vSupport;
|
||||||
|
flat varying vec2 vGaussCoefficients;
|
||||||
|
|
||||||
#ifdef WR_VERTEX_SHADER
|
#ifdef WR_VERTEX_SHADER
|
||||||
// Applies a separable gaussian blur in one direction, as specified
|
// Applies a separable gaussian blur in one direction, as specified
|
||||||
|
@ -40,6 +41,30 @@ BlurTask fetch_blur_task(int address) {
|
||||||
return task;
|
return task;
|
||||||
}
|
}
|
||||||
|
|
||||||
|
void calculate_gauss_coefficients(float sigma) {
|
||||||
|
// Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889)
|
||||||
|
vGaussCoefficients = vec2(1.0 / (sqrt(2.0 * 3.14159265) * sigma),
|
||||||
|
exp(-0.5 / (sigma * sigma)));
|
||||||
|
|
||||||
|
// Pre-calculate the coefficient total in the vertex shader so that
|
||||||
|
// we can avoid having to do it per-fragment and also avoid division
|
||||||
|
// by zero in the degenerate case.
|
||||||
|
vec3 gauss_coefficient = vec3(vGaussCoefficients,
|
||||||
|
vGaussCoefficients.y * vGaussCoefficients.y);
|
||||||
|
float gauss_coefficient_total = gauss_coefficient.x;
|
||||||
|
for (int i = 1; i <= vSupport; i += 2) {
|
||||||
|
gauss_coefficient.xy *= gauss_coefficient.yz;
|
||||||
|
float gauss_coefficient_subtotal = gauss_coefficient.x;
|
||||||
|
gauss_coefficient.xy *= gauss_coefficient.yz;
|
||||||
|
gauss_coefficient_subtotal += gauss_coefficient.x;
|
||||||
|
gauss_coefficient_total += 2.0 * gauss_coefficient_subtotal;
|
||||||
|
}
|
||||||
|
|
||||||
|
// Scale initial coefficient by total to avoid passing the total separately
|
||||||
|
// to the fragment shader.
|
||||||
|
vGaussCoefficients.x /= gauss_coefficient_total;
|
||||||
|
}
|
||||||
|
|
||||||
void main(void) {
|
void main(void) {
|
||||||
BlurTask blur_task = fetch_blur_task(aBlurRenderTaskAddress);
|
BlurTask blur_task = fetch_blur_task(aBlurRenderTaskAddress);
|
||||||
RenderTaskCommonData src_task = fetch_render_task_common_data(aBlurSourceTaskAddress);
|
RenderTaskCommonData src_task = fetch_render_task_common_data(aBlurSourceTaskAddress);
|
||||||
|
@ -52,8 +77,7 @@ void main(void) {
|
||||||
#else
|
#else
|
||||||
vec2 texture_size = vec2(textureSize(sPrevPassAlpha, 0).xy);
|
vec2 texture_size = vec2(textureSize(sPrevPassAlpha, 0).xy);
|
||||||
#endif
|
#endif
|
||||||
vUv.z = src_task.texture_layer_index;
|
vUvLayer = src_task.texture_layer_index;
|
||||||
vSigma = blur_task.blur_radius;
|
|
||||||
|
|
||||||
// Ensure that the support is an even number of pixels to simplify the
|
// Ensure that the support is an even number of pixels to simplify the
|
||||||
// fragment shader logic.
|
// fragment shader logic.
|
||||||
|
@ -62,6 +86,13 @@ void main(void) {
|
||||||
// hardware for linear filtering.
|
// hardware for linear filtering.
|
||||||
vSupport = int(ceil(1.5 * blur_task.blur_radius)) * 2;
|
vSupport = int(ceil(1.5 * blur_task.blur_radius)) * 2;
|
||||||
|
|
||||||
|
if (vSupport > 0) {
|
||||||
|
calculate_gauss_coefficients(blur_task.blur_radius);
|
||||||
|
} else {
|
||||||
|
// The gauss function gets NaNs when blur radius is zero.
|
||||||
|
vGaussCoefficients = vec2(1.0, 1.0);
|
||||||
|
}
|
||||||
|
|
||||||
switch (aBlurDirection) {
|
switch (aBlurDirection) {
|
||||||
case DIR_HORIZONTAL:
|
case DIR_HORIZONTAL:
|
||||||
vOffsetScale = vec2(1.0 / texture_size.x, 0.0);
|
vOffsetScale = vec2(1.0 / texture_size.x, 0.0);
|
||||||
|
@ -81,7 +112,7 @@ void main(void) {
|
||||||
|
|
||||||
vec2 uv0 = src_rect.p0 / texture_size;
|
vec2 uv0 = src_rect.p0 / texture_size;
|
||||||
vec2 uv1 = (src_rect.p0 + src_rect.size) / texture_size;
|
vec2 uv1 = (src_rect.p0 + src_rect.size) / texture_size;
|
||||||
vUv.xy = mix(uv0, uv1, aPosition.xy);
|
vUv = mix(uv0, uv1, aPosition.xy);
|
||||||
|
|
||||||
gl_Position = uTransform * vec4(pos, 0.0, 1.0);
|
gl_Position = uTransform * vec4(pos, 0.0, 1.0);
|
||||||
}
|
}
|
||||||
|
@ -91,10 +122,10 @@ void main(void) {
|
||||||
|
|
||||||
#if defined WR_FEATURE_COLOR_TARGET
|
#if defined WR_FEATURE_COLOR_TARGET
|
||||||
#define SAMPLE_TYPE vec4
|
#define SAMPLE_TYPE vec4
|
||||||
#define SAMPLE_TEXTURE(uv) texture(sPrevPassColor, uv)
|
#define SAMPLE_TEXTURE(uv) texture(sPrevPassColor, vec3(uv, vUvLayer))
|
||||||
#else
|
#else
|
||||||
#define SAMPLE_TYPE float
|
#define SAMPLE_TYPE float
|
||||||
#define SAMPLE_TEXTURE(uv) texture(sPrevPassAlpha, uv).r
|
#define SAMPLE_TEXTURE(uv) texture(sPrevPassAlpha, vec3(uv, vUvLayer)).r
|
||||||
#endif
|
#endif
|
||||||
|
|
||||||
// TODO(gw): Write a fast path blur that handles smaller blur radii
|
// TODO(gw): Write a fast path blur that handles smaller blur radii
|
||||||
|
@ -104,23 +135,11 @@ void main(void) {
|
||||||
void main(void) {
|
void main(void) {
|
||||||
SAMPLE_TYPE original_color = SAMPLE_TEXTURE(vUv);
|
SAMPLE_TYPE original_color = SAMPLE_TEXTURE(vUv);
|
||||||
|
|
||||||
// TODO(gw): The gauss function gets NaNs when blur radius
|
|
||||||
// is zero. In the future, detect this earlier
|
|
||||||
// and skip the blur passes completely.
|
|
||||||
if (vSupport == 0) {
|
|
||||||
oFragColor = vec4(original_color);
|
|
||||||
return;
|
|
||||||
}
|
|
||||||
|
|
||||||
// Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889)
|
// Incremental Gaussian Coefficent Calculation (See GPU Gems 3 pp. 877 - 889)
|
||||||
vec3 gauss_coefficient;
|
vec3 gauss_coefficient = vec3(vGaussCoefficients,
|
||||||
gauss_coefficient.x = 1.0 / (sqrt(2.0 * 3.14159265) * vSigma);
|
vGaussCoefficients.y * vGaussCoefficients.y);
|
||||||
gauss_coefficient.y = exp(-0.5 / (vSigma * vSigma));
|
|
||||||
gauss_coefficient.z = gauss_coefficient.y * gauss_coefficient.y;
|
|
||||||
|
|
||||||
float gauss_coefficient_total = gauss_coefficient.x;
|
|
||||||
SAMPLE_TYPE avg_color = original_color * gauss_coefficient.x;
|
SAMPLE_TYPE avg_color = original_color * gauss_coefficient.x;
|
||||||
gauss_coefficient.xy *= gauss_coefficient.yz;
|
|
||||||
|
|
||||||
// Evaluate two adjacent texels at a time. We can do this because, if c0
|
// Evaluate two adjacent texels at a time. We can do this because, if c0
|
||||||
// and c1 are colors of adjacent texels and k0 and k1 are arbitrary
|
// and c1 are colors of adjacent texels and k0 and k1 are arbitrary
|
||||||
|
@ -142,6 +161,8 @@ void main(void) {
|
||||||
// Equation 1 with a single texture lookup.
|
// Equation 1 with a single texture lookup.
|
||||||
|
|
||||||
for (int i = 1; i <= vSupport; i += 2) {
|
for (int i = 1; i <= vSupport; i += 2) {
|
||||||
|
gauss_coefficient.xy *= gauss_coefficient.yz;
|
||||||
|
|
||||||
float gauss_coefficient_subtotal = gauss_coefficient.x;
|
float gauss_coefficient_subtotal = gauss_coefficient.x;
|
||||||
gauss_coefficient.xy *= gauss_coefficient.yz;
|
gauss_coefficient.xy *= gauss_coefficient.yz;
|
||||||
gauss_coefficient_subtotal += gauss_coefficient.x;
|
gauss_coefficient_subtotal += gauss_coefficient.x;
|
||||||
|
@ -149,16 +170,12 @@ void main(void) {
|
||||||
|
|
||||||
vec2 offset = vOffsetScale * (float(i) + gauss_ratio);
|
vec2 offset = vOffsetScale * (float(i) + gauss_ratio);
|
||||||
|
|
||||||
vec2 st0 = clamp(vUv.xy - offset, vUvRect.xy, vUvRect.zw);
|
vec2 st0 = max(vUv - offset, vUvRect.xy);
|
||||||
avg_color += SAMPLE_TEXTURE(vec3(st0, vUv.z)) * gauss_coefficient_subtotal;
|
vec2 st1 = min(vUv + offset, vUvRect.zw);
|
||||||
|
avg_color += (SAMPLE_TEXTURE(st0) + SAMPLE_TEXTURE(st1)) *
|
||||||
vec2 st1 = clamp(vUv.xy + offset, vUvRect.xy, vUvRect.zw);
|
gauss_coefficient_subtotal;
|
||||||
avg_color += SAMPLE_TEXTURE(vec3(st1, vUv.z)) * gauss_coefficient_subtotal;
|
|
||||||
|
|
||||||
gauss_coefficient_total += 2.0 * gauss_coefficient_subtotal;
|
|
||||||
gauss_coefficient.xy *= gauss_coefficient.yz;
|
|
||||||
}
|
}
|
||||||
|
|
||||||
oFragColor = vec4(avg_color) / gauss_coefficient_total;
|
oFragColor = vec4(avg_color);
|
||||||
}
|
}
|
||||||
#endif
|
#endif
|
||||||
|
|
Загрузка…
Ссылка в новой задаче