Bug 1686244 - Accelerate radial gradients in SWGL. r=jrmuizel

The same optimization of looking for merged linear gradients can also be
applied to radial gradients by solving the quadratic equation to check
how large a span we can process within a given merged span. This allows
us to save a bunch of table lookup and some other math in the inner loops.

Differential Revision: https://phabricator.services.mozilla.com/D105858
This commit is contained in:
Lee Salzman 2021-02-22 04:14:39 +00:00
Родитель 6d46022aa7
Коммит 7d3755c3f6
7 изменённых файлов: 285 добавлений и 18 удалений

Просмотреть файл

@ -3778,6 +3778,14 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
Type::new(Void),
vec![Type::new(Sampler2D), Type::new(Int), Type::new(Float), Type::new(Bool), Type::new(Float)],
);
declare_function(
state,
"swgl_commitRadialGradientRGBA8",
None,
Type::new(Void),
vec![Type::new(Sampler2D), Type::new(Int), Type::new(Float), Type::new(Bool), Type::new(Vec2),
Type::new(Float)],
);
declare_function(
state,
"swgl_commitGradientRGBA8",

Просмотреть файл

@ -1185,6 +1185,241 @@ static void commitLinearGradient(sampler2D sampler, int address, float size,
swgl_SpanLength = 0; \
} while (0)
template <typename V>
static ALWAYS_INLINE V fastSqrt(V v) {
#if USE_SSE2 || USE_NEON
return v * inversesqrt(v);
#else
return sqrt(v);
#endif
}
template <typename V>
static ALWAYS_INLINE auto fastLength(V v) {
return fastSqrt(dot(v, v));
}
// Samples an entire span of a radial gradient by crawling the gradient table
// and looking for consecutive stops that can be merged into a single larger
// gradient, then interpolating between those larger gradients within the span
// based on the computed position relative to a radius.
template <bool BLEND>
static void commitRadialGradient(sampler2D sampler, int address, float size,
bool repeat, vec2 pos, float radius,
uint32_t* buf, int span) {
assert(sampler->format == TextureFormat::RGBA32F);
assert(address >= 0 && address < int(sampler->height * sampler->stride));
GradientStops* stops = (GradientStops*)&sampler->buf[address];
// clang-format off
// Given position p, delta d, and radius r, we need to repeatedly solve the
// following quadratic for the pixel offset t:
// length(p + t*d) = r
// (px + t*dx)^2 + (py + t*dy)^2 = r^2
// Rearranged into quadratic equation form (t^2*a + t*b + c = 0) this is:
// t^2*(dx^2+dy^2) + t*2*(dx*px+dy*py) + (px^2+py^2-r^2) = 0
// t^2*d.d + t*2*d.p + (p.p-r^2) = 0
// The solution of the quadratic formula t=(-b+-sqrt(b^2-4ac))/2a reduces to:
// t = -d.p/d.d +- sqrt((d.p/d.d)^2 - (p.p-r^2)/d.d)
// Note that d.p, d.d, p.p, and r^2 are constant across the gradient, and so
// we cache them below for faster computation.
//
// The quadratic has two solutions, representing the span intersecting the
// given radius of gradient, which can occur at two offsets. If there is only
// one solution (where b^2-4ac = 0), this represents the point at which the
// span runs tangent to the radius. This middle point is significant in that
// before it, we walk down the gradient ramp, and after it, we walk up the
// ramp.
// clang-format on
vec2_scalar pos0 = {pos.x.x, pos.y.x};
vec2_scalar delta = {pos.x.y - pos.x.x, pos.y.y - pos.y.x};
float deltaDelta = dot(delta, delta);
float invDelta, middleT, middleB;
if (deltaDelta > 0) {
invDelta = 1.0f / deltaDelta;
middleT = -dot(delta, pos0) * invDelta;
middleB = middleT * middleT - dot(pos0, pos0) * invDelta;
} else {
// If position is invariant, just set the coefficients so the quadratic
// always reduces to the end of the span.
invDelta = 0.0f;
middleT = float(span);
middleB = 0.0f;
}
// We only want search for merged gradients up to the minimum of either the
// mid-point or the span length. Cache those offsets here as they don't vary
// in the inner loop.
Float middleEndRadius =
fastLength(pos0 + delta * (Float){middleT, float(span), 0.0f, 0.0f});
float middleRadius = span < middleT ? middleEndRadius.y : middleEndRadius.x;
float endRadius = middleEndRadius.y;
// Convert delta to change in position per chunk.
delta *= 4;
deltaDelta *= 4 * 4;
// clang-format off
// Given current position p and delta d, we reduce:
// length(p) = sqrt(dot(p,p)) = dot(p,p) * invsqrt(dot(p,p))
// where dot(p+d,p+d) can be accumulated as:
// (x+dx)^2+(y+dy)^2 = (x^2+y^2) + 2(x*dx+y*dy) + (dx^2+dy^2)
// = p.p + 2p.d + d.d
// Since p increases by d every loop iteration, p.d increases by d.d, and thus
// we can accumulate d.d to calculate 2p.d, then allowing us to get the next
// dot-product by adding it to dot-product p.p of the prior iteration. This
// saves us some multiplications and an expensive sqrt inside the inner loop.
// clang-format on
Float dotPos = dot(pos, pos);
Float dotPosDelta = 2.0f * dot(pos, delta) + deltaDelta;
float deltaDelta2 = 2.0f * deltaDelta;
for (int t = 0; t < span;) {
// Compute the gradient table offset from the current position.
Float offset = fastSqrt(dotPos) - radius;
float startRadius = radius;
// If repeat is desired, we need to limit the offset to a fractional value.
if (repeat) {
// The non-repeating radius at which the gradient table actually starts,
// radius + floor(offset) = radius + (offset - fract(offset)).
startRadius += offset.x;
offset = fract(offset);
startRadius -= offset.x;
}
// We need to find the min/max index in the table of the gradient we want to
// use as well as the intercept point where we leave this gradient.
float intercept = -1;
int minIndex = 0;
int maxIndex = int(1.0f + size);
if (offset.x < 0) {
// If inside the inner radius of the gradient table, then use the first
// stop. Set the intercept to advance forward to the start of the gradient
// table.
maxIndex = minIndex;
if (t >= middleT) {
intercept = radius;
}
} else if (offset.x >= 1) {
// If outside the outer radius of the gradient table, then use the last
// stop. Set the intercept to advance toward the valid part of the
// gradient table if going in, or just run to the end of the span if going
// away from the gradient.
minIndex = maxIndex;
if (t < middleT) {
intercept = radius + 1;
}
} else {
// Otherwise, we're inside the valid part of the gradient table.
minIndex = int(1.0f + offset.x * size);
maxIndex = minIndex;
// Find the offset in the gradient that corresponds to the search limit.
// We only search up to the minimum of either the mid-point or the span
// length. Get the table index that corresponds to this offset, clamped so
// that we avoid hitting the beginning (0) or end (1 + size) of the table.
float searchOffset =
(t >= middleT ? endRadius : middleRadius) - startRadius;
int searchIndex = int(clamp(1.0f + size * searchOffset, 1.0f, size));
// If we are past the mid-point, walk up the gradient table trying to
// merge stops. If we're below the mid-point, we need to walk down the
// table. We note the table index at which we need to look for an
// intercept to determine a valid span.
if (t >= middleT) {
while (maxIndex + 1 <= searchIndex &&
stops[maxIndex].can_merge(stops[maxIndex + 1])) {
maxIndex++;
}
intercept = maxIndex + 1;
} else {
while (minIndex - 1 >= searchIndex &&
stops[minIndex - 1].can_merge(stops[minIndex])) {
minIndex--;
}
intercept = minIndex;
}
// Convert from a table index into units of radius from the center of the
// gradient.
intercept = clamp((intercept - 1.0f) / size, 0.0f, 1.0f) + startRadius;
}
// Solve the quadratic for t to find where the merged gradient ends. If no
// intercept is found, just go to the middle or end of the span.
float endT = t >= middleT ? span : min(span, int(middleT));
if (intercept >= 0) {
float b = middleB + intercept * intercept * invDelta;
if (b > 0) {
b = fastSqrt(b);
endT = min(endT, t >= middleT ? middleT + b : middleT - b);
}
}
// Figure out how many chunks are actually inside the merged gradient.
int inside = int(endT - t) & ~3;
if (inside > 0) {
// Convert start and end colors to BGRA and scale to 0..255 range later.
auto minColorF = stops[minIndex].startColor.zyxw * 255.0f;
auto maxColorF = stops[maxIndex].end_color().zyxw * 255.0f;
// Compute the change in color per change in gradient offset.
auto deltaColorF =
(maxColorF - minColorF) * (size / (maxIndex + 1 - minIndex));
// Subtract off the color difference of the beginning of the current span
// from the beginning of the gradient.
Float colorF =
minColorF - deltaColorF * (startRadius + (minIndex - 1) / size);
// Finally, walk over the span accumulating the position dot product and
// getting its sqrt as an offset into the color ramp. Since we're already
// in BGRA format and scaled to 255, we just need to round to an integer
// and pack down to pixel format.
for (auto* end = buf + inside; buf < end; buf += 4) {
Float offsetG = fastSqrt(dotPos);
commit_blend_span<BLEND>(
buf,
combine(
packRGBA8(round_pixel(colorF + deltaColorF * offsetG.x, 1),
round_pixel(colorF + deltaColorF * offsetG.y, 1)),
packRGBA8(round_pixel(colorF + deltaColorF * offsetG.z, 1),
round_pixel(colorF + deltaColorF * offsetG.w, 1))));
dotPos += dotPosDelta;
dotPosDelta += deltaDelta2;
}
// Advance past the portion of gradient we just processed.
t += inside;
// If we hit the end of the span, exit out now.
if (t >= span) {
break;
}
// Otherwise, we are most likely in a transitional section of the gradient
// between stops that will likely require doing per-sample table lookups.
// Rather than having to redo all the searching above to figure that out,
// just assume that to be the case and fall through below to doing the
// table lookups to hopefully avoid an iteration.
offset = fastSqrt(dotPos) - radius;
if (repeat) {
offset = fract(offset);
}
}
// If we got here, that means we still have span left to process but did not
// have any whole chunks that fell within a merged gradient. Just fall back
// to doing a table lookup for each sample.
Float entry = clamp(offset * size + 1.0f, 0.0f, 1.0f + size);
commit_blend_span<BLEND>(buf, sampleGradient(sampler, address, entry));
buf += 4;
t += 4;
dotPos += dotPosDelta;
dotPosDelta += deltaDelta2;
}
}
// Commits an entire span of a radial gradient similar to
// swglcommitLinearGradient, but given a varying 2D position scaled to
// gradient-space and a radius at which the distance from the origin maps to the
// start of the gradient table.
#define swgl_commitRadialGradientRGBA8(sampler, address, size, repeat, pos, \
radius) \
do { \
if (blend_key) { \
commitRadialGradient<true>(sampler, address, size, repeat, pos, radius, \
swgl_OutRGBA8, swgl_SpanLength); \
} else { \
commitRadialGradient<false>(sampler, address, size, repeat, pos, radius, \
swgl_OutRGBA8, swgl_SpanLength); \
} \
swgl_OutRGBA8 += swgl_SpanLength; \
swgl_SpanLength = 0; \
} while (0)
// Extension to set a clip mask image to be sampled during blending. The offset
// specifies the positioning of the clip mask image relative to the viewport
// origin. The bounding box specifies the rectangle relative to the clip mask's

Просмотреть файл

@ -96,13 +96,18 @@ void swgl_drawSpanRGBA8() {
if (address < 0) {
return;
}
while (swgl_SpanLength > 0) {
float offset = get_gradient_offset(compute_repeated_pos());
if (v_gradient_repeat != 0.0) offset = fract(offset);
float entry = clamp_gradient_entry(offset);
swgl_commitGradientRGBA8(sGpuCache, address, entry);
v_pos += swgl_interpStep(v_pos);
}
#ifndef WR_FEATURE_ALPHA_PASS
swgl_commitRadialGradientRGBA8(sGpuCache, address, GRADIENT_ENTRIES, v_gradient_repeat != 0.0,
v_pos * v_repeated_size - v_center, v_start_radius);
#else
while (swgl_SpanLength > 0) {
float offset = get_gradient_offset(compute_repeated_pos());
if (v_gradient_repeat != 0.0) offset = fract(offset);
float entry = clamp_gradient_entry(offset);
swgl_commitGradientRGBA8(sGpuCache, address, entry);
v_pos += swgl_interpStep(v_pos);
}
#endif
}
#endif

Просмотреть файл

@ -922,11 +922,30 @@ impl GradientGpuBlockBuilder {
// within the segment of the gradient space represented by that entry. To lookup a gradient result,
// first the entry index is calculated to determine which two colors to interpolate between, then
// the offset within that entry bucket is used to interpolate between the two colors in that entry.
// This layout preserves hard stops, as the end color for a given entry can differ from the start
// color for the following entry, despite them being adjacent. Colors are stored within in BGRA8
// format for texture upload. This table requires the gradient color stops to be normalized to the
// range [0, 1]. The first and last entries hold the first and last color stop colors respectively,
// while the entries in between hold the interpolated color stop values for the range [0, 1].
// This layout is motivated by the fact that if one naively tries to store a single color per entry
// and interpolate directly between entries, then hard stops will become softened because the end
// color of an entry actually differs from the start color of the next entry, even though they fall
// at the same edge offset in the gradient space. Instead, the two-color-per-entry layout preserves
// hard stops, as the end color for a given entry can differ from the start color for the following
// entry.
// Colors are stored in RGBA32F format (in the GPU cache). This table requires the gradient color
// stops to be normalized to the range [0, 1]. The first and last entries hold the first and last
// color stop colors respectively, while the entries in between hold the interpolated color stop
// values for the range [0, 1].
// As a further optimization, rather than directly storing the end color, the difference of the end
// color from the start color is stored instead, so that an entry can be evaluated more cheaply
// with start+diff*offset instead of mix(start,end,offset). Further, the color difference in two
// adjacent entries will always be the same if they were generated from the same set of stops/run.
// To allow fast searching of the table, if two adjacent entries generated from different sets of
// stops (a boundary) have the same difference, the floating-point bits of the stop will be nudged
// so that they compare differently without perceptibly altering the interpolation result. This way,
// one can quickly scan the table and recover runs just by comparing the color differences of the
// current and next entry.
// For example, a table with 2 inside entries (startR,startG,startB):(diffR,diffG,diffB) might look
// like so:
// first | 0.0 | 0.5 | last
// (0,0,0):(0,0,0) | (1,0,0):(-1,1,0) | (0,0,1):(0,1,-1) | (1,1,1):(0,0,0)
// ^ solid black ^ red to green ^ blue to green ^ solid white
let mut entries = [GradientDataEntry::white(); GRADIENT_DATA_SIZE];
let mut prev_step = cur_color;
if reverse_stops {

Просмотреть файл

@ -50,8 +50,8 @@ platform(linux,mac) fuzzy(1,80000) == radial-ellipse.yaml radial-ellipse-ref.png
# this might be able to be improved
fuzzy(255,1200) == repeat-linear.yaml repeat-linear-ref.yaml
fuzzy(255,1200) == repeat-linear-reverse.yaml repeat-linear-ref.yaml
fuzzy(255,2664) == repeat-radial.yaml repeat-radial-ref.yaml
fuzzy(255,2664) == repeat-radial-negative.yaml repeat-radial-ref.yaml
fuzzy(255,2666) == repeat-radial.yaml repeat-radial-ref.yaml
fuzzy(255,2666) == repeat-radial-negative.yaml repeat-radial-ref.yaml
fuzzy(255,1652) == repeat-conic.yaml repeat-conic-ref.yaml
fuzzy(255,1652) == repeat-conic-negative.yaml repeat-conic-ref.yaml
@ -62,7 +62,7 @@ fuzzy(1,62154) == tiling-linear-3.yaml tiling-linear-3-ref.yaml
fuzzy(1,17) == tiling-radial-1.yaml tiling-radial-1-ref.yaml
fuzzy(1,1) == tiling-radial-2.yaml tiling-radial-2-ref.yaml
fuzzy(1,3) == tiling-radial-3.yaml tiling-radial-3-ref.yaml
fuzzy(1,3) fuzzy-if(platform(swgl),1,1318) == tiling-radial-3.yaml tiling-radial-3-ref.yaml
fuzzy(1,17) == tiling-radial-4.yaml tiling-radial-4-ref.yaml
fuzzy(1,17) == tiling-conic-1.yaml tiling-conic-1-ref.yaml

Просмотреть файл

@ -59,7 +59,7 @@ fuzzy-if(Android,0-19,0-4) fuzzy-if(webrender&&gtkWidget,13-13,28-32) fuzzy-if(w
fuzzy-if(Android,0-7,0-4) fuzzy-if(webrender&&gtkWidget,29-30,28-32) fuzzy-if(webrender&&cocoaWidget,19-20,44-44) skip-if(!asyncPan) == perspective-scrolling-4.html perspective-scrolling-4-ref.html # Bug 1604338
skip-if(!asyncPan) == perspective-scrolling-5.html perspective-scrolling-5-ref.html
pref(apz.disable_for_scroll_linked_effects,true) skip-if(!asyncPan) == disable-apz-for-sle-pages.html disable-apz-for-sle-pages-ref.html
fuzzy-if(browserIsRemote&&d2d,0-1,0-22) skip-if(!asyncPan) fuzzy-if(geckoview,2-2,242-242) skip-if(geckoview&&debug) == background-blend-mode-1.html background-blend-mode-1-ref.html # bug 1558286 for GV
fuzzy-if(browserIsRemote&&d2d,0-1,0-22) skip-if(!asyncPan) fuzzy-if(geckoview,2-2,242-242) skip-if(geckoview&&debug) fuzzy-if(webrender&&swgl,0-255,0-11) == background-blend-mode-1.html background-blend-mode-1-ref.html # bug 1558286 for GV
skip-if(Android||!asyncPan) != opaque-fractional-displayport-1.html about:blank
skip-if(Android||!asyncPan) != opaque-fractional-displayport-2.html about:blank
fuzzy-if(Android,0-19,0-4) fuzzy-if(webrender&&gtkWidget,19-19,28-32) fuzzy-if(webrender&&cocoaWidget,21-21,44-44) skip-if(!asyncPan) == fixed-pos-scrolled-clip-1.html fixed-pos-scrolled-clip-1-ref.html # Bug 1604338

Просмотреть файл

@ -47,8 +47,8 @@ fuzzy-if(Android,0-8,0-771) == radial-shape-farthest-corner-1a.html radial-shape
fails-if(gtkWidget&&/x86_64-/.test(xulRuntime.XPCOMABI)) fuzzy(0-1,0-1622) fuzzy-if(cocoaWidget,0-2,0-41281) fuzzy-if(Android,0-8,0-1091) fuzzy-if(skiaContent,0-2,0-500) == radial-shape-farthest-corner-1b.html radial-shape-farthest-corner-1-ref.html
fuzzy-if(Android,0-17,0-13320) == radial-shape-farthest-side-1a.html radial-shape-farthest-side-1-ref.html
fuzzy-if(Android,0-17,0-13320) == radial-shape-farthest-side-1b.html radial-shape-farthest-side-1-ref.html
fuzzy-if(webrender,0-2,0-26) == radial-size-1a.html radial-size-1-ref.html
fuzzy-if(webrender,0-1,0-26) == radial-size-1b.html radial-size-1-ref.html
fuzzy-if(webrender&&!swgl,0-2,0-26) fuzzy-if(webrender&&swgl,1-1,944-983) == radial-size-1a.html radial-size-1-ref.html
fuzzy-if(webrender&&!swgl,0-1,0-26) fuzzy-if(webrender&&swgl,1-1,944-983) == radial-size-1b.html radial-size-1-ref.html
fuzzy-if(Android,0-4,0-248) == radial-zero-length-1a.html radial-zero-length-1-ref.html
fuzzy-if(Android,0-4,0-248) == radial-zero-length-1b.html radial-zero-length-1-ref.html
fuzzy-if(Android,0-4,0-248) == radial-zero-length-1c.html radial-zero-length-1-ref.html