зеркало из https://github.com/mozilla/gecko-dev.git
Bug 1686244 - Accelerate radial gradients in SWGL. r=jrmuizel
The same optimization of looking for merged linear gradients can also be applied to radial gradients by solving the quadratic equation to check how large a span we can process within a given merged span. This allows us to save a bunch of table lookup and some other math in the inner loops. Differential Revision: https://phabricator.services.mozilla.com/D105858
This commit is contained in:
Родитель
6d46022aa7
Коммит
7d3755c3f6
|
@ -3778,6 +3778,14 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
|
|||
Type::new(Void),
|
||||
vec![Type::new(Sampler2D), Type::new(Int), Type::new(Float), Type::new(Bool), Type::new(Float)],
|
||||
);
|
||||
declare_function(
|
||||
state,
|
||||
"swgl_commitRadialGradientRGBA8",
|
||||
None,
|
||||
Type::new(Void),
|
||||
vec![Type::new(Sampler2D), Type::new(Int), Type::new(Float), Type::new(Bool), Type::new(Vec2),
|
||||
Type::new(Float)],
|
||||
);
|
||||
declare_function(
|
||||
state,
|
||||
"swgl_commitGradientRGBA8",
|
||||
|
|
|
@ -1185,6 +1185,241 @@ static void commitLinearGradient(sampler2D sampler, int address, float size,
|
|||
swgl_SpanLength = 0; \
|
||||
} while (0)
|
||||
|
||||
template <typename V>
|
||||
static ALWAYS_INLINE V fastSqrt(V v) {
|
||||
#if USE_SSE2 || USE_NEON
|
||||
return v * inversesqrt(v);
|
||||
#else
|
||||
return sqrt(v);
|
||||
#endif
|
||||
}
|
||||
|
||||
template <typename V>
|
||||
static ALWAYS_INLINE auto fastLength(V v) {
|
||||
return fastSqrt(dot(v, v));
|
||||
}
|
||||
|
||||
// Samples an entire span of a radial gradient by crawling the gradient table
|
||||
// and looking for consecutive stops that can be merged into a single larger
|
||||
// gradient, then interpolating between those larger gradients within the span
|
||||
// based on the computed position relative to a radius.
|
||||
template <bool BLEND>
|
||||
static void commitRadialGradient(sampler2D sampler, int address, float size,
|
||||
bool repeat, vec2 pos, float radius,
|
||||
uint32_t* buf, int span) {
|
||||
assert(sampler->format == TextureFormat::RGBA32F);
|
||||
assert(address >= 0 && address < int(sampler->height * sampler->stride));
|
||||
GradientStops* stops = (GradientStops*)&sampler->buf[address];
|
||||
// clang-format off
|
||||
// Given position p, delta d, and radius r, we need to repeatedly solve the
|
||||
// following quadratic for the pixel offset t:
|
||||
// length(p + t*d) = r
|
||||
// (px + t*dx)^2 + (py + t*dy)^2 = r^2
|
||||
// Rearranged into quadratic equation form (t^2*a + t*b + c = 0) this is:
|
||||
// t^2*(dx^2+dy^2) + t*2*(dx*px+dy*py) + (px^2+py^2-r^2) = 0
|
||||
// t^2*d.d + t*2*d.p + (p.p-r^2) = 0
|
||||
// The solution of the quadratic formula t=(-b+-sqrt(b^2-4ac))/2a reduces to:
|
||||
// t = -d.p/d.d +- sqrt((d.p/d.d)^2 - (p.p-r^2)/d.d)
|
||||
// Note that d.p, d.d, p.p, and r^2 are constant across the gradient, and so
|
||||
// we cache them below for faster computation.
|
||||
//
|
||||
// The quadratic has two solutions, representing the span intersecting the
|
||||
// given radius of gradient, which can occur at two offsets. If there is only
|
||||
// one solution (where b^2-4ac = 0), this represents the point at which the
|
||||
// span runs tangent to the radius. This middle point is significant in that
|
||||
// before it, we walk down the gradient ramp, and after it, we walk up the
|
||||
// ramp.
|
||||
// clang-format on
|
||||
vec2_scalar pos0 = {pos.x.x, pos.y.x};
|
||||
vec2_scalar delta = {pos.x.y - pos.x.x, pos.y.y - pos.y.x};
|
||||
float deltaDelta = dot(delta, delta);
|
||||
float invDelta, middleT, middleB;
|
||||
if (deltaDelta > 0) {
|
||||
invDelta = 1.0f / deltaDelta;
|
||||
middleT = -dot(delta, pos0) * invDelta;
|
||||
middleB = middleT * middleT - dot(pos0, pos0) * invDelta;
|
||||
} else {
|
||||
// If position is invariant, just set the coefficients so the quadratic
|
||||
// always reduces to the end of the span.
|
||||
invDelta = 0.0f;
|
||||
middleT = float(span);
|
||||
middleB = 0.0f;
|
||||
}
|
||||
// We only want search for merged gradients up to the minimum of either the
|
||||
// mid-point or the span length. Cache those offsets here as they don't vary
|
||||
// in the inner loop.
|
||||
Float middleEndRadius =
|
||||
fastLength(pos0 + delta * (Float){middleT, float(span), 0.0f, 0.0f});
|
||||
float middleRadius = span < middleT ? middleEndRadius.y : middleEndRadius.x;
|
||||
float endRadius = middleEndRadius.y;
|
||||
// Convert delta to change in position per chunk.
|
||||
delta *= 4;
|
||||
deltaDelta *= 4 * 4;
|
||||
// clang-format off
|
||||
// Given current position p and delta d, we reduce:
|
||||
// length(p) = sqrt(dot(p,p)) = dot(p,p) * invsqrt(dot(p,p))
|
||||
// where dot(p+d,p+d) can be accumulated as:
|
||||
// (x+dx)^2+(y+dy)^2 = (x^2+y^2) + 2(x*dx+y*dy) + (dx^2+dy^2)
|
||||
// = p.p + 2p.d + d.d
|
||||
// Since p increases by d every loop iteration, p.d increases by d.d, and thus
|
||||
// we can accumulate d.d to calculate 2p.d, then allowing us to get the next
|
||||
// dot-product by adding it to dot-product p.p of the prior iteration. This
|
||||
// saves us some multiplications and an expensive sqrt inside the inner loop.
|
||||
// clang-format on
|
||||
Float dotPos = dot(pos, pos);
|
||||
Float dotPosDelta = 2.0f * dot(pos, delta) + deltaDelta;
|
||||
float deltaDelta2 = 2.0f * deltaDelta;
|
||||
for (int t = 0; t < span;) {
|
||||
// Compute the gradient table offset from the current position.
|
||||
Float offset = fastSqrt(dotPos) - radius;
|
||||
float startRadius = radius;
|
||||
// If repeat is desired, we need to limit the offset to a fractional value.
|
||||
if (repeat) {
|
||||
// The non-repeating radius at which the gradient table actually starts,
|
||||
// radius + floor(offset) = radius + (offset - fract(offset)).
|
||||
startRadius += offset.x;
|
||||
offset = fract(offset);
|
||||
startRadius -= offset.x;
|
||||
}
|
||||
// We need to find the min/max index in the table of the gradient we want to
|
||||
// use as well as the intercept point where we leave this gradient.
|
||||
float intercept = -1;
|
||||
int minIndex = 0;
|
||||
int maxIndex = int(1.0f + size);
|
||||
if (offset.x < 0) {
|
||||
// If inside the inner radius of the gradient table, then use the first
|
||||
// stop. Set the intercept to advance forward to the start of the gradient
|
||||
// table.
|
||||
maxIndex = minIndex;
|
||||
if (t >= middleT) {
|
||||
intercept = radius;
|
||||
}
|
||||
} else if (offset.x >= 1) {
|
||||
// If outside the outer radius of the gradient table, then use the last
|
||||
// stop. Set the intercept to advance toward the valid part of the
|
||||
// gradient table if going in, or just run to the end of the span if going
|
||||
// away from the gradient.
|
||||
minIndex = maxIndex;
|
||||
if (t < middleT) {
|
||||
intercept = radius + 1;
|
||||
}
|
||||
} else {
|
||||
// Otherwise, we're inside the valid part of the gradient table.
|
||||
minIndex = int(1.0f + offset.x * size);
|
||||
maxIndex = minIndex;
|
||||
// Find the offset in the gradient that corresponds to the search limit.
|
||||
// We only search up to the minimum of either the mid-point or the span
|
||||
// length. Get the table index that corresponds to this offset, clamped so
|
||||
// that we avoid hitting the beginning (0) or end (1 + size) of the table.
|
||||
float searchOffset =
|
||||
(t >= middleT ? endRadius : middleRadius) - startRadius;
|
||||
int searchIndex = int(clamp(1.0f + size * searchOffset, 1.0f, size));
|
||||
// If we are past the mid-point, walk up the gradient table trying to
|
||||
// merge stops. If we're below the mid-point, we need to walk down the
|
||||
// table. We note the table index at which we need to look for an
|
||||
// intercept to determine a valid span.
|
||||
if (t >= middleT) {
|
||||
while (maxIndex + 1 <= searchIndex &&
|
||||
stops[maxIndex].can_merge(stops[maxIndex + 1])) {
|
||||
maxIndex++;
|
||||
}
|
||||
intercept = maxIndex + 1;
|
||||
} else {
|
||||
while (minIndex - 1 >= searchIndex &&
|
||||
stops[minIndex - 1].can_merge(stops[minIndex])) {
|
||||
minIndex--;
|
||||
}
|
||||
intercept = minIndex;
|
||||
}
|
||||
// Convert from a table index into units of radius from the center of the
|
||||
// gradient.
|
||||
intercept = clamp((intercept - 1.0f) / size, 0.0f, 1.0f) + startRadius;
|
||||
}
|
||||
// Solve the quadratic for t to find where the merged gradient ends. If no
|
||||
// intercept is found, just go to the middle or end of the span.
|
||||
float endT = t >= middleT ? span : min(span, int(middleT));
|
||||
if (intercept >= 0) {
|
||||
float b = middleB + intercept * intercept * invDelta;
|
||||
if (b > 0) {
|
||||
b = fastSqrt(b);
|
||||
endT = min(endT, t >= middleT ? middleT + b : middleT - b);
|
||||
}
|
||||
}
|
||||
// Figure out how many chunks are actually inside the merged gradient.
|
||||
int inside = int(endT - t) & ~3;
|
||||
if (inside > 0) {
|
||||
// Convert start and end colors to BGRA and scale to 0..255 range later.
|
||||
auto minColorF = stops[minIndex].startColor.zyxw * 255.0f;
|
||||
auto maxColorF = stops[maxIndex].end_color().zyxw * 255.0f;
|
||||
// Compute the change in color per change in gradient offset.
|
||||
auto deltaColorF =
|
||||
(maxColorF - minColorF) * (size / (maxIndex + 1 - minIndex));
|
||||
// Subtract off the color difference of the beginning of the current span
|
||||
// from the beginning of the gradient.
|
||||
Float colorF =
|
||||
minColorF - deltaColorF * (startRadius + (minIndex - 1) / size);
|
||||
// Finally, walk over the span accumulating the position dot product and
|
||||
// getting its sqrt as an offset into the color ramp. Since we're already
|
||||
// in BGRA format and scaled to 255, we just need to round to an integer
|
||||
// and pack down to pixel format.
|
||||
for (auto* end = buf + inside; buf < end; buf += 4) {
|
||||
Float offsetG = fastSqrt(dotPos);
|
||||
commit_blend_span<BLEND>(
|
||||
buf,
|
||||
combine(
|
||||
packRGBA8(round_pixel(colorF + deltaColorF * offsetG.x, 1),
|
||||
round_pixel(colorF + deltaColorF * offsetG.y, 1)),
|
||||
packRGBA8(round_pixel(colorF + deltaColorF * offsetG.z, 1),
|
||||
round_pixel(colorF + deltaColorF * offsetG.w, 1))));
|
||||
dotPos += dotPosDelta;
|
||||
dotPosDelta += deltaDelta2;
|
||||
}
|
||||
// Advance past the portion of gradient we just processed.
|
||||
t += inside;
|
||||
// If we hit the end of the span, exit out now.
|
||||
if (t >= span) {
|
||||
break;
|
||||
}
|
||||
// Otherwise, we are most likely in a transitional section of the gradient
|
||||
// between stops that will likely require doing per-sample table lookups.
|
||||
// Rather than having to redo all the searching above to figure that out,
|
||||
// just assume that to be the case and fall through below to doing the
|
||||
// table lookups to hopefully avoid an iteration.
|
||||
offset = fastSqrt(dotPos) - radius;
|
||||
if (repeat) {
|
||||
offset = fract(offset);
|
||||
}
|
||||
}
|
||||
// If we got here, that means we still have span left to process but did not
|
||||
// have any whole chunks that fell within a merged gradient. Just fall back
|
||||
// to doing a table lookup for each sample.
|
||||
Float entry = clamp(offset * size + 1.0f, 0.0f, 1.0f + size);
|
||||
commit_blend_span<BLEND>(buf, sampleGradient(sampler, address, entry));
|
||||
buf += 4;
|
||||
t += 4;
|
||||
dotPos += dotPosDelta;
|
||||
dotPosDelta += deltaDelta2;
|
||||
}
|
||||
}
|
||||
|
||||
// Commits an entire span of a radial gradient similar to
|
||||
// swglcommitLinearGradient, but given a varying 2D position scaled to
|
||||
// gradient-space and a radius at which the distance from the origin maps to the
|
||||
// start of the gradient table.
|
||||
#define swgl_commitRadialGradientRGBA8(sampler, address, size, repeat, pos, \
|
||||
radius) \
|
||||
do { \
|
||||
if (blend_key) { \
|
||||
commitRadialGradient<true>(sampler, address, size, repeat, pos, radius, \
|
||||
swgl_OutRGBA8, swgl_SpanLength); \
|
||||
} else { \
|
||||
commitRadialGradient<false>(sampler, address, size, repeat, pos, radius, \
|
||||
swgl_OutRGBA8, swgl_SpanLength); \
|
||||
} \
|
||||
swgl_OutRGBA8 += swgl_SpanLength; \
|
||||
swgl_SpanLength = 0; \
|
||||
} while (0)
|
||||
|
||||
// Extension to set a clip mask image to be sampled during blending. The offset
|
||||
// specifies the positioning of the clip mask image relative to the viewport
|
||||
// origin. The bounding box specifies the rectangle relative to the clip mask's
|
||||
|
|
|
@ -96,13 +96,18 @@ void swgl_drawSpanRGBA8() {
|
|||
if (address < 0) {
|
||||
return;
|
||||
}
|
||||
while (swgl_SpanLength > 0) {
|
||||
float offset = get_gradient_offset(compute_repeated_pos());
|
||||
if (v_gradient_repeat != 0.0) offset = fract(offset);
|
||||
float entry = clamp_gradient_entry(offset);
|
||||
swgl_commitGradientRGBA8(sGpuCache, address, entry);
|
||||
v_pos += swgl_interpStep(v_pos);
|
||||
}
|
||||
#ifndef WR_FEATURE_ALPHA_PASS
|
||||
swgl_commitRadialGradientRGBA8(sGpuCache, address, GRADIENT_ENTRIES, v_gradient_repeat != 0.0,
|
||||
v_pos * v_repeated_size - v_center, v_start_radius);
|
||||
#else
|
||||
while (swgl_SpanLength > 0) {
|
||||
float offset = get_gradient_offset(compute_repeated_pos());
|
||||
if (v_gradient_repeat != 0.0) offset = fract(offset);
|
||||
float entry = clamp_gradient_entry(offset);
|
||||
swgl_commitGradientRGBA8(sGpuCache, address, entry);
|
||||
v_pos += swgl_interpStep(v_pos);
|
||||
}
|
||||
#endif
|
||||
}
|
||||
#endif
|
||||
|
||||
|
|
|
@ -922,11 +922,30 @@ impl GradientGpuBlockBuilder {
|
|||
// within the segment of the gradient space represented by that entry. To lookup a gradient result,
|
||||
// first the entry index is calculated to determine which two colors to interpolate between, then
|
||||
// the offset within that entry bucket is used to interpolate between the two colors in that entry.
|
||||
// This layout preserves hard stops, as the end color for a given entry can differ from the start
|
||||
// color for the following entry, despite them being adjacent. Colors are stored within in BGRA8
|
||||
// format for texture upload. This table requires the gradient color stops to be normalized to the
|
||||
// range [0, 1]. The first and last entries hold the first and last color stop colors respectively,
|
||||
// while the entries in between hold the interpolated color stop values for the range [0, 1].
|
||||
// This layout is motivated by the fact that if one naively tries to store a single color per entry
|
||||
// and interpolate directly between entries, then hard stops will become softened because the end
|
||||
// color of an entry actually differs from the start color of the next entry, even though they fall
|
||||
// at the same edge offset in the gradient space. Instead, the two-color-per-entry layout preserves
|
||||
// hard stops, as the end color for a given entry can differ from the start color for the following
|
||||
// entry.
|
||||
// Colors are stored in RGBA32F format (in the GPU cache). This table requires the gradient color
|
||||
// stops to be normalized to the range [0, 1]. The first and last entries hold the first and last
|
||||
// color stop colors respectively, while the entries in between hold the interpolated color stop
|
||||
// values for the range [0, 1].
|
||||
// As a further optimization, rather than directly storing the end color, the difference of the end
|
||||
// color from the start color is stored instead, so that an entry can be evaluated more cheaply
|
||||
// with start+diff*offset instead of mix(start,end,offset). Further, the color difference in two
|
||||
// adjacent entries will always be the same if they were generated from the same set of stops/run.
|
||||
// To allow fast searching of the table, if two adjacent entries generated from different sets of
|
||||
// stops (a boundary) have the same difference, the floating-point bits of the stop will be nudged
|
||||
// so that they compare differently without perceptibly altering the interpolation result. This way,
|
||||
// one can quickly scan the table and recover runs just by comparing the color differences of the
|
||||
// current and next entry.
|
||||
// For example, a table with 2 inside entries (startR,startG,startB):(diffR,diffG,diffB) might look
|
||||
// like so:
|
||||
// first | 0.0 | 0.5 | last
|
||||
// (0,0,0):(0,0,0) | (1,0,0):(-1,1,0) | (0,0,1):(0,1,-1) | (1,1,1):(0,0,0)
|
||||
// ^ solid black ^ red to green ^ blue to green ^ solid white
|
||||
let mut entries = [GradientDataEntry::white(); GRADIENT_DATA_SIZE];
|
||||
let mut prev_step = cur_color;
|
||||
if reverse_stops {
|
||||
|
|
|
@ -50,8 +50,8 @@ platform(linux,mac) fuzzy(1,80000) == radial-ellipse.yaml radial-ellipse-ref.png
|
|||
# this might be able to be improved
|
||||
fuzzy(255,1200) == repeat-linear.yaml repeat-linear-ref.yaml
|
||||
fuzzy(255,1200) == repeat-linear-reverse.yaml repeat-linear-ref.yaml
|
||||
fuzzy(255,2664) == repeat-radial.yaml repeat-radial-ref.yaml
|
||||
fuzzy(255,2664) == repeat-radial-negative.yaml repeat-radial-ref.yaml
|
||||
fuzzy(255,2666) == repeat-radial.yaml repeat-radial-ref.yaml
|
||||
fuzzy(255,2666) == repeat-radial-negative.yaml repeat-radial-ref.yaml
|
||||
fuzzy(255,1652) == repeat-conic.yaml repeat-conic-ref.yaml
|
||||
fuzzy(255,1652) == repeat-conic-negative.yaml repeat-conic-ref.yaml
|
||||
|
||||
|
@ -62,7 +62,7 @@ fuzzy(1,62154) == tiling-linear-3.yaml tiling-linear-3-ref.yaml
|
|||
|
||||
fuzzy(1,17) == tiling-radial-1.yaml tiling-radial-1-ref.yaml
|
||||
fuzzy(1,1) == tiling-radial-2.yaml tiling-radial-2-ref.yaml
|
||||
fuzzy(1,3) == tiling-radial-3.yaml tiling-radial-3-ref.yaml
|
||||
fuzzy(1,3) fuzzy-if(platform(swgl),1,1318) == tiling-radial-3.yaml tiling-radial-3-ref.yaml
|
||||
fuzzy(1,17) == tiling-radial-4.yaml tiling-radial-4-ref.yaml
|
||||
|
||||
fuzzy(1,17) == tiling-conic-1.yaml tiling-conic-1-ref.yaml
|
||||
|
|
|
@ -59,7 +59,7 @@ fuzzy-if(Android,0-19,0-4) fuzzy-if(webrender&>kWidget,13-13,28-32) fuzzy-if(w
|
|||
fuzzy-if(Android,0-7,0-4) fuzzy-if(webrender&>kWidget,29-30,28-32) fuzzy-if(webrender&&cocoaWidget,19-20,44-44) skip-if(!asyncPan) == perspective-scrolling-4.html perspective-scrolling-4-ref.html # Bug 1604338
|
||||
skip-if(!asyncPan) == perspective-scrolling-5.html perspective-scrolling-5-ref.html
|
||||
pref(apz.disable_for_scroll_linked_effects,true) skip-if(!asyncPan) == disable-apz-for-sle-pages.html disable-apz-for-sle-pages-ref.html
|
||||
fuzzy-if(browserIsRemote&&d2d,0-1,0-22) skip-if(!asyncPan) fuzzy-if(geckoview,2-2,242-242) skip-if(geckoview&&debug) == background-blend-mode-1.html background-blend-mode-1-ref.html # bug 1558286 for GV
|
||||
fuzzy-if(browserIsRemote&&d2d,0-1,0-22) skip-if(!asyncPan) fuzzy-if(geckoview,2-2,242-242) skip-if(geckoview&&debug) fuzzy-if(webrender&&swgl,0-255,0-11) == background-blend-mode-1.html background-blend-mode-1-ref.html # bug 1558286 for GV
|
||||
skip-if(Android||!asyncPan) != opaque-fractional-displayport-1.html about:blank
|
||||
skip-if(Android||!asyncPan) != opaque-fractional-displayport-2.html about:blank
|
||||
fuzzy-if(Android,0-19,0-4) fuzzy-if(webrender&>kWidget,19-19,28-32) fuzzy-if(webrender&&cocoaWidget,21-21,44-44) skip-if(!asyncPan) == fixed-pos-scrolled-clip-1.html fixed-pos-scrolled-clip-1-ref.html # Bug 1604338
|
||||
|
|
|
@ -47,8 +47,8 @@ fuzzy-if(Android,0-8,0-771) == radial-shape-farthest-corner-1a.html radial-shape
|
|||
fails-if(gtkWidget&&/x86_64-/.test(xulRuntime.XPCOMABI)) fuzzy(0-1,0-1622) fuzzy-if(cocoaWidget,0-2,0-41281) fuzzy-if(Android,0-8,0-1091) fuzzy-if(skiaContent,0-2,0-500) == radial-shape-farthest-corner-1b.html radial-shape-farthest-corner-1-ref.html
|
||||
fuzzy-if(Android,0-17,0-13320) == radial-shape-farthest-side-1a.html radial-shape-farthest-side-1-ref.html
|
||||
fuzzy-if(Android,0-17,0-13320) == radial-shape-farthest-side-1b.html radial-shape-farthest-side-1-ref.html
|
||||
fuzzy-if(webrender,0-2,0-26) == radial-size-1a.html radial-size-1-ref.html
|
||||
fuzzy-if(webrender,0-1,0-26) == radial-size-1b.html radial-size-1-ref.html
|
||||
fuzzy-if(webrender&&!swgl,0-2,0-26) fuzzy-if(webrender&&swgl,1-1,944-983) == radial-size-1a.html radial-size-1-ref.html
|
||||
fuzzy-if(webrender&&!swgl,0-1,0-26) fuzzy-if(webrender&&swgl,1-1,944-983) == radial-size-1b.html radial-size-1-ref.html
|
||||
fuzzy-if(Android,0-4,0-248) == radial-zero-length-1a.html radial-zero-length-1-ref.html
|
||||
fuzzy-if(Android,0-4,0-248) == radial-zero-length-1b.html radial-zero-length-1-ref.html
|
||||
fuzzy-if(Android,0-4,0-248) == radial-zero-length-1c.html radial-zero-length-1-ref.html
|
||||
|
|
Загрузка…
Ссылка в новой задаче