diff --git a/gfx/wr/glsl-to-cxx/src/hir.rs b/gfx/wr/glsl-to-cxx/src/hir.rs index e2e0f3a59629..700be55c3195 100644 --- a/gfx/wr/glsl-to-cxx/src/hir.rs +++ b/gfx/wr/glsl-to-cxx/src/hir.rs @@ -3778,6 +3778,14 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio Type::new(Void), vec![Type::new(Sampler2D), Type::new(Int), Type::new(Float), Type::new(Bool), Type::new(Float)], ); + declare_function( + state, + "swgl_commitRadialGradientRGBA8", + None, + Type::new(Void), + vec![Type::new(Sampler2D), Type::new(Int), Type::new(Float), Type::new(Bool), Type::new(Vec2), + Type::new(Float)], + ); declare_function( state, "swgl_commitGradientRGBA8", diff --git a/gfx/wr/swgl/src/swgl_ext.h b/gfx/wr/swgl/src/swgl_ext.h index 10d3aadf2240..a89fd807c2e4 100644 --- a/gfx/wr/swgl/src/swgl_ext.h +++ b/gfx/wr/swgl/src/swgl_ext.h @@ -1185,6 +1185,241 @@ static void commitLinearGradient(sampler2D sampler, int address, float size, swgl_SpanLength = 0; \ } while (0) +template +static ALWAYS_INLINE V fastSqrt(V v) { +#if USE_SSE2 || USE_NEON + return v * inversesqrt(v); +#else + return sqrt(v); +#endif +} + +template +static ALWAYS_INLINE auto fastLength(V v) { + return fastSqrt(dot(v, v)); +} + +// Samples an entire span of a radial gradient by crawling the gradient table +// and looking for consecutive stops that can be merged into a single larger +// gradient, then interpolating between those larger gradients within the span +// based on the computed position relative to a radius. +template +static void commitRadialGradient(sampler2D sampler, int address, float size, + bool repeat, vec2 pos, float radius, + uint32_t* buf, int span) { + assert(sampler->format == TextureFormat::RGBA32F); + assert(address >= 0 && address < int(sampler->height * sampler->stride)); + GradientStops* stops = (GradientStops*)&sampler->buf[address]; + // clang-format off + // Given position p, delta d, and radius r, we need to repeatedly solve the + // following quadratic for the pixel offset t: + // length(p + t*d) = r + // (px + t*dx)^2 + (py + t*dy)^2 = r^2 + // Rearranged into quadratic equation form (t^2*a + t*b + c = 0) this is: + // t^2*(dx^2+dy^2) + t*2*(dx*px+dy*py) + (px^2+py^2-r^2) = 0 + // t^2*d.d + t*2*d.p + (p.p-r^2) = 0 + // The solution of the quadratic formula t=(-b+-sqrt(b^2-4ac))/2a reduces to: + // t = -d.p/d.d +- sqrt((d.p/d.d)^2 - (p.p-r^2)/d.d) + // Note that d.p, d.d, p.p, and r^2 are constant across the gradient, and so + // we cache them below for faster computation. + // + // The quadratic has two solutions, representing the span intersecting the + // given radius of gradient, which can occur at two offsets. If there is only + // one solution (where b^2-4ac = 0), this represents the point at which the + // span runs tangent to the radius. This middle point is significant in that + // before it, we walk down the gradient ramp, and after it, we walk up the + // ramp. + // clang-format on + vec2_scalar pos0 = {pos.x.x, pos.y.x}; + vec2_scalar delta = {pos.x.y - pos.x.x, pos.y.y - pos.y.x}; + float deltaDelta = dot(delta, delta); + float invDelta, middleT, middleB; + if (deltaDelta > 0) { + invDelta = 1.0f / deltaDelta; + middleT = -dot(delta, pos0) * invDelta; + middleB = middleT * middleT - dot(pos0, pos0) * invDelta; + } else { + // If position is invariant, just set the coefficients so the quadratic + // always reduces to the end of the span. + invDelta = 0.0f; + middleT = float(span); + middleB = 0.0f; + } + // We only want search for merged gradients up to the minimum of either the + // mid-point or the span length. Cache those offsets here as they don't vary + // in the inner loop. + Float middleEndRadius = + fastLength(pos0 + delta * (Float){middleT, float(span), 0.0f, 0.0f}); + float middleRadius = span < middleT ? middleEndRadius.y : middleEndRadius.x; + float endRadius = middleEndRadius.y; + // Convert delta to change in position per chunk. + delta *= 4; + deltaDelta *= 4 * 4; + // clang-format off + // Given current position p and delta d, we reduce: + // length(p) = sqrt(dot(p,p)) = dot(p,p) * invsqrt(dot(p,p)) + // where dot(p+d,p+d) can be accumulated as: + // (x+dx)^2+(y+dy)^2 = (x^2+y^2) + 2(x*dx+y*dy) + (dx^2+dy^2) + // = p.p + 2p.d + d.d + // Since p increases by d every loop iteration, p.d increases by d.d, and thus + // we can accumulate d.d to calculate 2p.d, then allowing us to get the next + // dot-product by adding it to dot-product p.p of the prior iteration. This + // saves us some multiplications and an expensive sqrt inside the inner loop. + // clang-format on + Float dotPos = dot(pos, pos); + Float dotPosDelta = 2.0f * dot(pos, delta) + deltaDelta; + float deltaDelta2 = 2.0f * deltaDelta; + for (int t = 0; t < span;) { + // Compute the gradient table offset from the current position. + Float offset = fastSqrt(dotPos) - radius; + float startRadius = radius; + // If repeat is desired, we need to limit the offset to a fractional value. + if (repeat) { + // The non-repeating radius at which the gradient table actually starts, + // radius + floor(offset) = radius + (offset - fract(offset)). + startRadius += offset.x; + offset = fract(offset); + startRadius -= offset.x; + } + // We need to find the min/max index in the table of the gradient we want to + // use as well as the intercept point where we leave this gradient. + float intercept = -1; + int minIndex = 0; + int maxIndex = int(1.0f + size); + if (offset.x < 0) { + // If inside the inner radius of the gradient table, then use the first + // stop. Set the intercept to advance forward to the start of the gradient + // table. + maxIndex = minIndex; + if (t >= middleT) { + intercept = radius; + } + } else if (offset.x >= 1) { + // If outside the outer radius of the gradient table, then use the last + // stop. Set the intercept to advance toward the valid part of the + // gradient table if going in, or just run to the end of the span if going + // away from the gradient. + minIndex = maxIndex; + if (t < middleT) { + intercept = radius + 1; + } + } else { + // Otherwise, we're inside the valid part of the gradient table. + minIndex = int(1.0f + offset.x * size); + maxIndex = minIndex; + // Find the offset in the gradient that corresponds to the search limit. + // We only search up to the minimum of either the mid-point or the span + // length. Get the table index that corresponds to this offset, clamped so + // that we avoid hitting the beginning (0) or end (1 + size) of the table. + float searchOffset = + (t >= middleT ? endRadius : middleRadius) - startRadius; + int searchIndex = int(clamp(1.0f + size * searchOffset, 1.0f, size)); + // If we are past the mid-point, walk up the gradient table trying to + // merge stops. If we're below the mid-point, we need to walk down the + // table. We note the table index at which we need to look for an + // intercept to determine a valid span. + if (t >= middleT) { + while (maxIndex + 1 <= searchIndex && + stops[maxIndex].can_merge(stops[maxIndex + 1])) { + maxIndex++; + } + intercept = maxIndex + 1; + } else { + while (minIndex - 1 >= searchIndex && + stops[minIndex - 1].can_merge(stops[minIndex])) { + minIndex--; + } + intercept = minIndex; + } + // Convert from a table index into units of radius from the center of the + // gradient. + intercept = clamp((intercept - 1.0f) / size, 0.0f, 1.0f) + startRadius; + } + // Solve the quadratic for t to find where the merged gradient ends. If no + // intercept is found, just go to the middle or end of the span. + float endT = t >= middleT ? span : min(span, int(middleT)); + if (intercept >= 0) { + float b = middleB + intercept * intercept * invDelta; + if (b > 0) { + b = fastSqrt(b); + endT = min(endT, t >= middleT ? middleT + b : middleT - b); + } + } + // Figure out how many chunks are actually inside the merged gradient. + int inside = int(endT - t) & ~3; + if (inside > 0) { + // Convert start and end colors to BGRA and scale to 0..255 range later. + auto minColorF = stops[minIndex].startColor.zyxw * 255.0f; + auto maxColorF = stops[maxIndex].end_color().zyxw * 255.0f; + // Compute the change in color per change in gradient offset. + auto deltaColorF = + (maxColorF - minColorF) * (size / (maxIndex + 1 - minIndex)); + // Subtract off the color difference of the beginning of the current span + // from the beginning of the gradient. + Float colorF = + minColorF - deltaColorF * (startRadius + (minIndex - 1) / size); + // Finally, walk over the span accumulating the position dot product and + // getting its sqrt as an offset into the color ramp. Since we're already + // in BGRA format and scaled to 255, we just need to round to an integer + // and pack down to pixel format. + for (auto* end = buf + inside; buf < end; buf += 4) { + Float offsetG = fastSqrt(dotPos); + commit_blend_span( + buf, + combine( + packRGBA8(round_pixel(colorF + deltaColorF * offsetG.x, 1), + round_pixel(colorF + deltaColorF * offsetG.y, 1)), + packRGBA8(round_pixel(colorF + deltaColorF * offsetG.z, 1), + round_pixel(colorF + deltaColorF * offsetG.w, 1)))); + dotPos += dotPosDelta; + dotPosDelta += deltaDelta2; + } + // Advance past the portion of gradient we just processed. + t += inside; + // If we hit the end of the span, exit out now. + if (t >= span) { + break; + } + // Otherwise, we are most likely in a transitional section of the gradient + // between stops that will likely require doing per-sample table lookups. + // Rather than having to redo all the searching above to figure that out, + // just assume that to be the case and fall through below to doing the + // table lookups to hopefully avoid an iteration. + offset = fastSqrt(dotPos) - radius; + if (repeat) { + offset = fract(offset); + } + } + // If we got here, that means we still have span left to process but did not + // have any whole chunks that fell within a merged gradient. Just fall back + // to doing a table lookup for each sample. + Float entry = clamp(offset * size + 1.0f, 0.0f, 1.0f + size); + commit_blend_span(buf, sampleGradient(sampler, address, entry)); + buf += 4; + t += 4; + dotPos += dotPosDelta; + dotPosDelta += deltaDelta2; + } +} + +// Commits an entire span of a radial gradient similar to +// swglcommitLinearGradient, but given a varying 2D position scaled to +// gradient-space and a radius at which the distance from the origin maps to the +// start of the gradient table. +#define swgl_commitRadialGradientRGBA8(sampler, address, size, repeat, pos, \ + radius) \ + do { \ + if (blend_key) { \ + commitRadialGradient(sampler, address, size, repeat, pos, radius, \ + swgl_OutRGBA8, swgl_SpanLength); \ + } else { \ + commitRadialGradient(sampler, address, size, repeat, pos, radius, \ + swgl_OutRGBA8, swgl_SpanLength); \ + } \ + swgl_OutRGBA8 += swgl_SpanLength; \ + swgl_SpanLength = 0; \ + } while (0) + // Extension to set a clip mask image to be sampled during blending. The offset // specifies the positioning of the clip mask image relative to the viewport // origin. The bounding box specifies the rectangle relative to the clip mask's diff --git a/gfx/wr/webrender/res/brush_radial_gradient.glsl b/gfx/wr/webrender/res/brush_radial_gradient.glsl index 681493ea438e..f0c0a91d2fef 100644 --- a/gfx/wr/webrender/res/brush_radial_gradient.glsl +++ b/gfx/wr/webrender/res/brush_radial_gradient.glsl @@ -96,13 +96,18 @@ void swgl_drawSpanRGBA8() { if (address < 0) { return; } - while (swgl_SpanLength > 0) { - float offset = get_gradient_offset(compute_repeated_pos()); - if (v_gradient_repeat != 0.0) offset = fract(offset); - float entry = clamp_gradient_entry(offset); - swgl_commitGradientRGBA8(sGpuCache, address, entry); - v_pos += swgl_interpStep(v_pos); - } + #ifndef WR_FEATURE_ALPHA_PASS + swgl_commitRadialGradientRGBA8(sGpuCache, address, GRADIENT_ENTRIES, v_gradient_repeat != 0.0, + v_pos * v_repeated_size - v_center, v_start_radius); + #else + while (swgl_SpanLength > 0) { + float offset = get_gradient_offset(compute_repeated_pos()); + if (v_gradient_repeat != 0.0) offset = fract(offset); + float entry = clamp_gradient_entry(offset); + swgl_commitGradientRGBA8(sGpuCache, address, entry); + v_pos += swgl_interpStep(v_pos); + } + #endif } #endif diff --git a/gfx/wr/webrender/src/prim_store/gradient.rs b/gfx/wr/webrender/src/prim_store/gradient.rs index a22416b2bb7a..7b07ca608fab 100644 --- a/gfx/wr/webrender/src/prim_store/gradient.rs +++ b/gfx/wr/webrender/src/prim_store/gradient.rs @@ -922,11 +922,30 @@ impl GradientGpuBlockBuilder { // within the segment of the gradient space represented by that entry. To lookup a gradient result, // first the entry index is calculated to determine which two colors to interpolate between, then // the offset within that entry bucket is used to interpolate between the two colors in that entry. - // This layout preserves hard stops, as the end color for a given entry can differ from the start - // color for the following entry, despite them being adjacent. Colors are stored within in BGRA8 - // format for texture upload. This table requires the gradient color stops to be normalized to the - // range [0, 1]. The first and last entries hold the first and last color stop colors respectively, - // while the entries in between hold the interpolated color stop values for the range [0, 1]. + // This layout is motivated by the fact that if one naively tries to store a single color per entry + // and interpolate directly between entries, then hard stops will become softened because the end + // color of an entry actually differs from the start color of the next entry, even though they fall + // at the same edge offset in the gradient space. Instead, the two-color-per-entry layout preserves + // hard stops, as the end color for a given entry can differ from the start color for the following + // entry. + // Colors are stored in RGBA32F format (in the GPU cache). This table requires the gradient color + // stops to be normalized to the range [0, 1]. The first and last entries hold the first and last + // color stop colors respectively, while the entries in between hold the interpolated color stop + // values for the range [0, 1]. + // As a further optimization, rather than directly storing the end color, the difference of the end + // color from the start color is stored instead, so that an entry can be evaluated more cheaply + // with start+diff*offset instead of mix(start,end,offset). Further, the color difference in two + // adjacent entries will always be the same if they were generated from the same set of stops/run. + // To allow fast searching of the table, if two adjacent entries generated from different sets of + // stops (a boundary) have the same difference, the floating-point bits of the stop will be nudged + // so that they compare differently without perceptibly altering the interpolation result. This way, + // one can quickly scan the table and recover runs just by comparing the color differences of the + // current and next entry. + // For example, a table with 2 inside entries (startR,startG,startB):(diffR,diffG,diffB) might look + // like so: + // first | 0.0 | 0.5 | last + // (0,0,0):(0,0,0) | (1,0,0):(-1,1,0) | (0,0,1):(0,1,-1) | (1,1,1):(0,0,0) + // ^ solid black ^ red to green ^ blue to green ^ solid white let mut entries = [GradientDataEntry::white(); GRADIENT_DATA_SIZE]; let mut prev_step = cur_color; if reverse_stops { diff --git a/gfx/wr/wrench/reftests/gradient/reftest.list b/gfx/wr/wrench/reftests/gradient/reftest.list index 0441182ecef1..83e3956478e5 100644 --- a/gfx/wr/wrench/reftests/gradient/reftest.list +++ b/gfx/wr/wrench/reftests/gradient/reftest.list @@ -50,8 +50,8 @@ platform(linux,mac) fuzzy(1,80000) == radial-ellipse.yaml radial-ellipse-ref.png # this might be able to be improved fuzzy(255,1200) == repeat-linear.yaml repeat-linear-ref.yaml fuzzy(255,1200) == repeat-linear-reverse.yaml repeat-linear-ref.yaml -fuzzy(255,2664) == repeat-radial.yaml repeat-radial-ref.yaml -fuzzy(255,2664) == repeat-radial-negative.yaml repeat-radial-ref.yaml +fuzzy(255,2666) == repeat-radial.yaml repeat-radial-ref.yaml +fuzzy(255,2666) == repeat-radial-negative.yaml repeat-radial-ref.yaml fuzzy(255,1652) == repeat-conic.yaml repeat-conic-ref.yaml fuzzy(255,1652) == repeat-conic-negative.yaml repeat-conic-ref.yaml @@ -62,7 +62,7 @@ fuzzy(1,62154) == tiling-linear-3.yaml tiling-linear-3-ref.yaml fuzzy(1,17) == tiling-radial-1.yaml tiling-radial-1-ref.yaml fuzzy(1,1) == tiling-radial-2.yaml tiling-radial-2-ref.yaml -fuzzy(1,3) == tiling-radial-3.yaml tiling-radial-3-ref.yaml +fuzzy(1,3) fuzzy-if(platform(swgl),1,1318) == tiling-radial-3.yaml tiling-radial-3-ref.yaml fuzzy(1,17) == tiling-radial-4.yaml tiling-radial-4-ref.yaml fuzzy(1,17) == tiling-conic-1.yaml tiling-conic-1-ref.yaml diff --git a/layout/reftests/async-scrolling/reftest.list b/layout/reftests/async-scrolling/reftest.list index eaa4768772dc..c21bd6f9bbb3 100644 --- a/layout/reftests/async-scrolling/reftest.list +++ b/layout/reftests/async-scrolling/reftest.list @@ -59,7 +59,7 @@ fuzzy-if(Android,0-19,0-4) fuzzy-if(webrender&>kWidget,13-13,28-32) fuzzy-if(w fuzzy-if(Android,0-7,0-4) fuzzy-if(webrender&>kWidget,29-30,28-32) fuzzy-if(webrender&&cocoaWidget,19-20,44-44) skip-if(!asyncPan) == perspective-scrolling-4.html perspective-scrolling-4-ref.html # Bug 1604338 skip-if(!asyncPan) == perspective-scrolling-5.html perspective-scrolling-5-ref.html pref(apz.disable_for_scroll_linked_effects,true) skip-if(!asyncPan) == disable-apz-for-sle-pages.html disable-apz-for-sle-pages-ref.html -fuzzy-if(browserIsRemote&&d2d,0-1,0-22) skip-if(!asyncPan) fuzzy-if(geckoview,2-2,242-242) skip-if(geckoview&&debug) == background-blend-mode-1.html background-blend-mode-1-ref.html # bug 1558286 for GV +fuzzy-if(browserIsRemote&&d2d,0-1,0-22) skip-if(!asyncPan) fuzzy-if(geckoview,2-2,242-242) skip-if(geckoview&&debug) fuzzy-if(webrender&&swgl,0-255,0-11) == background-blend-mode-1.html background-blend-mode-1-ref.html # bug 1558286 for GV skip-if(Android||!asyncPan) != opaque-fractional-displayport-1.html about:blank skip-if(Android||!asyncPan) != opaque-fractional-displayport-2.html about:blank fuzzy-if(Android,0-19,0-4) fuzzy-if(webrender&>kWidget,19-19,28-32) fuzzy-if(webrender&&cocoaWidget,21-21,44-44) skip-if(!asyncPan) == fixed-pos-scrolled-clip-1.html fixed-pos-scrolled-clip-1-ref.html # Bug 1604338 diff --git a/layout/reftests/css-gradients/reftest.list b/layout/reftests/css-gradients/reftest.list index ada9ccf96437..8e922a512066 100644 --- a/layout/reftests/css-gradients/reftest.list +++ b/layout/reftests/css-gradients/reftest.list @@ -47,8 +47,8 @@ fuzzy-if(Android,0-8,0-771) == radial-shape-farthest-corner-1a.html radial-shape fails-if(gtkWidget&&/x86_64-/.test(xulRuntime.XPCOMABI)) fuzzy(0-1,0-1622) fuzzy-if(cocoaWidget,0-2,0-41281) fuzzy-if(Android,0-8,0-1091) fuzzy-if(skiaContent,0-2,0-500) == radial-shape-farthest-corner-1b.html radial-shape-farthest-corner-1-ref.html fuzzy-if(Android,0-17,0-13320) == radial-shape-farthest-side-1a.html radial-shape-farthest-side-1-ref.html fuzzy-if(Android,0-17,0-13320) == radial-shape-farthest-side-1b.html radial-shape-farthest-side-1-ref.html -fuzzy-if(webrender,0-2,0-26) == radial-size-1a.html radial-size-1-ref.html -fuzzy-if(webrender,0-1,0-26) == radial-size-1b.html radial-size-1-ref.html +fuzzy-if(webrender&&!swgl,0-2,0-26) fuzzy-if(webrender&&swgl,1-1,944-983) == radial-size-1a.html radial-size-1-ref.html +fuzzy-if(webrender&&!swgl,0-1,0-26) fuzzy-if(webrender&&swgl,1-1,944-983) == radial-size-1b.html radial-size-1-ref.html fuzzy-if(Android,0-4,0-248) == radial-zero-length-1a.html radial-zero-length-1-ref.html fuzzy-if(Android,0-4,0-248) == radial-zero-length-1b.html radial-zero-length-1-ref.html fuzzy-if(Android,0-4,0-248) == radial-zero-length-1c.html radial-zero-length-1-ref.html