Backed out 4 changesets (bug 1621454) for causing bustage src/swgl_ext.h. a=backout

Backed out changeset d0d03d5a81a8 (bug 1621454) Backed out changeset 6675f76d6f11 (bug 1621454) Backed out changeset 29943d5348df (bug 1621454) Backed out changeset 80c0aaa81c2e (bug 1621454)
2020-10-31 12:28:47 +02:00 · 2020-10-31 12:28:47 +02:00 · c4646c9aa3
--- a/dom/html/reftests/reftest.list
+++ b/dom/html/reftests/reftest.list
@ -44,7 +44,7 @@ skip-if(Android) == 649134-2.html 649134-2-ref.html
 # (Fuzzy necessary due to pixel-wise comparison of different JPEGs.
 # The vast majority of the fuzziness comes from Linux and WinXP.)
 skip-if(isCoverageBuild) pref(layout.css.image-orientation.initial-from-image,true) fuzzy(0-2,0-830) == bug917595-iframe-1.html    bug917595-1-ref.html
-fuzzy(0-3,0-640) fuzzy-if(skiaContent,0-3,0-7544) fuzzy-if(webrender&&!geckoview,2-3,50-7544) == bug917595-exif-rotated.jpg bug917595-pixel-rotated.jpg # bug 1060869
+fuzzy(0-3,0-640) fuzzy-if(skiaContent,0-3,0-7544) fuzzy-if(webrender&&!geckoview,2-3,52-7544) == bug917595-exif-rotated.jpg bug917595-pixel-rotated.jpg # bug 1060869

 # Test support for SVG-as-image in <picture> elements.
 == bug1106522-1.html bug1106522-ref.html
--- a/gfx/layers/apz/test/reftest/reftest.list
+++ b/gfx/layers/apz/test/reftest/reftest.list
@ -4,10 +4,10 @@
 skip-if(!asyncPan) pref(apz.allow_zooming,true) != async-scrollbar-1-v.html about:blank
 skip-if(!asyncPan) pref(apz.allow_zooming,true) != async-scrollbar-1-v-ref.html about:blank
 fuzzy-if(Android,0-1,0-2) fuzzy-if(webrender&&gtkWidget,7-8,24-32) fuzzy-if(webrender&&cocoaWidget,22-22,44-44) skip-if(!asyncPan) pref(apz.allow_zooming,true) == async-scrollbar-1-v.html async-scrollbar-1-v-ref.html
-fuzzy-if(Android,0-4,0-5) fuzzy-if(webrender&&gtkWidget,28-29,30-32) fuzzy-if(webrender&&cocoaWidget,22-22,44-44) skip-if(!asyncPan) pref(apz.allow_zooming,true) == async-scrollbar-1-h.html async-scrollbar-1-h-ref.html
+fuzzy-if(Android,0-4,0-5) fuzzy-if(webrender&&gtkWidget,28-28,30-32) fuzzy-if(webrender&&cocoaWidget,22-22,44-44) skip-if(!asyncPan) pref(apz.allow_zooming,true) == async-scrollbar-1-h.html async-scrollbar-1-h-ref.html
 fuzzy-if(Android,0-6,0-6) fuzzy-if(webrender&&gtkWidget,2-2,19-20) fuzzy-if(webrender&&cocoaWidget,17-17,88-88) skip-if(!asyncPan) pref(apz.allow_zooming,true) == async-scrollbar-1-vh.html async-scrollbar-1-vh-ref.html
 fuzzy-if(Android,0-1,0-2) fuzzy-if(webrender&&gtkWidget,7-8,24-32) fuzzy-if(webrender&&cocoaWidget,22-22,44-44) skip-if(!asyncPan) pref(apz.allow_zooming,true) == async-scrollbar-1-v-rtl.html async-scrollbar-1-v-rtl-ref.html
-fuzzy-if(Android,0-14,0-5) fuzzy-if(webrender&&gtkWidget,28-29,30-32) fuzzy-if(webrender&&cocoaWidget,22-22,44-44) skip-if(!asyncPan) pref(apz.allow_zooming,true) == async-scrollbar-1-h-rtl.html async-scrollbar-1-h-rtl-ref.html
+fuzzy-if(Android,0-14,0-5) fuzzy-if(webrender&&gtkWidget,28-28,30-32) fuzzy-if(webrender&&cocoaWidget,22-22,44-44) skip-if(!asyncPan) pref(apz.allow_zooming,true) == async-scrollbar-1-h-rtl.html async-scrollbar-1-h-rtl-ref.html
 fuzzy-if(Android,0-8,0-8) fuzzy-if(webrender&&gtkWidget,13-13,32-32) fuzzy-if(webrender&&cocoaWidget,17-17,50-54) skip-if(!asyncPan) pref(apz.allow_zooming,true) == async-scrollbar-1-vh-rtl.html async-scrollbar-1-vh-rtl-ref.html

 # Different async zoom levels. Since the scrollthumb gets async-scaled in the
--- a/gfx/wr/glsl-to-cxx/src/hir.rs
+++ b/gfx/wr/glsl-to-cxx/src/hir.rs
@ -862,7 +862,7 @@ impl RunClass {

 #[derive(Debug, Clone, PartialEq)]
 pub enum SymDecl {
-    NativeFunction(FunctionType, Option<&'static str>, RunClass),
+    NativeFunction(FunctionType, Option<&'static str>),
    UserFunction(Rc<FunctionDefinition>, RunClass),
    Local(StorageClass, Type, RunClass),
    Global(
@ -1993,7 +1993,7 @@ pub fn is_output(expr: &Expr, state: &State) -> Option<SymRef> {
    match &expr.kind {
        ExprKind::Variable(i) => match state.sym(*i).decl {
            SymDecl::Global(storage, ..) => match storage {
-                StorageClass::In | StorageClass::Out => return Some(*i),
+                StorageClass::Out => return Some(*i),
                _ => {}
            },
            SymDecl::Local(..) => {}
@ -2146,23 +2146,28 @@ fn translate_expression(state: &mut State, e: &syntax::Expr) -> Expr {
        syntax::Expr::Binary(op, lhs, rhs) => {
            let lhs = Box::new(translate_expression(state, lhs));
            let rhs = Box::new(translate_expression(state, rhs));
-            let ty = match op {
-                BinaryOp::Equal | BinaryOp::NonEqual | BinaryOp::GT | BinaryOp::GTE | BinaryOp::LT | BinaryOp::LTE => {
+            let ty = if op == &BinaryOp::Mult {
+                if lhs.ty.kind == TypeKind::Mat3 && rhs.ty.kind == TypeKind::Vec3 {
+                    rhs.ty.clone()
+                } else if lhs.ty.kind == TypeKind::Mat4 && rhs.ty.kind == TypeKind::Vec4 {
+                    rhs.ty.clone()
+                } else if lhs.ty.kind == TypeKind::Mat2 && rhs.ty.kind == TypeKind::Vec2 {
+                    rhs.ty.clone()
+                } else if lhs.ty.kind == TypeKind::Mat2 && rhs.ty.kind == TypeKind::Float {
+                    lhs.ty.clone()
+                } else {
+                    promoted_type(&lhs.ty, &rhs.ty)
+                }
+            } else {
+                promoted_type(&lhs.ty, &rhs.ty)
+            };
+
            // comparison operators have a bool result
+            let ty = match op {
+                BinaryOp::Equal | BinaryOp::GT | BinaryOp::GTE | BinaryOp::LT | BinaryOp::LTE => {
                    Type::new(TypeKind::Bool)
                }
-                BinaryOp::Mult => {
-                    match (lhs.ty.kind, rhs.ty.kind) {
-                        (TypeKind::Mat2, TypeKind::Vec2) |
-                        (TypeKind::Mat3, TypeKind::Vec3) |
-                        (TypeKind::Mat4, TypeKind::Vec4) => rhs.ty.clone(),
-                        (TypeKind::Mat2, TypeKind::Float) |
-                        (TypeKind::Mat3, TypeKind::Float) |
-                        (TypeKind::Mat4, TypeKind::Float) => lhs.ty.clone(),
-                        _ => promoted_type(&lhs.ty, &rhs.ty),
-                    }
-                }
-                _ => promoted_type(&lhs.ty, &rhs.ty),
+                _ => ty,
            };

            Expr {
@ -2240,7 +2245,7 @@ fn translate_expression(state: &mut State, e: &syntax::Expr) -> Expr {
                                }
                            }
                            match &state.sym(sym).decl {
-                                SymDecl::NativeFunction(fn_ty, _, _) => {
+                                SymDecl::NativeFunction(fn_ty, _) => {
                                    let mut ret = None;
                                    for sig in &fn_ty.signatures {
                                        let mut matching = true;
@ -2764,13 +2769,12 @@ fn translate_external_declaration(
    }
 }

-fn declare_function_ext(
+fn declare_function(
    state: &mut State,
    name: &str,
    cxx_name: Option<&'static str>,
    ret: Type,
    params: Vec<Type>,
-    run_class: RunClass,
 ) {
    let sig = FunctionSignature { ret, params };
    match state.lookup_sym_mut(name) {
@ -2786,7 +2790,6 @@ fn declare_function_ext(
                        signatures: NonEmpty::new(sig),
                    },
                    cxx_name,
-                    run_class,
                ),
            );
        }
@ -2795,16 +2798,6 @@ fn declare_function_ext(
    //state.declare(name, Type::Function(FunctionType{ v}))
 }

-fn declare_function(
-    state: &mut State,
-    name: &str,
-    cxx_name: Option<&'static str>,
-    ret: Type,
-    params: Vec<Type>,
-) {
-    declare_function_ext(state, name, cxx_name, ret, params, RunClass::Unknown)
-}
-
 pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> TranslationUnit {
    // global scope
    state.push_scope("global".into());
@ -2816,13 +2809,6 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
        Type::new(Vec2),
        vec![Type::new(Float)],
    );
-    declare_function(
-        state,
-        "vec2",
-        Some("make_vec2"),
-        Type::new(Vec2),
-        vec![Type::new(Float), Type::new(Float)],
-    );
    declare_function(
        state,
        "vec2",
@ -2858,13 +2844,6 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
        Type::new(Vec3),
        vec![Type::new(Vec2), Type::new(Float)],
    );
-    declare_function(
-        state,
-        "vec4",
-        Some("make_vec4"),
-        Type::new(Vec4),
-        vec![Type::new(Float)],
-    );
    declare_function(
        state,
        "vec4",
@ -3574,154 +3553,6 @@ pub fn ast_to_hir(state: &mut State, tu: &syntax::TranslationUnit) -> Translatio
        SymDecl::Global(StorageClass::Out, None, Type::new(Vec4), RunClass::Vector),
    );

-    state.declare(
-        "swgl_SpanLength",
-        SymDecl::Global(StorageClass::In, None, Type::new(Int), RunClass::Scalar),
-    );
-    state.declare(
-        "swgl_StepSize",
-        SymDecl::Global(StorageClass::Const, None, Type::new(Int), RunClass::Scalar),
-    );
-
-    for t in &[Float, Vec2, Vec3, Vec4, Int, IVec2, IVec3, IVec4, Mat3, Mat4] {
-        declare_function_ext(
-            state,
-            "swgl_forceScalar",
-            None,
-            Type::new(*t),
-            vec![Type::new(*t)],
-            RunClass::Scalar,
-        );
-    }
-
-    declare_function(
-        state,
-        "swgl_stepInterp",
-        None,
-        Type::new(Void),
-        vec![],
-    );
-
-    for t in &[Float, Vec2, Vec3, Vec4] {
-        declare_function_ext(
-            state,
-            "swgl_interpStep",
-            None,
-            Type::new(*t),
-            vec![Type::new(*t)],
-            RunClass::Scalar,
-        );
-    }
-
-    declare_function(
-        state,
-        "swgl_commitSolidRGBA8",
-        None,
-        Type::new(Void),
-        vec![Type::new(Vec4)],
-    );
-    declare_function(
-        state,
-        "swgl_commitSolidR8",
-        None,
-        Type::new(Void),
-        vec![Type::new(Float)],
-    );
-    declare_function(
-        state,
-        "swgl_commitColorRGBA8",
-        None,
-        Type::new(Void),
-        vec![Type::new(Vec4), Type::new(Float)],
-    );
-    declare_function(
-        state,
-        "swgl_commitColorR8",
-        None,
-        Type::new(Void),
-        vec![Type::new(Float), Type::new(Float)],
-    );
-
-    for s in &[Sampler2D, Sampler2DRect, Sampler2DArray] {
-        declare_function(
-            state,
-            "swgl_isTextureLinear",
-            None,
-            Type::new(Bool),
-            vec![Type::new(*s)],
-        );
-        declare_function(
-            state,
-            "swgl_isTextureRGBA8",
-            None,
-            Type::new(Bool),
-            vec![Type::new(*s)],
-        );
-        declare_function(
-            state,
-            "swgl_isTextureR8",
-            None,
-            Type::new(Bool),
-            vec![Type::new(*s)],
-        );
-        declare_function(
-            state,
-            "swgl_textureLayerOffset",
-            None,
-            Type::new(Int),
-            vec![Type::new(*s), Type::new(Float)],
-        );
-        declare_function(
-            state,
-            "swgl_linearQuantize",
-            None,
-            Type::new(Vec2),
-            vec![Type::new(*s), Type::new(Vec2)],
-        );
-        declare_function(
-            state,
-            "swgl_linearQuantizeStep",
-            None,
-            Type::new(Vec2),
-            vec![Type::new(*s), Type::new(Vec2)],
-        );
-        declare_function(
-            state,
-            "swgl_commitTextureLinearRGBA8",
-            None,
-            Type::new(Void),
-            vec![Type::new(*s), Type::new(Vec2), Type::new(Int)],
-        );
-        declare_function(
-            state,
-            "swgl_commitTextureLinearR8",
-            None,
-            Type::new(Void),
-            vec![Type::new(*s), Type::new(Vec2), Type::new(Int)],
-        );
-        declare_function(
-            state,
-            "swgl_commitTextureLinearColorRGBA8",
-            None,
-            Type::new(Void),
-            vec![Type::new(*s), Type::new(Vec2), Type::new(Vec4), Type::new(Int)],
-        );
-        declare_function(
-            state,
-            "swgl_commitTextureLinearColorRGBA8",
-            None,
-            Type::new(Void),
-            vec![Type::new(*s), Type::new(Vec2), Type::new(Float), Type::new(Int)],
-        );
-        declare_function(
-            state,
-            "swgl_commitTextureLinearColorR8",
-            None,
-            Type::new(Void),
-            vec![Type::new(*s), Type::new(Vec2), Type::new(Float), Type::new(Int)],
-        );
-    }
-
    TranslationUnit(tu.0.map(state, translate_external_declaration))
 }

@ -3772,13 +3603,7 @@ fn infer_expr_inner(state: &mut State, expr: &Expr, assign: &mut SymRef) -> RunC
            };
            match fun {
                FunIdentifier::Identifier(ref sym) => match &state.sym(*sym).decl {
-                    SymDecl::NativeFunction(_, _, ref ret_class) => {
-                        if *ret_class != RunClass::Unknown {
-                            *ret_class
-                        } else {
-                            run_class
-                        }
-                    }
+                    SymDecl::NativeFunction(..) => run_class,
                    SymDecl::UserFunction(ref fd, ref run_class) => {
                        for (&(mut arg_class, assign), param) in
                            arg_classes.iter().zip(fd.prototype.parameters.iter())
--- a/gfx/wr/glsl-to-cxx/src/lib.rs
+++ b/gfx/wr/glsl-to-cxx/src/lib.rs
@ -61,6 +61,8 @@ pub fn translate(args: &mut dyn Iterator<Item = String>) -> String {
        .to_string_lossy()
        .to_string();

+    let frag_include = args.next();
+
    let (vs_state, vs_hir, vs_is_frag) = parse_shader(vertex_file);
    let (fs_state, fs_hir, fs_is_frag) = parse_shader(frag_file);

@ -77,6 +79,7 @@ pub fn translate(args: &mut dyn Iterator<Item = String>) -> String {
        vs_hir,
        vs_is_frag,
        &uniform_indices,
+        None,
    );
    result += "\n";
    result += &translate_shader(
@ -85,6 +88,7 @@ pub fn translate(args: &mut dyn Iterator<Item = String>) -> String {
        fs_hir,
        fs_is_frag,
        &uniform_indices,
+        frag_include,
    );
    result
 }
@ -116,6 +120,7 @@ fn translate_shader(
    hir: hir::TranslationUnit,
    is_frag: bool,
    uniform_indices: &UniformIndices,
+    include_file: Option<String>,
 ) -> String {
    //println!("{:#?}", state);

@ -180,6 +185,8 @@ fn translate_shader(
        uses_discard: false,
        used_fragcoord: Cell::new(0),
        use_perspective: false,
+        has_draw_span_rgba8: false,
+        has_draw_span_r8: false,
        used_globals: RefCell::new(Vec::new()),
        texel_fetches: RefCell::new(Vec::new()),
    };
@ -208,6 +215,10 @@ fn translate_shader(

        show_translation_unit(&mut state, &hir);

+        if let Some(include_file) = include_file {
+            write_include_file(&mut state, include_file);
+        }
+
        let pruned_inputs: Vec<_> = inputs
            .iter()
            .filter(|i| state.used_globals.borrow().contains(i))
@ -726,6 +737,35 @@ fn write_read_inputs(state: &mut OutputState, inputs: &[hir::SymRef]) {
    }
 }

+fn write_include_file(state: &mut OutputState, include_file: String) {
+    let include_contents = std::fs::read_to_string(&include_file).unwrap();
+
+    let mut offset = 0;
+    while offset < include_contents.len() {
+        let s = &include_contents[offset ..];
+        if let Some(start_proto) = s.find("draw_span") {
+            let s = &s[start_proto ..];
+            if let Some(end_proto) = s.find(')') {
+                let proto = &s[.. end_proto];
+                if proto.contains("uint32_t") {
+                    state.has_draw_span_rgba8 = true;
+                } else if proto.contains("uint8_t") {
+                    state.has_draw_span_r8 = true;
+                }
+                offset += start_proto + end_proto;
+                continue;
+            }
+        }
+        break;
+    }
+
+    let include_name = std::path::Path::new(&include_file)
+        .file_name()
+        .unwrap()
+        .to_string_lossy();
+    write!(state, "\n#include \"{}\"\n\n", include_name);
+}
+
 pub struct OutputState {
    hir: hir::State,
    output: String,
@ -748,6 +788,8 @@ pub struct OutputState {
    uses_discard: bool,
    used_fragcoord: Cell<i32>,
    use_perspective: bool,
+    has_draw_span_rgba8: bool,
+    has_draw_span_r8: bool,
    used_globals: RefCell<Vec<hir::SymRef>>,
    texel_fetches: RefCell<Vec<(hir::SymRef, hir::SymRef, hir::TexelFetchOffsets)>>,
 }
@ -816,7 +858,7 @@ fn add_used_global(state: &OutputState, i: &hir::SymRef) {
 pub fn show_sym(state: &OutputState, i: &hir::SymRef) {
    let sym = state.hir.sym(*i);
    match &sym.decl {
-        hir::SymDecl::NativeFunction(_, ref cxx_name, _) => {
+        hir::SymDecl::NativeFunction(_, ref cxx_name) => {
            let mut name = sym.name.as_str();
            if state.output_cxx {
                name = cxx_name.unwrap_or(name);
@ -2677,45 +2719,6 @@ pub fn show_function_definition(
    }

    if state.output_cxx {
-        match fd.prototype.name.as_str() {
-            "swgl_drawSpanRGBA8" |
-            "swgl_drawSpanR8" => {
-                // Partial spans are not drawn using span shaders, but rather drawn with a fragment shader
-                // where the span shader left off. We need to undo any changes to the interpolants made by
-                // the span shaders so that we can reset the interpolants to where the fragment shader
-                // expects them. We do this by saving them in an _Undo_ struct on entry to the span shader,
-                // and then restore them in the _Undo_ struct destructor.
-                let mut needs_undo = vec![];
-                for global in &fd.globals {
-                    let sym = state.hir.sym(*global);
-                    match &sym.decl {
-                        hir::SymDecl::Global(hir::StorageClass::In, _, ty, hir::RunClass::Vector) => {
-                            if needs_undo.is_empty() {
-                                state.write("struct _Undo_ {\nSelf* self;\n");
-                            }
-                            show_type(state, ty);
-                            write!(state, " {};\n", sym.name);
-                            needs_undo.push(sym.name.clone());
-                        }
-                        _ => {}
-                    }
-                }
-                if !needs_undo.is_empty() {
-                    state.write("explicit _Undo_(Self* self) : self(self)");
-                    for name in &needs_undo {
-                        write!(state, ", {0}(self->{0})", name);
-                    }
-                    state.write(" {}\n");
-                    state.write("~_Undo_() {\n");
-                    for name in &needs_undo {
-                        write!(state, "self->{0} = {0};\n", name);
-                    }
-                    state.write("}} _undo_(this);\n");
-                }
-            }
-            _ => {}
-        }
-
        let mut texel_fetches = state.texel_fetches.borrow_mut();
        texel_fetches.clear();
        for ((sampler, base), offsets) in fd.texel_fetches.iter() {
@ -3230,7 +3233,7 @@ pub fn show_jump_statement(state: &mut OutputState, j: &hir::JumpStatement) {
            if state.output_cxx {
                state.uses_discard = true;
                if let Some(mask) = &state.mask {
-                    state.write("swgl_IsPixelDiscarded |= (");
+                    state.write("isPixelDiscarded |= (");
                    show_hir_expr(state, mask);
                    state.write(")");
                    if state.return_declared {
@ -3238,7 +3241,7 @@ pub fn show_jump_statement(state: &mut OutputState, j: &hir::JumpStatement) {
                    }
                    state.write(";\n");
                } else {
-                    state.write("swgl_IsPixelDiscarded = true;\n");
+                    state.write("isPixelDiscarded = true;\n");
                }
            } else {
                state.write("discard;\n");
@ -3543,13 +3546,11 @@ pub fn show_translation_unit(state: &mut OutputState, tu: &hir::TranslationUnit)
        state.flush_buffer();
    }
    if state.output_cxx {
-        for name in &["main", "swgl_drawSpanRGBA8", "swgl_drawSpanR8"] {
-            if let Some(sym) = state.hir.lookup(name) {
-                show_cxx_function_definition(state, sym, 0);
+        if let Some(name) = state.hir.lookup("main") {
+            show_cxx_function_definition(state, name, 0);
            state.flush_buffer();
        }
    }
-    }
 }

 fn write_abi(state: &mut OutputState) {
@ -3557,7 +3558,7 @@ fn write_abi(state: &mut OutputState) {
        ShaderKind::Fragment => {
            state.write("static void run(Self *self) {\n");
            if state.uses_discard {
-                state.write(" self->swgl_IsPixelDiscarded = false;\n");
+                state.write(" self->isPixelDiscarded = false;\n");
            }
            state.write(" self->main();\n");
            state.write(" self->step_interp_inputs();\n");
@ -3568,7 +3569,7 @@ fn write_abi(state: &mut OutputState) {
            if state.use_perspective {
                state.write("static void run_perspective(Self *self) {\n");
                if state.uses_discard {
-                    state.write(" self->swgl_IsPixelDiscarded = false;\n");
+                    state.write(" self->isPixelDiscarded = false;\n");
                }
                state.write(" self->main();\n");
                state.write(" self->step_perspective_inputs();\n");
@ -3577,13 +3578,15 @@ fn write_abi(state: &mut OutputState) {
                state.write(" self->step_perspective_inputs(steps);\n");
                state.write("}\n");
            }
-            if state.hir.lookup("swgl_drawSpanRGBA8").is_some() {
+            if state.has_draw_span_rgba8 {
                state.write(
-                    "static void draw_span_RGBA8(Self* self) { DISPATCH_DRAW_SPAN(self, RGBA8); }\n");
+                    "static void draw_span_RGBA8(Self* self, uint32_t* buf, int len) { \
+                        DISPATCH_DRAW_SPAN(self, buf, len); }\n");
            }
-            if state.hir.lookup("swgl_drawSpanR8").is_some() {
+            if state.has_draw_span_r8 {
                state.write(
-                    "static void draw_span_R8(Self* self) { DISPATCH_DRAW_SPAN(self, R8); }\n");
+                    "static void draw_span_R8(Self* self, uint8_t* buf, int len) { \
+                        DISPATCH_DRAW_SPAN(self, buf, len); }\n");
            }

            write!(state, "public:\n{}_frag() {{\n", state.name);
@ -3605,10 +3608,10 @@ fn write_abi(state: &mut OutputState) {
            state.write(" init_span_func = (InitSpanFunc)&read_interp_inputs;\n");
            state.write(" run_func = (RunFunc)&run;\n");
            state.write(" skip_func = (SkipFunc)&skip;\n");
-            if state.hir.lookup("swgl_drawSpanRGBA8").is_some() {
+            if state.has_draw_span_rgba8 {
                state.write(" draw_span_RGBA8_func = (DrawSpanRGBA8Func)&draw_span_RGBA8;\n");
            }
-            if state.hir.lookup("swgl_drawSpanR8").is_some() {
+            if state.has_draw_span_r8 {
                state.write(" draw_span_R8_func = (DrawSpanR8Func)&draw_span_R8;\n");
            }
            if state.uses_discard {
--- a/gfx/wr/swgl/build.rs
+++ b/gfx/wr/swgl/build.rs
@ -90,11 +90,16 @@ fn translate_shader(shader_key: &str, shader_dir: &str) {
    std::fs::write(&vs_name, vs).unwrap();
    std::fs::write(&fs_name, fs).unwrap();

-    let args = vec![
+    let mut args = vec![
        "glsl_to_cxx".to_string(),
        vs_name,
        fs_name,
    ];
+    let frag_include = format!("{}/{}.frag.h", shader_dir, shader);
+    if std::path::Path::new(&frag_include).exists() {
+        println!("cargo:rerun-if-changed={}/{}.frag.h", shader_dir, shader);
+        args.push(frag_include);
+    }
    let result = glsl_to_cxx::translate(&mut args.into_iter());
    std::fs::write(format!("{}/{}.h", out_dir, shader), result).unwrap();
 }
@ -128,7 +133,6 @@ fn main() {
    println!("cargo:rerun-if-changed=src/gl_defs.h");
    println!("cargo:rerun-if-changed=src/glsl.h");
    println!("cargo:rerun-if-changed=src/program.h");
-    println!("cargo:rerun-if-changed=src/swgl_ext.h");
    println!("cargo:rerun-if-changed=src/texture.h");
    println!("cargo:rerun-if-changed=src/vector_type.h");
    println!("cargo:rerun-if-changed=src/gl.cc");
--- a/gfx/wr/swgl/src/composite.h
+++ b/gfx/wr/swgl/src/composite.h
@ -129,13 +129,13 @@ static void linear_row_blit(uint8_t* dest, int span, const vec2_scalar& srcUV,
  vec2 uv = init_interp(srcUV, vec2_scalar(srcDU, 0.0f));
  for (; span >= 4; span -= 4) {
    auto srcpx = textureLinearPackedR8(sampler, ivec2(uv), srcZOffset);
-    unaligned_store(dest, srcpx);
+    unaligned_store(dest, pack(srcpx));
    dest += 4;
    uv.x += 4 * srcDU;
  }
  if (span > 0) {
    auto srcpx = textureLinearPackedR8(sampler, ivec2(uv), srcZOffset);
-    partial_store_span(dest, srcpx, span);
+    partial_store_span(dest, pack(srcpx), span);
  }
 }

@ -446,9 +446,7 @@ void Composite(LockedTexture* lockedDst, LockedTexture* lockedSrc, GLint srcX,
  IntRect dstReq = {dstX, dstY, dstX + dstWidth, dstY + dstHeight};

  if (opaque) {
-    // Ensure we have rows of at least 2 pixels when using the linear filter
-    // to avoid overreading the row.
-    if (!srcReq.same_size(dstReq) && srctex.width >= 2 && filter == GL_LINEAR) {
+    if (!srcReq.same_size(dstReq) && filter == GL_LINEAR) {
      linear_blit(srctex, srcReq, 0, dsttex, dstReq, 0, flip, bandOffset,
                  bandHeight);
    } else {
@ -456,7 +454,7 @@ void Composite(LockedTexture* lockedDst, LockedTexture* lockedSrc, GLint srcX,
                 bandHeight);
    }
  } else {
-    if (!srcReq.same_size(dstReq) && srctex.width >= 2) {
+    if (!srcReq.same_size(dstReq)) {
      linear_composite(srctex, srcReq, dsttex, dstReq, flip, bandOffset,
                       bandHeight);
    } else {
--- a/gfx/wr/swgl/src/gl.cc
+++ b/gfx/wr/swgl/src/gl.cc
@ -43,7 +43,6 @@
 #include "gl_defs.h"
 #include "glsl.h"
 #include "program.h"
-#include "texture.h"

 using namespace glsl;

@ -907,11 +906,7 @@ static inline void init_depth(S* s, Texture& t) {

 template <typename S>
 static inline void init_filter(S* s, Texture& t) {
-  // If the width is not at least 2 pixels, then we can't safely sample the end
-  // of the row with a linear filter. In that case, just punt to using nearest
-  // filtering instead.
-  s->filter = t.width >= 2 ? gl_filter_to_texture_filter(t.mag_filter)
-                           : TextureFilter::NEAREST;
+  s->filter = gl_filter_to_texture_filter(t.mag_filter);
 }

 template <typename S>
@ -2613,6 +2608,83 @@ void CopyTexSubImage2D(GLenum target, UNUSED GLint level, GLint xoffset,

 }  // extern "C"

+using PackedRGBA8 = V16<uint8_t>;
+using WideRGBA8 = V16<uint16_t>;
+using HalfRGBA8 = V8<uint16_t>;
+
+static inline WideRGBA8 unpack(PackedRGBA8 p) { return CONVERT(p, WideRGBA8); }
+
+template <int N>
+UNUSED static ALWAYS_INLINE VectorType<uint8_t, N> genericPackWide(
+    VectorType<uint16_t, N> p) {
+  typedef VectorType<uint8_t, N> packed_type;
+  // Generic conversions only mask off the low byte without actually clamping
+  // like a real pack. First force the word to all 1s if it overflows, and then
+  // add on the sign bit to cause it to roll over to 0 if it was negative.
+  p = (p | (p > 255)) + (p >> 15);
+  return CONVERT(p, packed_type);
+}
+
+static inline PackedRGBA8 pack(WideRGBA8 p) {
+#if USE_SSE2
+  return _mm_packus_epi16(lowHalf(p), highHalf(p));
+#elif USE_NEON
+  return vcombine_u8(vqmovn_u16(lowHalf(p)), vqmovn_u16(highHalf(p)));
+#else
+  return genericPackWide(p);
+#endif
+}
+
+static inline HalfRGBA8 packRGBA8(I32 a, I32 b) {
+#if USE_SSE2
+  return _mm_packs_epi32(a, b);
+#elif USE_NEON
+  return vcombine_u16(vqmovun_s32(a), vqmovun_s32(b));
+#else
+  return CONVERT(combine(a, b), HalfRGBA8);
+#endif
+}
+
+using PackedR8 = V4<uint8_t>;
+using WideR8 = V4<uint16_t>;
+
+static inline WideR8 unpack(PackedR8 p) { return CONVERT(p, WideR8); }
+
+static inline WideR8 packR8(I32 a) {
+#if USE_SSE2
+  return lowHalf(bit_cast<V8<uint16_t>>(_mm_packs_epi32(a, a)));
+#elif USE_NEON
+  return vqmovun_s32(a);
+#else
+  return CONVERT(a, WideR8);
+#endif
+}
+
+static inline PackedR8 pack(WideR8 p) {
+#if USE_SSE2
+  auto m = expand(p);
+  auto r = bit_cast<V16<uint8_t>>(_mm_packus_epi16(m, m));
+  return SHUFFLE(r, r, 0, 1, 2, 3);
+#elif USE_NEON
+  return lowHalf(bit_cast<V8<uint8_t>>(vqmovn_u16(expand(p))));
+#else
+  return genericPackWide(p);
+#endif
+}
+
+using PackedRG8 = V8<uint8_t>;
+using WideRG8 = V8<uint16_t>;
+
+static inline PackedRG8 pack(WideRG8 p) {
+#if USE_SSE2
+  return lowHalf(bit_cast<V16<uint8_t>>(_mm_packus_epi16(p, p)));
+#elif USE_NEON
+  return bit_cast<V8<uint8_t>>(vqmovn_u16(p));
+#else
+  return genericPackWide(p);
+#endif
+}
+
 using ZMask = I32;

 static inline PackedRGBA8 convert_zmask(ZMask mask, uint32_t*) {
@ -2676,21 +2748,11 @@ static ALWAYS_INLINE void discard_depth(Z z, DepthRun* zbuf, I32 mask) {
  if (ctx->depthmask) {
    I32 src = I32(z);
    I32 dest = unaligned_load<I32>(zbuf);
-    mask |= fragment_shader->swgl_IsPixelDiscarded;
+    mask |= fragment_shader->isPixelDiscarded;
    unaligned_store(zbuf, (mask & dest) | (~mask & src));
  }
 }

-static inline HalfRGBA8 packRGBA8(I32 a, I32 b) {
-#if USE_SSE2
-  return _mm_packs_epi32(a, b);
-#elif USE_NEON
-  return vcombine_u16(vqmovun_s32(a), vqmovun_s32(b));
-#else
-  return CONVERT(combine(a, b), HalfRGBA8);
-#endif
-}
-
 static inline WideRGBA8 pack_pixels_RGBA8(const vec4& v) {
  ivec4 i = round_pixel(v);
  HalfRGBA8 xz = packRGBA8(i.z, i.x);
@ -2702,7 +2764,7 @@ static inline WideRGBA8 pack_pixels_RGBA8(const vec4& v) {
  return combine(lo, hi);
 }

-UNUSED static inline WideRGBA8 pack_pixels_RGBA8(const vec4_scalar& v) {
+static inline WideRGBA8 pack_pixels_RGBA8(const vec4_scalar& v) {
  I32 i = round_pixel((Float){v.z, v.y, v.x, v.w});
  HalfRGBA8 c = packRGBA8(i, i);
  return combine(c, c);
@ -2712,6 +2774,15 @@ static inline WideRGBA8 pack_pixels_RGBA8() {
  return pack_pixels_RGBA8(fragment_shader->gl_FragColor);
 }

+template <typename V>
+static inline PackedRGBA8 pack_span(uint32_t*, const V& v) {
+  return pack(pack_pixels_RGBA8(v));
+}
+
+static inline PackedRGBA8 pack_span(uint32_t*) {
+  return pack(pack_pixels_RGBA8());
+}
+
 // (x*y + x) >> 8, cheap approximation of (x*y) / 255
 template <typename T>
 static inline T muldiv255(T x, T y) {
@ -2797,8 +2868,7 @@ static inline void discard_output(uint32_t* buf, PackedRGBA8 mask) {
  PackedRGBA8 dst = unaligned_load<PackedRGBA8>(buf);
  WideRGBA8 r = pack_pixels_RGBA8();
  if (blend_key) r = blend_pixels_RGBA8(dst, r);
-  if (DISCARD)
-    mask |= bit_cast<PackedRGBA8>(fragment_shader->swgl_IsPixelDiscarded);
+  if (DISCARD) mask |= bit_cast<PackedRGBA8>(fragment_shader->isPixelDiscarded);
  unaligned_store(buf, (mask & dst) | (~mask & pack(r)));
 }

@ -2822,22 +2892,19 @@ static inline PackedRGBA8 span_mask(uint32_t*, int span) {
  return span_mask_RGBA8(span);
 }

-static inline WideR8 packR8(I32 a) {
-#if USE_SSE2
-  return lowHalf(bit_cast<V8<uint16_t>>(_mm_packs_epi32(a, a)));
-#elif USE_NEON
-  return vqmovun_s32(a);
-#else
-  return CONVERT(a, WideR8);
-#endif
-}
-
 static inline WideR8 pack_pixels_R8(Float c) { return packR8(round_pixel(c)); }

 static inline WideR8 pack_pixels_R8() {
  return pack_pixels_R8(fragment_shader->gl_FragColor.x);
 }

+template <typename C>
+static inline PackedR8 pack_span(uint8_t*, C c) {
+  return pack(pack_pixels_R8(c));
+}
+
+static inline PackedR8 pack_span(uint8_t*) { return pack(pack_pixels_R8()); }
+
 static inline WideR8 blend_pixels_R8(WideR8 dst, WideR8 src) {
  switch (blend_key) {
    case BLEND_KEY_NONE:
@ -2859,7 +2926,7 @@ static inline void discard_output(uint8_t* buf, WideR8 mask) {
  WideR8 dst = unpack(unaligned_load<PackedR8>(buf));
  WideR8 r = pack_pixels_R8();
  if (blend_key) r = blend_pixels_R8(dst, r);
-  if (DISCARD) mask |= packR8(fragment_shader->swgl_IsPixelDiscarded);
+  if (DISCARD) mask |= packR8(fragment_shader->isPixelDiscarded);
  unaligned_store(buf, pack((mask & dst) | (~mask & r)));
 }

@ -2928,7 +2995,67 @@ static inline void commit_output(P* buf, Z z, DepthRun* zbuf, int span) {
  }
 }

-#include "swgl_ext.h"
+static inline void commit_span(uint32_t* buf, PackedRGBA8 r) {
+  if (blend_key)
+    r = pack(blend_pixels_RGBA8(unaligned_load<PackedRGBA8>(buf), unpack(r)));
+  unaligned_store(buf, r);
+}
+
+UNUSED static inline void commit_solid_span(uint32_t* buf, PackedRGBA8 r,
+                                            int len) {
+  if (blend_key) {
+    auto src = unpack(r);
+    for (uint32_t* end = &buf[len]; buf < end; buf += 4) {
+      unaligned_store(
+          buf, pack(blend_pixels_RGBA8(unaligned_load<PackedRGBA8>(buf), src)));
+    }
+  } else {
+    fill_n(buf, len, bit_cast<U32>(r).x);
+  }
+}
+
+UNUSED static inline void commit_texture_span(uint32_t* buf, uint32_t* src,
+                                              int len) {
+  if (blend_key) {
+    for (uint32_t* end = &buf[len]; buf < end; buf += 4, src += 4) {
+      PackedRGBA8 r = unaligned_load<PackedRGBA8>(src);
+      unaligned_store(buf, pack(blend_pixels_RGBA8(
+                               unaligned_load<PackedRGBA8>(buf), unpack(r))));
+    }
+  } else {
+    memcpy(buf, src, len * sizeof(uint32_t));
+  }
+}
+
+static inline void commit_span(uint8_t* buf, PackedR8 r) {
+  if (blend_key)
+    r = pack(blend_pixels_R8(unpack(unaligned_load<PackedR8>(buf)), unpack(r)));
+  unaligned_store(buf, r);
+}
+
+UNUSED static inline void commit_solid_span(uint8_t* buf, PackedR8 r, int len) {
+  if (blend_key) {
+    auto src = unpack(r);
+    for (uint8_t* end = &buf[len]; buf < end; buf += 4) {
+      unaligned_store(buf, pack(blend_pixels_R8(
+                               unpack(unaligned_load<PackedR8>(buf)), src)));
+    }
+  } else {
+    fill_n((uint32_t*)buf, len / 4, bit_cast<uint32_t>(r));
+  }
+}
+
+#define DISPATCH_DRAW_SPAN(self, buf, len)                  \
+  do {                                                      \
+    int drawn = self->draw_span(buf, len);                  \
+    if (drawn) self->step_interp_inputs(drawn);             \
+    for (buf += drawn; drawn < len; drawn += 4, buf += 4) { \
+      run(self);                                            \
+      commit_span(buf, pack_span(buf));                     \
+    }                                                       \
+  } while (0)
+
+#include "texture.h"

 #pragma GCC diagnostic push
 #pragma GCC diagnostic ignored "-Wuninitialized"
@ -3553,7 +3680,7 @@ static inline void draw_perspective_spans(int nump, Point3D* p,
        // cancel out the 1/w baked into the interpolants.
        fragment_shader->gl_FragCoord.z = init_interp(zw.x, stepZW.x);
        fragment_shader->gl_FragCoord.w = init_interp(zw.y, stepZW.y);
-        fragment_shader->swgl_StepZW = stepZW;
+        fragment_shader->stepZW = stepZW;
        // Change in interpolants is difference between current right and left
        // edges per the change in right and left X. The left and right
        // interpolant values were previously multipled by 1/w, so the step and
--- a/gfx/wr/swgl/src/glsl.h
+++ b/gfx/wr/swgl/src/glsl.h
@ -1532,14 +1532,6 @@ struct vec4_scalar {
    w /= a.w;
    return *this;
  }
-
-  friend bool operator==(const vec4_scalar& l, const vec4_scalar& r) {
-    return l.x == r.x && l.y == r.y && l.z == r.z && l.w == r.w;
-  }
-
-  friend bool operator!=(const vec4_scalar& l, const vec4_scalar& r) {
-    return l.x != r.x || l.y != r.y || l.z != r.z || l.w != r.w;
-  }
 };

 vec4_scalar vec2_scalar::sel(XYZW c1, XYZW c2, XYZW c3, XYZW c4) {
--- a/gfx/wr/swgl/src/program.h
+++ b/gfx/wr/swgl/src/program.h
@ -82,8 +82,9 @@ struct FragmentShaderImpl {
                                const void* step);
  typedef void (*RunWFunc)(FragmentShaderImpl*);
  typedef void (*SkipWFunc)(FragmentShaderImpl*, int steps);
-  typedef void (*DrawSpanRGBA8Func)(FragmentShaderImpl*);
-  typedef void (*DrawSpanR8Func)(FragmentShaderImpl*);
+  typedef void (*DrawSpanRGBA8Func)(FragmentShaderImpl*, uint32_t* buf,
+                                    int len);
+  typedef void (*DrawSpanR8Func)(FragmentShaderImpl*, uint8_t* buf, int len);

  InitSpanFunc init_span_func = nullptr;
  RunFunc run_func = nullptr;
@ -107,24 +108,16 @@ struct FragmentShaderImpl {
  }

  vec4 gl_FragCoord;
+  vec2_scalar stepZW;
+  Bool isPixelDiscarded = false;
  vec4 gl_FragColor;
  vec4 gl_SecondaryFragColor;

-  vec2_scalar swgl_StepZW;
-  Bool swgl_IsPixelDiscarded = false;
-  // The current buffer position for committing span output.
-  uint32_t* swgl_OutRGBA8 = nullptr;
-  uint8_t* swgl_OutR8 = nullptr;
-  // The remaining number of pixels in the span.
-  int32_t swgl_SpanLength = 0;
-  // The number of pixels in a step.
-  enum : int32_t { swgl_StepSize = 4 };
-
  ALWAYS_INLINE void step_fragcoord(int steps = 4) { gl_FragCoord.x += steps; }

  ALWAYS_INLINE void step_perspective(int steps = 4) {
-    gl_FragCoord.z += swgl_StepZW.x * steps;
-    gl_FragCoord.w += swgl_StepZW.y * steps;
+    gl_FragCoord.z += stepZW.x * steps;
+    gl_FragCoord.w += stepZW.y * steps;
  }

  template <bool W = false>
@ -143,9 +136,7 @@ struct FragmentShaderImpl {
  }

  ALWAYS_INLINE void draw_span(uint32_t* buf, int len) {
-    swgl_OutRGBA8 = buf;
-    swgl_SpanLength = len;
-    (*draw_span_RGBA8_func)(this);
+    (*draw_span_RGBA8_func)(this, buf, len);
  }

  ALWAYS_INLINE bool has_draw_span(uint32_t*) {
@ -153,9 +144,7 @@ struct FragmentShaderImpl {
  }

  ALWAYS_INLINE void draw_span(uint8_t* buf, int len) {
-    swgl_OutR8 = buf;
-    swgl_SpanLength = len;
-    (*draw_span_R8_func)(this);
+    (*draw_span_R8_func)(this, buf, len);
  }

  ALWAYS_INLINE bool has_draw_span(uint8_t*) {
--- a/gfx/wr/swgl/src/swgl_ext.h
+++ b/gfx/wr/swgl/src/swgl_ext.h
@ -1,196 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-static inline void commit_span(uint32_t* buf, PackedRGBA8 r) {
-  if (blend_key)
-    r = pack(blend_pixels_RGBA8(unaligned_load<PackedRGBA8>(buf), unpack(r)));
-  unaligned_store(buf, r);
-}
-
-UNUSED static inline void commit_solid_span(uint32_t* buf, PackedRGBA8 r,
-                                            int len) {
-  if (blend_key) {
-    auto src = unpack(r);
-    for (uint32_t* end = &buf[len]; buf < end; buf += 4) {
-      unaligned_store(
-          buf, pack(blend_pixels_RGBA8(unaligned_load<PackedRGBA8>(buf), src)));
-    }
-  } else {
-    fill_n(buf, len, bit_cast<U32>(r).x);
-  }
-}
-
-UNUSED static inline void commit_texture_span(uint32_t* buf, uint32_t* src,
-                                              int len) {
-  if (blend_key) {
-    for (uint32_t* end = &buf[len]; buf < end; buf += 4, src += 4) {
-      PackedRGBA8 r = unaligned_load<PackedRGBA8>(src);
-      unaligned_store(buf, pack(blend_pixels_RGBA8(
-                               unaligned_load<PackedRGBA8>(buf), unpack(r))));
-    }
-  } else {
-    memcpy(buf, src, len * sizeof(uint32_t));
-  }
-}
-
-static inline void commit_span(uint8_t* buf, PackedR8 r) {
-  if (blend_key)
-    r = pack(blend_pixels_R8(unpack(unaligned_load<PackedR8>(buf)), unpack(r)));
-  unaligned_store(buf, r);
-}
-
-UNUSED static inline void commit_solid_span(uint8_t* buf, PackedR8 r, int len) {
-  if (blend_key) {
-    auto src = unpack(r);
-    for (uint8_t* end = &buf[len]; buf < end; buf += 4) {
-      unaligned_store(buf, pack(blend_pixels_R8(
-                               unpack(unaligned_load<PackedR8>(buf)), src)));
-    }
-  } else {
-    fill_n((uint32_t*)buf, len / 4, bit_cast<uint32_t>(r));
-  }
-}
-
-template <typename V>
-static inline PackedRGBA8 pack_span(uint32_t*, const V& v) {
-  return pack(pack_pixels_RGBA8(v));
-}
-
-static inline PackedRGBA8 pack_span(uint32_t*) {
-  return pack(pack_pixels_RGBA8());
-}
-
-template <typename C>
-static inline PackedR8 pack_span(uint8_t*, C c) {
-  return pack(pack_pixels_R8(c));
-}
-
-static inline PackedR8 pack_span(uint8_t*) { return pack(pack_pixels_R8()); }
-
-// Forces a value with vector run-class to have scalar run-class.
-template <typename T>
-static ALWAYS_INLINE auto swgl_forceScalar(T v) -> decltype(force_scalar(v)) {
-  return force_scalar(v);
-}
-
-// Advance all varying inperpolants by a single chunk
-#define swgl_stepInterp() step_interp_inputs()
-
-// Pseudo-intrinsic that accesses the interpolation step for a given varying
-#define swgl_interpStep(v) (interp_step.v)
-
-// Commit an entire span of a solid color
-#define swgl_commitSolid(format, v)                                       \
-  do {                                                                    \
-    commit_solid_span(swgl_Out##format, pack_span(swgl_Out##format, (v)), \
-                      swgl_SpanLength);                                   \
-    swgl_Out##format += swgl_SpanLength;                                  \
-    swgl_SpanLength = 0;                                                  \
-  } while (0)
-#define swgl_commitSolidRGBA8(v) swgl_commitSolid(RGBA8, v)
-#define swgl_commitSolidR8(v) swgl_commitSolid(R8, v)
-
-#define swgl_commitChunk(format, chunk)   \
-  do {                                    \
-    commit_span(swgl_Out##format, chunk); \
-    swgl_Out##format += swgl_StepSize;    \
-    swgl_SpanLength -= swgl_StepSize;     \
-  } while (0)
-
-static inline WideRGBA8 pack_pixels_RGBA8(Float alpha) {
-  I32 i = round_pixel(alpha);
-  HalfRGBA8 c = packRGBA8(zipLow(i, i), zipHigh(i, i));
-  return combine(zipLow(c, c), zipHigh(c, c));
-}
-
-// Commit a single chunk of a color scaled by an alpha weight
-#define swgl_commitColor(format, color, alpha)                         \
-  swgl_commitChunk(format, pack(muldiv255(pack_pixels_##format(color), \
-                                          pack_pixels_##format(alpha))))
-#define swgl_commitColorRGBA8(color, alpha) \
-  swgl_commitColor(RGBA8, color, alpha)
-#define swgl_commitColorR8(color, alpha) swgl_commitColor(R8, color, alpha)
-
-template <typename S>
-static ALWAYS_INLINE bool swgl_isTextureLinear(S s) {
-  return s->filter == TextureFilter::LINEAR;
-}
-
-template <typename S>
-static ALWAYS_INLINE bool swgl_isTextureRGBA8(S s) {
-  return s->format == TextureFormat::RGBA8;
-}
-
-template <typename S>
-static ALWAYS_INLINE bool swgl_isTextureR8(S s) {
-  return s->format == TextureFormat::R8;
-}
-
-// Returns the offset into the texture buffer for the given layer index. If not
-// a texture array or 3D texture, this will always access the first layer.
-template <typename S>
-static ALWAYS_INLINE int swgl_textureLayerOffset(S s, float layer) {
-  return 0;
-}
-
-UNUSED static ALWAYS_INLINE int swgl_textureLayerOffset(sampler2DArray s,
-                                                        float layer) {
-  return clampCoord(int(layer), s->depth) * s->height_stride;
-}
-
-// Use the default linear quantization scale of 128. This gives 7 bits of
-// fractional precision, which when multiplied with a signed 9 bit value
-// still fits in a 16 bit integer.
-const int swgl_LinearQuantizeScale = 128;
-
-// Quantizes UVs for access into a linear texture.
-template <typename S, typename T>
-static ALWAYS_INLINE T swgl_linearQuantize(S s, T p) {
-  return linearQuantize(p, swgl_LinearQuantizeScale, s);
-}
-
-// Quantizes an interpolation step for UVs for access into a linear texture.
-template <typename S, typename T>
-static ALWAYS_INLINE T swgl_linearQuantizeStep(S s, T p) {
-  return samplerScale(s, p) * swgl_LinearQuantizeScale;
-}
-
-// Commit a single chunk from a linear texture fetch
-#define swgl_commitTextureLinear(format, s, p, ...) \
-  swgl_commitChunk(format,                          \
-                   textureLinearPacked##format(s, ivec2(p), __VA_ARGS__))
-#define swgl_commitTextureLinearRGBA8(s, p, ...) \
-  swgl_commitTextureLinear(RGBA8, s, p, __VA_ARGS__)
-#define swgl_commitTextureLinearR8(s, p, ...) \
-  swgl_commitTextureLinear(R8, s, p, __VA_ARGS__)
-
-// Commit a single chunk from a linear texture fetch that is scaled by a color
-#define swgl_commitTextureLinearColor(format, s, p, color, ...)               \
-  swgl_commitChunk(                                                           \
-      format,                                                                 \
-      pack(muldiv255(textureLinearUnpacked##format(s, ivec2(p), __VA_ARGS__), \
-                     pack_pixels_##format(color))))
-#define swgl_commitTextureLinearColorRGBA8(s, p, color, ...) \
-  swgl_commitTextureLinearColor(RGBA8, s, p, color, __VA_ARGS__)
-#define swgl_commitTextureLinearColorR8(s, p, color, ...) \
-  swgl_commitTextureLinearColor(R8, s, p, color, __VA_ARGS__)
-
-// Dispatch helper used by the GLSL translator to swgl_drawSpan functions.
-// The number of pixels committed is tracked by checking for the difference in
-// swgl_SpanLength. Any varying interpolants used will be advanced past the
-// committed part of the span in case the fragment shader must be executed for
-// any remaining pixels that were not committed by the span shader.
-#define DISPATCH_DRAW_SPAN(self, format)                                      \
-  do {                                                                        \
-    int total = self->swgl_SpanLength;                                        \
-    self->swgl_drawSpan##format();                                            \
-    int drawn = total - self->swgl_SpanLength;                                \
-    if (drawn) self->step_interp_inputs(drawn);                               \
-    while (self->swgl_SpanLength > 0) {                                       \
-      run(self);                                                              \
-      commit_span(self->swgl_Out##format, pack_span(self->swgl_Out##format)); \
-      self->swgl_Out##format += swgl_StepSize;                                \
-      self->swgl_SpanLength -= swgl_StepSize;                                 \
-    }                                                                         \
-  } while (0)
--- a/gfx/wr/swgl/src/texture.h
+++ b/gfx/wr/swgl/src/texture.h
@ -4,62 +4,6 @@

 namespace glsl {

-using PackedRGBA8 = V16<uint8_t>;
-using WideRGBA8 = V16<uint16_t>;
-using HalfRGBA8 = V8<uint16_t>;
-
-SI WideRGBA8 unpack(PackedRGBA8 p) { return CONVERT(p, WideRGBA8); }
-
-template <int N>
-UNUSED SI VectorType<uint8_t, N> genericPackWide(VectorType<uint16_t, N> p) {
-  typedef VectorType<uint8_t, N> packed_type;
-  // Generic conversions only mask off the low byte without actually clamping
-  // like a real pack. First force the word to all 1s if it overflows, and then
-  // add on the sign bit to cause it to roll over to 0 if it was negative.
-  p = (p | (p > 255)) + (p >> 15);
-  return CONVERT(p, packed_type);
-}
-
-SI PackedRGBA8 pack(WideRGBA8 p) {
-#if USE_SSE2
-  return _mm_packus_epi16(lowHalf(p), highHalf(p));
-#elif USE_NEON
-  return vcombine_u8(vqmovn_u16(lowHalf(p)), vqmovn_u16(highHalf(p)));
-#else
-  return genericPackWide(p);
-#endif
-}
-
-using PackedR8 = V4<uint8_t>;
-using WideR8 = V4<uint16_t>;
-
-SI WideR8 unpack(PackedR8 p) { return CONVERT(p, WideR8); }
-
-SI PackedR8 pack(WideR8 p) {
-#if USE_SSE2
-  auto m = expand(p);
-  auto r = bit_cast<V16<uint8_t>>(_mm_packus_epi16(m, m));
-  return SHUFFLE(r, r, 0, 1, 2, 3);
-#elif USE_NEON
-  return lowHalf(bit_cast<V8<uint8_t>>(vqmovn_u16(expand(p))));
-#else
-  return genericPackWide(p);
-#endif
-}
-
-using PackedRG8 = V8<uint8_t>;
-using WideRG8 = V8<uint16_t>;
-
-SI PackedRG8 pack(WideRG8 p) {
-#if USE_SSE2
-  return lowHalf(bit_cast<V16<uint8_t>>(_mm_packus_epi16(p, p)));
-#elif USE_NEON
-  return bit_cast<V8<uint8_t>>(vqmovn_u16(p));
-#else
-  return genericPackWide(p);
-#endif
-}
-
 SI I32 clampCoord(I32 coord, int limit, int base = 0) {
 #if USE_SSE2
  return _mm_min_epi16(_mm_max_epi16(coord, _mm_set1_epi32(base)),
@ -463,58 +407,135 @@ SI T linearQuantize(T P, float scale) {

 // Helper version that also scales normalized texture coords for sampler
 template <typename T, typename S>
-SI T samplerScale(S sampler, T P) {
+SI T linearQuantize(T P, float scale, S sampler) {
  P.x *= sampler->width;
  P.y *= sampler->height;
-  return P;
+  return linearQuantize(P, scale);
 }

 template <typename T>
-SI T samplerScale(sampler2DRect sampler, T P) {
-  return P;
+SI T linearQuantize(T P, float scale, sampler2DRect sampler) {
+  return linearQuantize(P, scale);
 }

-template <typename T, typename S>
-SI T linearQuantize(T P, float scale, S sampler) {
-  return linearQuantize(samplerScale(sampler, P), scale);
-}
-
-// Compute clamped offset of first row for linear interpolation
-template <typename S>
-SI I32 computeRow(S sampler, ivec2 i, int32_t zoffset, size_t margin = 1) {
-  return clampCoord(i.x, sampler->width - margin) +
-         clampCoord(i.y, sampler->height) * sampler->stride + zoffset;
-}
-
-// Compute clamped offset of second row for linear interpolation from first row
-template <typename S>
-SI I32 computeNextRowOffset(S sampler, ivec2 i) {
-  return (i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
-         I32(sampler->stride);
-}
-
-// Convert X coordinate to a 2^7 scale fraction for interpolation
-template <typename S>
-SI I16 computeFracX(S sampler, ivec2 i, ivec2 frac) {
-  auto overread = i.x > int32_t(sampler->width) - 2;
-  return CONVERT((((frac.x & (i.x >= 0)) | overread) & 0x7F) - overread, I16);
-}
-
-// Convert Y coordinate to a 2^7 scale fraction for interpolation
-SI I16 computeFracY(ivec2 frac) { return CONVERT(frac.y & 0x7F, I16); }
-
 template <typename S>
 vec4 textureLinearRGBA8(S sampler, vec2 P, int32_t zoffset = 0) {
  assert(sampler->format == TextureFormat::RGBA8);

+#if USE_SSE2
+  ivec2 i(linearQuantize(P, 256, sampler));
+  ivec2 frac = i & (I32)0xFF;
+  i >>= 8;
+
+  // Pack coords so they get clamped into range, and also for later bounding
+  // of fractional coords. Store Y as low-bits for easier access, X as high.
+  __m128i yx = _mm_packs_epi32(i.y, i.x);
+  __m128i hw = _mm_packs_epi32(_mm_set1_epi32(sampler->height - 1),
+                               _mm_set1_epi32(sampler->width - 1));
+  // Clamp coords to valid range to prevent sampling outside texture.
+  __m128i clampyx = _mm_min_epi16(_mm_max_epi16(yx, _mm_setzero_si128()), hw);
+  // Multiply clamped Y by stride and add X offset without overflowing 2^15
+  // stride and accidentally yielding signed result.
+  __m128i row0 =
+      _mm_madd_epi16(_mm_unpacklo_epi16(clampyx, clampyx),
+                     _mm_set1_epi32((sampler->stride - 1) | 0x10000));
+  row0 = _mm_add_epi32(row0, _mm_unpackhi_epi16(clampyx, _mm_setzero_si128()));
+  // Add in layer offset if available
+  row0 = _mm_add_epi32(row0, _mm_set1_epi32(zoffset));
+
+  // Check if fractional coords are all zero, in which case skip filtering.
+  __m128i fracyx = _mm_packs_epi32(frac.y, frac.x);
+  if (!_mm_movemask_epi8(_mm_cmpgt_epi16(fracyx, _mm_setzero_si128()))) {
+    return fetchOffsetsRGBA8(sampler, row0);
+  }
+
+  // Check if coords were clamped at all above. If so, need to adjust fractions
+  // to avoid sampling outside the texture on the edges.
+  __m128i yxinside = _mm_andnot_si128(_mm_cmplt_epi16(yx, _mm_setzero_si128()),
+                                      _mm_cmplt_epi16(yx, hw));
+  // Set fraction to zero when outside.
+  fracyx = _mm_and_si128(fracyx, yxinside);
+  // Store two side-by-side copies of X fraction, as below each pixel value
+  // will be interleaved to be next to the pixel value for the next row.
+  __m128i fracx = _mm_unpackhi_epi16(fracyx, fracyx);
+  // For Y fraction, we need to store 1-fraction before each fraction, as a
+  // madd will be used to weight and collapse all results as last step.
+  __m128i fracy =
+      _mm_unpacklo_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fracyx), fracyx);
+
+  // Ensure we don't sample row off end of texture from added stride.
+  __m128i row1 = _mm_and_si128(yxinside, _mm_set1_epi16(sampler->stride));
+
+  // Load two adjacent pixels on each row and interleave them.
+  // r0,g0,b0,a0,r1,g1,b1,a1 \/ R0,G0,B0,A0,R1,G1,B1,A1
+  // r0,R0,g0,G0,b0,B0,a0,A0,r1,R1,g1,G1,b1,B1,a1,A1
+#  define LOAD_LANE(out, idx)                                               \
+    {                                                                       \
+      uint32_t* buf = &sampler->buf[_mm_cvtsi128_si32(                      \
+          _mm_shuffle_epi32(row0, _MM_SHUFFLE(idx, idx, idx, idx)))];       \
+      out = _mm_unpacklo_epi8(                                              \
+          _mm_loadl_epi64((__m128i*)buf),                                   \
+          _mm_loadl_epi64((__m128i*)(buf + _mm_extract_epi16(row1, idx)))); \
+    }
+  __m128i x, y, z, w;
+  LOAD_LANE(x, 0)
+  LOAD_LANE(y, 1)
+  LOAD_LANE(z, 2)
+  LOAD_LANE(w, 3)
+#  undef LOAD_LANE
+
+  // Need to transpose the data from AoS to SoA format. Best to do this here
+  // while the data is still packed into 8-bit components, requiring fewer
+  // insns.
+  // r0,R0,g0,G0,b0,B0,a0,A0,r1,R1,g1,G1,b1,B1,a1,A1 \/
+  // r2,R2,g2,G2,b2,B2,a2,A2,r3,R3,g3,G3,b3,B3,a3,A3
+  // ... r0,R0,r2,R2,g0,G0,g2,G2,b0,B0,b2,B2,a0,A0,a2,A2
+  // ... r1,R1,r3,R3,g1,G1,g3,G3,b1,B1,b3,B3,a1,A1,a3,A3
+  __m128i xy0 = _mm_unpacklo_epi16(x, y);
+  __m128i xy1 = _mm_unpackhi_epi16(x, y);
+  __m128i zw0 = _mm_unpacklo_epi16(z, w);
+  __m128i zw1 = _mm_unpackhi_epi16(z, w);
+  // r0,R0,r2,R2,g0,G0,g2,G2,b0,B0,b2,B2,a0,A0,a2,A2 \/
+  // r4,R4,r6,R6,g4,G4,g6,G6,b4,B4,b6,B6,a4,A4,a6,A6
+  // ... r0,R0,r2,R2,r4,R4,r6,R6,g0,G0,g2,G2,g4,G4,g6,G6
+  // ... b0,B0,b2,B2,b4,B4,b6,B6,a0,A0,a2,A2,a4,A4,a6,A6
+  __m128i rg0 = _mm_unpacklo_epi32(xy0, zw0);
+  __m128i ba0 = _mm_unpackhi_epi32(xy0, zw0);
+  __m128i rg1 = _mm_unpacklo_epi32(xy1, zw1);
+  __m128i ba1 = _mm_unpackhi_epi32(xy1, zw1);
+
+  // Expand packed SoA pixels for each column. Multiply then add columns with
+  // 8-bit precision so we don't carry to high byte of word accidentally. Use
+  // final madd insn to blend interleaved rows and expand result to 32 bits.
+#  define FILTER_COMPONENT(out, unpack, src0, src1)                            \
+    {                                                                          \
+      __m128i cc0 = unpack(src0, _mm_setzero_si128());                         \
+      __m128i cc1 = unpack(src1, _mm_setzero_si128());                         \
+      cc0 = _mm_add_epi8(                                                      \
+          cc0,                                                                 \
+          _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(cc1, cc0), fracx), 8)); \
+      out = _mm_cvtepi32_ps(_mm_madd_epi16(cc0, fracy));                       \
+    }
+  __m128 fr, fg, fb, fa;
+  FILTER_COMPONENT(fr, _mm_unpacklo_epi8, rg0, rg1);
+  FILTER_COMPONENT(fg, _mm_unpackhi_epi8, rg0, rg1);
+  FILTER_COMPONENT(fb, _mm_unpacklo_epi8, ba0, ba1);
+  FILTER_COMPONENT(fa, _mm_unpackhi_epi8, ba0, ba1);
+#  undef FILTER_COMPONENT
+
+  return vec4(fb, fg, fr, fa) * (1.0f / 0xFF00);
+#else
  ivec2 i(linearQuantize(P, 128, sampler));
-  ivec2 frac = i;
+  ivec2 frac = i & (I32)0x7F;
  i >>= 7;

-  I32 row0 = computeRow(sampler, i, zoffset);
-  I32 row1 = row0 + computeNextRowOffset(sampler, i);
-  I16 fracx = computeFracX(sampler, i, frac);
-  I16 fracy = computeFracY(frac);
+  I32 row0 = clampCoord(i.x, sampler->width) +
+             clampCoord(i.y, sampler->height) * sampler->stride + zoffset;
+  I32 row1 = row0 + ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
+                     I32(sampler->stride));
+  I16 fracx =
+      CONVERT(frac.x & (i.x >= 0 && i.x < int32_t(sampler->width) - 1), I16);
+  I16 fracy = CONVERT(frac.y, I16);

  auto a0 =
      CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row0.x]), V8<int16_t>);
@ -556,19 +577,22 @@ vec4 textureLinearRGBA8(S sampler, vec2 P, int32_t zoffset = 0) {
  auto b = lowHalf(ba);
  auto a = highHalf(ba);
  return vec4(b, g, r, a) * (1.0f / 255.0f);
+#endif
 }

 template <typename S>
-static inline U16 textureLinearUnpackedR8(S sampler, ivec2 i,
-                                          int32_t zoffset = 0) {
+static inline U16 textureLinearPackedR8(S sampler, ivec2 i, int32_t zoffset) {
  assert(sampler->format == TextureFormat::R8);
-  ivec2 frac = i;
+  ivec2 frac = i & (I32)0x7F;
  i >>= 7;

-  I32 row0 = computeRow(sampler, i, zoffset);
-  I32 row1 = row0 + computeNextRowOffset(sampler, i);
-  I16 fracx = computeFracX(sampler, i, frac);
-  I16 fracy = computeFracY(frac);
+  I32 row0 = clampCoord(i.x, sampler->width) +
+             clampCoord(i.y, sampler->height) * sampler->stride + zoffset;
+  I32 row1 = row0 + ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
+                     I32(sampler->stride));
+  I16 fracx =
+      CONVERT(frac.x & (i.x >= 0 && i.x < int32_t(sampler->width) - 1), I16);
+  I16 fracy = CONVERT(frac.y, I16);

  uint8_t* buf = (uint8_t*)sampler->buf;
  auto a0 = unaligned_load<V2<uint8_t>>(&buf[row0.x]);
@ -597,9 +621,81 @@ template <typename S>
 vec4 textureLinearR8(S sampler, vec2 P, int32_t zoffset = 0) {
  assert(sampler->format == TextureFormat::R8);

+#if USE_SSE2
+  ivec2 i(linearQuantize(P, 256, sampler));
+  ivec2 frac = i & (I32)0xFF;
+  i >>= 8;
+
+  // Pack coords so they get clamped into range, and also for later bounding
+  // of fractional coords. Store Y as low-bits for easier access, X as high.
+  __m128i yx = _mm_packs_epi32(i.y, i.x);
+  __m128i hw = _mm_packs_epi32(_mm_set1_epi32(sampler->height - 1),
+                               _mm_set1_epi32(sampler->width - 1));
+  // Clamp coords to valid range to prevent sampling outside texture.
+  __m128i clampyx = _mm_min_epi16(_mm_max_epi16(yx, _mm_setzero_si128()), hw);
+  // Multiply clamped Y by stride and add X offset without overflowing 2^15
+  // stride and accidentally yielding signed result.
+  __m128i row0 =
+      _mm_madd_epi16(_mm_unpacklo_epi16(clampyx, clampyx),
+                     _mm_set1_epi32((sampler->stride - 1) | 0x10000));
+  row0 = _mm_add_epi32(row0, _mm_unpackhi_epi16(clampyx, _mm_setzero_si128()));
+  // Add in layer offset if available
+  row0 = _mm_add_epi32(row0, _mm_set1_epi32(zoffset));
+
+  __m128i fracyx = _mm_packs_epi32(frac.y, frac.x);
+
+  // Check if coords were clamped at all above. If so, need to adjust fractions
+  // to avoid sampling outside the texture on the edges.
+  __m128i yxinside = _mm_andnot_si128(_mm_cmplt_epi16(yx, _mm_setzero_si128()),
+                                      _mm_cmplt_epi16(yx, hw));
+  // Set fraction to zero when outside.
+  fracyx = _mm_and_si128(fracyx, yxinside);
+  // For X fraction, we need to store 1-fraction before each fraction, as a
+  // madd will be used to weight and collapse all results as last step.
+  __m128i fracx =
+      _mm_unpackhi_epi16(_mm_sub_epi16(_mm_set1_epi16(256), fracyx), fracyx);
+  // Store two side-by-side copies of Y fraction, as below each pixel value
+  // will be interleaved to be next to the pixel value for the next column.
+  __m128i fracy = _mm_unpacklo_epi16(fracyx, fracyx);
+
+  // Ensure we don't sample row off end of texture from added stride.
+  __m128i row1 = _mm_and_si128(yxinside, _mm_set1_epi16(sampler->stride));
+
+  // Calculate pointers for first row in each lane
+  uint8_t* buf = (uint8_t*)sampler->buf;
+  uint8_t* buf0 =
+      buf + _mm_cvtsi128_si32(_mm_shuffle_epi32(row0, _MM_SHUFFLE(0, 0, 0, 0)));
+  uint8_t* buf1 =
+      buf + _mm_cvtsi128_si32(_mm_shuffle_epi32(row0, _MM_SHUFFLE(1, 1, 1, 1)));
+  uint8_t* buf2 =
+      buf + _mm_cvtsi128_si32(_mm_shuffle_epi32(row0, _MM_SHUFFLE(2, 2, 2, 2)));
+  uint8_t* buf3 =
+      buf + _mm_cvtsi128_si32(_mm_shuffle_epi32(row0, _MM_SHUFFLE(3, 3, 3, 3)));
+  // Load adjacent columns from first row, pack into register, then expand.
+  __m128i cc0 = _mm_unpacklo_epi8(
+      _mm_setr_epi16(*(uint16_t*)buf0, *(uint16_t*)buf1, *(uint16_t*)buf2,
+                     *(uint16_t*)buf3, 0, 0, 0, 0),
+      _mm_setzero_si128());
+  // Load adjacent columns from next row, pack into register, then expand.
+  __m128i cc1 = _mm_unpacklo_epi8(
+      _mm_setr_epi16(*(uint16_t*)(buf0 + _mm_extract_epi16(row1, 0)),
+                     *(uint16_t*)(buf1 + _mm_extract_epi16(row1, 1)),
+                     *(uint16_t*)(buf2 + _mm_extract_epi16(row1, 2)),
+                     *(uint16_t*)(buf3 + _mm_extract_epi16(row1, 3)), 0, 0, 0,
+                     0),
+      _mm_setzero_si128());
+  // Multiply then add rows with 8-bit precision so we don't carry to high byte
+  // of word accidentally. Use final madd insn to blend interleaved columns and
+  // expand result to 32 bits.
+  __m128i cc = _mm_add_epi8(
+      cc0, _mm_srli_epi16(_mm_mullo_epi16(_mm_sub_epi16(cc1, cc0), fracy), 8));
+  __m128 r = _mm_cvtepi32_ps(_mm_madd_epi16(cc, fracx));
+  return vec4((Float)r * (1.0f / 0xFF00), 0.0f, 0.0f, 1.0f);
+#else
  ivec2 i(linearQuantize(P, 128, sampler));
-  Float r = CONVERT(textureLinearUnpackedR8(sampler, i, zoffset), Float);
+  Float r = CONVERT(textureLinearPackedR8(sampler, i, zoffset), Float);
  return vec4(r * (1.0f / 255.0f), 0.0f, 0.0f, 1.0f);
+#endif
 }

 template <typename S>
@ -607,13 +703,16 @@ vec4 textureLinearRG8(S sampler, vec2 P, int32_t zoffset = 0) {
  assert(sampler->format == TextureFormat::RG8);

  ivec2 i(linearQuantize(P, 128, sampler));
-  ivec2 frac = i;
+  ivec2 frac = i & (I32)0x7F;
  i >>= 7;

-  I32 row0 = computeRow(sampler, i, zoffset);
-  I32 row1 = row0 + computeNextRowOffset(sampler, i);
-  I16 fracx = computeFracX(sampler, i, frac);
-  I16 fracy = computeFracY(frac);
+  I32 row0 = clampCoord(i.x, sampler->width) +
+             clampCoord(i.y, sampler->height) * sampler->stride + zoffset;
+  I32 row1 = row0 + ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
+                     I32(sampler->stride));
+  I16 fracx =
+      CONVERT(frac.x & (i.x >= 0 && i.x < int32_t(sampler->width) - 1), I16);
+  I16 fracy = CONVERT(frac.y, I16);

  uint16_t* buf = (uint16_t*)sampler->buf;

@ -665,18 +764,18 @@ static inline I16 textureLinearPackedR16(S sampler, ivec2 i,
                                         int32_t zoffset = 0) {
  assert(sampler->format == TextureFormat::R16);

-  ivec2 frac = i;
+  ivec2 frac = i & (I32)0x7F;
  i >>= 7;

-  I32 row0 = computeRow(sampler, i, zoffset);
-  I32 row1 = row0 + computeNextRowOffset(sampler, i);
+  I32 row0 = clampCoord(i.x, sampler->width) +
+             clampCoord(i.y, sampler->height) * sampler->stride + zoffset;
+  I32 row1 = row0 + ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
+                     I32(sampler->stride));

  I16 fracx =
-      CONVERT(
-          ((frac.x & (i.x >= 0)) | (i.x > int32_t(sampler->width) - 2)) & 0x7F,
-          I16)
+      CONVERT(frac.x & (i.x >= 0 && i.x < int32_t(sampler->width) - 1), I16)
      << 8;
-  I16 fracy = computeFracY(frac) << 8;
+  I16 fracy = CONVERT(frac.y, I16) << 8;

  // Sample the 16 bit data for both rows
  uint16_t* buf = (uint16_t*)sampler->buf;
@ -745,17 +844,17 @@ vec4 textureLinearR16(S sampler, vec2 P, int32_t zoffset = 0) {
 template <typename S>
 vec4 textureLinearRGBA32F(S sampler, vec2 P, int32_t zoffset = 0) {
  assert(sampler->format == TextureFormat::RGBA32F);
-  P = samplerScale(sampler, P);
+  P.x *= sampler->width;
+  P.y *= sampler->height;
  P -= 0.5f;
  vec2 f = floor(P);
  vec2 r = P - f;
  ivec2 i(f);
-  ivec2 c(clampCoord(i.x, sampler->width - 1),
-          clampCoord(i.y, sampler->height));
-  r.x = if_then_else(i.x >= 0, if_then_else(i.x < sampler->width - 1, r.x, 1.0),
-                     0.0f);
+  ivec2 c = clamp2D(i, sampler);
+  r.x = if_then_else(i.x >= 0 && i.x < sampler->width - 1, r.x, 0.0f);
  I32 offset0 = c.x * 4 + c.y * sampler->stride + zoffset;
-  I32 offset1 = offset0 + computeNextRowOffset(sampler, i);
+  I32 offset1 = offset0 + ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
+                           I32(sampler->stride));

  Float c0 = mix(mix(*(Float*)&sampler->buf[offset0.x],
                     *(Float*)&sampler->buf[offset0.x + 4], r.x),
@ -785,26 +884,27 @@ vec4 textureLinearYUV422(S sampler, vec2 P, int32_t zoffset = 0) {
  assert(sampler->format == TextureFormat::YUV422);

  ivec2 i(linearQuantize(P, 128, sampler));
-  ivec2 frac = i;
+  ivec2 frac = i & (I32)0x7F;
  i >>= 7;

-  I32 row0 = computeRow(sampler, i, zoffset, 2);
+  I32 row0 = clampCoord(i.x, sampler->width) +
+             clampCoord(i.y, sampler->height) * sampler->stride + zoffset;
  // Layout is 2 pixel chunks (occupying 4 bytes) organized as: G0, B, G1, R.
  // Get the selector for the pixel within the chunk.
  I32 selector = row0 & 1;
  // Align the row index to the chunk.
  row0 &= ~1;
-  I32 row1 = row0 + computeNextRowOffset(sampler, i);
+  I32 row1 = row0 + ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
+                     I32(sampler->stride));
  // G only needs to be clamped to a pixel boundary for safe interpolation,
  // whereas the BR fraction needs to be clamped 1 extra pixel inside to a chunk
  // boundary.
  frac.x &= (i.x >= 0);
-  auto fracx =
-      CONVERT(combine(frac.x | (i.x > int32_t(sampler->width) - 3),
-                      (frac.x >> 1) | (i.x > int32_t(sampler->width) - 3)) &
-                  0x7F,
+  auto fracx = CONVERT(combine(frac.x & (i.x < int32_t(sampler->width) - 1),
+                               ((frac.x >> 1) | (selector << 6)) &
+                                   (i.x < int32_t(sampler->width) - 2)),
                       V8<int16_t>);
-  I16 fracy = computeFracY(frac);
+  I16 fracy = CONVERT(frac.y, I16);

  uint16_t* buf = (uint16_t*)sampler->buf;

@ -963,16 +1063,18 @@ ivec2_scalar textureSize(sampler2DRect sampler) {
 }

 template <typename S>
-static WideRGBA8 textureLinearUnpackedRGBA8(S sampler, ivec2 i,
-                                            int zoffset = 0) {
+static WideRGBA8 textureLinearUnpackedRGBA8(S sampler, ivec2 i, int zoffset) {
  assert(sampler->format == TextureFormat::RGBA8);
-  ivec2 frac = i;
+  ivec2 frac = i & 0x7F;
  i >>= 7;

-  I32 row0 = computeRow(sampler, i, zoffset);
-  I32 row1 = row0 + computeNextRowOffset(sampler, i);
-  I16 fracx = computeFracX(sampler, i, frac);
-  I16 fracy = computeFracY(frac);
+  I32 row0 = clampCoord(i.x, sampler->width) +
+             clampCoord(i.y, sampler->height) * sampler->stride + zoffset;
+  I32 row1 = row0 + ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
+                     I32(sampler->stride));
+  I16 fracx =
+      CONVERT(frac.x & (i.x >= 0 && i.x < int32_t(sampler->width) - 1), I16);
+  I16 fracy = CONVERT(frac.y, I16);

  auto a0 =
      CONVERT(unaligned_load<V8<uint8_t>>(&sampler->buf[row0.x]), V8<int16_t>);
@ -1010,26 +1112,98 @@ static WideRGBA8 textureLinearUnpackedRGBA8(S sampler, ivec2 i,
 }

 template <typename S>
-static PackedRGBA8 textureLinearPackedRGBA8(S sampler, ivec2 i,
-                                            int zoffset = 0) {
+static PackedRGBA8 textureLinearPackedRGBA8(S sampler, ivec2 i, int zoffset) {
  return pack(textureLinearUnpackedRGBA8(sampler, i, zoffset));
 }

 template <typename S>
-static PackedR8 textureLinearPackedR8(S sampler, ivec2 i, int zoffset = 0) {
-  return pack(textureLinearUnpackedR8(sampler, i, zoffset));
+static inline void textureLinearCommit4(S sampler, ivec2 i, int zoffset,
+                                        uint32_t* buf) {
+  commit_span(buf, textureLinearPackedRGBA8(sampler, i, zoffset));
 }

 template <typename S>
-static PackedRG8 textureLinearPackedRG8(S sampler, ivec2 i, int zoffset = 0) {
+static void textureLinearCommit8(S sampler, ivec2_scalar i, int zoffset,
+                                 uint32_t* buf) {
+  assert(sampler->format == TextureFormat::RGBA8);
+  ivec2_scalar frac = i & 0x7F;
+  i >>= 7;
+
+  uint32_t* row0 =
+      &sampler
+           ->buf[clampCoord(i.x, sampler->width) +
+                 clampCoord(i.y, sampler->height) * sampler->stride + zoffset];
+  uint32_t* row1 =
+      row0 +
+      ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) ? sampler->stride : 0);
+  int16_t fracx = i.x >= 0 && i.x < int32_t(sampler->width) - 1 ? frac.x : 0;
+  int16_t fracy = frac.y;
+
+  U32 pix0 = unaligned_load<U32>(row0);
+  U32 pix0n = unaligned_load<U32>(row0 + 4);
+  uint32_t pix0x = row0[8];
+  U32 pix1 = unaligned_load<U32>(row1);
+  U32 pix1n = unaligned_load<U32>(row1 + 4);
+  uint32_t pix1x = row1[8];
+
+  {
+    auto ab0 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0, pix0, 0, 1, 1, 2)),
+                       V16<int16_t>);
+    auto ab1 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1, pix1, 0, 1, 1, 2)),
+                       V16<int16_t>);
+    ab0 += ((ab1 - ab0) * fracy) >> 7;
+
+    auto cd0 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0, pix0n, 2, 3, 3, 4)),
+                       V16<int16_t>);
+    auto cd1 = CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1, pix1n, 2, 3, 3, 4)),
+                       V16<int16_t>);
+    cd0 += ((cd1 - cd0) * fracy) >> 7;
+
+    auto abcdl = combine(lowHalf(ab0), lowHalf(cd0));
+    auto abcdh = combine(highHalf(ab0), highHalf(cd0));
+    abcdl += ((abcdh - abcdl) * fracx) >> 7;
+
+    commit_span(buf, pack(WideRGBA8(abcdl)));
+  }
+
+  {
+    auto ab0 =
+        CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0n, pix0n, 0, 1, 1, 2)),
+                V16<int16_t>);
+    auto ab1 =
+        CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1n, pix1n, 0, 1, 1, 2)),
+                V16<int16_t>);
+    ab0 += ((ab1 - ab0) * fracy) >> 7;
+
+    auto cd0 =
+        CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix0n, U32(pix0x), 2, 3, 3, 4)),
+                V16<int16_t>);
+    auto cd1 =
+        CONVERT(bit_cast<V16<uint8_t>>(SHUFFLE(pix1n, U32(pix1x), 2, 3, 3, 4)),
+                V16<int16_t>);
+    cd0 += ((cd1 - cd0) * fracy) >> 7;
+
+    auto abcdl = combine(lowHalf(ab0), lowHalf(cd0));
+    auto abcdh = combine(highHalf(ab0), highHalf(cd0));
+    abcdl += ((abcdh - abcdl) * fracx) >> 7;
+
+    commit_span(buf + 4, pack(WideRGBA8(abcdl)));
+  }
+}
+
+template <typename S>
+static PackedRG8 textureLinearPackedRG8(S sampler, ivec2 i, int zoffset) {
  assert(sampler->format == TextureFormat::RG8);
  ivec2 frac = i & 0x7F;
  i >>= 7;

-  I32 row0 = computeRow(sampler, i, zoffset);
-  I32 row1 = row0 + computeNextRowOffset(sampler, i);
-  I16 fracx = computeFracX(sampler, i, frac);
-  I16 fracy = computeFracY(frac);
+  I32 row0 = clampCoord(i.x, sampler->width) +
+             clampCoord(i.y, sampler->height) * sampler->stride + zoffset;
+  I32 row1 = row0 + ((i.y >= 0 && i.y < int32_t(sampler->height) - 1) &
+                     I32(sampler->stride));
+  I16 fracx =
+      CONVERT(frac.x & (i.x >= 0 && i.x < int32_t(sampler->width) - 1), I16);
+  I16 fracy = CONVERT(frac.y, I16);

  uint16_t* buf = (uint16_t*)sampler->buf;

--- a/gfx/wr/webrender/res/brush_image.glsl
+++ b/gfx/wr/webrender/res/brush_image.glsl
@ -333,88 +333,4 @@ Fragment brush_fs() {

    return frag;
 }
-
-#if defined(SWGL) && (!defined(WR_FEATURE_ALPHA_PASS) || !defined(WR_FEATURE_DUAL_SOURCE_BLENDING))
-void swgl_drawSpanRGBA8() {
-    if (!swgl_isTextureRGBA8(sColor0) || !swgl_isTextureLinear(sColor0)) {
-        return;
-    }
-
-    #ifdef WR_FEATURE_ALPHA_PASS
-        if (v_mask_swizzle != vec2(1.0, 0.0)) {
-            return;
-        }
-    #endif
-
-    int layer = swgl_textureLayerOffset(sColor0, v_layer_and_perspective.x);
-
-    float perspective_divisor = mix(swgl_forceScalar(gl_FragCoord.w), 1.0, v_layer_and_perspective.y);
-
-    #ifndef WR_FEATURE_REPETITION
-        vec2 uv = swgl_linearQuantize(sColor0, v_uv * perspective_divisor + v_uv_bounds.xy);
-        vec2 min_uv = swgl_linearQuantize(sColor0, v_uv_sample_bounds.xy);
-        vec2 max_uv = swgl_linearQuantize(sColor0, v_uv_sample_bounds.zw);
-        vec2 step_uv = swgl_linearQuantizeStep(sColor0, swgl_interpStep(v_uv)) * perspective_divisor;
-    #endif
-
-    #ifdef WR_FEATURE_ALPHA_PASS
-        if (needs_clip()) {
-            while (swgl_SpanLength > 0) {
-                vec4 color = v_color * do_clip();
-                #ifdef WR_FEATURE_ANTIALIASING
-                    color *= init_transform_fs(v_local_pos);
-                    v_local_pos += swgl_interpStep(v_local_pos);
-                #endif
-                #ifdef WR_FEATURE_REPETITION
-                    vec2 repeated_uv = compute_repeated_uvs(perspective_divisor);
-                    vec2 uv = clamp(repeated_uv, v_uv_sample_bounds.xy, v_uv_sample_bounds.zw);
-                    swgl_commitTextureLinearColorRGBA8(sColor0, swgl_linearQuantize(sColor0, uv), color, layer);
-                    v_uv += swgl_interpStep(v_uv);
-                #else
-                    swgl_commitTextureLinearColorRGBA8(sColor0, clamp(uv, min_uv, max_uv), color, layer);
-                    uv += step_uv;
-                #endif
-                vClipMaskUv += swgl_interpStep(vClipMaskUv);
-            }
-            return;
-        #ifdef WR_FEATURE_ANTIALIASING
-        } else {
-        #else
-        } else if (v_color != vec4(1.0)) {
-        #endif
-            while (swgl_SpanLength > 0) {
-                vec4 color = v_color;
-                #ifdef WR_FEATURE_ANTIALIASING
-                    color *= init_transform_fs(v_local_pos);
-                    v_local_pos += swgl_interpStep(v_local_pos);
-                #endif
-                #ifdef WR_FEATURE_REPETITION
-                    vec2 repeated_uv = compute_repeated_uvs(perspective_divisor);
-                    vec2 uv = clamp(repeated_uv, v_uv_sample_bounds.xy, v_uv_sample_bounds.zw);
-                    swgl_commitTextureLinearColorRGBA8(sColor0, swgl_linearQuantize(sColor0, uv), color, layer);
-                    v_uv += swgl_interpStep(v_uv);
-                #else
-                    swgl_commitTextureLinearColorRGBA8(sColor0, clamp(uv, min_uv, max_uv), color, layer);
-                    uv += step_uv;
-                #endif
-            }
-            return;
-        }
-        // No clip or color scaling required, so just fall through to a normal textured span...
-    #endif
-
-    while (swgl_SpanLength > 0) {
-        #ifdef WR_FEATURE_REPETITION
-            vec2 repeated_uv = compute_repeated_uvs(perspective_divisor);
-            vec2 uv = clamp(repeated_uv, v_uv_sample_bounds.xy, v_uv_sample_bounds.zw);
-            swgl_commitTextureLinearRGBA8(sColor0, swgl_linearQuantize(sColor0, uv), layer);
-            v_uv += swgl_interpStep(v_uv);
-        #else
-            swgl_commitTextureLinearRGBA8(sColor0, clamp(uv, min_uv, max_uv), layer);
-            uv += step_uv;
-        #endif
-    }
-}
-#endif
-
 #endif
--- a/gfx/wr/webrender/res/brush_opacity.glsl
+++ b/gfx/wr/webrender/res/brush_opacity.glsl
@ -76,45 +76,4 @@ Fragment brush_fs() {
    // Pre-multiply the contribution of the opacity factor.
    return Fragment(alpha * color);
 }
-
-#if defined(SWGL) && !defined(WR_FEATURE_DUAL_SOURCE_BLENDING)
-void swgl_drawSpanRGBA8() {
-    if (!swgl_isTextureRGBA8(sColor0)) {
-        return;
-    }
-
-    int layer = swgl_textureLayerOffset(sColor0, v_layer_and_perspective.x);
-
-    float perspective_divisor = mix(swgl_forceScalar(gl_FragCoord.w), 1.0, v_layer_and_perspective.y);
-
-    vec2 uv = swgl_linearQuantize(sColor0, v_uv * perspective_divisor);
-    vec2 min_uv = swgl_linearQuantize(sColor0, v_uv_sample_bounds.xy);
-    vec2 max_uv = swgl_linearQuantize(sColor0, v_uv_sample_bounds.zw);
-    vec2 step_uv = swgl_linearQuantizeStep(sColor0, swgl_interpStep(v_uv)) * perspective_divisor;
-
-    if (needs_clip()) {
-        while (swgl_SpanLength > 0) {
-            float alpha = v_opacity * do_clip();
-            #ifdef WR_FEATURE_ANTIALIASING
-                alpha *= init_transform_fs(v_local_pos);
-                v_local_pos += swgl_interpStep(v_local_pos);
-            #endif
-            swgl_commitTextureLinearColorRGBA8(sColor0, clamp(uv, min_uv, max_uv), alpha, layer);
-            uv += step_uv;
-            vClipMaskUv += swgl_interpStep(vClipMaskUv);
-        }
-    } else {
-        while (swgl_SpanLength > 0) {
-            float alpha = v_opacity;
-            #ifdef WR_FEATURE_ANTIALIASING
-                alpha *= init_transform_fs(v_local_pos);
-                v_local_pos += swgl_interpStep(v_local_pos);
-            #endif
-            swgl_commitTextureLinearColorRGBA8(sColor0, clamp(uv, min_uv, max_uv), alpha, layer);
-            uv += step_uv;
-        }
-    }
-}
-#endif
-
 #endif
--- a/gfx/wr/webrender/res/brush_solid.frag.h
+++ b/gfx/wr/webrender/res/brush_solid.frag.h
@ -0,0 +1,15 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+ALWAYS_INLINE int draw_span(uint32_t* buf, int len) {
+  auto color = pack_span(buf, v_color);
+  commit_solid_span(buf, color, len);
+  return len;
+}
+
+ALWAYS_INLINE int draw_span(uint8_t* buf, int len) {
+  auto color = pack_span(buf, v_color.x);
+  commit_solid_span(buf, color, len);
+  return len;
+}
--- a/gfx/wr/webrender/res/brush_solid.glsl
+++ b/gfx/wr/webrender/res/brush_solid.glsl
@ -54,55 +54,4 @@ Fragment brush_fs() {
 #endif
    return Fragment(color);
 }
-
-#if defined(SWGL) && (!defined(WR_FEATURE_ALPHA_PASS) || !defined(WR_FEATURE_DUAL_SOURCE_BLENDING))
-void swgl_drawSpanRGBA8() {
-    #ifdef WR_FEATURE_ALPHA_PASS
-        if (needs_clip()) {
-            while (swgl_SpanLength > 0) {
-                float alpha = init_transform_fs(v_local_pos) * do_clip();
-                swgl_commitColorRGBA8(v_color, alpha);
-                v_local_pos += swgl_interpStep(v_local_pos);
-                vClipMaskUv += swgl_interpStep(vClipMaskUv);
-            }
-            return;
-        } else if (has_valid_transform_bounds()) {
-            while (swgl_SpanLength > 0) {
-                float alpha = init_transform_fs(v_local_pos);
-                swgl_commitColorRGBA8(v_color, alpha);
-                v_local_pos += swgl_interpStep(v_local_pos);
-            }
-            return;
-        }
-        // No clip or transform, so just fall through to a solid span...
-    #endif
-
-    swgl_commitSolidRGBA8(v_color);
-}
-
-void swgl_drawSpanR8() {
-    #ifdef WR_FEATURE_ALPHA_PASS
-        if (needs_clip()) {
-            while (swgl_SpanLength > 0) {
-                float alpha = init_transform_fs(v_local_pos) * do_clip();
-                swgl_commitColorR8(v_color.x, alpha);
-                v_local_pos += swgl_interpStep(v_local_pos);
-                vClipMaskUv += swgl_interpStep(vClipMaskUv);
-            }
-            return;
-        } else if (has_valid_transform_bounds()) {
-            while (swgl_SpanLength > 0) {
-                float alpha = init_transform_fs(v_local_pos);
-                swgl_commitColorR8(v_color.x, alpha);
-                v_local_pos += swgl_interpStep(v_local_pos);
-            }
-            return;
-        }
-        // No clip or transform, so just fall through to a solid span...
-    #endif
-
-    swgl_commitSolidR8(v_color.x);
-}
-#endif
-
 #endif
--- a/gfx/wr/webrender/res/prim_shared.glsl
+++ b/gfx/wr/webrender/res/prim_shared.glsl
@ -248,9 +248,6 @@ struct Fragment {
 #endif
 };

-bool needs_clip() {
-    return vClipMaskUvBounds.xy != vClipMaskUvBounds.zw;
-}

 float do_clip() {
    // check for the dummy bounds, which are given to the opaque objects
--- a/gfx/wr/webrender/res/transform.glsl
+++ b/gfx/wr/webrender/res/transform.glsl
@ -93,11 +93,6 @@ vec4 get_node_pos(vec2 pos, Transform transform) {

 #ifdef WR_FRAGMENT_SHADER

-// Assume transform bounds are set to a large scale to signal they are invalid.
-bool has_valid_transform_bounds() {
-    return vTransformBounds.w < 1.0e15;
-}
-
 float signed_distance_rect(vec2 pos, vec2 p0, vec2 p1) {
    vec2 d = max(p0 - pos, pos - p1);
    return length(max(vec2(0.0), d)) + min(0.0, max(d.x, d.y));
--- a/gfx/wr/wrench/reftests/boxshadow/reftest.list
+++ b/gfx/wr/wrench/reftests/boxshadow/reftest.list
@ -5,7 +5,7 @@
 platform(linux,mac) == inset-alpha.yaml inset-alpha.png
 platform(linux,mac) == boxshadow-spread-only.yaml boxshadow-spread-only-ref.png
 == box-shadow-clip.yaml box-shadow-clip-ref.yaml
-fuzzy(1,402) == inset-large-offset.yaml inset-large-offset-ref.png
+fuzzy(1,396) == inset-large-offset.yaml inset-large-offset-ref.png
 platform(linux,mac) == inset-border-radius.yaml inset-border-radius.png
 platform(linux,mac) == inset-offset.yaml inset-offset.png
 platform(linux,mac) == inset-neg-offset.yaml inset-neg-offset.png
--- a/gfx/wr/wrench/reftests/filters/reftest.list
+++ b/gfx/wr/wrench/reftests/filters/reftest.list
@ -2,7 +2,7 @@
 platform(linux,mac) == draw_calls(7) color_targets(7) alpha_targets(0) filter-blur.yaml filter-blur.png
 == isolated.yaml isolated-ref.yaml
 == invisible.yaml invisible-ref.yaml
-fuzzy-if(platform(swgl),1,10000) == opacity.yaml opacity-ref.yaml
+== opacity.yaml opacity-ref.yaml
 fuzzy-range(<=1,*10000) == opacity-combined.yaml opacity-combined-ref.yaml
 == opacity-overlap.yaml opacity-overlap-ref.yaml
 == filter-brightness.yaml filter-brightness-ref.yaml
@ -30,7 +30,7 @@ skip_on(android,device) fuzzy(1,12) == draw_calls(6) color_targets(6) alpha_targ
 == filter-saturate-blue-alpha-1.yaml filter-saturate-blue-alpha-1-ref.yaml
 fuzzy(1,14) == filter-hue-rotate-1.yaml filter-hue-rotate-1-ref.yaml
 skip_on(android,device) == filter-hue-rotate-alpha-1.yaml filter-hue-rotate-alpha-1-ref.yaml  # Fails on Pixel2
-skip_on(android,device) fuzzy(2,9072) fuzzy-if(platform(swgl),5,71071) == filter-long-chain.yaml filter-long-chain.png  # fails on Pixel2
+skip_on(android,device) fuzzy(2,9072) fuzzy-if(platform(swgl),2,35827) == filter-long-chain.yaml filter-long-chain.png  # fails on Pixel2
 platform(linux,mac) == filter-drop-shadow.yaml filter-drop-shadow.png
 platform(linux,mac) == filter-drop-shadow-on-viewport-edge.yaml filter-drop-shadow-on-viewport-edge.png
 platform(linux,mac) == blend-clipped.yaml blend-clipped.png
--- a/gfx/wr/wrench/reftests/gradient/reftest.list
+++ b/gfx/wr/wrench/reftests/gradient/reftest.list
@ -91,7 +91,7 @@ fuzzy(1,115) == conic-color-wheel.yaml conic-color-wheel.png
 # replaces a computed gradient by a sampled texture, so a lot of off-by-one
 # variation from interpolation, which is fine:
 fuzzy-range(<=1,*195000) == gradient_cache_5stops.yaml gradient_cache_5stops_ref.yaml
-fuzzy-range(<=1,*171840) == gradient_cache_5stops_vertical.yaml gradient_cache_5stops_vertical_ref.yaml
+fuzzy-range(<=1,*169000) == gradient_cache_5stops_vertical.yaml gradient_cache_5stops_vertical_ref.yaml
 == gradient_cache_hardstop.yaml gradient_cache_hardstop_ref.yaml
 == gradient_cache_hardstop_clip.yaml gradient_cache_hardstop_clip_ref.yaml
 == gradient_cache_clamp.yaml gradient_cache_clamp_ref.yaml
--- a/gfx/wr/wrench/reftests/image/reftest.list
+++ b/gfx/wr/wrench/reftests/image/reftest.list
@ -4,7 +4,7 @@
 == tile-with-spacing.yaml tile-with-spacing-ref.yaml
 skip_on(android,device) fuzzy(1,331264) == tile-repeat-prim-or-decompose.yaml tile-repeat-prim-or-decompose-ref.yaml
 platform(linux,mac) options(allow-mipmaps) == downscale.yaml downscale.png
-skip_on(android,device) fuzzy-if(platform(swgl),1,20) == segments.yaml segments.png
+skip_on(android,device) == segments.yaml segments.png
 platform(linux,mac) == yuv.yaml yuv.png
 skip_on(android,device) == tiled-clip-chain.yaml tiled-clip-chain-ref.yaml
 skip_on(android,device) == tiled-complex-clip.yaml tiled-complex-clip-ref.yaml
--- a/gfx/wr/wrench/reftests/text/reftest.list
+++ b/gfx/wr/wrench/reftests/text/reftest.list
@ -14,7 +14,7 @@ fuzzy(1,1) == shadow-huge.yaml shadow-huge-ref.yaml
 != shadow-clipped-text.yaml blank.yaml
 != non-opaque.yaml non-opaque-notref.yaml
 == decorations.yaml decorations-ref.yaml
-skip_on(android,device) fuzzy(1,3001) fuzzy-if(platform(swgl),2,2658) == decorations-suite.yaml decorations-suite.png  # Fails on Pixel2
+skip_on(android,device) fuzzy(1,3001) == decorations-suite.yaml decorations-suite.png  # Fails on Pixel2
 == 1658.yaml 1658-ref.yaml
 fuzzy(1,6) fuzzy-if(platform(swgl),1,391) == split-batch.yaml split-batch-ref.yaml
 # Next 3 tests affected by bug 1548099 on Android
--- a/image/test/reftest/downscaling/reftest.list
+++ b/image/test/reftest/downscaling/reftest.list
@ -178,7 +178,7 @@ fuzzy(0-20,0-999) != downscale-2f.html?205,53,bottom about:blank
 # Skip on WinXP with skia content
 # Skip on Android because it runs reftests via http, and moz-icon isn't
 # accessible from http/https origins anymore.
-fuzzy(0-53,0-6391) fuzzy-if(gtkWidget&&webrender,18-19,5502-5568) fails-if(/^Windows\x20NT\x205\.1/.test(http.oscpu)) skip-if(Android) == downscale-moz-icon-1.html downscale-moz-icon-1-ref.html # gtkWidget Bug 1592059
+fuzzy(0-53,0-6391) fuzzy-if(gtkWidget&&webrender,19-19,5502-5568) fails-if(/^Windows\x20NT\x205\.1/.test(http.oscpu)) skip-if(Android) == downscale-moz-icon-1.html downscale-moz-icon-1-ref.html # gtkWidget Bug 1592059

 == downscale-png.html?16,16,interlaced downscale-png.html?16,16,normal
 == downscale-png.html?24,24,interlaced downscale-png.html?24,24,normal
--- a/layout/reftests/async-scrolling/reftest.list
+++ b/layout/reftests/async-scrolling/reftest.list
@ -5,7 +5,7 @@ skip-if(!asyncPan) == bg-fixed-cover-3.html bg-fixed-cover-3-ref.html
 skip-if(!asyncPan) == bg-fixed-child.html bg-fixed-child-ref.html
 skip-if(!asyncPan) == bg-fixed-child-clip-1.html bg-fixed-child-clip-ref.html
 skip-if(!asyncPan) == bg-fixed-child-clip-2.html bg-fixed-child-clip-ref.html
-fuzzy(0-1,0-246) fuzzy-if(skiaContent,0-2,0-170) fuzzy-if(browserIsRemote&&d2d,0-59,0-187) fuzzy-if(webrender,41-42,166-176) skip-if(!asyncPan) == bg-fixed-child-mask.html bg-fixed-child-mask-ref.html
+fuzzy(0-1,0-246) fuzzy-if(skiaContent,0-2,0-170) fuzzy-if(browserIsRemote&&d2d,0-59,0-187) fuzzy-if(webrender,41-41,166-176) skip-if(!asyncPan) == bg-fixed-child-mask.html bg-fixed-child-mask-ref.html
 skip-if(!asyncPan) == bg-fixed-in-opacity.html bg-fixed-in-opacity-ref.html
 # Passing the test below without WebRender would require implementing CSS filters in the Gecko compositor.
 fails-if(!webrender) skip-if(!asyncPan) fuzzy-if(webrender&&gtkWidget,0-1,0-87) fuzzy-if(webrender&&!gtkWidget,0-1,0-3951) == bg-fixed-in-css-filter.html bg-fixed-in-css-filter-ref.html # bug 1454794 for webrender fuzziness
@ -24,7 +24,7 @@ fuzzy-if(/^Windows\x20NT\x2010\.0/.test(http.oscpu),0-1,0-3120) skip-if(!asyncPa
 skip-if(!asyncPan) == position-fixed-cover-1.html position-fixed-cover-1-ref.html
 skip-if(!asyncPan) == position-fixed-cover-2.html position-fixed-cover-2-ref.html
 skip-if(!asyncPan) == position-fixed-cover-3.html position-fixed-cover-3-ref.html
-fuzzy-if(Android,0-8,0-4) fuzzy-if(webrender&&gtkWidget,32-32,30-32) fuzzy-if(webrender&&cocoaWidget,21-21,44-44) skip-if(!asyncPan) == position-fixed-transformed-1.html position-fixed-transformed-1-ref.html # Bug 1604338
+fuzzy-if(Android,0-8,0-4) fuzzy-if(webrender&&gtkWidget,32-32,32-32) fuzzy-if(webrender&&cocoaWidget,21-21,44-44) skip-if(!asyncPan) == position-fixed-transformed-1.html position-fixed-transformed-1-ref.html # Bug 1604338
 skip-if(!asyncPan) == split-layers-1.html split-layers-1-ref.html
 skip-if(!asyncPan) == split-layers-multi-scrolling-1.html split-layers-multi-scrolling-1-ref.html
 fuzzy-if(skiaContent,0-2,0-240000) fuzzy-if(browserIsRemote&&!skiaContent&&(cocoaWidget||winWidget),0-1,0-240000) skip-if(!asyncPan) == split-opacity-layers-1.html split-opacity-layers-1-ref.html
@ -49,14 +49,14 @@ skip-if(!asyncPan) fails-if(!webrender) == sticky-inside-transform-1.html sticky
 fuzzy(0-1,0-60000) skip-if(!asyncPan) == group-opacity-surface-size-1.html group-opacity-surface-size-1-ref.html
 fuzzy-if(Android,0-1,0-197) fuzzy-if(webrender,0-9,0-99) skip-if(!asyncPan) == position-sticky-transformed.html position-sticky-transformed-ref.html
 skip-if(!asyncPan) fuzzy-if(webrender&&cocoaWidget,1-1,396-396) fuzzy-if(webrender&&winWidget,0-1,0-396) == offscreen-prerendered-active-opacity.html offscreen-prerendered-active-opacity-ref.html
-fuzzy-if(Android,0-6,0-4) fuzzy-if(skiaContent&&!Android,0-1,0-34) fuzzy-if(webrender&&gtkWidget,34-34,30-32) fuzzy-if(webrender&&cocoaWidget,7-7,38-39) skip-if(!asyncPan) == offscreen-clipped-blendmode-1.html offscreen-clipped-blendmode-ref.html # Bug 1604338
-fuzzy-if(Android,0-6,0-4) fuzzy-if(webrender&&gtkWidget,34-34,30-32) fuzzy-if(webrender&&cocoaWidget,7-7,38-39) skip-if(!asyncPan) == offscreen-clipped-blendmode-2.html offscreen-clipped-blendmode-ref.html # Bug 1604338
+fuzzy-if(Android,0-6,0-4) fuzzy-if(skiaContent&&!Android,0-1,0-34) fuzzy-if(webrender&&gtkWidget,34-34,32-32) fuzzy-if(webrender&&cocoaWidget,7-7,38-39) skip-if(!asyncPan) == offscreen-clipped-blendmode-1.html offscreen-clipped-blendmode-ref.html # Bug 1604338
+fuzzy-if(Android,0-6,0-4) fuzzy-if(webrender&&gtkWidget,34-34,32-32) fuzzy-if(webrender&&cocoaWidget,7-7,38-39) skip-if(!asyncPan) == offscreen-clipped-blendmode-2.html offscreen-clipped-blendmode-ref.html # Bug 1604338
 fuzzy-if(Android,0-6,0-4) skip == offscreen-clipped-blendmode-3.html offscreen-clipped-blendmode-ref.html # bug 1251588 - wrong AGR on mix-blend-mode item
-fuzzy-if(Android,0-6,0-4) fuzzy-if(webrender&&gtkWidget,34-34,30-32) fuzzy-if(webrender&&cocoaWidget,7-7,38-39) skip-if(!asyncPan) == offscreen-clipped-blendmode-4.html offscreen-clipped-blendmode-ref.html # Bug 1604338
+fuzzy-if(Android,0-6,0-4) fuzzy-if(webrender&&gtkWidget,34-34,32-32) fuzzy-if(webrender&&cocoaWidget,7-7,38-39) skip-if(!asyncPan) == offscreen-clipped-blendmode-4.html offscreen-clipped-blendmode-ref.html # Bug 1604338
 fuzzy-if(Android,0-7,0-1600) fuzzy-if(webrender&&gtkWidget,1-1,10-20) fuzzy-if(webrender&&cocoaWidget,1-2,16-18) skip-if(!asyncPan) == perspective-scrolling-1.html perspective-scrolling-1-ref.html # Bug 1604338
 fuzzy-if(Android,0-7,0-4) skip-if(!asyncPan) == perspective-scrolling-2.html perspective-scrolling-2-ref.html
 fuzzy-if(Android,0-19,0-4) fuzzy-if(webrender&&gtkWidget,13-13,28-32) fuzzy-if(webrender&&cocoaWidget,13-13,44-44) skip-if(!asyncPan) == perspective-scrolling-3.html perspective-scrolling-3-ref.html # Bug 1604338
-fuzzy-if(Android,0-7,0-4) fuzzy-if(webrender&&gtkWidget,29-30,30-32) fuzzy-if(webrender&&cocoaWidget,19-20,44-44) skip-if(!asyncPan) == perspective-scrolling-4.html perspective-scrolling-4-ref.html # Bug 1604338
+fuzzy-if(Android,0-7,0-4) fuzzy-if(webrender&&gtkWidget,29-30,32-32) fuzzy-if(webrender&&cocoaWidget,19-20,44-44) skip-if(!asyncPan) == perspective-scrolling-4.html perspective-scrolling-4-ref.html # Bug 1604338
 skip-if(!asyncPan) == perspective-scrolling-5.html perspective-scrolling-5-ref.html
 pref(apz.disable_for_scroll_linked_effects,true) skip-if(!asyncPan) == disable-apz-for-sle-pages.html disable-apz-for-sle-pages-ref.html
 fuzzy-if(browserIsRemote&&d2d,0-1,0-22) skip-if(!asyncPan) fuzzy-if(geckoview,2-2,242-242) skip-if(geckoview&&debug) == background-blend-mode-1.html background-blend-mode-1-ref.html # bug 1558286 for GV
@ -68,14 +68,14 @@ fuzzy-if(Android,0-6,0-8) fuzzy-if(webrender&&gtkWidget,28-28,56-60) fuzzy-if(we
 fuzzy-if(Android,0-6,0-8) fuzzy-if(webrender&&gtkWidget,28-28,56-60) fuzzy-if(webrender&&cocoaWidget,18-19,70-75) skip-if(!asyncPan) == fixed-pos-scrolled-clip-4.html fixed-pos-scrolled-clip-4-ref.html # Bug 1604338
 skip-if(!asyncPan) == fixed-pos-scrolled-clip-5.html fixed-pos-scrolled-clip-5-ref.html
 skip-if(!asyncPan) == position-sticky-bug1434250.html position-sticky-bug1434250-ref.html
-fuzzy-if(Android,0-8,0-4) fuzzy-if(webrender&&gtkWidget,25-25,30-32) fuzzy-if(webrender&&cocoaWidget,16-16,44-44) skip-if(!asyncPan) == position-sticky-scrolled-clip-1.html position-sticky-scrolled-clip-1-ref.html # Bug 1604338
+fuzzy-if(Android,0-8,0-4) fuzzy-if(webrender&&gtkWidget,25-25,32-32) fuzzy-if(webrender&&cocoaWidget,16-16,44-44) skip-if(!asyncPan) == position-sticky-scrolled-clip-1.html position-sticky-scrolled-clip-1-ref.html # Bug 1604338
 fuzzy-if(Android,0-6,0-4) skip == position-sticky-scrolled-clip-2.html position-sticky-scrolled-clip-2-ref.html # bug ?????? - incorrectly applying clip to sticky contents
 fuzzy-if(Android,0-8,0-27) fuzzy-if(webrender&&cocoaWidget,10-11,44-44) skip-if(!asyncPan) == curtain-effect-1.html curtain-effect-1-ref.html
 fuzzy-if(Android,0-6,0-4) fuzzy-if(webrender&&gtkWidget,15-15,28-32) fuzzy-if(webrender&&cocoaWidget,8-8,38-42) skip-if(!asyncPan) == transformed-1.html transformed-1-ref.html # Bug 1604338
 fuzzy-if(Android&&!webrender,2-2,4-4) fuzzy-if(Android&&webrender,7-7,4-4) fuzzy-if(webrender&&gtkWidget,4-5,27-28) fuzzy-if(webrender&&cocoaWidget,6-6,37-38) skip-if(!asyncPan) == position-sticky-transformed-in-scrollframe-1.html position-sticky-transformed-in-scrollframe-1-ref.html # Bug 1604338
 fuzzy-if(Android&&!webrender,3-3,4-4) fuzzy-if(Android&&webrender,10-10,4-4) fuzzy-if(webrender&&gtkWidget,20-20,32-32) fuzzy-if(webrender&&cocoaWidget,15-16,44-44) skip-if(!asyncPan) == position-sticky-transformed-in-scrollframe-2.html position-sticky-transformed-in-scrollframe-2-ref.html # Bug 1604338
-fuzzy-if(Android&&!webrender,3-3,4-4) fuzzy-if(Android&&webrender,13-13,4-4) fuzzy-if(webrender&&gtkWidget,26-27,30-32) fuzzy-if(webrender&&cocoaWidget,16-16,44-44) skip-if(!asyncPan) == position-sticky-in-transformed-scrollframe-1.html position-sticky-in-transformed-scrollframe-ref.html # Bug 1604338
-fuzzy-if(Android&&!webrender,3-3,4-4) fuzzy-if(Android&&webrender,13-13,4-4) fuzzy-if(webrender&&gtkWidget,26-27,30-32) fuzzy-if(webrender&&cocoaWidget,16-16,44-44) skip-if(!asyncPan) == position-sticky-in-transformed-scrollframe-2.html position-sticky-in-transformed-scrollframe-ref.html # Bug 1604338
+fuzzy-if(Android&&!webrender,3-3,4-4) fuzzy-if(Android&&webrender,13-13,4-4) fuzzy-if(webrender&&gtkWidget,26-27,32-32) fuzzy-if(webrender&&cocoaWidget,16-16,44-44) skip-if(!asyncPan) == position-sticky-in-transformed-scrollframe-1.html position-sticky-in-transformed-scrollframe-ref.html # Bug 1604338
+fuzzy-if(Android&&!webrender,3-3,4-4) fuzzy-if(Android&&webrender,13-13,4-4) fuzzy-if(webrender&&gtkWidget,26-27,32-32) fuzzy-if(webrender&&cocoaWidget,16-16,44-44) skip-if(!asyncPan) == position-sticky-in-transformed-scrollframe-2.html position-sticky-in-transformed-scrollframe-ref.html # Bug 1604338

 # for the following tests, we want to disable the low-precision buffer
 # as it will expand the displayport beyond what the test specifies in
--- a/layout/reftests/bugs/reftest.list
+++ b/layout/reftests/bugs/reftest.list
@ -922,10 +922,10 @@ fuzzy-if(Android,0-13,0-9) == 407111-1.html 407111-1-ref.html  # Bug 1128229
 == 409089-1.html 409089-1-ref.html
 == 409089-2.html 409089-2-ref.html
 == 409089-3.html 409089-3-ref.html
-fuzzy-if(winWidget,0-123,0-1600) fuzzy-if(webrender&&swgl,1-1,39-39) == 409659-1a.html 409659-1-ref.html  # Bug 1128229
+fuzzy-if(winWidget,0-123,0-1600) == 409659-1a.html 409659-1-ref.html  # Bug 1128229
 != 409659-1b.html 409659-1-ref.html
 != 409659-1c.html 409659-1-ref.html
-fuzzy-if(winWidget,0-123,0-1900) fuzzy-if(webrender&&swgl,1-1,39-39) == 409659-1d.html 409659-1-ref.html  # Bug 1128229
+fuzzy-if(winWidget,0-123,0-1900) == 409659-1d.html 409659-1-ref.html  # Bug 1128229
 == 410621-1.html 410621-1-ref.html
 random-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)) == 411059-1.html 411059-1-ref.html # Bug 1392106
 fuzzy-if(webrender&&winWidget,127-129,652-770) == 411334-1.xml 411334-1-ref.xml
@ -1214,7 +1214,7 @@ fuzzy-if(skiaContent||webrender,0-1,0-31200) == 461512-1.html 461512-1-ref.html
 == 462844-3.html 462844-ref.html
 == 462844-4.html 462844-ref.html
 == 463204-1.html 463204-1-ref.html
-fuzzy-if(webrender,16-16,3345-4020) == chrome://reftest/content/bugs/463217-1.xhtml chrome://reftest/content/bugs/463217-1-ref.xhtml
+fuzzy-if(webrender,16-16,3391-4020) == chrome://reftest/content/bugs/463217-1.xhtml chrome://reftest/content/bugs/463217-1-ref.xhtml
 == 463952-1.html 463952-1-ref.html
 == 464811-1.html 464811-1-ref.html
 == 465574-1.html 465574-1-ref.html # bug 421436
@ -1785,7 +1785,7 @@ fuzzy-if(skiaContent,0-1,0-5) == 956513-1.svg 956513-1-ref.svg
 == 957770-1.svg 957770-1-ref.svg
 == 960277-1.html 960277-1-ref.html
 fuzzy-if(skiaContent,0-1,0-80) == 961887-1.html 961887-1-ref.html
-fuzzy-if(gtkWidget&&webrender&&swgl,1-1,42-42) == 961887-2.html 961887-2-ref.html
+fuzzy-if(gtkWidget&&webrender&&swgl,1-1,22-22) == 961887-2.html 961887-2-ref.html
 == 961887-3.html 961887-3-ref.html
 pref(layout.css.overflow-clip-box.enabled,true) fuzzy(0-50,0-145) fuzzy-if(asyncPan&&!layersGPUAccelerated,0-102,0-3712) fuzzy-if(webrender,0-255,0-180) random-if(/^Windows\x20NT\x206\.1/.test(http.oscpu)) == 966992-1.html 966992-1-ref.html # Bug 1392106
 == 966510-1.html 966510-1-ref.html
@ -1838,7 +1838,7 @@ fails-if(webrender) == 1059498-3.html 1059498-1-ref.html # WebRender: see bug 14
 == 1069716-1.html 1069716-1-ref.html
 skip-if(geckoview&&!webrender) == 1078262-1.html about:blank # bug 1656792
 test-pref(layout.testing.overlay-scrollbars.always-visible,false) == 1081072-1.html 1081072-1-ref.html
-fuzzy-if(webrender,63-65,359-845) == 1081185-1.html 1081185-1-ref.html
+fuzzy-if(webrender,63-64,359-845) == 1081185-1.html 1081185-1-ref.html
 == 1097437-1.html 1097437-1-ref.html
 == 1103258-1.html 1103258-1-ref.html # assertion crash test with layers culling test
 == 1105137-1.html 1105137-1-ref.html
@ -1973,7 +1973,7 @@ fuzzy-if(Android,0-27,0-874) fuzzy-if(!Android,0-14,0-43) == 1313772.xhtml 13137
 fuzzy(0-3,0-320000) == 1315113-1.html 1315113-1-ref.html
 fuzzy(0-3,0-20000) == 1315113-2.html 1315113-2-ref.html
 == 1315632-1.html 1315632-1-ref.html
-fuzzy(0-2,0-40000) fuzzy-if(/^Windows\x20NT\x2010\.0/.test(http.oscpu),0-13,0-40000) fuzzy-if(gtkWidget&&webrender&&swgl,8-8,330-330) == 1316719-1a.html 1316719-1-ref.html
+fuzzy(0-2,0-40000) fuzzy-if(/^Windows\x20NT\x2010\.0/.test(http.oscpu),0-13,0-40000) fuzzy-if(gtkWidget&&webrender&&swgl,7-7,410-410) == 1316719-1a.html 1316719-1-ref.html
 fuzzy(0-13,0-40000) fuzzy-if(/^Windows\x20NT\x2010\.0/.test(http.oscpu),0-13,0-40000) == 1316719-1b.html 1316719-1-ref.html
 fuzzy(0-13,0-40000) fuzzy-if(/^Windows\x20NT\x2010\.0/.test(http.oscpu),0-13,0-40000) == 1316719-1c.html 1316719-1-ref.html
 != 1318769-1.html 1318769-1-ref.html
@ -2047,7 +2047,7 @@ fuzzy(0-255,0-4054) == 1415987-1.html 1415987-1-ref.html # this is a large fuzz,
 == 1424680.html 1424680-ref.html
 == 1424798-1.html 1424798-ref.html
 fuzzy-if(!webrender,0-74,0-2234) == 1425243-1.html 1425243-1-ref.html
-fuzzy-if(Android,0-66,0-574) fuzzy-if(d2d,0-89,0-777) fuzzy-if(!Android&&!d2d,0-1,0-31341) fuzzy-if(webrender&&winWidget,1-1,31284-31320) fuzzy-if(webrender&&swgl,1-1,31620-31620) == 1425243-2.html 1425243-2-ref.html
+fuzzy-if(Android,0-66,0-574) fuzzy-if(d2d,0-89,0-777) fuzzy-if(!Android&&!d2d,0-1,0-31341) fuzzy-if(webrender&&winWidget,1-1,31284-31320) == 1425243-2.html 1425243-2-ref.html
 == 1430869.html 1430869-ref.html
 == 1432541.html 1432541-ref.html
 == 1446470.html 1035091-ref.html
--- a/layout/reftests/css-gradients/reftest.list
+++ b/layout/reftests/css-gradients/reftest.list
@ -15,15 +15,15 @@ fuzzy(0-1,0-800000) == linear-flipped-1.html linear-flipped-1-ref.html
 == linear-repeat-1e.html linear-repeat-1-ref.html
 == linear-repeat-1f.html linear-repeat-1-ref.html
 fails-if(d2d&&!webrender) == linear-repeat-1g.html linear-repeat-1-ref.html # bug 582236
-fuzzy-if(skiaContent,0-1,0-500) fuzzy-if(webrender&&winWidget,0-2,0-100) == linear-rotated-1.html linear-rotated-1-ref.html
+fuzzy-if(skiaContent,0-1,0-500) fuzzy-if(webrender&&winWidget,0-2,0-100) fuzzy-if(webrender&&gtkWidget&&swgl,1-1,7600-7600) == linear-rotated-1.html linear-rotated-1-ref.html
 fuzzy-if(winWidget,0-1,0-37800) fuzzy-if(skiaContent,0-1,0-45000) == linear-size-1a.html linear-size-1-ref.html
 == linear-stops-1a.html linear-stops-1-ref.html
 == linear-stops-1b.html linear-stops-1-ref.html
 == linear-stops-1c.html linear-stops-1-ref.html
 # these gradients will be cached (in WebRender) and thus exhibit off-by-1 texture interpolation differences:
-fuzzy(0-1,0-35400) == linear-stops-1d.html linear-stops-1-ref.html
-fuzzy(0-1,0-35400) == linear-stops-1e.html linear-stops-1-ref.html
-fuzzy(0-1,0-35400) == linear-stops-1f.html linear-stops-1-ref.html
+fuzzy(0-1,0-35100) == linear-stops-1d.html linear-stops-1-ref.html
+fuzzy(0-1,0-35100) == linear-stops-1e.html linear-stops-1-ref.html
+fuzzy(0-1,0-35100) == linear-stops-1f.html linear-stops-1-ref.html
 fuzzy-if(!contentSameGfxBackendAsCanvas,0-3,0-88500) fuzzy-if(azureSkiaGL||skiaContent,0-3,0-89700) == linear-vertical-1a.html linear-vertical-1-ref.html
 fuzzy-if(!contentSameGfxBackendAsCanvas,0-3,0-88500) fuzzy-if(azureSkiaGL||skiaContent,0-3,0-89700) == linear-vertical-1b.html linear-vertical-1-ref.html
 fuzzy-if(!contentSameGfxBackendAsCanvas,0-3,0-88500) fuzzy-if(azureSkiaGL||skiaContent,0-3,0-89700) == linear-vertical-1c.html linear-vertical-1-ref.html
--- a/layout/reftests/pagination/reftest.list
+++ b/layout/reftests/pagination/reftest.list
@ -111,7 +111,7 @@ fuzzy-if(OSX,23-23,1-1) == table-nested-1308876-1.xhtml table-nested-1308876-1-r
 == 115199-2a.html 115199-2-ref.html
 == 115199-2b.html 115199-2-ref.html
 == 652178-1.html 652178-1-ref2.html
-fuzzy-if(cocoaWidget,0-1,0-5000) fuzzy-if(webrender&&swgl,1-1,9-9) == 745025-1.html 745025-1-ref.html
+fuzzy-if(cocoaWidget,0-1,0-5000) == 745025-1.html 745025-1-ref.html
 == 820496-1.html 820496-1-ref.html

 # NOTE: These tests don't yet rigorously test what they're
--- a/layout/reftests/position-sticky/reftest.list
+++ b/layout/reftests/position-sticky/reftest.list
@ -51,5 +51,5 @@ fuzzy-if(Android,0-8,0-630) fuzzy-if(OSX,0-1,0-11) fuzzy-if(skiaContent,0-1,0-22
 == iframe-1.html iframe-1-ref.html
 == transformed-1.html transformed-1-ref.html
 fuzzy-if(Android,0-4,0-4) fuzzy-if(webrender&&gtkWidget,16-17,28-32) fuzzy-if(webrender&&cocoaWidget,8-8,38-42) skip-if(!asyncPan) == transformed-2.html transformed-2-ref.html # Bug 1604644
-skip-if(!asyncPan) fuzzy-if(Android,0-10,0-4) fuzzy-if(webrender&&gtkWidget,29-30,30-32) fuzzy-if(webrender&&cocoaWidget,15-16,44-44) == nested-sticky-1.html nested-sticky-1-ref.html # Bug 1604644
-skip-if(!asyncPan) fuzzy-if(Android,0-10,0-4) fuzzy-if(webrender&&gtkWidget,29-30,30-32) fuzzy-if(webrender&&cocoaWidget,15-16,44-44) fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu),0-4,0-104) == nested-sticky-2.html nested-sticky-2-ref.html # Bug 1604644
+skip-if(!asyncPan) fuzzy-if(Android,0-10,0-4) fuzzy-if(webrender&&gtkWidget,29-30,32-32) fuzzy-if(webrender&&cocoaWidget,15-16,44-44) == nested-sticky-1.html nested-sticky-1-ref.html # Bug 1604644
+skip-if(!asyncPan) fuzzy-if(Android,0-10,0-4) fuzzy-if(webrender&&gtkWidget,29-30,32-32) fuzzy-if(webrender&&cocoaWidget,15-16,44-44) fuzzy-if(/^Windows\x20NT\x206\.1/.test(http.oscpu),0-4,0-104) == nested-sticky-2.html nested-sticky-2-ref.html # Bug 1604644
--- a/layout/reftests/svg/filters/css-filter-chains/reftest.list
+++ b/layout/reftests/svg/filters/css-filter-chains/reftest.list
@ -2,6 +2,6 @@
 # e.g. filter: blur(3px) grayscale(0.5) invert(0.2);

 # Some platforms render this complex filter chain a little differently, and that's ok.
-fuzzy(0-5,0-13638) fuzzy-if(/^Windows\x20NT\x2010\.0/.test(http.oscpu)&&layersGPUAccelerated,0-35,0-13638) fuzzy-if(webrender,4-6,12000-19484) fuzzy-if(webrender&&swgl,9-9,19336-19336) == long-chain.html long-chain-ref.html # Win10: Bug 1258241
+fuzzy(0-5,0-13638) fuzzy-if(/^Windows\x20NT\x2010\.0/.test(http.oscpu)&&layersGPUAccelerated,0-35,0-13638) fuzzy-if(webrender,4-6,12000-19484) == long-chain.html long-chain-ref.html # Win10: Bug 1258241
 == moz-element.html moz-element-ref.html
-fuzzy-if(webrender,13-15,7676-7980) == same-filter.html same-filter-ref.html
+fuzzy-if(webrender,13-15,7676-7966) == same-filter.html same-filter-ref.html
--- a/layout/reftests/svg/reftest.list
+++ b/layout/reftests/svg/reftest.list
@ -611,5 +611,5 @@ fails-if(Android) != mask-resource.html about:blank # The image the test uses is
 != bug-1562091.html bug-1562091-ref.html
 == 1570363-1.html 1570363-1-ref.html

-fuzzy-if(webrender,0-1,0-92) fuzzy-if(!webrender,0-2,0-7882) fuzzy-if(webrender&&swgl,1-1,172-172) == mask-opacity-invalidation-1.html mask-opacity-invalidation-1-ref.html # clip-path mask/opacity optimization
+fuzzy-if(webrender,0-1,0-92) fuzzy-if(!webrender,0-2,0-7882) == mask-opacity-invalidation-1.html mask-opacity-invalidation-1-ref.html # clip-path mask/opacity optimization
 == transform-animation-on-path.html transform-animation-on-path-ref.html