diff --git a/gfx/webrender/res/brush.glsl b/gfx/webrender/res/brush.glsl
index f828e6cae1fe..8eaba3d3a8b7 100644
--- a/gfx/webrender/res/brush.glsl
+++ b/gfx/webrender/res/brush.glsl
@@ -37,7 +37,7 @@ void main(void) {
                           VECS_PER_SPECIFIC_BRUSH +
                           segment_index * VECS_PER_SEGMENT;
 
-    vec4[2] segment_data = fetch_from_resource_cache_2(segment_address);
+    vec4[2] segment_data = fetch_from_gpu_cache_2(segment_address);
     RectWithSize local_segment_rect = RectWithSize(segment_data[0].xy, segment_data[0].zw);
 
     VertexInfo vi;
diff --git a/gfx/webrender/res/brush_blend.glsl b/gfx/webrender/res/brush_blend.glsl
index a8dd5854b2bc..9117d6c3f621 100644
--- a/gfx/webrender/res/brush_blend.glsl
+++ b/gfx/webrender/res/brush_blend.glsl
@@ -101,8 +101,8 @@ void brush_vs(
         }
         case 10: {
             // Color Matrix
-            vec4 mat_data[3] = fetch_from_resource_cache_3(user_data.z);
-            vec4 offset_data = fetch_from_resource_cache_1(user_data.z + 4);
+            vec4 mat_data[3] = fetch_from_gpu_cache_3(user_data.z);
+            vec4 offset_data = fetch_from_gpu_cache_1(user_data.z + 4);
             vColorMat = mat3(mat_data[0].xyz, mat_data[1].xyz, mat_data[2].xyz);
             vColorOffset = offset_data.rgb;
             break;
diff --git a/gfx/webrender/res/brush_image.glsl b/gfx/webrender/res/brush_image.glsl
index 6fd0dc074acf..10cd961acd22 100644
--- a/gfx/webrender/res/brush_image.glsl
+++ b/gfx/webrender/res/brush_image.glsl
@@ -33,7 +33,7 @@ struct ImageBrushData {
 };
 
 ImageBrushData fetch_image_data(int address) {
-    vec4[3] raw_data = fetch_from_resource_cache_3(address);
+    vec4[3] raw_data = fetch_from_gpu_cache_3(address);
     ImageBrushData data = ImageBrushData(
         raw_data[0],
         raw_data[1],
diff --git a/gfx/webrender/res/brush_linear_gradient.glsl b/gfx/webrender/res/brush_linear_gradient.glsl
index 7499c7171548..dc986a04d387 100644
--- a/gfx/webrender/res/brush_linear_gradient.glsl
+++ b/gfx/webrender/res/brush_linear_gradient.glsl
@@ -32,7 +32,7 @@ struct Gradient {
 };
 
 Gradient fetch_gradient(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
     return Gradient(
         data[0],
         int(data[1].x),
diff --git a/gfx/webrender/res/brush_mix_blend.glsl b/gfx/webrender/res/brush_mix_blend.glsl
index aec357a54b74..9b09f146960e 100644
--- a/gfx/webrender/res/brush_mix_blend.glsl
+++ b/gfx/webrender/res/brush_mix_blend.glsl
@@ -24,7 +24,7 @@ void brush_vs(
     vec4 unused
 ) {
     vec2 snapped_device_pos = snap_device_pos(vi);
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0));
     vOp = user_data.x;
 
     PictureTask src_task = fetch_picture_task(user_data.z);
@@ -200,8 +200,8 @@ const int MixBlendMode_Color       = 14;
 const int MixBlendMode_Luminosity  = 15;
 
 Fragment brush_fs() {
-    vec4 Cb = textureLod(sCacheRGBA8, vBackdropUv, 0.0);
-    vec4 Cs = textureLod(sCacheRGBA8, vSrcUv, 0.0);
+    vec4 Cb = textureLod(sPrevPassColor, vBackdropUv, 0.0);
+    vec4 Cs = textureLod(sPrevPassColor, vSrcUv, 0.0);
 
     if (Cb.a == 0.0) {
         return Fragment(Cs);
diff --git a/gfx/webrender/res/brush_radial_gradient.glsl b/gfx/webrender/res/brush_radial_gradient.glsl
index 6169213bc1b1..39fcc64aebf0 100644
--- a/gfx/webrender/res/brush_radial_gradient.glsl
+++ b/gfx/webrender/res/brush_radial_gradient.glsl
@@ -31,7 +31,7 @@ struct RadialGradient {
 };
 
 RadialGradient fetch_radial_gradient(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
     return RadialGradient(
         data[0],
         data[1].x,
diff --git a/gfx/webrender/res/brush_solid.glsl b/gfx/webrender/res/brush_solid.glsl
index 6b66282a4c0e..41d02448e25d 100644
--- a/gfx/webrender/res/brush_solid.glsl
+++ b/gfx/webrender/res/brush_solid.glsl
@@ -19,7 +19,7 @@ struct SolidBrush {
 };
 
 SolidBrush fetch_solid_primitive(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
+    vec4 data = fetch_from_gpu_cache_1(address);
     return SolidBrush(data);
 }
 
diff --git a/gfx/webrender/res/brush_yuv_image.glsl b/gfx/webrender/res/brush_yuv_image.glsl
index 366279140558..48cd3f1e6645 100644
--- a/gfx/webrender/res/brush_yuv_image.glsl
+++ b/gfx/webrender/res/brush_yuv_image.glsl
@@ -77,7 +77,7 @@ struct YuvPrimitive {
 };
 
 YuvPrimitive fetch_yuv_primitive(int address) {
-    vec4 data = fetch_from_resource_cache_1(address);
+    vec4 data = fetch_from_gpu_cache_1(address);
     return YuvPrimitive(data.x);
 }
 
diff --git a/gfx/webrender/res/clip_shared.glsl b/gfx/webrender/res/clip_shared.glsl
index 543ace282ada..7a0ffd79adc9 100644
--- a/gfx/webrender/res/clip_shared.glsl
+++ b/gfx/webrender/res/clip_shared.glsl
@@ -2,7 +2,7 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include rect,render_task,resource_cache,snap,transform
+#include rect,render_task,gpu_cache,snap,transform
 
 #ifdef WR_VERTEX_SHADER
 
diff --git a/gfx/webrender/res/cs_blur.glsl b/gfx/webrender/res/cs_blur.glsl
index af7121c76c89..f6fbd79ad7aa 100644
--- a/gfx/webrender/res/cs_blur.glsl
+++ b/gfx/webrender/res/cs_blur.glsl
@@ -46,9 +46,9 @@ void main(void) {
     RectWithSize target_rect = blur_task.common_data.task_rect;
 
 #if defined WR_FEATURE_COLOR_TARGET
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0).xy);
 #else
-    vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassAlpha, 0).xy);
 #endif
     vUv.z = src_task.texture_layer_index;
     vSigma = blur_task.blur_radius;
@@ -89,10 +89,10 @@ void main(void) {
 
 #if defined WR_FEATURE_COLOR_TARGET
 #define SAMPLE_TYPE vec4
-#define SAMPLE_TEXTURE(uv)  texture(sCacheRGBA8, uv)
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassColor, uv)
 #else
 #define SAMPLE_TYPE float
-#define SAMPLE_TEXTURE(uv)  texture(sCacheA8, uv).r
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassAlpha, uv).r
 #endif
 
 // TODO(gw): Write a fast path blur that handles smaller blur radii
diff --git a/gfx/webrender/res/cs_clip_box_shadow.glsl b/gfx/webrender/res/cs_clip_box_shadow.glsl
index 5ed7d69278d6..4419241313c4 100644
--- a/gfx/webrender/res/cs_clip_box_shadow.glsl
+++ b/gfx/webrender/res/cs_clip_box_shadow.glsl
@@ -26,7 +26,7 @@ struct BoxShadowData {
 };
 
 BoxShadowData fetch_data(ivec2 address) {
-    vec4 data[3] = fetch_from_resource_cache_3_direct(address);
+    vec4 data[3] = fetch_from_gpu_cache_3_direct(address);
     RectWithSize dest_rect = RectWithSize(data[2].xy, data[2].zw);
     BoxShadowData bs_data = BoxShadowData(
         data[0].xy,
diff --git a/gfx/webrender/res/cs_clip_image.glsl b/gfx/webrender/res/cs_clip_image.glsl
index 7c5425aae4e9..42b3869902d9 100644
--- a/gfx/webrender/res/cs_clip_image.glsl
+++ b/gfx/webrender/res/cs_clip_image.glsl
@@ -17,7 +17,7 @@ struct ImageMaskData {
 };
 
 ImageMaskData fetch_mask_data(ivec2 address) {
-    vec4 data = fetch_from_resource_cache_1_direct(address);
+    vec4 data = fetch_from_gpu_cache_1_direct(address);
     RectWithSize local_rect = RectWithSize(data.xy, data.zw);
     ImageMaskData mask_data = ImageMaskData(local_rect);
     return mask_data;
diff --git a/gfx/webrender/res/cs_clip_line.glsl b/gfx/webrender/res/cs_clip_line.glsl
index 4e2c151fc90f..1fc59dbd00b6 100644
--- a/gfx/webrender/res/cs_clip_line.glsl
+++ b/gfx/webrender/res/cs_clip_line.glsl
@@ -28,7 +28,7 @@ struct LineDecorationData {
 };
 
 LineDecorationData fetch_data(ivec2 address) {
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
     RectWithSize local_rect = RectWithSize(data[0].xy, data[0].zw);
     LineDecorationData line_data = LineDecorationData(
         local_rect,
diff --git a/gfx/webrender/res/cs_clip_rectangle.glsl b/gfx/webrender/res/cs_clip_rectangle.glsl
index 3232f7f8105e..3918c8634698 100644
--- a/gfx/webrender/res/cs_clip_rectangle.glsl
+++ b/gfx/webrender/res/cs_clip_rectangle.glsl
@@ -18,7 +18,7 @@ struct ClipRect {
 };
 
 ClipRect fetch_clip_rect(ivec2 address) {
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
     ClipRect rect = ClipRect(RectWithSize(data[0].xy, data[0].zw), data[1]);
     return rect;
 }
@@ -32,7 +32,7 @@ struct ClipCorner {
 // miscompilations with a macOS 10.12 Intel driver.
 ClipCorner fetch_clip_corner(ivec2 address, float index) {
     address += ivec2(2 + 2 * int(index), 0);
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
     ClipCorner corner = ClipCorner(RectWithSize(data[0].xy, data[0].zw), data[1]);
     return corner;
 }
diff --git a/gfx/webrender/res/cs_scale.glsl b/gfx/webrender/res/cs_scale.glsl
index 13070d05c9a0..03d9f96be3c5 100644
--- a/gfx/webrender/res/cs_scale.glsl
+++ b/gfx/webrender/res/cs_scale.glsl
@@ -32,9 +32,9 @@ void main(void) {
     RectWithSize target_rect = scale_task.common_data.task_rect;
 
 #if defined WR_FEATURE_COLOR_TARGET
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0).xy);
 #else
-    vec2 texture_size = vec2(textureSize(sCacheA8, 0).xy);
+    vec2 texture_size = vec2(textureSize(sPrevPassAlpha, 0).xy);
 #endif
 
     vUv.z = src_task.texture_layer_index;
@@ -54,10 +54,10 @@ void main(void) {
 
 #if defined WR_FEATURE_COLOR_TARGET
 #define SAMPLE_TYPE vec4
-#define SAMPLE_TEXTURE(uv)  texture(sCacheRGBA8, uv)
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassColor, uv)
 #else
 #define SAMPLE_TYPE float
-#define SAMPLE_TEXTURE(uv)  texture(sCacheA8, uv).r
+#define SAMPLE_TEXTURE(uv)  texture(sPrevPassAlpha, uv).r
 #endif
 
 void main(void) {
diff --git a/gfx/webrender/res/gpu_cache.glsl b/gfx/webrender/res/gpu_cache.glsl
new file mode 100644
index 000000000000..a4c391721dc4
--- /dev/null
+++ b/gfx/webrender/res/gpu_cache.glsl
@@ -0,0 +1,137 @@
+/* This Source Code Form is subject to the terms of the Mozilla Public
+ * License, v. 2.0. If a copy of the MPL was not distributed with this
+ * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
+
+uniform HIGHP_SAMPLER_FLOAT sampler2D sGpuCache;
+
+#define VECS_PER_IMAGE_RESOURCE     2
+
+// TODO(gw): This is here temporarily while we have
+//           both GPU store and cache. When the GPU
+//           store code is removed, we can change the
+//           PrimitiveInstance instance structure to
+//           use 2x unsigned shorts as vertex attributes
+//           instead of an int, and encode the UV directly
+//           in the vertices.
+ivec2 get_gpu_cache_uv(int address) {
+    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
+                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
+}
+
+vec4[2] fetch_from_gpu_cache_2_direct(ivec2 address) {
+    return vec4[2](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0))
+    );
+}
+
+vec4[2] fetch_from_gpu_cache_2(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[2](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0))
+    );
+}
+
+#ifdef WR_VERTEX_SHADER
+
+vec4[8] fetch_from_gpu_cache_8(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[8](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(4, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(5, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(6, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(7, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_cache_3(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[3](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0))
+    );
+}
+
+vec4[3] fetch_from_gpu_cache_3_direct(ivec2 address) {
+    return vec4[3](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_cache_4_direct(ivec2 address) {
+    return vec4[4](
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, address, 0, ivec2(3, 0))
+    );
+}
+
+vec4[4] fetch_from_gpu_cache_4(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return vec4[4](
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0)),
+        TEXEL_FETCH(sGpuCache, uv, 0, ivec2(3, 0))
+    );
+}
+
+vec4 fetch_from_gpu_cache_1_direct(ivec2 address) {
+    return texelFetch(sGpuCache, address, 0);
+}
+
+vec4 fetch_from_gpu_cache_1(int address) {
+    ivec2 uv = get_gpu_cache_uv(address);
+    return texelFetch(sGpuCache, uv, 0);
+}
+
+//TODO: image resource is too specific for this module
+
+struct ImageResource {
+    RectWithEndpoint uv_rect;
+    float layer;
+    vec3 user_data;
+};
+
+ImageResource fetch_image_resource(int address) {
+    //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageResource(uv_rect, data[1].x, data[1].yzw);
+}
+
+ImageResource fetch_image_resource_direct(ivec2 address) {
+    vec4 data[2] = fetch_from_gpu_cache_2_direct(address);
+    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
+    return ImageResource(uv_rect, data[1].x, data[1].yzw);
+}
+
+// Fetch optional extra data for a texture cache resource. This can contain
+// a polygon defining a UV rect within the texture cache resource.
+struct ImageResourceExtra {
+    vec2 st_tl;
+    vec2 st_tr;
+    vec2 st_bl;
+    vec2 st_br;
+};
+
+ImageResourceExtra fetch_image_resource_extra(int address) {
+    vec4 data[2] = fetch_from_gpu_cache_2(address + VECS_PER_IMAGE_RESOURCE);
+    return ImageResourceExtra(
+        data[0].xy,
+        data[0].zw,
+        data[1].xy,
+        data[1].zw
+    );
+}
+
+#endif //WR_VERTEX_SHADER
diff --git a/gfx/webrender/res/prim_shared.glsl b/gfx/webrender/res/prim_shared.glsl
index e600ab8769b6..9e446b6f1b4d 100644
--- a/gfx/webrender/res/prim_shared.glsl
+++ b/gfx/webrender/res/prim_shared.glsl
@@ -2,7 +2,7 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-#include rect,render_task,resource_cache,snap,transform
+#include rect,render_task,gpu_cache,snap,transform
 
 #define EXTEND_MODE_CLAMP  0
 #define EXTEND_MODE_REPEAT 1
@@ -15,11 +15,8 @@
 #define RASTER_LOCAL            0
 #define RASTER_SCREEN           1
 
-uniform sampler2DArray sCacheA8;
-uniform sampler2DArray sCacheRGBA8;
-
-// An A8 target for standalone tasks that is available to all passes.
-uniform sampler2DArray sSharedCacheA8;
+uniform sampler2DArray sPrevPassAlpha;
+uniform sampler2DArray sPrevPassColor;
 
 vec2 clamp_rect(vec2 pt, RectWithSize rect) {
     return clamp(pt, rect.p0, rect.p0 + rect.size);
@@ -121,14 +118,12 @@ VertexInfo write_vertex(RectWithSize instance_rect,
     vec4 world_pos = transform.m * vec4(clamped_local_pos, 0.0, 1.0);
 
     // Convert the world positions to device pixel space.
-    vec2 device_pos = world_pos.xy / world_pos.w * uDevicePixelRatio;
+    vec2 device_pos = world_pos.xy * uDevicePixelRatio;
 
     // Apply offsets for the render task to get correct screen location.
-    vec2 final_pos = device_pos + snap_offset -
-                     task.content_origin +
-                     task.common_data.task_rect.p0;
+    vec2 final_offset = snap_offset - task.content_origin + task.common_data.task_rect.p0;
 
-    gl_Position = uTransform * vec4(final_pos, z, 1.0);
+    gl_Position = uTransform * vec4(device_pos + final_offset * world_pos.w, z * world_pos.w, world_pos.w);
 
     VertexInfo vi = VertexInfo(
         clamped_local_pos,
@@ -254,7 +249,7 @@ float do_clip() {
     // is still interpolated and becomes a subject of precision-caused
     // fluctuations, see https://bugzilla.mozilla.org/show_bug.cgi?id=1491911
     ivec3 tc = ivec3(mask_uv, vClipMaskUv.z + 0.5);
-    return texelFetch(sCacheA8, tc, 0).r;
+    return texelFetch(sPrevPassAlpha, tc, 0).r;
 }
 
 #ifdef WR_FEATURE_DITHERING
@@ -297,7 +292,7 @@ vec4 sample_gradient(int address, float offset, float gradient_repeat) {
     lut_offset = clamp(lut_offset, 0, 2 * (GRADIENT_ENTRIES + 1));
 
     // Fetch the start and end color.
-    vec4 texels[2] = fetch_from_resource_cache_2(address + lut_offset);
+    vec4 texels[2] = fetch_from_gpu_cache_2(address + lut_offset);
 
     // Finally interpolate and apply dithering
     return dither(mix(texels[0], texels[1], fract(x)));
diff --git a/gfx/webrender/res/ps_split_composite.glsl b/gfx/webrender/res/ps_split_composite.glsl
index 7896b6643362..361cd1d06f42 100644
--- a/gfx/webrender/res/ps_split_composite.glsl
+++ b/gfx/webrender/res/ps_split_composite.glsl
@@ -14,11 +14,11 @@ struct SplitGeometry {
 };
 
 SplitGeometry fetch_split_geometry(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
+    ivec2 uv = get_gpu_cache_uv(address);
 
-    vec4 data0 = TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0));
-    vec4 data1 = TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0));
-    vec4 data2 = TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0));
+    vec4 data0 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(0, 0));
+    vec4 data1 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(1, 0));
+    vec4 data2 = TEXEL_FETCH(sGpuCache, uv, 0, ivec2(2, 0));
 
     SplitGeometry geo;
     geo.local = vec2[4](
@@ -86,7 +86,7 @@ void main(void) {
 
     gl_Position = uTransform * final_pos;
 
-    vec2 texture_size = vec2(textureSize(sCacheRGBA8, 0));
+    vec2 texture_size = vec2(textureSize(sPrevPassColor, 0));
     vec2 uv0 = res.uv_rect.p0;
     vec2 uv1 = res.uv_rect.p1;
 
@@ -115,6 +115,6 @@ void main(void) {
 void main(void) {
     float alpha = do_clip();
     vec2 uv = clamp(vUv.xy, vUvSampleBounds.xy, vUvSampleBounds.zw);
-    oFragColor = alpha * textureLod(sCacheRGBA8, vec3(uv, vUv.z), 0.0);
+    oFragColor = alpha * textureLod(sPrevPassColor, vec3(uv, vUv.z), 0.0);
 }
 #endif
diff --git a/gfx/webrender/res/ps_text_run.glsl b/gfx/webrender/res/ps_text_run.glsl
index 468663eeed43..e6b7a379a857 100644
--- a/gfx/webrender/res/ps_text_run.glsl
+++ b/gfx/webrender/res/ps_text_run.glsl
@@ -28,7 +28,7 @@ Glyph fetch_glyph(int specific_prim_address,
     int glyph_address = specific_prim_address +
                         VECS_PER_TEXT_RUN +
                         int(uint(glyph_index) / GLYPHS_PER_GPU_BLOCK);
-    vec4 data = fetch_from_resource_cache_1(glyph_address);
+    vec4 data = fetch_from_gpu_cache_1(glyph_address);
     // Select XY or ZW based on glyph index.
     // We use "!= 0" instead of "== 1" here in order to work around a driver
     // bug with equality comparisons on integers.
@@ -46,7 +46,7 @@ struct GlyphResource {
 };
 
 GlyphResource fetch_glyph_resource(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address);
+    vec4 data[2] = fetch_from_gpu_cache_2(address);
     return GlyphResource(data[0], data[1].x, data[1].yz, data[1].w);
 }
 
@@ -57,7 +57,7 @@ struct TextRun {
 };
 
 TextRun fetch_text_run(int address) {
-    vec4 data[3] = fetch_from_resource_cache_3(address);
+    vec4 data[3] = fetch_from_gpu_cache_3(address);
     return TextRun(data[0], data[1], data[2].xy);
 }
 
diff --git a/gfx/webrender/res/resource_cache.glsl b/gfx/webrender/res/resource_cache.glsl
deleted file mode 100644
index ca659770843e..000000000000
--- a/gfx/webrender/res/resource_cache.glsl
+++ /dev/null
@@ -1,137 +0,0 @@
-/* This Source Code Form is subject to the terms of the Mozilla Public
- * License, v. 2.0. If a copy of the MPL was not distributed with this
- * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
-
-uniform HIGHP_SAMPLER_FLOAT sampler2D sResourceCache;
-
-#define VECS_PER_IMAGE_RESOURCE     2
-
-// TODO(gw): This is here temporarily while we have
-//           both GPU store and cache. When the GPU
-//           store code is removed, we can change the
-//           PrimitiveInstance instance structure to
-//           use 2x unsigned shorts as vertex attributes
-//           instead of an int, and encode the UV directly
-//           in the vertices.
-ivec2 get_resource_cache_uv(int address) {
-    return ivec2(uint(address) % WR_MAX_VERTEX_TEXTURE_WIDTH,
-                 uint(address) / WR_MAX_VERTEX_TEXTURE_WIDTH);
-}
-
-vec4[2] fetch_from_resource_cache_2_direct(ivec2 address) {
-    return vec4[2](
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(1, 0))
-    );
-}
-
-vec4[2] fetch_from_resource_cache_2(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[2](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0))
-    );
-}
-
-#ifdef WR_VERTEX_SHADER
-
-vec4[8] fetch_from_resource_cache_8(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[8](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(3, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(4, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(5, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(6, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(7, 0))
-    );
-}
-
-vec4[3] fetch_from_resource_cache_3(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[3](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0))
-    );
-}
-
-vec4[3] fetch_from_resource_cache_3_direct(ivec2 address) {
-    return vec4[3](
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(2, 0))
-    );
-}
-
-vec4[4] fetch_from_resource_cache_4_direct(ivec2 address) {
-    return vec4[4](
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(2, 0)),
-        TEXEL_FETCH(sResourceCache, address, 0, ivec2(3, 0))
-    );
-}
-
-vec4[4] fetch_from_resource_cache_4(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return vec4[4](
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(0, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(1, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(2, 0)),
-        TEXEL_FETCH(sResourceCache, uv, 0, ivec2(3, 0))
-    );
-}
-
-vec4 fetch_from_resource_cache_1_direct(ivec2 address) {
-    return texelFetch(sResourceCache, address, 0);
-}
-
-vec4 fetch_from_resource_cache_1(int address) {
-    ivec2 uv = get_resource_cache_uv(address);
-    return texelFetch(sResourceCache, uv, 0);
-}
-
-//TODO: image resource is too specific for this module
-
-struct ImageResource {
-    RectWithEndpoint uv_rect;
-    float layer;
-    vec3 user_data;
-};
-
-ImageResource fetch_image_resource(int address) {
-    //Note: number of blocks has to match `renderer::BLOCKS_PER_UV_RECT`
-    vec4 data[2] = fetch_from_resource_cache_2(address);
-    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
-    return ImageResource(uv_rect, data[1].x, data[1].yzw);
-}
-
-ImageResource fetch_image_resource_direct(ivec2 address) {
-    vec4 data[2] = fetch_from_resource_cache_2_direct(address);
-    RectWithEndpoint uv_rect = RectWithEndpoint(data[0].xy, data[0].zw);
-    return ImageResource(uv_rect, data[1].x, data[1].yzw);
-}
-
-// Fetch optional extra data for a texture cache resource. This can contain
-// a polygon defining a UV rect within the texture cache resource.
-struct ImageResourceExtra {
-    vec2 st_tl;
-    vec2 st_tr;
-    vec2 st_bl;
-    vec2 st_br;
-};
-
-ImageResourceExtra fetch_image_resource_extra(int address) {
-    vec4 data[2] = fetch_from_resource_cache_2(address + VECS_PER_IMAGE_RESOURCE);
-    return ImageResourceExtra(
-        data[0].xy,
-        data[0].zw,
-        data[1].xy,
-        data[1].zw
-    );
-}
-
-#endif //WR_VERTEX_SHADER
diff --git a/gfx/webrender/src/batch.rs b/gfx/webrender/src/batch.rs
index 985ac27de31c..92d4a535a9d2 100644
--- a/gfx/webrender/src/batch.rs
+++ b/gfx/webrender/src/batch.rs
@@ -14,7 +14,7 @@ use gpu_types::{BrushFlags, BrushInstance, PrimitiveHeaders};
 use gpu_types::{ClipMaskInstance, SplitCompositeInstance};
 use gpu_types::{PrimitiveInstanceData, RasterizationSpace, GlyphInstance};
 use gpu_types::{PrimitiveHeader, PrimitiveHeaderIndex, TransformPaletteId, TransformPalette};
-use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
+use internal_types::{FastHashMap, SavedTargetIndex, TextureSource};
 use picture::{PictureCompositeMode, PicturePrimitive, PictureSurface};
 use plane_split::{BspSplitter, Clipper, Polygon, Splitter};
 use prim_store::{BrushKind, BrushPrimitive, BrushSegmentTaskId, DeferredResolve};
@@ -66,29 +66,29 @@ pub enum BatchKind {
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct BatchTextures {
-    pub colors: [SourceTexture; 3],
+    pub colors: [TextureSource; 3],
 }
 
 impl BatchTextures {
     pub fn no_texture() -> Self {
         BatchTextures {
-            colors: [SourceTexture::Invalid; 3],
+            colors: [TextureSource::Invalid; 3],
         }
     }
 
     pub fn render_target_cache() -> Self {
         BatchTextures {
             colors: [
-                SourceTexture::CacheRGBA8,
-                SourceTexture::CacheA8,
-                SourceTexture::Invalid,
+                TextureSource::PrevPassColor,
+                TextureSource::PrevPassAlpha,
+                TextureSource::Invalid,
             ],
         }
     }
 
-    pub fn color(texture: SourceTexture) -> Self {
+    pub fn color(texture: TextureSource) -> Self {
         BatchTextures {
-            colors: [texture, texture, SourceTexture::Invalid],
+            colors: [texture, texture, TextureSource::Invalid],
         }
     }
 }
@@ -120,8 +120,8 @@ impl BatchKey {
 }
 
 #[inline]
-fn textures_compatible(t1: SourceTexture, t2: SourceTexture) -> bool {
-    t1 == SourceTexture::Invalid || t2 == SourceTexture::Invalid || t1 == t2
+fn textures_compatible(t1: TextureSource, t2: TextureSource) -> bool {
+    t1 == TextureSource::Invalid || t2 == TextureSource::Invalid || t1 == t2
 }
 
 pub struct AlphaBatchList {
@@ -761,9 +761,9 @@ impl AlphaBatchBuilder {
                                                 let shadow_textures = BatchTextures::render_target_cache();
                                                 let content_textures = BatchTextures {
                                                     colors: [
-                                                        SourceTexture::RenderTaskCache(saved_index),
-                                                        SourceTexture::Invalid,
-                                                        SourceTexture::Invalid,
+                                                        TextureSource::RenderTaskCache(saved_index),
+                                                        TextureSource::Invalid,
+                                                        TextureSource::Invalid,
                                                     ],
                                                 };
 
@@ -1089,7 +1089,7 @@ impl AlphaBatchBuilder {
                     glyph_fetch_buffer,
                     gpu_cache,
                     |texture_id, mut glyph_format, glyphs| {
-                        debug_assert_ne!(texture_id, SourceTexture::Invalid);
+                        debug_assert_ne!(texture_id, TextureSource::Invalid);
 
                         // Ignore color and only sample alpha when shadowing.
                         if text_cpu.shadow {
@@ -1101,8 +1101,8 @@ impl AlphaBatchBuilder {
                         let textures = BatchTextures {
                             colors: [
                                 texture_id,
-                                SourceTexture::Invalid,
-                                SourceTexture::Invalid,
+                                TextureSource::Invalid,
+                                TextureSource::Invalid,
                             ],
                         };
 
@@ -1344,7 +1344,7 @@ fn get_image_tile_params(
         deferred_resolves,
     );
 
-    if cache_item.texture_id == SourceTexture::Invalid {
+    if cache_item.texture_id == TextureSource::Invalid {
         None
     } else {
         let textures = BatchTextures::color(cache_item.texture_id);
@@ -1393,7 +1393,7 @@ impl BrushPrimitive {
                     println!("\tsource {:?}", cache_item);
                 }
 
-                if cache_item.texture_id == SourceTexture::Invalid {
+                if cache_item.texture_id == TextureSource::Invalid {
                     None
                 } else {
                     let textures = BatchTextures::color(cache_item.texture_id);
@@ -1431,7 +1431,7 @@ impl BrushPrimitive {
                     }
                 };
 
-                if cache_item.texture_id == SourceTexture::Invalid {
+                if cache_item.texture_id == TextureSource::Invalid {
                     None
                 } else {
                     let textures = BatchTextures::color(cache_item.texture_id);
@@ -1508,7 +1508,7 @@ impl BrushPrimitive {
                         deferred_resolves,
                     );
 
-                    if cache_item.texture_id == SourceTexture::Invalid {
+                    if cache_item.texture_id == TextureSource::Invalid {
                         warn!("Warnings: skip a PrimitiveKind::YuvImage");
                         return None;
                     }
@@ -1664,7 +1664,7 @@ pub fn resolve_image(
                     // the render thread...
                     let cache_handle = gpu_cache.push_deferred_per_frame_blocks(BLOCKS_PER_UV_RECT);
                     let cache_item = CacheItem {
-                        texture_id: SourceTexture::External(external_image),
+                        texture_id: TextureSource::External(external_image),
                         uv_rect_handle: cache_handle,
                         uv_rect: DeviceUintRect::new(
                             DeviceUintPoint::zero(),
@@ -1706,8 +1706,8 @@ pub struct ClipBatcher {
     /// Rectangle draws fill up the rectangles with rounded corners.
     pub rectangles: Vec<ClipMaskInstance>,
     /// Image draws apply the image masking.
-    pub images: FastHashMap<SourceTexture, Vec<ClipMaskInstance>>,
-    pub box_shadows: FastHashMap<SourceTexture, Vec<ClipMaskInstance>>,
+    pub images: FastHashMap<TextureSource, Vec<ClipMaskInstance>>,
+    pub box_shadows: FastHashMap<TextureSource, Vec<ClipMaskInstance>>,
     pub line_decorations: Vec<ClipMaskInstance>,
 }
 
@@ -1815,7 +1815,7 @@ impl ClipBatcher {
                         .get_cached_render_task(rt_handle);
                     let cache_item = resource_cache
                         .get_texture_cache_item(&rt_cache_entry.handle);
-                    debug_assert_ne!(cache_item.texture_id, SourceTexture::Invalid);
+                    debug_assert_ne!(cache_item.texture_id, TextureSource::Invalid);
 
                     self.box_shadows
                         .entry(cache_item.texture_id)
@@ -1846,9 +1846,9 @@ impl ClipBatcher {
     }
 }
 
-fn get_buffer_kind(texture: SourceTexture) -> ImageBufferKind {
+fn get_buffer_kind(texture: TextureSource) -> ImageBufferKind {
     match texture {
-        SourceTexture::External(ext_image) => {
+        TextureSource::External(ext_image) => {
             match ext_image.image_type {
                 ExternalImageType::TextureHandle(target) => {
                     target.into()
diff --git a/gfx/webrender/src/device/gl.rs b/gfx/webrender/src/device/gl.rs
index cbc341670f0e..18008542ebee 100644
--- a/gfx/webrender/src/device/gl.rs
+++ b/gfx/webrender/src/device/gl.rs
@@ -434,6 +434,11 @@ impl ExternalTexture {
     }
 }
 
+/// WebRender interface to an OpenGL texture.
+///
+/// Because freeing a texture requires various device handles that are not
+/// reachable from this struct, manual destruction via `Device` is required.
+/// Our `Drop` implementation asserts that this has happened.
 pub struct Texture {
     id: gl::GLuint,
     target: gl::GLuint,
diff --git a/gfx/webrender/src/internal_types.rs b/gfx/webrender/src/internal_types.rs
index 2870379d0798..f36b22bc33e6 100644
--- a/gfx/webrender/src/internal_types.rs
+++ b/gfx/webrender/src/internal_types.rs
@@ -25,20 +25,23 @@ use tiling;
 pub type FastHashMap<K, V> = HashMap<K, V, BuildHasherDefault<FxHasher>>;
 pub type FastHashSet<K> = HashSet<K, BuildHasherDefault<FxHasher>>;
 
-// An ID for a texture that is owned by the
-// texture cache module. This can include atlases
-// or standalone textures allocated via the
-// texture cache (e.g. if an image is too large
-// to be added to an atlas). The texture cache
-// manages the allocation and freeing of these
-// IDs, and the rendering thread maintains a
-// map from cache texture ID to native texture.
-
+/// An ID for a texture that is owned by the `texture_cache` module.
+///
+/// This can include atlases or standalone textures allocated via the texture
+/// cache (e.g.  if an image is too large to be added to an atlas). The texture
+/// cache manages the allocation and freeing of these IDs, and the rendering
+/// thread maintains a map from cache texture ID to native texture.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct CacheTextureId(pub usize);
 
+/// Identifies a render pass target that is persisted until the end of the frame.
+///
+/// By default, only the targets of the immediately-preceding pass are bound as
+/// inputs to the next pass. However, tasks can opt into having their target
+/// preserved in a list until the end of the frame, and this type specifies the
+/// index in that list.
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -48,21 +51,24 @@ impl SavedTargetIndex {
     pub const PENDING: Self = SavedTargetIndex(!0);
 }
 
-// Represents the source for a texture.
-// These are passed from throughout the
-// pipeline until they reach the rendering
-// thread, where they are resolved to a
-// native texture ID.
-
+/// Identifies the source of an input texture to a shader.
 #[derive(Copy, Clone, Debug, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
-pub enum SourceTexture {
+pub enum TextureSource {
+    /// Equivalent to `None`, allowing us to avoid using `Option`s everywhere.
     Invalid,
+    /// An entry in the texture cache.
     TextureCache(CacheTextureId),
+    /// An external image texture, mananged by the embedding.
     External(ExternalImageData),
-    CacheA8,
-    CacheRGBA8,
+    /// The alpha target of the immediately-preceding pass.
+    PrevPassAlpha,
+    /// The color target of the immediately-preceding pass.
+    PrevPassColor,
+    /// A render target from an earlier pass. Unlike the immediately-preceding
+    /// passes, these are not made available automatically, but are instead
+    /// opt-in by the `RenderTask` (see `mark_for_saving()`).
     RenderTaskCache(SavedTargetIndex),
 }
 
diff --git a/gfx/webrender/src/picture.rs b/gfx/webrender/src/picture.rs
index 546f8abd765f..e5a318e4ffcf 100644
--- a/gfx/webrender/src/picture.rs
+++ b/gfx/webrender/src/picture.rs
@@ -279,10 +279,19 @@ impl PicturePrimitive {
         // a surface, and we have perspective or local raster
         // space request.
         let raster_space = self.requested_raster_space;
-        let local_scale = raster_space.local_scale();
 
-        let wants_raster_root = xf.has_perspective_component() ||
-                                local_scale.is_some();
+        // TODO(gw): A temporary hack here to revert behavior to
+        //           always raster in screen-space. This is not
+        //           a problem yet, since we're not taking advantage
+        //           of this for caching yet. This is a workaround
+        //           for some existing issues with handling scale
+        //           when rasterizing in local space mode. Once
+        //           the fixes for those are in-place, we can
+        //           remove this hack!
+        //let local_scale = raster_space.local_scale();
+        // let wants_raster_root = xf.has_perspective_component() ||
+        //                         local_scale.is_some();
+        let wants_raster_root = xf.has_perspective_component();
 
         let establishes_raster_root = has_surface && wants_raster_root;
 
diff --git a/gfx/webrender/src/render_backend.rs b/gfx/webrender/src/render_backend.rs
index 53c6f6f9617d..dd537abc9873 100644
--- a/gfx/webrender/src/render_backend.rs
+++ b/gfx/webrender/src/render_backend.rs
@@ -2,6 +2,12 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+//! The high-level module responsible for managing the pipeline and preparing
+//! commands to be issued by the `Renderer`.
+//!
+//! See the comment at the top of the `renderer` module for a description of
+//! how these two pieces interact.
+
 use api::{ApiMsg, BuiltDisplayList, ClearCache, DebugCommand};
 #[cfg(feature = "debugger")]
 use api::{BuiltDisplayListIter, SpecificDisplayItem};
@@ -661,6 +667,10 @@ impl RenderBackend {
                     SceneBuilderResult::ExternalEvent(evt) => {
                         self.notifier.external_event(evt);
                     }
+                    SceneBuilderResult::ClearNamespace(id) => {
+                        self.resource_cache.clear_namespace(id);
+                        self.documents.retain(|doc_id, _doc| doc_id.0 != id);
+                    }
                     SceneBuilderResult::Stopped => {
                         panic!("We haven't sent a Stop yet, how did we get a Stopped back?");
                     }
@@ -770,9 +780,8 @@ impl RenderBackend {
             ApiMsg::ExternalEvent(evt) => {
                 self.low_priority_scene_tx.send(SceneBuilderRequest::ExternalEvent(evt)).unwrap();
             }
-            ApiMsg::ClearNamespace(namespace_id) => {
-                self.resource_cache.clear_namespace(namespace_id);
-                self.documents.retain(|did, _doc| did.0 != namespace_id);
+            ApiMsg::ClearNamespace(id) => {
+                self.low_priority_scene_tx.send(SceneBuilderRequest::ClearNamespace(id)).unwrap();
             }
             ApiMsg::MemoryPressure => {
                 // This is drastic. It will basically flush everything out of the cache,
diff --git a/gfx/webrender/src/render_task.rs b/gfx/webrender/src/render_task.rs
index 1f0fab652da9..6d470df8252a 100644
--- a/gfx/webrender/src/render_task.rs
+++ b/gfx/webrender/src/render_task.rs
@@ -16,7 +16,7 @@ use freelist::{FreeList, FreeListHandle, WeakFreeListHandle};
 use glyph_rasterizer::GpuGlyphCacheKey;
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use gpu_types::{BorderInstance, ImageSource, RasterizationSpace, UvRectKind};
-use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
+use internal_types::{CacheTextureId, FastHashMap, SavedTargetIndex};
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh;
 use picture::PictureCacheKey;
@@ -116,16 +116,6 @@ impl RenderTaskTree {
             }
         }
 
-        // If this task can be shared between multiple
-        // passes, render it in the first pass so that
-        // it is available to all subsequent passes.
-        let pass_index = if task.is_shared() {
-            debug_assert!(task.children.is_empty());
-            0
-        } else {
-            pass_index
-        };
-
         let pass = &mut passes[pass_index];
         pass.add_render_task(id, task.get_dynamic_size(), task.target_kind());
     }
@@ -174,13 +164,27 @@ impl ops::IndexMut<RenderTaskId> for RenderTaskTree {
     }
 }
 
+/// Identifies the output buffer location for a given `RenderTask`.
 #[derive(Debug)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderTaskLocation {
+    /// The `RenderTask` should be drawn to a fixed region in a specific render
+    /// target. This is used for the root `RenderTask`, where the main
+    /// framebuffer is used as the render target.
     Fixed(DeviceIntRect),
+    /// The `RenderTask` should be drawn to a target provided by the atlas
+    /// allocator. This is the most common case.
+    ///
+    /// The second member specifies the width and height of the task
+    /// output, and the first member is initially left as `None`. During the
+    /// build phase, we invoke `RenderTargetList::alloc()` and store the
+    /// resulting location in the first member. That location identifies the
+    /// render target and the offset of the allocated region within that target.
     Dynamic(Option<(DeviceIntPoint, RenderTargetIndex)>, DeviceIntSize),
-    TextureCache(SourceTexture, i32, DeviceIntRect),
+    /// The output of the `RenderTask` will be persisted beyond this frame, and
+    /// thus should be drawn into the `TextureCache`.
+    TextureCache(CacheTextureId, i32, DeviceIntRect),
 }
 
 #[derive(Debug)]
@@ -871,33 +875,6 @@ impl RenderTask {
         }
     }
 
-    // Check if this task wants to be made available as an input
-    // to all passes (except the first) in the render task tree.
-    // To qualify for this, the task needs to have no children / dependencies.
-    // Currently, this is only supported for A8 targets, but it can be
-    // trivially extended to also support RGBA8 targets in the future
-    // if we decide that is useful.
-    pub fn is_shared(&self) -> bool {
-        match self.kind {
-            RenderTaskKind::Picture(..) |
-            RenderTaskKind::VerticalBlur(..) |
-            RenderTaskKind::Readback(..) |
-            RenderTaskKind::HorizontalBlur(..) |
-            RenderTaskKind::Scaling(..) |
-            RenderTaskKind::ClipRegion(..) |
-            RenderTaskKind::Blit(..) |
-            RenderTaskKind::Border(..) |
-            RenderTaskKind::Glyph(..) => false,
-
-            // TODO(gw): For now, we've disabled the shared clip mask
-            //           optimization. It's of dubious value in the
-            //           future once we start to cache clip tasks anyway.
-            //           I have left shared texture support here though,
-            //           just in case we want it in the future.
-            RenderTaskKind::CacheMask(..) => false,
-        }
-    }
-
     // Optionally, prepare the render task for drawing. This is executed
     // after all resource cache items (textures and glyphs) have been
     // resolved and can be queried. It also allows certain render tasks
diff --git a/gfx/webrender/src/renderer.rs b/gfx/webrender/src/renderer.rs
index 2a49d6129f08..c47b11345e53 100644
--- a/gfx/webrender/src/renderer.rs
+++ b/gfx/webrender/src/renderer.rs
@@ -2,12 +2,25 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
-//! The webrender API.
+//! The high-level module responsible for interfacing with the GPU.
 //!
-//! The `webrender::renderer` module provides the interface to webrender, which
-//! is accessible through [`Renderer`][renderer]
+//! Much of WebRender's design is driven by separating work into different
+//! threads. To avoid the complexities of multi-threaded GPU access, we restrict
+//! all communication with the GPU to one thread, the render thread. But since
+//! issuing GPU commands is often a bottleneck, we move everything else (i.e.
+//! the computation of what commands to issue) to another thread, the
+//! RenderBackend thread. The RenderBackend, in turn, may delegate work to other
+//! thread (like the SceneBuilder threads or Rayon workers), but the
+//! Render-vs-RenderBackend distinction is the most important.
 //!
-//! [renderer]: struct.Renderer.html
+//! The consumer is responsible for initializing the render thread before
+//! calling into WebRender, which means that this module also serves as the
+//! initial entry point into WebRender, and is responsible for spawning the
+//! various other threads discussed above. That said, WebRender initialization
+//! returns both the `Renderer` instance as well as a channel for communicating
+//! directly with the `RenderBackend`. Aside from a few high-level operations
+//! like 'render now', most of interesting commands from the consumer go over
+//! that channel and operate on the `RenderBackend`.
 
 use api::{BlobImageHandler, ColorF, DeviceIntPoint, DeviceIntRect, DeviceIntSize};
 use api::{DeviceUintPoint, DeviceUintRect, DeviceUintSize, DocumentId, Epoch, ExternalImageId};
@@ -39,7 +52,7 @@ use gpu_cache::GpuDebugChunk;
 #[cfg(feature = "pathfinder")]
 use gpu_glyph_renderer::GpuGlyphRenderer;
 use gpu_types::ScalingInstance;
-use internal_types::{SourceTexture, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
+use internal_types::{TextureSource, ORTHO_FAR_PLANE, ORTHO_NEAR_PLANE, ResourceCacheError};
 use internal_types::{CacheTextureId, DebugOutput, FastHashMap, RenderedDocument, ResultMsg};
 use internal_types::{TextureUpdateList, TextureUpdateOp, TextureUpdateSource};
 use internal_types::{RenderTargetInfo, SavedTargetIndex};
@@ -274,21 +287,22 @@ impl From<GlyphFormat> for ShaderColorMode {
     }
 }
 
+/// Enumeration of the texture samplers used across the various WebRender shaders.
+///
+/// Each variant corresponds to a uniform declared in shader source. We only bind
+/// the variants we need for a given shader, so not every variant is bound for every
+/// batch.
 #[derive(Debug, Copy, Clone, PartialEq, Eq)]
 pub(crate) enum TextureSampler {
     Color0,
     Color1,
     Color2,
-    CacheA8,
-    CacheRGBA8,
-    ResourceCache,
+    PrevPassAlpha,
+    PrevPassColor,
+    GpuCache,
     TransformPalette,
     RenderTasks,
     Dither,
-    // A special sampler that is bound to the A8 output of
-    // the *first* pass. Items rendered in this target are
-    // available as inputs to tasks in any subsequent pass.
-    SharedCacheA8,
     PrimitiveHeadersF,
     PrimitiveHeadersI,
 }
@@ -312,15 +326,14 @@ impl Into<TextureSlot> for TextureSampler {
             TextureSampler::Color0 => TextureSlot(0),
             TextureSampler::Color1 => TextureSlot(1),
             TextureSampler::Color2 => TextureSlot(2),
-            TextureSampler::CacheA8 => TextureSlot(3),
-            TextureSampler::CacheRGBA8 => TextureSlot(4),
-            TextureSampler::ResourceCache => TextureSlot(5),
+            TextureSampler::PrevPassAlpha => TextureSlot(3),
+            TextureSampler::PrevPassColor => TextureSlot(4),
+            TextureSampler::GpuCache => TextureSlot(5),
             TextureSampler::TransformPalette => TextureSlot(6),
             TextureSampler::RenderTasks => TextureSlot(7),
             TextureSampler::Dither => TextureSlot(8),
-            TextureSampler::SharedCacheA8 => TextureSlot(9),
-            TextureSampler::PrimitiveHeadersF => TextureSlot(10),
-            TextureSampler::PrimitiveHeadersI => TextureSlot(11),
+            TextureSampler::PrimitiveHeadersF => TextureSlot(9),
+            TextureSampler::PrimitiveHeadersI => TextureSlot(10),
         }
     }
 }
@@ -703,48 +716,64 @@ impl GpuGlyphRenderer {
 #[cfg(not(feature = "pathfinder"))]
 struct StenciledGlyphPage;
 
+/// A Texture that has been initialized by the `device` module and is ready to
+/// be used.
 struct ActiveTexture {
     texture: Texture,
     saved_index: Option<SavedTargetIndex>,
-    is_shared: bool,
 }
 
-struct SourceTextureResolver {
-    /// A vector for fast resolves of texture cache IDs to
-    /// native texture IDs. This maps to a free-list managed
-    /// by the backend thread / texture cache. We free the
-    /// texture memory associated with a TextureId when its
-    /// texture cache ID is freed by the texture cache, but
-    /// reuse the TextureId when the texture caches's free
-    /// list reuses the texture cache ID. This saves having to
-    /// use a hashmap, and allows a flat vector for performance.
-    cache_texture_map: Vec<Texture>,
+/// Helper struct for resolving device Textures for use during rendering passes.
+///
+/// Manages the mapping between the at-a-distance texture handles used by the
+/// `RenderBackend` (which does not directly interface with the GPU) and actual
+/// device texture handles.
+struct TextureResolver {
+    /// A vector for fast resolves of texture cache IDs to native texture IDs.
+    /// This maps to a free-list managed by the backend thread / texture cache.
+    /// We free the texture memory associated with a TextureId when its texture
+    /// cache ID is freed by the texture cache, but reuse the TextureId when the
+    /// texture caches's free list reuses the texture cache ID. This saves
+    /// having to use a hashmap, and allows a flat vector for performance.
+    texture_cache_map: Vec<Texture>,
 
     /// Map of external image IDs to native textures.
     external_images: FastHashMap<(ExternalImageId, u8), ExternalTexture>,
 
-    /// A special 1x1 dummy cache texture used for shaders that expect to work
-    /// with the cache but are actually running in the first pass
-    /// when no target is yet provided as a cache texture input.
+    /// A special 1x1 dummy texture used for shaders that expect to work with
+    /// the output of the previous pass but are actually running in the first
+    /// pass.
     dummy_cache_texture: Texture,
 
-    /// The current cache textures.
-    cache_rgba8_texture: Option<ActiveTexture>,
-    cache_a8_texture: Option<ActiveTexture>,
+    /// The outputs of the previous pass, if applicable.
+    prev_pass_color: Option<ActiveTexture>,
+    prev_pass_alpha: Option<ActiveTexture>,
 
-    /// An alpha texture shared between all passes.
-    //TODO: just use the standard texture saving logic instead.
-    shared_alpha_texture: Option<Texture>,
+    /// Saved render targets from previous passes. This is used when a pass
+    /// needs access to the result of a pass other than the immediately-preceding
+    /// one. In this case, the `RenderTask` will get a a non-`None` `saved_index`,
+    /// which will cause the resulting render target to be persisted in this list
+    /// (at that index) until the end of the frame.
+    saved_targets: Vec<Texture>,
 
-    /// Saved cache textures that are to be re-used.
-    saved_textures: Vec<Texture>,
-
-    /// General pool of render targets.
+    /// Pool of idle render target textures ready for re-use.
+    ///
+    /// Naively, it would seem like we only ever need two pairs of (color,
+    /// alpha) render targets: one for the output of the previous pass (serving
+    /// as input to the current pass), and one for the output of the current
+    /// pass. However, there are cases where the output of one pass is used as
+    /// the input to multiple future passes. For example, drop-shadows draw the
+    /// picture in pass X, then reference it in pass X+1 to create the blurred
+    /// shadow, and pass the results of both X and X+1 to pass X+2 draw the
+    /// actual content.
+    ///
+    /// See the comments in `allocate_target_texture` for more insight on why
+    /// reuse is a win.
     render_target_pool: Vec<Texture>,
 }
 
-impl SourceTextureResolver {
-    fn new(device: &mut Device) -> SourceTextureResolver {
+impl TextureResolver {
+    fn new(device: &mut Device) -> TextureResolver {
         let mut dummy_cache_texture = device
             .create_texture(TextureTarget::Array, ImageFormat::BGRA8);
         device.init_texture::<u8>(
@@ -757,14 +786,13 @@ impl SourceTextureResolver {
             None,
         );
 
-        SourceTextureResolver {
-            cache_texture_map: Vec::new(),
+        TextureResolver {
+            texture_cache_map: Vec::new(),
             external_images: FastHashMap::default(),
             dummy_cache_texture,
-            cache_a8_texture: None,
-            cache_rgba8_texture: None,
-            shared_alpha_texture: None,
-            saved_textures: Vec::default(),
+            prev_pass_alpha: None,
+            prev_pass_color: None,
+            saved_targets: Vec::default(),
             render_target_pool: Vec::new(),
         }
     }
@@ -772,7 +800,7 @@ impl SourceTextureResolver {
     fn deinit(self, device: &mut Device) {
         device.delete_texture(self.dummy_cache_texture);
 
-        for texture in self.cache_texture_map {
+        for texture in self.texture_cache_map {
             device.delete_texture(texture);
         }
 
@@ -782,18 +810,16 @@ impl SourceTextureResolver {
     }
 
     fn begin_frame(&mut self) {
-        assert!(self.cache_rgba8_texture.is_none());
-        assert!(self.cache_a8_texture.is_none());
-        assert!(self.saved_textures.is_empty());
+        assert!(self.prev_pass_color.is_none());
+        assert!(self.prev_pass_alpha.is_none());
+        assert!(self.saved_targets.is_empty());
     }
 
     fn end_frame(&mut self, device: &mut Device, frame_id: FrameId) {
         // return the cached targets to the pool
         self.end_pass(None, None);
-        // return the global alpha texture
-        self.render_target_pool.extend(self.shared_alpha_texture.take());
         // return the saved targets as well
-        self.render_target_pool.extend(self.saved_textures.drain(..));
+        self.render_target_pool.extend(self.saved_targets.drain(..));
 
         // GC the render target pool.
         //
@@ -830,23 +856,18 @@ impl SourceTextureResolver {
         // Also assign the pool index of those cache textures to last pass's index because this is
         // the result of last pass.
         // Note: the order here is important, needs to match the logic in `RenderPass::build()`.
-        if let Some(at) = self.cache_rgba8_texture.take() {
-            assert!(!at.is_shared);
+        if let Some(at) = self.prev_pass_color.take() {
             if let Some(index) = at.saved_index {
-                assert_eq!(self.saved_textures.len(), index.0);
-                self.saved_textures.push(at.texture);
+                assert_eq!(self.saved_targets.len(), index.0);
+                self.saved_targets.push(at.texture);
             } else {
                 self.render_target_pool.push(at.texture);
             }
         }
-        if let Some(at) = self.cache_a8_texture.take() {
+        if let Some(at) = self.prev_pass_alpha.take() {
             if let Some(index) = at.saved_index {
-                assert!(!at.is_shared);
-                assert_eq!(self.saved_textures.len(), index.0);
-                self.saved_textures.push(at.texture);
-            } else if at.is_shared {
-                assert!(self.shared_alpha_texture.is_none());
-                self.shared_alpha_texture = Some(at.texture);
+                assert_eq!(self.saved_targets.len(), index.0);
+                self.saved_targets.push(at.texture);
             } else {
                 self.render_target_pool.push(at.texture);
             }
@@ -854,40 +875,40 @@ impl SourceTextureResolver {
 
         // We have another pass to process, make these textures available
         // as inputs to the next pass.
-        self.cache_rgba8_texture = rgba8_texture;
-        self.cache_a8_texture = a8_texture;
+        self.prev_pass_color = rgba8_texture;
+        self.prev_pass_alpha = a8_texture;
     }
 
     // Bind a source texture to the device.
-    fn bind(&self, texture_id: &SourceTexture, sampler: TextureSampler, device: &mut Device) {
+    fn bind(&self, texture_id: &TextureSource, sampler: TextureSampler, device: &mut Device) {
         match *texture_id {
-            SourceTexture::Invalid => {}
-            SourceTexture::CacheA8 => {
-                let texture = match self.cache_a8_texture {
+            TextureSource::Invalid => {}
+            TextureSource::PrevPassAlpha => {
+                let texture = match self.prev_pass_alpha {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 };
                 device.bind_texture(sampler, texture);
             }
-            SourceTexture::CacheRGBA8 => {
-                let texture = match self.cache_rgba8_texture {
+            TextureSource::PrevPassColor => {
+                let texture = match self.prev_pass_color {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 };
                 device.bind_texture(sampler, texture);
             }
-            SourceTexture::External(external_image) => {
+            TextureSource::External(external_image) => {
                 let texture = self.external_images
                     .get(&(external_image.id, external_image.channel_index))
                     .expect(&format!("BUG: External image should be resolved by now"));
                 device.bind_external_texture(sampler, texture);
             }
-            SourceTexture::TextureCache(index) => {
-                let texture = &self.cache_texture_map[index.0];
+            TextureSource::TextureCache(index) => {
+                let texture = &self.texture_cache_map[index.0];
                 device.bind_texture(sampler, texture);
             }
-            SourceTexture::RenderTaskCache(saved_index) => {
-                let texture = &self.saved_textures[saved_index.0];
+            TextureSource::RenderTaskCache(saved_index) => {
+                let texture = &self.saved_targets[saved_index.0];
                 device.bind_texture(sampler, texture)
             }
         }
@@ -896,29 +917,29 @@ impl SourceTextureResolver {
     // Get the real (OpenGL) texture ID for a given source texture.
     // For a texture cache texture, the IDs are stored in a vector
     // map for fast access.
-    fn resolve(&self, texture_id: &SourceTexture) -> Option<&Texture> {
+    fn resolve(&self, texture_id: &TextureSource) -> Option<&Texture> {
         match *texture_id {
-            SourceTexture::Invalid => None,
-            SourceTexture::CacheA8 => Some(
-                match self.cache_a8_texture {
+            TextureSource::Invalid => None,
+            TextureSource::PrevPassAlpha => Some(
+                match self.prev_pass_alpha {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 }
             ),
-            SourceTexture::CacheRGBA8 => Some(
-                match self.cache_rgba8_texture {
+            TextureSource::PrevPassColor => Some(
+                match self.prev_pass_color {
                     Some(ref at) => &at.texture,
                     None => &self.dummy_cache_texture,
                 }
             ),
-            SourceTexture::External(..) => {
+            TextureSource::External(..) => {
                 panic!("BUG: External textures cannot be resolved, they can only be bound.");
             }
-            SourceTexture::TextureCache(index) => {
-                Some(&self.cache_texture_map[index.0])
+            TextureSource::TextureCache(index) => {
+                Some(&self.texture_cache_map[index.0])
             }
-            SourceTexture::RenderTaskCache(saved_index) => {
-                Some(&self.saved_textures[saved_index.0])
+            TextureSource::RenderTaskCache(saved_index) => {
+                Some(&self.saved_targets[saved_index.0])
             }
         }
     }
@@ -928,7 +949,7 @@ impl SourceTextureResolver {
 
         // We're reporting GPU memory rather than heap-allocations, so we don't
         // use size_of_op.
-        for t in self.cache_texture_map.iter() {
+        for t in self.texture_cache_map.iter() {
             report.texture_cache_textures += t.size_in_bytes();
         }
         for t in self.render_target_pool.iter() {
@@ -963,9 +984,9 @@ impl CacheRow {
     }
 }
 
-/// The bus over which CPU and GPU versions of the cache
+/// The bus over which CPU and GPU versions of the GPU cache
 /// get synchronized.
-enum CacheBus {
+enum GpuCacheBus {
     /// PBO-based updates, currently operate on a row granularity.
     /// Therefore, are subject to fragmentation issues.
     PixelBuffer {
@@ -993,12 +1014,12 @@ enum CacheBus {
 }
 
 /// The device-specific representation of the cache texture in gpu_cache.rs
-struct CacheTexture {
+struct GpuCacheTexture {
     texture: Texture,
-    bus: CacheBus,
+    bus: GpuCacheBus,
 }
 
-impl CacheTexture {
+impl GpuCacheTexture {
     fn new(device: &mut Device, use_scatter: bool) -> Result<Self, RendererError> {
         let texture = device.create_texture(TextureTarget::Default, ImageFormat::RGBAF32);
 
@@ -1013,7 +1034,7 @@ impl CacheTexture {
                 buf_position.stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[0..1]),
                 buf_value   .stream_with(&desc::GPU_CACHE_UPDATE.vertex_attributes[1..2]),
             ]);
-            CacheBus::Scatter {
+            GpuCacheBus::Scatter {
                 program,
                 vao,
                 buf_position,
@@ -1022,14 +1043,14 @@ impl CacheTexture {
             }
         } else {
             let buffer = device.create_pbo();
-            CacheBus::PixelBuffer {
+            GpuCacheBus::PixelBuffer {
                 buffer,
                 rows: Vec::new(),
                 cpu_blocks: Vec::new(),
             }
         };
 
-        Ok(CacheTexture {
+        Ok(GpuCacheTexture {
             texture,
             bus,
         })
@@ -1038,10 +1059,10 @@ impl CacheTexture {
     fn deinit(self, device: &mut Device) {
         device.delete_texture(self.texture);
         match self.bus {
-            CacheBus::PixelBuffer { buffer, ..} => {
+            GpuCacheBus::PixelBuffer { buffer, ..} => {
                 device.delete_pbo(buffer);
             }
-            CacheBus::Scatter { program, vao, buf_position, buf_value, ..} => {
+            GpuCacheBus::Scatter { program, vao, buf_position, buf_value, ..} => {
                 device.delete_program(program);
                 device.delete_custom_vao(vao);
                 device.delete_vbo(buf_position);
@@ -1065,7 +1086,7 @@ impl CacheTexture {
         let new_size = DeviceUintSize::new(MAX_VERTEX_TEXTURE_WIDTH as _, max_height);
 
         match self.bus {
-            CacheBus::PixelBuffer { ref mut rows, .. } => {
+            GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
                 if max_height > old_size.height {
                     // Create a f32 texture that can be used for the vertex shader
                     // to fetch data from.
@@ -1087,7 +1108,7 @@ impl CacheTexture {
                     }
                 }
             }
-            CacheBus::Scatter {
+            GpuCacheBus::Scatter {
                 ref mut buf_position,
                 ref mut buf_value,
                 ref mut count,
@@ -1122,7 +1143,7 @@ impl CacheTexture {
 
     fn update(&mut self, device: &mut Device, updates: &GpuCacheUpdateList) {
         match self.bus {
-            CacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
+            GpuCacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
                 for update in &updates.updates {
                     match *update {
                         GpuCacheUpdate::Copy {
@@ -1155,7 +1176,7 @@ impl CacheTexture {
                     }
                 }
             }
-            CacheBus::Scatter {
+            GpuCacheBus::Scatter {
                 ref buf_position,
                 ref buf_value,
                 ref mut count,
@@ -1193,7 +1214,7 @@ impl CacheTexture {
 
     fn flush(&mut self, device: &mut Device) -> usize {
         match self.bus {
-            CacheBus::PixelBuffer { ref buffer, ref mut rows, ref cpu_blocks } => {
+            GpuCacheBus::PixelBuffer { ref buffer, ref mut rows, ref cpu_blocks } => {
                 let rows_dirty = rows
                     .iter()
                     .filter(|row| row.is_dirty)
@@ -1228,7 +1249,7 @@ impl CacheTexture {
 
                 rows_dirty
             }
-            CacheBus::Scatter { ref program, ref vao, count, .. } => {
+            GpuCacheBus::Scatter { ref program, ref vao, count, .. } => {
                 device.disable_depth();
                 device.set_blend(false);
                 device.bind_program(program);
@@ -1397,6 +1418,9 @@ pub struct RendererVAOs {
 
 /// The renderer is responsible for submitting to the GPU the work prepared by the
 /// RenderBackend.
+///
+/// We have a separate `Renderer` instance for each instance of WebRender (generally
+/// one per OS window), and all instances share the same thread.
 pub struct Renderer {
     result_rx: Receiver<ResultMsg>,
     debug_server: DebugServer,
@@ -1436,7 +1460,7 @@ pub struct Renderer {
     prim_header_i_texture: VertexDataTexture,
     transforms_texture: VertexDataTexture,
     render_task_texture: VertexDataTexture,
-    gpu_cache_texture: CacheTexture,
+    gpu_cache_texture: GpuCacheTexture,
     #[cfg(feature = "debug_renderer")]
     gpu_cache_debug_chunks: Vec<GpuDebugChunk>,
 
@@ -1446,7 +1470,7 @@ pub struct Renderer {
     pipeline_info: PipelineInfo,
 
     // Manages and resolves source textures IDs to real texture IDs.
-    texture_resolver: SourceTextureResolver,
+    texture_resolver: TextureResolver,
 
     // A PBO used to do asynchronous texture cache uploads.
     texture_cache_upload_pbo: PBO,
@@ -1698,14 +1722,14 @@ impl Renderer {
         let scale_vao = device.create_vao_with_new_instances(&desc::SCALE, &prim_vao);
         let texture_cache_upload_pbo = device.create_pbo();
 
-        let texture_resolver = SourceTextureResolver::new(&mut device);
+        let texture_resolver = TextureResolver::new(&mut device);
 
         let prim_header_f_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
         let prim_header_i_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAI32);
         let transforms_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
         let render_task_texture = VertexDataTexture::new(&mut device, ImageFormat::RGBAF32);
 
-        let gpu_cache_texture = CacheTexture::new(
+        let gpu_cache_texture = GpuCacheTexture::new(
             &mut device,
             options.scatter_gpu_cache_updates,
         )?;
@@ -2300,13 +2324,13 @@ impl Renderer {
             | DebugCommand::SimulateLongLowPrioritySceneBuild(_) => {}
             DebugCommand::InvalidateGpuCache => {
                 match self.gpu_cache_texture.bus {
-                    CacheBus::PixelBuffer { ref mut rows, .. } => {
+                    GpuCacheBus::PixelBuffer { ref mut rows, .. } => {
                         info!("Invalidating GPU caches");
                         for row in rows {
                             row.is_dirty = true;
                         }
                     }
-                    CacheBus::Scatter { .. } => {
+                    GpuCacheBus::Scatter { .. } => {
                         warn!("Unable to invalidate scattered GPU cache");
                     }
                 }
@@ -2651,7 +2675,7 @@ impl Renderer {
         // Note: the texture might have changed during the `update`,
         // so we need to bind it here.
         self.device.bind_texture(
-            TextureSampler::ResourceCache,
+            TextureSampler::GpuCache,
             &self.gpu_cache_texture.texture,
         );
     }
@@ -2672,13 +2696,13 @@ impl Renderer {
                         render_target,
                     } => {
                         let CacheTextureId(cache_texture_index) = update.id;
-                        if self.texture_resolver.cache_texture_map.len() == cache_texture_index {
+                        if self.texture_resolver.texture_cache_map.len() == cache_texture_index {
                             // Create a new native texture, as requested by the texture cache.
                             let texture = self.device.create_texture(TextureTarget::Array, format);
-                            self.texture_resolver.cache_texture_map.push(texture);
+                            self.texture_resolver.texture_cache_map.push(texture);
                         }
                         let texture =
-                            &mut self.texture_resolver.cache_texture_map[cache_texture_index];
+                            &mut self.texture_resolver.texture_cache_map[cache_texture_index];
                         assert_eq!(texture.get_format(), format);
 
                         // Ensure no PBO is bound when creating the texture storage,
@@ -2700,7 +2724,7 @@ impl Renderer {
                         layer_index,
                         offset,
                     } => {
-                        let texture = &self.texture_resolver.cache_texture_map[update.id.0];
+                        let texture = &self.texture_resolver.texture_cache_map[update.id.0];
                         let mut uploader = self.device.upload_texture(
                             texture,
                             &self.texture_cache_upload_pbo,
@@ -2748,7 +2772,7 @@ impl Renderer {
                         self.profile_counters.texture_data_uploaded.add(bytes_uploaded >> 10);
                     }
                     TextureUpdateOp::Free => {
-                        let texture = &mut self.texture_resolver.cache_texture_map[update.id.0];
+                        let texture = &mut self.texture_resolver.texture_cache_map[update.id.0];
                         self.device.free_texture_storage(texture);
                     }
                 }
@@ -2831,7 +2855,7 @@ impl Renderer {
         }
 
         let cache_texture = self.texture_resolver
-            .resolve(&SourceTexture::CacheRGBA8)
+            .resolve(&TextureSource::PrevPassColor)
             .unwrap();
 
         // Before submitting the composite batch, do the
@@ -2909,7 +2933,7 @@ impl Renderer {
                     // TODO(gw): Support R8 format here once we start
                     //           creating mips for alpha masks.
                     let src_texture = self.texture_resolver
-                        .resolve(&SourceTexture::CacheRGBA8)
+                        .resolve(&TextureSource::PrevPassColor)
                         .expect("BUG: invalid source texture");
                     let source = &render_tasks[task_id];
                     let (source_rect, layer) = source.get_target_rect();
@@ -2928,7 +2952,7 @@ impl Renderer {
     fn handle_scaling(
         &mut self,
         scalings: &[ScalingInstance],
-        source: SourceTexture,
+        source: TextureSource,
         projection: &Transform3D<f32>,
         stats: &mut RendererStats,
     ) {
@@ -2937,12 +2961,12 @@ impl Renderer {
         }
 
         match source {
-            SourceTexture::CacheRGBA8 => {
+            TextureSource::PrevPassColor => {
                 self.shaders.cs_scale_rgba8.bind(&mut self.device,
                                                  &projection,
                                                  &mut self.renderer_errors);
             }
-            SourceTexture::CacheA8 => {
+            TextureSource::PrevPassAlpha => {
                 self.shaders.cs_scale_a8.bind(&mut self.device,
                                               &projection,
                                               &mut self.renderer_errors);
@@ -3064,7 +3088,7 @@ impl Renderer {
             }
         }
 
-        self.handle_scaling(&target.scalings, SourceTexture::CacheRGBA8, projection, stats);
+        self.handle_scaling(&target.scalings, TextureSource::PrevPassColor, projection, stats);
 
         //TODO: record the pixel count for cached primitives
 
@@ -3356,7 +3380,7 @@ impl Renderer {
             }
         }
 
-        self.handle_scaling(&target.scalings, SourceTexture::CacheA8, projection, stats);
+        self.handle_scaling(&target.scalings, TextureSource::PrevPassAlpha, projection, stats);
 
         // Draw the clip items into the tiled alpha mask.
         {
@@ -3387,8 +3411,8 @@ impl Renderer {
                 let textures = BatchTextures {
                     colors: [
                         mask_texture_id.clone(),
-                        SourceTexture::Invalid,
-                        SourceTexture::Invalid,
+                        TextureSource::Invalid,
+                        TextureSource::Invalid,
                     ],
                 };
                 self.shaders.cs_clip_box_shadow
@@ -3423,8 +3447,8 @@ impl Renderer {
                 let textures = BatchTextures {
                     colors: [
                         mask_texture_id.clone(),
-                        SourceTexture::Invalid,
-                        SourceTexture::Invalid,
+                        TextureSource::Invalid,
+                        TextureSource::Invalid,
                     ],
                 };
                 self.shaders.cs_clip_image
@@ -3443,15 +3467,16 @@ impl Renderer {
 
     fn draw_texture_cache_target(
         &mut self,
-        texture: &SourceTexture,
+        texture: &CacheTextureId,
         layer: i32,
         target: &TextureCacheRenderTarget,
         render_tasks: &RenderTaskTree,
         stats: &mut RendererStats,
     ) {
+        let texture_source = TextureSource::TextureCache(*texture);
         let (target_size, projection) = {
             let texture = self.texture_resolver
-                .resolve(texture)
+                .resolve(&texture_source)
                 .expect("BUG: invalid target texture");
             let target_size = texture.get_dimensions();
             let projection = Transform3D::ortho(
@@ -3475,7 +3500,7 @@ impl Renderer {
 
         {
             let texture = self.texture_resolver
-                .resolve(texture)
+                .resolve(&texture_source)
                 .expect("BUG: invalid target texture");
             self.device
                 .bind_draw_target(Some((texture, layer)), Some(target_size));
@@ -3660,6 +3685,18 @@ impl Renderer {
         }
     }
 
+    /// Allocates a texture to be used as the output for a rendering pass.
+    ///
+    /// We make an effort to reuse render targe textures across passes and
+    /// across frames. Reusing a texture with the same dimensions (width,
+    /// height, and layer-count) and format is obviously ideal. Reusing a
+    /// texture with different dimensions but the same format can be faster
+    /// than allocating a new texture, since it basically boils down to
+    /// a realloc in GPU memory, which can be very cheap if the existing
+    /// region can be resized. However, some drivers/GPUs require textures
+    /// with different formats to be allocated in different arenas,
+    /// reinitializing with a different format can force a large copy. As
+    /// such, we just allocate a new texture in that case.
     fn allocate_target_texture<T: RenderTarget>(
         &mut self,
         list: &mut RenderTargetList<T>,
@@ -3725,7 +3762,6 @@ impl Renderer {
         Some(ActiveTexture {
             texture,
             saved_index: list.saved_index.clone(),
-            is_shared: list.is_shared,
         })
     }
 
@@ -3767,8 +3803,8 @@ impl Renderer {
             &self.render_task_texture.texture,
         );
 
-        debug_assert!(self.texture_resolver.cache_a8_texture.is_none());
-        debug_assert!(self.texture_resolver.cache_rgba8_texture.is_none());
+        debug_assert!(self.texture_resolver.prev_pass_alpha.is_none());
+        debug_assert!(self.texture_resolver.prev_pass_color.is_none());
     }
 
     fn draw_tile_frame(
@@ -3797,13 +3833,13 @@ impl Renderer {
             self.gpu_profile.place_marker(&format!("pass {}", pass_index));
 
             self.texture_resolver.bind(
-                &SourceTexture::CacheA8,
-                TextureSampler::CacheA8,
+                &TextureSource::PrevPassAlpha,
+                TextureSampler::PrevPassAlpha,
                 &mut self.device,
             );
             self.texture_resolver.bind(
-                &SourceTexture::CacheRGBA8,
-                TextureSampler::CacheRGBA8,
+                &TextureSource::PrevPassColor,
+                TextureSampler::PrevPassColor,
                 &mut self.device,
             );
 
@@ -3909,12 +3945,6 @@ impl Renderer {
                 }
             };
 
-            //Note: the `end_pass` will make sure this texture is not recycled this frame
-            if let Some(ActiveTexture { ref texture, is_shared: true, .. }) = cur_alpha {
-                self.device
-                    .bind_texture(TextureSampler::SharedCacheA8, texture);
-            }
-
             self.texture_resolver.end_pass(
                 cur_alpha,
                 cur_color,
@@ -4039,7 +4069,7 @@ impl Renderer {
         let mut size = 512;
         let fb_width = framebuffer_size.width as i32;
         let num_layers: i32 = self.texture_resolver
-            .cache_texture_map
+            .texture_cache_map
             .iter()
             .map(|texture| texture.get_layer_count())
             .sum();
@@ -4051,7 +4081,7 @@ impl Renderer {
         }
 
         let mut i = 0;
-        for texture in &self.texture_resolver.cache_texture_map {
+        for texture in &self.texture_resolver.texture_cache_map {
             let y = spacing + if self.debug_flags.contains(DebugFlags::RENDER_TARGET_DBG) {
                 528
             } else {
@@ -4230,7 +4260,7 @@ impl Renderer {
         let mut report = MemoryReport::default();
 
         // GPU cache CPU memory.
-        if let CacheBus::PixelBuffer{ref cpu_blocks, ..} = self.gpu_cache_texture.bus {
+        if let GpuCacheBus::PixelBuffer{ref cpu_blocks, ..} = self.gpu_cache_texture.bus {
             report.gpu_cache_cpu_mirror += self.size_of(cpu_blocks.as_ptr());
         }
 
@@ -4740,7 +4770,7 @@ impl Renderer {
             };
 
             info!("saving cached textures");
-            for texture in &self.texture_resolver.cache_texture_map {
+            for texture in &self.texture_resolver.texture_cache_map {
                 let file_name = format!("cache-{}", plain_self.textures.len() + 1);
                 info!("\t{}", file_name);
                 let plain = Self::save_texture(texture, &file_name, &config.root, &mut self.device);
@@ -4794,14 +4824,14 @@ impl Renderer {
             info!("loading cached textures");
             self.device.begin_frame();
 
-            for texture in self.texture_resolver.cache_texture_map.drain(..) {
+            for texture in self.texture_resolver.texture_cache_map.drain(..) {
                 self.device.delete_texture(texture);
             }
             for texture in renderer.textures {
                 info!("\t{}", texture.data);
                 let mut t = self.device.create_texture(TextureTarget::Array, texture.format);
                 Self::load_texture(&mut t, &texture, &root, &mut self.device);
-                self.texture_resolver.cache_texture_map.push(t);
+                self.texture_resolver.texture_cache_map.push(t);
             }
 
             info!("loading gpu cache");
@@ -4812,7 +4842,7 @@ impl Renderer {
                 &mut self.device,
             );
             match self.gpu_cache_texture.bus {
-                CacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
+                GpuCacheBus::PixelBuffer { ref mut rows, ref mut cpu_blocks, .. } => {
                     let dim = self.gpu_cache_texture.texture.get_dimensions();
                     let blocks = unsafe {
                         slice::from_raw_parts(
@@ -4826,7 +4856,7 @@ impl Renderer {
                     rows.extend((0 .. dim.height).map(|_| CacheRow::new()));
                     cpu_blocks.extend_from_slice(blocks);
                 }
-                CacheBus::Scatter { .. } => {}
+                GpuCacheBus::Scatter { .. } => {}
             }
             self.gpu_cache_frame_id = renderer.gpu_cache_frame_id;
 
diff --git a/gfx/webrender/src/resource_cache.rs b/gfx/webrender/src/resource_cache.rs
index 3ee4f82a5fc0..324ee80663e1 100644
--- a/gfx/webrender/src/resource_cache.rs
+++ b/gfx/webrender/src/resource_cache.rs
@@ -28,7 +28,7 @@ use glyph_rasterizer::{FontInstance, GlyphFormat, GlyphKey, GlyphRasterizer};
 use gpu_cache::{GpuCache, GpuCacheAddress, GpuCacheHandle};
 use gpu_types::UvRectKind;
 use image::{compute_tile_range, for_each_tile_in_range};
-use internal_types::{FastHashMap, FastHashSet, SourceTexture, TextureUpdateList};
+use internal_types::{FastHashMap, FastHashSet, TextureSource, TextureUpdateList};
 use profiler::{ResourceProfileCounters, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use render_task::{RenderTaskCache, RenderTaskCacheKey, RenderTaskId};
@@ -67,7 +67,7 @@ pub struct GlyphFetchResult {
 // various CPU-side structures.
 #[derive(Debug, Clone)]
 pub struct CacheItem {
-    pub texture_id: SourceTexture,
+    pub texture_id: TextureSource,
     pub uv_rect_handle: GpuCacheHandle,
     pub uv_rect: DeviceUintRect,
     pub texture_layer: i32,
@@ -76,7 +76,7 @@ pub struct CacheItem {
 impl CacheItem {
     pub fn invalid() -> Self {
         CacheItem {
-            texture_id: SourceTexture::Invalid,
+            texture_id: TextureSource::Invalid,
             uv_rect_handle: GpuCacheHandle::new(),
             uv_rect: DeviceUintRect::zero(),
             texture_layer: 0,
@@ -1232,13 +1232,13 @@ impl ResourceCache {
         gpu_cache: &mut GpuCache,
         mut f: F,
     ) where
-        F: FnMut(SourceTexture, GlyphFormat, &[GlyphFetchResult]),
+        F: FnMut(TextureSource, GlyphFormat, &[GlyphFetchResult]),
     {
         debug_assert_eq!(self.state, State::QueryResources);
 
         self.glyph_rasterizer.prepare_font(&mut font);
 
-        let mut current_texture_id = SourceTexture::Invalid;
+        let mut current_texture_id = TextureSource::Invalid;
         let mut current_glyph_format = GlyphFormat::Subpixel;
         debug_assert!(fetch_buffer.is_empty());
 
@@ -1282,14 +1282,14 @@ impl ResourceCache {
         gpu_cache: &mut GpuCache,
         mut f: F,
     ) where
-        F: FnMut(SourceTexture, GlyphFormat, &[GlyphFetchResult]),
+        F: FnMut(TextureSource, GlyphFormat, &[GlyphFetchResult]),
     {
         debug_assert_eq!(self.state, State::QueryResources);
 
         self.glyph_rasterizer.prepare_font(&mut font);
         let glyph_key_cache = self.cached_glyphs.get_glyph_key_cache_for_font(&font);
 
-        let mut current_texture_id = SourceTexture::Invalid;
+        let mut current_texture_id = TextureSource::Invalid;
         let mut current_glyph_format = GlyphFormat::Subpixel;
         debug_assert!(fetch_buffer.is_empty());
 
@@ -1626,6 +1626,10 @@ impl ResourceCache {
         self.cached_images
             .clear_keys(|key| key.0 == namespace);
 
+        self.blob_image_templates.retain(|key, _| key.0 != namespace);
+
+        self.rasterized_blob_images.retain(|key, _| key.0 != namespace);
+
         self.resources.font_instances
             .write()
             .unwrap()
diff --git a/gfx/webrender/src/scene_builder.rs b/gfx/webrender/src/scene_builder.rs
index 8d513576cd7e..627b4375ec1a 100644
--- a/gfx/webrender/src/scene_builder.rs
+++ b/gfx/webrender/src/scene_builder.rs
@@ -4,7 +4,7 @@
 
 use api::{AsyncBlobImageRasterizer, BlobImageRequest, BlobImageParams, BlobImageResult};
 use api::{DocumentId, PipelineId, ApiMsg, FrameMsg, ResourceUpdate, ExternalEvent, Epoch};
-use api::{BuiltDisplayList, ColorF, LayoutSize, NotificationRequest, Checkpoint};
+use api::{BuiltDisplayList, ColorF, LayoutSize, NotificationRequest, Checkpoint, IdNamespace};
 use api::channel::MsgSender;
 #[cfg(feature = "capture")]
 use capture::CaptureConfig;
@@ -122,6 +122,7 @@ pub enum SceneBuilderRequest {
     DeleteDocument(DocumentId),
     WakeUp,
     Flush(MsgSender<()>),
+    ClearNamespace(IdNamespace),
     SetFrameBuilderConfig(FrameBuilderConfig),
     SimulateLongSceneBuild(u32),
     SimulateLongLowPrioritySceneBuild(u32),
@@ -137,6 +138,7 @@ pub enum SceneBuilderResult {
     Transaction(Box<BuiltTransaction>, Option<Sender<SceneSwapResult>>),
     ExternalEvent(ExternalEvent),
     FlushComplete(MsgSender<()>),
+    ClearNamespace(IdNamespace),
     Stopped,
 }
 
@@ -221,6 +223,15 @@ impl SceneBuilder {
         )
     }
 
+    /// Send a message to the render backend thread.
+    ///
+    /// We first put something in the result queue and then send a wake-up
+    /// message to the api queue that the render backend is blocking on.
+    pub fn send(&self, msg: SceneBuilderResult) {
+        self.tx.send(msg).unwrap();
+        let _ = self.api_tx.send(ApiMsg::WakeUp);
+    }
+
     /// The scene builder thread's event loop.
     pub fn run(&mut self) {
         if let Some(ref hooks) = self.hooks {
@@ -231,8 +242,7 @@ impl SceneBuilder {
             match self.rx.recv() {
                 Ok(SceneBuilderRequest::WakeUp) => {}
                 Ok(SceneBuilderRequest::Flush(tx)) => {
-                    self.tx.send(SceneBuilderResult::FlushComplete(tx)).unwrap();
-                    let _ = self.api_tx.send(ApiMsg::WakeUp);
+                    self.send(SceneBuilderResult::FlushComplete(tx));
                 }
                 Ok(SceneBuilderRequest::Transaction(mut txn)) => {
                     let built_txn = self.process_transaction(&mut txn);
@@ -244,6 +254,10 @@ impl SceneBuilder {
                 Ok(SceneBuilderRequest::SetFrameBuilderConfig(cfg)) => {
                     self.config = cfg;
                 }
+                Ok(SceneBuilderRequest::ClearNamespace(id)) => {
+                    self.documents.retain(|doc_id, _doc| doc_id.0 != id);
+                    self.send(SceneBuilderResult::ClearNamespace(id));
+                }
                 #[cfg(feature = "replay")]
                 Ok(SceneBuilderRequest::LoadScenes(msg)) => {
                     self.load_scenes(msg);
@@ -253,8 +267,7 @@ impl SceneBuilder {
                     self.save_scene(config);
                 }
                 Ok(SceneBuilderRequest::ExternalEvent(evt)) => {
-                    self.tx.send(SceneBuilderResult::ExternalEvent(evt)).unwrap();
-                    self.api_tx.send(ApiMsg::WakeUp).unwrap();
+                    self.send(SceneBuilderResult::ExternalEvent(evt));
                 }
                 Ok(SceneBuilderRequest::Stop) => {
                     self.tx.send(SceneBuilderResult::Stopped).unwrap();
diff --git a/gfx/webrender/src/shade.rs b/gfx/webrender/src/shade.rs
index df22d1bb9f2c..df342f9eac07 100644
--- a/gfx/webrender/src/shade.rs
+++ b/gfx/webrender/src/shade.rs
@@ -382,12 +382,11 @@ fn create_prim_shader(
                 ("sColor1", TextureSampler::Color1),
                 ("sColor2", TextureSampler::Color2),
                 ("sDither", TextureSampler::Dither),
-                ("sCacheA8", TextureSampler::CacheA8),
-                ("sCacheRGBA8", TextureSampler::CacheRGBA8),
+                ("sPrevPassAlpha", TextureSampler::PrevPassAlpha),
+                ("sPrevPassColor", TextureSampler::PrevPassColor),
                 ("sTransformPalette", TextureSampler::TransformPalette),
                 ("sRenderTasks", TextureSampler::RenderTasks),
-                ("sResourceCache", TextureSampler::ResourceCache),
-                ("sSharedCacheA8", TextureSampler::SharedCacheA8),
+                ("sGpuCache", TextureSampler::GpuCache),
                 ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
                 ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
             ],
@@ -399,8 +398,7 @@ fn create_prim_shader(
 
 fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program, ShaderError> {
     let prefix = format!(
-        "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}U\n
-        #define WR_FEATURE_TRANSFORM\n",
+        "#define WR_MAX_VERTEX_TEXTURE_WIDTH {}U\n",
         MAX_VERTEX_TEXTURE_WIDTH
     );
 
@@ -415,8 +413,7 @@ fn create_clip_shader(name: &'static str, device: &mut Device) -> Result<Program
                 ("sColor0", TextureSampler::Color0),
                 ("sTransformPalette", TextureSampler::TransformPalette),
                 ("sRenderTasks", TextureSampler::RenderTasks),
-                ("sResourceCache", TextureSampler::ResourceCache),
-                ("sSharedCacheA8", TextureSampler::SharedCacheA8),
+                ("sGpuCache", TextureSampler::GpuCache),
                 ("sPrimitiveHeadersF", TextureSampler::PrimitiveHeadersF),
                 ("sPrimitiveHeadersI", TextureSampler::PrimitiveHeadersI),
             ],
diff --git a/gfx/webrender/src/spatial_node.rs b/gfx/webrender/src/spatial_node.rs
index 529b8b8ad60e..75361be38ab1 100644
--- a/gfx/webrender/src/spatial_node.rs
+++ b/gfx/webrender/src/spatial_node.rs
@@ -256,12 +256,20 @@ impl SpatialNode {
                     .pre_mul(&source_transform.into())
                     .pre_mul(&info.source_perspective);
 
-                // The transformation for this viewport in world coordinates is the transformation for
-                // our parent reference frame, plus any accumulated scrolling offsets from nodes
-                // between our reference frame and this node. Finally, we also include
-                // whatever local transformation this reference frame provides.
+                // In order to compute a transformation to world coordinates, we need to apply the
+                // following transforms in order:
+                //   state.parent_accumulated_scroll_offset
+                //   info.source_perspective
+                //   info.source_transform
+                //   info.origin_in_parent_reference_frame
+                //   state.parent_reference_frame_transform
+                // The first one incorporates the scrolling effect of any scrollframes/sticky nodes
+                // between this reference frame and the parent reference frame. The middle three
+                // transforms (which are combined into info.resolved_transform) do the conversion
+                // into the parent reference frame's coordinate space, and then the last one
+                // applies the parent reference frame's transform to the world space.
                 let relative_transform = info.resolved_transform
-                    .post_translate(state.parent_accumulated_scroll_offset)
+                    .pre_translate(&state.parent_accumulated_scroll_offset)
                     .to_transform()
                     .with_destination::<LayoutPixel>();
                 self.world_viewport_transform =
diff --git a/gfx/webrender/src/texture_cache.rs b/gfx/webrender/src/texture_cache.rs
index 2cc3e79a6006..fb80c468c82a 100644
--- a/gfx/webrender/src/texture_cache.rs
+++ b/gfx/webrender/src/texture_cache.rs
@@ -10,7 +10,7 @@ use freelist::{FreeList, FreeListHandle, UpsertResult, WeakFreeListHandle};
 use gpu_cache::{GpuCache, GpuCacheHandle};
 use gpu_types::{ImageSource, UvRectKind};
 use internal_types::{CacheTextureId, FastHashMap, TextureUpdateList, TextureUpdateSource};
-use internal_types::{RenderTargetInfo, SourceTexture, TextureUpdate, TextureUpdateOp};
+use internal_types::{RenderTargetInfo, TextureSource, TextureUpdate, TextureUpdateOp};
 use profiler::{ResourceProfileCounter, TextureCacheProfileCounters};
 use render_backend::FrameId;
 use resource_cache::CacheItem;
@@ -575,7 +575,7 @@ impl TextureCache {
                 };
                 CacheItem {
                     uv_rect_handle: entry.uv_rect_handle,
-                    texture_id: SourceTexture::TextureCache(entry.texture_id),
+                    texture_id: TextureSource::TextureCache(entry.texture_id),
                     uv_rect: DeviceUintRect::new(origin, entry.size),
                     texture_layer: layer_index as i32,
                 }
@@ -589,7 +589,7 @@ impl TextureCache {
     pub fn get_cache_location(
         &self,
         handle: &TextureCacheHandle,
-    ) -> (SourceTexture, i32, DeviceUintRect) {
+    ) -> (CacheTextureId, i32, DeviceUintRect) {
         let handle = handle
             .entry
             .as_ref()
@@ -609,7 +609,7 @@ impl TextureCache {
                 ..
             } => (layer_index, origin),
         };
-        (SourceTexture::TextureCache(entry.texture_id),
+        (entry.texture_id,
          layer_index as i32,
          DeviceUintRect::new(origin, entry.size))
     }
diff --git a/gfx/webrender/src/tiling.rs b/gfx/webrender/src/tiling.rs
index 0172938d188f..3af6741157a1 100644
--- a/gfx/webrender/src/tiling.rs
+++ b/gfx/webrender/src/tiling.rs
@@ -14,7 +14,7 @@ use euclid::{TypedPoint2D, TypedVector2D};
 use gpu_cache::{GpuCache};
 use gpu_types::{BorderInstance, BlurDirection, BlurInstance, PrimitiveHeaders, ScalingInstance};
 use gpu_types::{TransformData, TransformPalette};
-use internal_types::{FastHashMap, SavedTargetIndex, SourceTexture};
+use internal_types::{CacheTextureId, FastHashMap, SavedTargetIndex, TextureSource};
 #[cfg(feature = "pathfinder")]
 use pathfinder_partitioner::mesh::Mesh;
 use prim_store::{PrimitiveStore, DeferredResolve};
@@ -32,6 +32,7 @@ const MIN_TARGET_SIZE: u32 = 2048;
 const STYLE_SOLID: i32 = ((BorderStyle::Solid as i32) << 8) | ((BorderStyle::Solid as i32) << 16);
 const STYLE_MASK: i32 = 0x00FF_FF00;
 
+/// Identifies a given `RenderTarget` in a `RenderTargetList`.
 #[derive(Debug, Copy, Clone)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
@@ -86,12 +87,35 @@ impl TextureAllocator {
     }
 }
 
+/// Represents a number of rendering operations on a surface.
+///
+/// In graphics parlance, a "render target" usually means "a surface (texture or
+/// framebuffer) bound to the output of a shader". This trait has a slightly
+/// different meaning, in that it represents the operations on that surface
+/// _before_ it's actually bound and rendered. So a `RenderTarget` is built by
+/// the `RenderBackend` by inserting tasks, and then shipped over to the
+/// `Renderer` where a device surface is resolved and the tasks are transformed
+/// into draw commands on that surface.
+///
+/// We express this as a trait to generalize over color and alpha surfaces.
+/// a given `RenderTask` will draw to one or the other, depending on its type
+/// and sometimes on its parameters. See `RenderTask::target_kind`.
 pub trait RenderTarget {
+    /// Creates a new RenderTarget of the given type.
     fn new(
         size: Option<DeviceUintSize>,
         screen_size: DeviceIntSize,
     ) -> Self;
+
+    /// Allocates a region of the given size in this target, and returns either
+    /// the offset of that region or `None` if it won't fit.
+    ///
+    /// If a non-`None` result is returned, that value is generally stored in
+    /// a task which is then added to this target via `add_task()`.
     fn allocate(&mut self, size: DeviceUintSize) -> Option<DeviceUintPoint>;
+
+    /// Optional hook to provide additional processing for the target at the
+    /// end of the build phase.
     fn build(
         &mut self,
         _ctx: &mut RenderTargetContext,
@@ -102,13 +126,16 @@ pub trait RenderTarget {
         _transforms: &mut TransformPalette,
     ) {
     }
-    // TODO(gw): It's a bit odd that we need the deferred resolves and mutable
-    //           GPU cache here. They are typically used by the build step
-    //           above. They are used for the blit jobs to allow resolve_image
-    //           to be called. It's a bit of extra overhead to store the image
-    //           key here and the resolve them in the build step separately.
-    //           BUT: if/when we add more texture cache target jobs, we might
-    //           want to tidy this up.
+
+    /// Associates a `RenderTask` with this target. That task must be assigned
+    /// to a region returned by invoking `allocate()` on this target.
+    ///
+    /// TODO(gw): It's a bit odd that we need the deferred resolves and mutable
+    /// GPU cache here. They are typically used by the build step above. They
+    /// are used for the blit jobs to allow resolve_image to be called. It's a
+    /// bit of extra overhead to store the image key here and the resolve them
+    /// in the build step separately.  BUT: if/when we add more texture cache
+    /// target jobs, we might want to tidy this up.
     fn add_task(
         &mut self,
         task_id: RenderTaskId,
@@ -123,14 +150,40 @@ pub trait RenderTarget {
     fn needs_depth(&self) -> bool;
 }
 
+/// A tag used to identify the output format of a `RenderTarget`.
 #[derive(Debug, Copy, Clone, Eq, PartialEq, Hash)]
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderTargetKind {
-    Color, // RGBA32
+    Color, // RGBA8
     Alpha, // R8
 }
 
+/// A series of `RenderTarget` instances, serving as the high-level container
+/// into which `RenderTasks` are assigned.
+///
+/// During the build phase, we iterate over the tasks in each `RenderPass`. For
+/// each task, we invoke `allocate()` on the `RenderTargetList`, which in turn
+/// attempts to allocate an output region in the last `RenderTarget` in the
+/// list. If allocation fails (or if the list is empty), a new `RenderTarget` is
+/// created and appended to the list. The build phase then assign the task into
+/// the target associated with the final allocation.
+///
+/// The result is that each `RenderPass` is associated with one or two
+/// `RenderTargetLists`, depending on whether we have all our tasks have the
+/// same `RenderTargetKind`. The lists are then shipped to the `Renderer`, which
+/// allocates a device texture array, with one slice per render target in the
+/// list.
+///
+/// The upshot of this scheme is that it maximizes batching. In a given pass,
+/// we need to do a separate batch for each individual render target. But with
+/// the texture array, we can expose the entirety of the previous pass to each
+/// task in the current pass in a single batch, which generally allows each
+/// task to be drawn in a single batch regardless of how many results from the
+/// previous pass it depends on.
+///
+/// Note that in some cases (like drop-shadows), we can depend on the output of
+/// a pass earlier than the immediately-preceding pass. See `SavedTargetIndex`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderTargetList<T> {
@@ -139,7 +192,6 @@ pub struct RenderTargetList<T> {
     pub max_size: DeviceUintSize,
     pub targets: Vec<T>,
     pub saved_index: Option<SavedTargetIndex>,
-    pub is_shared: bool,
 }
 
 impl<T: RenderTarget> RenderTargetList<T> {
@@ -153,7 +205,6 @@ impl<T: RenderTarget> RenderTargetList<T> {
             max_size: DeviceUintSize::new(MIN_TARGET_SIZE, MIN_TARGET_SIZE),
             targets: Vec::new(),
             saved_index: None,
-            is_shared: false,
         }
     }
 
@@ -256,7 +307,7 @@ pub struct FrameOutput {
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum BlitJobSource {
-    Texture(SourceTexture, i32, DeviceIntRect),
+    Texture(TextureSource, i32, DeviceIntRect),
     RenderTask(RenderTaskId),
 }
 
@@ -285,7 +336,10 @@ pub struct GlyphJob {
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct GlyphJob;
 
-/// A render target represents a number of rendering operations on a surface.
+/// Contains the work (in the form of instance arrays) needed to fill a color
+/// color output surface (RGBA8).
+///
+/// See `RenderTarget`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct ColorRenderTarget {
@@ -500,6 +554,10 @@ impl RenderTarget for ColorRenderTarget {
     }
 }
 
+/// Contains the work (in the form of instance arrays) needed to fill an alpha
+/// output surface (R8).
+///
+/// See `RenderTarget`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct AlphaRenderTarget {
@@ -727,14 +785,18 @@ impl TextureCacheRenderTarget {
     fn add_glyph_task(&mut self, _: &mut GlyphTask, _: DeviceIntRect) {}
 }
 
+/// Contains the set of `RenderTarget`s specific to the kind of pass.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub enum RenderPassKind {
+    /// The final pass to the main frame buffer, where we have a single color
+    /// target for display to the user.
     MainFramebuffer(ColorRenderTarget),
+    /// An intermediate pass, where we may have multiple targets.
     OffScreen {
         alpha: RenderTargetList<AlphaRenderTarget>,
         color: RenderTargetList<ColorRenderTarget>,
-        texture_cache: FastHashMap<(SourceTexture, i32), TextureCacheRenderTarget>,
+        texture_cache: FastHashMap<(CacheTextureId, i32), TextureCacheRenderTarget>,
     },
 }
 
@@ -742,15 +804,21 @@ pub enum RenderPassKind {
 /// another.
 ///
 /// A render pass can have several render targets if there wasn't enough space in one
-/// target to do all of the rendering for that pass.
+/// target to do all of the rendering for that pass. See `RenderTargetList`.
 #[cfg_attr(feature = "capture", derive(Serialize))]
 #[cfg_attr(feature = "replay", derive(Deserialize))]
 pub struct RenderPass {
+    /// The kind of pass, as well as the set of targets associated with that
+    /// kind of pass.
     pub kind: RenderPassKind,
+    /// The set of tasks to be performed in this pass, as indices into the
+    /// `RenderTaskTree`.
     tasks: Vec<RenderTaskId>,
 }
 
 impl RenderPass {
+    /// Creates a pass for the main framebuffer. There is only one of these, and
+    /// it is always the last pass.
     pub fn new_main_framebuffer(screen_size: DeviceIntSize) -> Self {
         let target = ColorRenderTarget::new(None, screen_size);
         RenderPass {
@@ -759,6 +827,7 @@ impl RenderPass {
         }
     }
 
+    /// Creates an intermediate off-screen pass.
     pub fn new_off_screen(screen_size: DeviceIntSize) -> Self {
         RenderPass {
             kind: RenderPassKind::OffScreen {
@@ -770,6 +839,7 @@ impl RenderPass {
         }
     }
 
+    /// Adds a task to this pass.
     pub fn add_render_task(
         &mut self,
         task_id: RenderTaskId,
@@ -788,6 +858,11 @@ impl RenderPass {
         self.tasks.push(task_id);
     }
 
+    /// Processes this pass to prepare it for rendering.
+    ///
+    /// Among other things, this allocates output regions for each of our tasks
+    /// (added via `add_render_task`) in a RenderTarget and assigns it into that
+    /// target.
     pub fn build(
         &mut self,
         ctx: &mut RenderTargetContext,
@@ -824,11 +899,6 @@ impl RenderPass {
                 );
             }
             RenderPassKind::OffScreen { ref mut color, ref mut alpha, ref mut texture_cache } => {
-                let is_shared_alpha = self.tasks.iter().any(|&task_id| {
-                    let task = &render_tasks[task_id];
-                    task.is_shared() &&
-                        task.target_kind() == RenderTargetKind::Alpha
-                });
                 let saved_color = if self.tasks.iter().any(|&task_id| {
                     let t = &render_tasks[task_id];
                     t.target_kind() == RenderTargetKind::Color && t.saved_index.is_some()
@@ -940,7 +1010,6 @@ impl RenderPass {
                     prim_headers,
                     transforms,
                 );
-                alpha.is_shared = is_shared_alpha;
             }
         }
     }
diff --git a/gfx/webrender/src/util.rs b/gfx/webrender/src/util.rs
index 84b25937acf0..231418484158 100644
--- a/gfx/webrender/src/util.rs
+++ b/gfx/webrender/src/util.rs
@@ -540,6 +540,7 @@ impl<Src, Dst> FastTransform<Src, Dst> {
         }
     }
 
+    #[allow(dead_code)]
     pub fn post_translate(&self, new_offset: TypedVector2D<f32, Dst>) -> Self {
         match *self {
             FastTransform::Offset(offset) => {
diff --git a/gfx/webrender_api/src/api.rs b/gfx/webrender_api/src/api.rs
index 2c3aa2f460db..ea7c44a74f44 100644
--- a/gfx/webrender_api/src/api.rs
+++ b/gfx/webrender_api/src/api.rs
@@ -11,7 +11,6 @@ use std::fmt;
 use std::marker::PhantomData;
 use std::os::raw::c_void;
 use std::path::PathBuf;
-use std::sync::Arc;
 use std::u32;
 use {BuiltDisplayList, BuiltDisplayListDescriptor, ColorF, DeviceIntPoint, DeviceUintRect};
 use {DeviceUintSize, ExternalScrollId, FontInstanceKey, FontInstanceOptions};
@@ -1269,34 +1268,47 @@ pub trait NotificationHandler : Send + Sync {
     fn notify(&self, when: Checkpoint);
 }
 
-#[derive(Clone)]
 pub struct NotificationRequest {
-    handler: Arc<NotificationHandler>,
+    handler: Option<Box<NotificationHandler>>,
     when: Checkpoint,
-    done: bool,
 }
 
 impl NotificationRequest {
-    pub fn new(when: Checkpoint, handler: Arc<NotificationHandler>) -> Self {
+    pub fn new(when: Checkpoint, handler: Box<NotificationHandler>) -> Self {
         NotificationRequest {
-            handler,
+            handler: Some(handler),
             when,
-            done: false,
         }
     }
 
     pub fn when(&self) -> Checkpoint { self.when }
 
     pub fn notify(mut self) {
-        self.handler.notify(self.when);
-        self.done = true;
+        if let Some(handler) = self.handler.take() {
+            handler.notify(self.when);
+        }
     }
 }
 
 impl Drop for NotificationRequest {
     fn drop(&mut self) {
-        if !self.done {
-            self.handler.notify(Checkpoint::TransactionDropped);
+        if let Some(ref mut handler) = self.handler {
+            handler.notify(Checkpoint::TransactionDropped);
+        }
+    }
+}
+
+// This Clone impl yields an "empty" request because we don't want the requests
+// to be notified twice so the request is owned by only one of the API messages
+// (the original one) after the clone.
+// This works in practice because the notifications requests are used for
+// synchronization so we don't need to include them in the recording mechanism
+// in wrench that clones the messages.
+impl Clone for NotificationRequest {
+    fn clone(&self) -> Self {
+        NotificationRequest {
+            when: self.when,
+            handler: None,
         }
     }
 }
diff --git a/gfx/webrender_api/src/image.rs b/gfx/webrender_api/src/image.rs
index 48ad0c96c7fb..2acb20cd05d9 100644
--- a/gfx/webrender_api/src/image.rs
+++ b/gfx/webrender_api/src/image.rs
@@ -127,6 +127,8 @@ pub enum ColorDepth {
     Color10,
     /// 12 bits image
     Color12,
+    /// 16 bits image
+    Color16,
 }
 
 impl ColorDepth {
@@ -136,6 +138,7 @@ impl ColorDepth {
             ColorDepth::Color8 => 8,
             ColorDepth::Color10 => 10,
             ColorDepth::Color12 => 12,
+            ColorDepth::Color16 => 16,
         }
     }
     /// 10 and 12 bits images are encoded using 16 bits integer, we need to
@@ -145,6 +148,7 @@ impl ColorDepth {
             ColorDepth::Color8 => 1.0,
             ColorDepth::Color10 => 64.0,
             ColorDepth::Color12 => 16.0,
+            ColorDepth::Color16 => 1.0,
         }
     }
 }
diff --git a/gfx/webrender_api/src/lib.rs b/gfx/webrender_api/src/lib.rs
index 45bb28e867df..e22d8ae035f1 100644
--- a/gfx/webrender_api/src/lib.rs
+++ b/gfx/webrender_api/src/lib.rs
@@ -2,6 +2,15 @@
  * License, v. 2.0. If a copy of the MPL was not distributed with this
  * file, You can obtain one at http://mozilla.org/MPL/2.0/. */
 
+//! The `webrender_api` crate contains an assortment types and functions used
+//! by WebRender consumers as well as, in many cases, WebRender itself.
+//!
+//! This separation allows Servo to parallelize compilation across `webrender`
+//! and other crates that depend on `webrender_api`. So in practice, we put
+//! things in this crate when Servo needs to use them. Firefox depends on the
+//! `webrender` crate directly, and so this distinction is not really relevant
+//! there.
+
 #![cfg_attr(feature = "nightly", feature(nonzero))]
 #![cfg_attr(feature = "cargo-clippy", allow(float_cmp, too_many_arguments, unreadable_literal))]
 
diff --git a/gfx/webrender_bindings/revision.txt b/gfx/webrender_bindings/revision.txt
index 2ba274927477..06a5efd07d2a 100644
--- a/gfx/webrender_bindings/revision.txt
+++ b/gfx/webrender_bindings/revision.txt
@@ -1 +1 @@
-d7a6d081384ce0da9dd359b0cf4b9f758aab1b67
+9536249e3ed920a920346f6cc0a79473cad16099