Bug 1682195 - Use scissoring to restrict cs_clip_image rather than discard. r=gw

cs_clip_image renders the entire target-space sub-rect, and then uses discard against
the local-space bounds to ensure the primitive gets trimmed down to its actual footprint
within the larger target-space sub-rect. This can be fairly wasteful and slow.

This patch restructures things so that we essentially render with the actual primitive
vertexes, and then we either clamp (for axis-aligned) or scissor (for non-axis-aligned)
to restrict the footprint to within the target-space sub-rect.

This then allows the use of a SWGL fast-path for the axis-aligned case that can vastly
speed up the common-case.

Differential Revision: https://phabricator.services.mozilla.com/D111316
This commit is contained in:
Lee Salzman 2021-04-09 22:55:33 +00:00
Родитель 858230be78
Коммит b40978d057
10 изменённых файлов: 168 добавлений и 46 удалений

Просмотреть файл

@ -770,6 +770,22 @@ static ALWAYS_INLINE WideR8 blend_span(uint8_t* buf, WideR8 r, int len) {
len);
}
static ALWAYS_INLINE void commit_span(uint8_t* buf, PackedR8 r) {
unaligned_store(buf, r);
}
static ALWAYS_INLINE void commit_span(uint8_t* buf, PackedR8 r, int len) {
partial_store_span(buf, r, len);
}
static ALWAYS_INLINE PackedR8 blend_span(uint8_t* buf, PackedR8 r) {
return pack(blend_span(buf, unpack(r)));
}
static ALWAYS_INLINE PackedR8 blend_span(uint8_t* buf, PackedR8 r, int len) {
return pack(blend_span(buf, unpack(r), len));
}
template <bool BLEND, typename P, typename R>
static ALWAYS_INLINE void commit_blend_span(P* buf, R r) {
if (BLEND) {

Просмотреть файл

@ -481,8 +481,9 @@ static int blendTextureNearestFast(S sampler, vec2 uv, int span,
// Calculate the row pointer within the buffer, clamping to within valid row
// bounds.
P* row =
&sampler->buf[clamp(clampCoord(i.y, sampler->height), minUV.y, maxUV.y) *
sampler->stride];
&((P*)sampler
->buf)[clamp(clampCoord(i.y, sampler->height), minUV.y, maxUV.y) *
sampler->stride];
// Find clamped X bounds within the row.
int minX = clamp(minUV.x, 0, sampler->width - 1);
int maxX = clamp(maxUV.x, minX, sampler->width - 1);
@ -518,7 +519,8 @@ static int blendTextureNearestFast(S sampler, vec2 uv, int span,
// If we still have samples left above the valid sample bounds, then we again
// need to fill this section with a constant clamped sample.
if (curX < endX) {
auto src = applyColor(unpack(bit_cast<packed_type>(U32(row[maxX]))), color);
auto src =
applyColor(unpack(bit_cast<packed_type>(V4<P>(row[maxX]))), color);
commit_solid_span<BLEND>(buf, src, endX - curX);
}
return span;

Просмотреть файл

@ -4,10 +4,9 @@
#include shared,clip_shared
varying vec4 vLocalPos;
varying vec2 vLocalPos;
varying vec2 vClipMaskImageUv;
flat varying vec4 vClipMaskUvRect;
flat varying vec4 vClipMaskUvInnerRect;
#ifdef WR_VERTEX_SHADER
@ -35,13 +34,48 @@ ClipMaskInstanceImage fetch_clip_item() {
return cmi;
}
struct ClipImageVertexInfo {
vec2 local_pos;
vec4 world_pos;
};
// This differs from write_clip_tile_vertex in that we forward transform the
// primitive's local-space tile rect into the target space. We use scissoring
// to ensure that the primitive does not draw outside the target bounds.
ClipImageVertexInfo write_clip_image_vertex(RectWithSize tile_rect,
RectWithSize local_clip_rect,
Transform prim_transform,
Transform clip_transform,
RectWithSize sub_rect,
vec2 task_origin,
vec2 screen_origin,
float device_pixel_scale) {
vec2 local_pos = clamp_rect(tile_rect.p0 + aPosition.xy * tile_rect.size, local_clip_rect);
vec4 world_pos = prim_transform.m * vec4(local_pos, 0.0, 1.0);
vec4 final_pos = vec4(
world_pos.xy * device_pixel_scale + (task_origin - screen_origin) * world_pos.w,
0.0,
world_pos.w
);
gl_Position = uTransform * final_pos;
init_transform_vs(
prim_transform.is_axis_aligned
? vec4(vec2(-1.0e16), vec2(1.0e16))
: vec4(local_clip_rect.p0, local_clip_rect.p0 + local_clip_rect.size));
ClipImageVertexInfo vi = ClipImageVertexInfo(local_pos, world_pos);
return vi;
}
void main(void) {
ClipMaskInstanceImage cmi = fetch_clip_item();
Transform clip_transform = fetch_transform(cmi.base.clip_transform_id);
Transform prim_transform = fetch_transform(cmi.base.prim_transform_id);
ImageSource res = fetch_image_source_direct(cmi.resource_address);
ClipVertexInfo vi = write_clip_tile_vertex(
ClipImageVertexInfo vi = write_clip_image_vertex(
cmi.tile_rect,
cmi.local_rect,
prim_transform,
clip_transform,
@ -51,33 +85,33 @@ void main(void) {
cmi.base.device_pixel_scale
);
vLocalPos = vi.local_pos;
vClipMaskImageUv = (vi.local_pos.xy - cmi.tile_rect.p0 * vi.local_pos.w) / cmi.tile_rect.size;
vec2 uv = (vi.local_pos - cmi.tile_rect.p0) / cmi.tile_rect.size;
vec2 texture_size = vec2(TEX_SIZE(sColor0));
vClipMaskUvRect = vec4(res.uv_rect.p0, res.uv_rect.p1 - res.uv_rect.p0) / texture_size.xyxy;
vec4 uv_rect = vec4(res.uv_rect.p0, res.uv_rect.p1);
vClipMaskImageUv = mix(uv_rect.xy, uv_rect.zw, uv) / texture_size;
// applying a half-texel offset to the UV boundaries to prevent linear samples from the outside
vec4 inner_rect = vec4(res.uv_rect.p0, res.uv_rect.p1);
vClipMaskUvInnerRect = (inner_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
vClipMaskUvInnerRect = (uv_rect + vec4(0.5, 0.5, -0.5, -0.5)) / texture_size.xyxy;
}
#endif
#ifdef WR_FRAGMENT_SHADER
void main(void) {
vec2 local_pos = vLocalPos.xy / vLocalPos.w;
float alpha = vLocalPos.w > 0.0 ? init_transform_fs(local_pos) : 0.0;
// TODO: Handle repeating masks?
vec2 clamped_mask_uv = clamp(vClipMaskImageUv, vec2(0.0, 0.0), vLocalPos.ww);
// Ensure we don't draw outside of our tile.
// FIXME(emilio): Can we do this earlier?
if (clamped_mask_uv != vClipMaskImageUv)
discard;
vec2 source_uv = clamp(
clamped_mask_uv / vLocalPos.w * vClipMaskUvRect.zw + vClipMaskUvRect.xy,
vClipMaskUvInnerRect.xy, vClipMaskUvInnerRect.zw);
float alpha = init_transform_fs(vLocalPos);
vec2 source_uv = clamp(vClipMaskImageUv, vClipMaskUvInnerRect.xy, vClipMaskUvInnerRect.zw);
float clip_alpha = texture(sColor0, source_uv).r; //careful: texture has type A8
oFragColor = vec4(alpha * clip_alpha, 1.0, 1.0, 1.0);
}
#ifdef SWGL_DRAW_SPAN
void swgl_drawSpanR8() {
if (has_valid_transform_bounds()) {
return;
}
swgl_commitTextureLinearR8(sColor0, vClipMaskImageUv, vClipMaskUvInnerRect);
}
#endif
#endif

Просмотреть файл

@ -17,10 +17,6 @@
uniform sampler2D sClipMask;
vec2 clamp_rect(vec2 pt, RectWithSize rect) {
return clamp(pt, rect.p0, rect.p0 + rect.size);
}
#ifndef SWGL_CLIP_MASK
// TODO: convert back to RectWithEndPoint if driver issues are resolved, if ever.
flat varying vec4 vClipMaskUvBounds;

Просмотреть файл

@ -50,3 +50,7 @@ float signed_distance_rect(vec2 pos, vec2 p0, vec2 p1) {
return max(d.x, d.y);
}
vec2 clamp_rect(vec2 pt, RectWithSize rect) {
return clamp(pt, rect.p0, rect.p0 + rect.size);
}

Просмотреть файл

@ -93,6 +93,11 @@ vec4 get_node_pos(vec2 pos, Transform transform) {
#ifdef WR_FRAGMENT_SHADER
// Assume transform bounds are set to a large scale to signal they are invalid.
bool has_valid_transform_bounds() {
return vTransformBounds.w < 1.0e15;
}
float init_transform_fs(vec2 local_pos) {
// Get signed distance from local rect bounds.
float d = signed_distance_rect(

Просмотреть файл

@ -3383,7 +3383,7 @@ pub struct ClipBatchList {
pub slow_rectangles: Vec<ClipMaskInstanceRect>,
pub fast_rectangles: Vec<ClipMaskInstanceRect>,
/// Image draws apply the image masking.
pub images: FastHashMap<TextureSource, Vec<ClipMaskInstanceImage>>,
pub images: FastHashMap<(TextureSource, Option<DeviceIntRect>), Vec<ClipMaskInstanceImage>>,
pub box_shadows: FastHashMap<TextureSource, Vec<ClipMaskInstanceBoxShadow>>,
}
@ -3586,11 +3586,26 @@ impl ClipBatcher {
spatial_tree,
);
let prim_transform_id = transforms.get_id(
root_spatial_node_index,
ROOT_SPATIAL_NODE_INDEX,
spatial_tree,
);
// For clip mask images, we need to map from the primitive's layout space to
// the target space, as the cs_clip_image shader needs to forward transform
// the local image bounds, rather than backwards transform the target bounds
// as in done in write_clip_tile_vertex.
let prim_transform_id = match clip_node.item.kind {
ClipItemKind::Image { .. } => {
transforms.get_id(
clip_instance.spatial_node_index,
root_spatial_node_index,
spatial_tree,
)
}
_ => {
transforms.get_id(
root_spatial_node_index,
ROOT_SPATIAL_NODE_INDEX,
spatial_tree,
)
}
};
let common = ClipMaskInstanceCommon {
sub_rect: DeviceRect::new(
@ -3612,7 +3627,14 @@ impl ClipBatcher {
tile: None,
};
let mut add_image = |request: ImageRequest, local_tile_rect: LayoutRect, sub_rect: DeviceRect| {
let map_local_to_world = SpaceMapper::new_with_target(
ROOT_SPATIAL_NODE_INDEX,
clip_instance.spatial_node_index,
WorldRect::max_rect(),
spatial_tree,
);
let mut add_image = |request: ImageRequest, tile_rect: LayoutRect, sub_rect: DeviceRect| {
let cache_item = match resource_cache.get_cached_image(request) {
Ok(item) => item,
Err(..) => {
@ -3622,9 +3644,33 @@ impl ClipBatcher {
}
};
// If the primitive transform is axis aligned, we can skip any need for scissoring
// by clipping the local clip rect with the backwards transformed target bounds.
// If it is not axis-aligned, then we pass the local clip rect through unmodified
// to the shader and also set up a scissor rect for the overall target bounds to
// ensure nothing is drawn outside the target.
let (local_rect, scissor_rect) =
if prim_transform_id.transform_kind() == TransformedRectKind::AxisAligned {
let world_rect =
sub_rect.translate(actual_rect.origin.to_vector()) / global_device_pixel_scale;
(map_local_to_world
.unmap(&world_rect)
.expect("bug: should always map as axis-aligned")
.intersection(&rect)
.unwrap_or_default(),
None)
} else {
(rect,
Some(common.sub_rect
.translate(task_origin.to_vector())
.round_out()
.to_i32()))
};
self.get_batch_list(is_first_clip)
.images
.entry(cache_item.texture_id)
.entry((cache_item.texture_id, scissor_rect))
.or_insert_with(Vec::new)
.push(ClipMaskInstanceImage {
common: ClipMaskInstanceCommon {
@ -3632,8 +3678,8 @@ impl ClipBatcher {
..common
},
resource_address: gpu_cache.get_address(&cache_item.uv_rect_handle),
tile_rect: local_tile_rect,
local_rect: rect,
tile_rect,
local_rect,
});
};
@ -3642,12 +3688,6 @@ impl ClipBatcher {
let clip_spatial_node = &spatial_tree.spatial_nodes[clip_instance.spatial_node_index.0 as usize];
let clip_is_axis_aligned = clip_spatial_node.coordinate_system_id == CoordinateSystemId::root();
let sub_rect_bounds = actual_rect.size.into();
let map_local_to_world = SpaceMapper::new_with_target(
ROOT_SPATIAL_NODE_INDEX,
clip_instance.spatial_node_index,
WorldRect::max_rect(),
spatial_tree,
);
for tile in tiles {
let tile_sub_rect = if clip_is_axis_aligned {

Просмотреть файл

@ -3749,6 +3749,7 @@ impl Renderer {
fn draw_clip_batch_list(
&mut self,
list: &ClipBatchList,
draw_target: &DrawTarget,
projection: &default::Transform3D<f32>,
stats: &mut RendererStats,
) {
@ -3803,8 +3804,27 @@ impl Renderer {
}
// draw image masks
for (mask_texture_id, items) in list.images.iter() {
let mut using_scissor = false;
for ((mask_texture_id, clip_rect), items) in list.images.iter() {
let _gm2 = self.gpu_profiler.start_marker("clip images");
// Some image masks may require scissoring to ensure they don't draw
// outside their task's target bounds. Axis-aligned primitives will
// be clamped inside the shader and should not require scissoring.
// TODO: We currently assume scissor state is off by default for
// alpha targets here, but in the future we may want to track the
// current scissor state so that this can be properly saved and
// restored here.
if let Some(clip_rect) = clip_rect {
if !using_scissor {
self.device.enable_scissor();
using_scissor = true;
}
let scissor_rect = draw_target.build_scissor_rect(Some(*clip_rect));
self.device.set_scissor_rect(scissor_rect);
} else if using_scissor {
self.device.disable_scissor();
using_scissor = false;
}
let textures = BatchTextures::composite_rgb(*mask_texture_id);
self.shaders.borrow_mut().cs_clip_image
.bind(&mut self.device, projection, None, &mut self.renderer_errors);
@ -3815,6 +3835,9 @@ impl Renderer {
stats,
);
}
if using_scissor {
self.device.disable_scissor();
}
}
fn draw_alpha_target(
@ -3958,6 +3981,7 @@ impl Renderer {
self.set_blend(false, FramebufferKind::Other);
self.draw_clip_batch_list(
&target.clip_batcher.primary_clips,
&draw_target,
projection,
stats,
);
@ -3968,6 +3992,7 @@ impl Renderer {
self.set_blend_mode_multiply(FramebufferKind::Other);
self.draw_clip_batch_list(
&target.clip_batcher.secondary_clips,
&draw_target,
projection,
stats,
);

Двоичные данные
gfx/wr/wrench/reftests/mask/mask-perspective.png

Двоичный файл не отображается.

До

Ширина:  |  Высота:  |  Размер: 317 B

После

Ширина:  |  Высота:  |  Размер: 279 B

Просмотреть файл

@ -12,6 +12,6 @@ platform(linux,mac) fuzzy(1,17500) == mask-atomicity-tiling.yaml mask-atomicity-
platform(linux,mac) == mask-perspective.yaml mask-perspective.png
== fuzzy(1,11) mask-perspective-tiling.yaml mask-perspective.yaml
platform(linux,mac) == checkerboard.yaml checkerboard.png
skip_on(android,device) == checkerboard.yaml checkerboard-tiling.yaml # Fails on a Pixel2
skip_on(android,device) fuzzy(2,1900) == checkerboard.yaml checkerboard-tiling.yaml # Fails on a Pixel2
== missing-mask.yaml missing-mask-ref.yaml
platform(linux) == scaled-filter-raster-root.yaml scaled-filter-raster-root.png