Support the SPV_EXT_fragment_shader_interlock extension.

This was straightforward to implement in GLSL. The `ShadingRateInterlockOrderedEXT` and `ShadingRateInterlockUnorderedEXT` modes aren't implemented yet, because we don't support `SPV_NV_shading_rate` or `SPV_EXT_fragment_invocation_density` yet. HLSL and MSL were more interesting. They don't support this directly, but they do support marking resources as "rasterizer ordered," which does roughly the same thing. So this implementation scans all accesses inside the critical section and marks all storage resources found therein as rasterizer ordered. They also don't support the fine-grained controls on pixel- vs. sample-level interlock and disabling ordering guarantees that GLSL and SPIR-V do, but that's OK. "Unordered" here merely means the order is undefined; that it just so happens to be the same as rasterizer order is immaterial. As for pixel- vs. sample-level interlock, Vulkan explicitly states: > With sample shading enabled, [the `PixelInterlockOrderedEXT` and > `PixelInterlockUnorderedEXT`] execution modes are treated like > `SampleInterlockOrderedEXT` or `SampleInterlockUnorderedEXT` > respectively. and: > If [the `SampleInterlockOrderedEXT` or `SampleInterlockUnorderedEXT`] > execution modes are used in single-sample mode they are treated like > `PixelInterlockOrderedEXT` or `PixelInterlockUnorderedEXT` > respectively. So this will DTRT for MoltenVK and gfx-rs, at least. MSL additionally supports multiple raster order groups; resources that are not accessed together can be placed in different ROGs to allow them to be synchronized separately. A more sophisticated analysis might be able to place resources optimally, but that's outside the scope of this change. For now, we assign all resources to group 0, which should do for our purposes. `glslang` doesn't support the `RasterizerOrdered` UAVs this implementation produces for HLSL, so the test case needs `fxc.exe`. It also insists on GLSL 4.50 for `GL_ARB_fragment_shader_interlock`, even though the spec says it needs either 4.20 or `GL_ARB_shader_image_load_store`; and it doesn't support the `GL_NV_fragment_shader_interlock` extension at all. So I haven't been able to test those code paths. Fixes #1002.
2019-08-04 00:07:20 -05:00 · 2019-08-04 00:07:20 -05:00 · 2eff420d9a
--- a/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
+++ b/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@ -0,0 +1,24 @@
+RWByteAddressBuffer _9 : register(u6, space0);
+globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0);
+RasterizerOrderedByteAddressBuffer _52 : register(u4, space0);
+RWTexture2D<unorm float4> img4 : register(u5, space0);
+RasterizerOrderedTexture2D<unorm float4> img : register(u0, space0);
+RasterizerOrderedTexture2D<unorm float4> img3 : register(u2, space0);
+RasterizerOrderedTexture2D<uint> img2 : register(u1, space0);
+
+void frag_main()
+{
+    _9.Store(0, uint(0));
+    img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f);
+    img[int2(0, 0)] = img3[int2(0, 0)];
+    uint _39;
+    InterlockedAdd(img2[int2(0, 0)], 1u, _39);
+    _42.Store(0, uint(int(_42.Load(0)) + 42));
+    uint _55;
+    _42.InterlockedAnd(4, _52.Load(0), _55);
+}
+
+void main()
+{
+    frag_main();
+}
--- a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
+++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@ -0,0 +1,43 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    device Buffer3* m_9 [[id(0)]];
+    texture2d<float, access::write> img4 [[id(1)]];
+    texture2d<float, access::write> img [[id(2), raster_order_group(0)]];
+    texture2d<float> img3 [[id(3), raster_order_group(0)]];
+    volatile device Buffer* m_34 [[id(4), raster_order_group(0)]];
+    device Buffer2* m_44 [[id(5), raster_order_group(0)]];
+};
+
+fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
+{
+    (*spvDescriptorSet0.m_9).baz = 0;
+    spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0)));
+    (*spvDescriptorSet0.m_34).foo += 42;
+    uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed);
+}
+
--- a/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
+++ b/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@ -0,0 +1,33 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d<float, access::write> img4 [[texture(0)]], texture2d<float, access::write> img [[texture(1), raster_order_group(0)]], texture2d<float> img3 [[texture(2), raster_order_group(0)]])
+{
+    _9.baz = 0;
+    img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    img.write(img3.read(uint2(int2(0))), uint2(int2(0)));
+    _34.foo += 42;
+    uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed);
+}
+
--- a/reference/opt/shaders/frag/pixel-interlock-ordered.frag
+++ b/reference/opt/shaders/frag/pixel-interlock-ordered.frag
@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
--- a/reference/opt/shaders/frag/pixel-interlock-unordered.frag
+++ b/reference/opt/shaders/frag/pixel-interlock-unordered.frag
@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
--- a/reference/opt/shaders/frag/sample-interlock-ordered.frag
+++ b/reference/opt/shaders/frag/sample-interlock-ordered.frag
@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0]));
+    endInvocationInterlockARB();
+}
+
--- a/reference/opt/shaders/frag/sample-interlock-unordered.frag
+++ b/reference/opt/shaders/frag/sample-interlock-unordered.frag
@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
--- a/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
+++ b/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@ -0,0 +1,24 @@
+RWByteAddressBuffer _9 : register(u6, space0);
+globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0);
+RasterizerOrderedByteAddressBuffer _52 : register(u4, space0);
+RWTexture2D<unorm float4> img4 : register(u5, space0);
+RasterizerOrderedTexture2D<unorm float4> img : register(u0, space0);
+RasterizerOrderedTexture2D<unorm float4> img3 : register(u2, space0);
+RasterizerOrderedTexture2D<uint> img2 : register(u1, space0);
+
+void frag_main()
+{
+    _9.Store(0, uint(0));
+    img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f);
+    img[int2(0, 0)] = img3[int2(0, 0)];
+    uint _39;
+    InterlockedAdd(img2[int2(0, 0)], 1u, _39);
+    _42.Store(0, uint(int(_42.Load(0)) + 42));
+    uint _55;
+    _42.InterlockedAnd(4, _52.Load(0), _55);
+}
+
+void main()
+{
+    frag_main();
+}
--- a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
+++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@ -0,0 +1,43 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+struct spvDescriptorSetBuffer0
+{
+    device Buffer3* m_9 [[id(0)]];
+    texture2d<float, access::write> img4 [[id(1)]];
+    texture2d<float, access::write> img [[id(2), raster_order_group(0)]];
+    texture2d<float> img3 [[id(3), raster_order_group(0)]];
+    volatile device Buffer* m_34 [[id(4), raster_order_group(0)]];
+    device Buffer2* m_44 [[id(5), raster_order_group(0)]];
+};
+
+fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]])
+{
+    (*spvDescriptorSet0.m_9).baz = 0;
+    spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0)));
+    (*spvDescriptorSet0.m_34).foo += 42;
+    uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed);
+}
+
--- a/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
+++ b/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@ -0,0 +1,33 @@
+#pragma clang diagnostic ignored "-Wunused-variable"
+
+#include <metal_stdlib>
+#include <simd/simd.h>
+#include <metal_atomic>
+
+using namespace metal;
+
+struct Buffer3
+{
+    int baz;
+};
+
+struct Buffer
+{
+    int foo;
+    uint bar;
+};
+
+struct Buffer2
+{
+    uint quux;
+};
+
+fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d<float, access::write> img4 [[texture(0)]], texture2d<float, access::write> img [[texture(1), raster_order_group(0)]], texture2d<float> img3 [[texture(2), raster_order_group(0)]])
+{
+    _9.baz = 0;
+    img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1)));
+    img.write(img3.read(uint2(int2(0))), uint2(int2(0)));
+    _34.foo += 42;
+    uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed);
+}
+
--- a/reference/shaders/frag/pixel-interlock-ordered.frag
+++ b/reference/shaders/frag/pixel-interlock-ordered.frag
@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
--- a/reference/shaders/frag/pixel-interlock-unordered.frag
+++ b/reference/shaders/frag/pixel-interlock-unordered.frag
@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(pixel_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
--- a/reference/shaders/frag/sample-interlock-ordered.frag
+++ b/reference/shaders/frag/sample-interlock-ordered.frag
@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_ordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0]));
+    endInvocationInterlockARB();
+}
+
--- a/reference/shaders/frag/sample-interlock-unordered.frag
+++ b/reference/shaders/frag/sample-interlock-unordered.frag
@ -0,0 +1,23 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+layout(sample_interlock_unordered) in;
+
+layout(binding = 2, std430) coherent buffer Buffer
+{
+    int foo;
+    uint bar;
+} _30;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+
+void main()
+{
+    beginInvocationInterlockARB();
+    imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0));
+    uint _27 = imageAtomicAdd(img2, ivec2(0), 1u);
+    _30.foo += 42;
+    uint _41 = atomicAnd(_30.bar, 255u);
+    endInvocationInterlockARB();
+}
+
--- a/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
+++ b/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag
@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+	uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+	int baz;
+};
+
+void main()
+{
+	// Deliberately outside the critical section to test usage tracking.
+	baz = 0;
+	imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, quux);
+	endInvocationInterlockARB();
+}
--- a/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
+++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag
@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+//layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+	uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+	int baz;
+};
+
+void main()
+{
+	// Deliberately outside the critical section to test usage tracking.
+	baz = 0;
+	imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+	//imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, quux);
+	endInvocationInterlockARB();
+}
--- a/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
+++ b/shaders-msl/frag/pixel-interlock-ordered.msl2.frag
@ -0,0 +1,36 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+//layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2, rgba8) uniform readonly image2D img3;
+layout(binding = 3) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+layout(binding = 4) buffer Buffer2
+{
+	uint quux;
+};
+
+layout(binding = 5, rgba8) uniform writeonly image2D img4;
+layout(binding = 6) buffer Buffer3
+{
+	int baz;
+};
+
+void main()
+{
+	// Deliberately outside the critical section to test usage tracking.
+	baz = 0;
+	imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0));
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0)));
+	//imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, quux);
+	endInvocationInterlockARB();
+}
--- a/shaders/frag/pixel-interlock-ordered.frag
+++ b/shaders/frag/pixel-interlock-ordered.frag
@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, 0xff);
+	endInvocationInterlockARB();
+}
--- a/shaders/frag/pixel-interlock-unordered.frag
+++ b/shaders/frag/pixel-interlock-unordered.frag
@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(pixel_interlock_unordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, 0xff);
+	endInvocationInterlockARB();
+}
--- a/shaders/frag/sample-interlock-ordered.frag
+++ b/shaders/frag/sample-interlock-ordered.frag
@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(sample_interlock_ordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, gl_SampleMaskIn[0]);
+	endInvocationInterlockARB();
+}
--- a/shaders/frag/sample-interlock-unordered.frag
+++ b/shaders/frag/sample-interlock-unordered.frag
@ -0,0 +1,22 @@
+#version 450
+#extension GL_ARB_fragment_shader_interlock : require
+
+layout(sample_interlock_unordered) in;
+
+layout(binding = 0, rgba8) uniform writeonly image2D img;
+layout(binding = 1, r32ui) uniform uimage2D img2;
+layout(binding = 2) coherent buffer Buffer
+{
+	int foo;
+	uint bar;
+};
+
+void main()
+{
+	beginInvocationInterlockARB();
+	imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0));
+	imageAtomicAdd(img2, ivec2(0, 0), 1u);
+	foo += 42;
+	atomicAnd(bar, 0xff);
+	endInvocationInterlockARB();
+}
--- a/spirv_cross.cpp
+++ b/spirv_cross.cpp
@ -4249,6 +4249,221 @@ void Compiler::analyze_non_block_pointer_types()
 	sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types));
 }

+bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length)
+{
+	if (opcode == OpBeginInvocationInterlockEXT)
+	{
+		in_crit_sec = true;
+		return true;
+	}
+
+	if (opcode == OpEndInvocationInterlockEXT)
+	{
+		// End critical section--nothing more to do.
+		return false;
+	}
+
+	// We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need.
+	switch (opcode)
+	{
+	case OpLoad:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+
+		// We're only concerned with buffer and image memory here.
+		if (!var)
+			break;
+
+		switch (var->storage)
+		{
+		default:
+			break;
+
+		case StorageClassUniformConstant:
+		{
+			uint32_t result_type = args[0];
+			uint32_t id = args[1];
+			compiler.set<SPIRExpression>(id, "", result_type, true);
+			compiler.register_read(id, ptr, true);
+			break;
+		}
+
+		case StorageClassUniform:
+			// Must have BufferBlock; we only care about SSBOs.
+			if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+				break;
+			// fallthrough
+		case StorageClassStorageBuffer:
+			if (!in_crit_sec)
+				break;
+
+			compiler.interlocked_resources.insert(var->self);
+			break;
+		}
+		break;
+	}
+
+	case OpInBoundsAccessChain:
+	case OpAccessChain:
+	case OpPtrAccessChain:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t result_type = args[0];
+
+		auto &type = compiler.get<SPIRType>(result_type);
+		if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant ||
+		    type.storage == StorageClassStorageBuffer)
+		{
+			uint32_t id = args[1];
+			uint32_t ptr = args[2];
+			compiler.set<SPIRExpression>(id, "", result_type, true);
+			compiler.register_read(id, ptr, true);
+		}
+		break;
+	}
+
+	case OpImageTexelPointer:
+	{
+		if (length < 3)
+			return false;
+
+		uint32_t result_type = args[0];
+		uint32_t id = args[1];
+		uint32_t ptr = args[2];
+		auto &e = compiler.set<SPIRExpression>(id, "", result_type, true);
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var)
+			e.loaded_from = var->self;
+	}
+
+	case OpStore:
+	case OpImageWrite:
+	case OpAtomicStore:
+	{
+		if (length < 1)
+			return false;
+
+		if (!in_crit_sec)
+			break;
+
+		uint32_t ptr = args[0];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+		            var->storage == StorageClassStorageBuffer))
+			compiler.interlocked_resources.insert(var->self);
+
+		break;
+	}
+
+	case OpCopyMemory:
+	{
+		if (length < 2)
+			return false;
+
+		if (!in_crit_sec)
+			break;
+
+		uint32_t dst = args[0];
+		uint32_t src = args[1];
+		auto *dst_var = compiler.maybe_get_backing_variable(dst);
+		auto *src_var = compiler.maybe_get_backing_variable(src);
+		if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer))
+			compiler.interlocked_resources.insert(dst_var->self);
+		if (src_var)
+		{
+			if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer)
+				break;
+			if (src_var->storage == StorageClassUniform &&
+			    !compiler.has_decoration(compiler.get<SPIRType>(src_var->basetype).self, DecorationBufferBlock))
+				break;
+			compiler.interlocked_resources.insert(src_var->self);
+		}
+
+		break;
+	}
+
+	case OpImageRead:
+	case OpAtomicLoad:
+	{
+		if (length < 3)
+			return false;
+
+		if (!in_crit_sec)
+			break;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+
+		// We're only concerned with buffer and image memory here.
+		if (!var)
+			break;
+
+		switch (var->storage)
+		{
+		default:
+			break;
+
+		case StorageClassUniform:
+			// Must have BufferBlock; we only care about SSBOs.
+			if (!compiler.has_decoration(compiler.get<SPIRType>(var->basetype).self, DecorationBufferBlock))
+				break;
+			// fallthrough
+		case StorageClassUniformConstant:
+		case StorageClassStorageBuffer:
+			compiler.interlocked_resources.insert(var->self);
+			break;
+		}
+		break;
+	}
+
+	case OpAtomicExchange:
+	case OpAtomicCompareExchange:
+	case OpAtomicIIncrement:
+	case OpAtomicIDecrement:
+	case OpAtomicIAdd:
+	case OpAtomicISub:
+	case OpAtomicSMin:
+	case OpAtomicUMin:
+	case OpAtomicSMax:
+	case OpAtomicUMax:
+	case OpAtomicAnd:
+	case OpAtomicOr:
+	case OpAtomicXor:
+	{
+		if (length < 3)
+			return false;
+
+		if (!in_crit_sec)
+			break;
+
+		uint32_t ptr = args[2];
+		auto *var = compiler.maybe_get_backing_variable(ptr);
+		if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant ||
+		            var->storage == StorageClassStorageBuffer))
+			compiler.interlocked_resources.insert(var->self);
+
+		break;
+	}
+
+	default:
+		break;
+	}
+
+	return true;
+}
+
+void Compiler::analyze_interlocked_resource_usage()
+{
+	InterlockedResourceAccessHandler handler(*this);
+	traverse_all_reachable_opcodes(get<SPIRFunction>(ir.default_entry_point), handler);
+}
+
 bool Compiler::type_is_array_of_pointers(const SPIRType &type) const
 {
 	if (!type.pointer)
--- a/spirv_cross.hpp
+++ b/spirv_cross.hpp
@ -945,6 +945,27 @@ protected:
 	                              bool single_function);
 	bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var);

+	// Finds all resources that are written to from inside the critical section, if present.
+	// The critical section is delimited by OpBeginInvocationInterlockEXT and
+	// OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written
+	// while inside the critical section must be placed in a raster order group.
+	struct InterlockedResourceAccessHandler : OpcodeHandler
+	{
+		InterlockedResourceAccessHandler(Compiler &compiler_)
+		    : compiler(compiler_)
+		{
+		}
+
+		bool handle(spv::Op op, const uint32_t *args, uint32_t length) override;
+
+		Compiler &compiler;
+		bool in_crit_sec = false;
+	};
+
+	void analyze_interlocked_resource_usage();
+	// The set of all resources written while inside the critical section, if present.
+	std::unordered_set<uint32_t> interlocked_resources;
+
 	void make_constant_null(uint32_t id, uint32_t type);

 	std::unordered_map<uint32_t, std::string> declared_block_names;
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -605,6 +605,26 @@ void CompilerGLSL::emit_header()
 	if (execution.flags.get(ExecutionModePostDepthCoverage))
 		require_extension_internal("GL_ARB_post_depth_coverage");

+	// Needed for: layout({pixel,sample}_interlock_[un]ordered) in;
+	if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+	    execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+	    execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+	    execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+	{
+		if (options.es)
+		{
+			if (options.version < 310)
+				SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock.");
+			require_extension_internal("GL_NV_fragment_shader_interlock");
+		}
+		else
+		{
+			if (options.version < 420)
+				require_extension_internal("GL_ARB_shader_image_load_store");
+			require_extension_internal("GL_ARB_fragment_shader_interlock");
+		}
+	}
+
 	for (auto &ext : forced_extensions)
 	{
 		if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16")
@ -784,6 +804,15 @@ void CompilerGLSL::emit_header()
 		if (execution.flags.get(ExecutionModePostDepthCoverage))
 			inputs.push_back("post_depth_coverage");

+		if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT))
+			inputs.push_back("pixel_interlock_ordered");
+		else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT))
+			inputs.push_back("pixel_interlock_unordered");
+		else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT))
+			inputs.push_back("sample_interlock_ordered");
+		else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT))
+			inputs.push_back("sample_interlock_unordered");
+
 		if (!options.es && execution.flags.get(ExecutionModeDepthGreater))
 			statement("layout(depth_greater) out float gl_FragDepth;");
 		else if (!options.es && execution.flags.get(ExecutionModeDepthLess))
@ -10109,6 +10138,32 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		emit_op(ops[0], ops[1], "helperInvocationEXT()", false);
 		break;

+	case OpBeginInvocationInterlockEXT:
+		if (options.es)
+		{
+			require_extension_internal("GL_NV_fragment_shader_interlock");
+			statement("beginInvocationInterlockNV();");
+		}
+		else
+		{
+			require_extension_internal("GL_ARB_fragment_shader_interlock");
+			statement("beginInvocationInterlockARB();");
+		}
+		break;
+
+	case OpEndInvocationInterlockEXT:
+		if (options.es)
+		{
+			require_extension_internal("GL_NV_fragment_shader_interlock");
+			statement("endInvocationInterlockNV();");
+		}
+		else
+		{
+			require_extension_internal("GL_ARB_fragment_shader_interlock");
+			statement("endInvocationInterlockARB();");
+		}
+		break;
+
 	default:
 		statement("// unimplemented op ", instruction.op);
 		break;
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@ -203,7 +203,7 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype)
 	}
 }

-string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
+string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id)
 {
 	auto &imagetype = get<SPIRType>(type.image.type);
 	const char *dim = nullptr;
@ -235,7 +235,12 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
 		if (type.image.sampled == 1)
 			return join("Buffer<", type_to_glsl(imagetype), components, ">");
 		else if (type.image.sampled == 2)
+		{
+			if (interlocked_resources.count(id))
+				return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype),
+				            ">");
 			return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">");
+		}
 		else
 			SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
 	case DimSubpassData:
@ -248,6 +253,8 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t)
 	const char *arrayed = type.image.arrayed ? "Array" : "";
 	const char *ms = type.image.ms ? "MS" : "";
 	const char *rw = typed_load ? "RW" : "";
+	if (typed_load && interlocked_resources.count(id))
+		rw = "RasterizerOrdered";
 	return join(rw, "Texture", dim, ms, arrayed, "<",
 	            typed_load ? image_format_to_type(type.image.format, imagetype.basetype) :
 	                         join(type_to_glsl(imagetype), components),
@ -1848,9 +1855,13 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var)
 		Bitset flags = ir.get_buffer_block_flags(var);
 		bool is_readonly = flags.get(DecorationNonWritable);
 		bool is_coherent = flags.get(DecorationCoherent);
+		bool is_interlocked = interlocked_resources.count(var.self) > 0;
+		const char *type_name = "ByteAddressBuffer ";
+		if (!is_readonly)
+			type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer ";
 		add_resource_name(var.self);
-		statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ",
-		          to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";");
+		statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type),
+		          to_resource_binding(var), ";");
 	}
 	else
 	{
@ -4673,6 +4684,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	case OpIsHelperInvocationEXT:
 		SPIRV_CROSS_THROW("helperInvocationEXT() is not supported in HLSL.");

+	case OpBeginInvocationInterlockEXT:
+	case OpEndInvocationInterlockEXT:
+		if (hlsl_options.shader_model < 51)
+			SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1.");
+		break; // Nothing to do in the body
+
 	default:
 		CompilerGLSL::emit_instruction(instruction);
 		break;
@ -4850,6 +4867,12 @@ string CompilerHLSL::compile()
 	validate_shader_model();
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
+	if (get_execution_model() == ExecutionModelFragment &&
+	    (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
+		analyze_interlocked_resource_usage();

 	// Subpass input needs SV_Position.
 	if (need_subpass_input)
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@ -852,6 +852,12 @@ string CompilerMSL::compile()
 	update_active_builtins();
 	analyze_image_and_sampler_usage();
 	analyze_sampled_image_usage();
+	if (get_execution_model() == ExecutionModelFragment &&
+	    (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) ||
+	     get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT)))
+		analyze_interlocked_resource_usage();
 	preprocess_op_codes();
 	build_implicit_builtins();

@ -5541,6 +5547,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
 		emit_op(ops[0], ops[1], "simd_is_helper_thread()", false);
 		break;

+	case OpBeginInvocationInterlockEXT:
+	case OpEndInvocationInterlockEXT:
+		if (!msl_options.supports_msl_version(2, 0))
+			SPIRV_CROSS_THROW("Raster order groups require MSL 2.0.");
+		break; // Nothing to do in the body
+
 	default:
 		CompilerGLSL::emit_instruction(instruction);
 		break;
@ -7436,8 +7448,15 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in
 	bool is_builtin = is_member_builtin(type, index, &builtin);

 	if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary))
-		return join(" [[id(",
-		            get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")]]");
+	{
+		string quals = join(
+		    " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")");
+		if (interlocked_resources.count(
+		        get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID)))
+			quals += ", raster_order_group(0)";
+		quals += "]]";
+		return quals;
+	}

 	// Vertex function inputs
 	if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput)
@ -8239,7 +8258,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 						ep_args += ", ";
 					ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) +
 					           r.name + "_" + convert_to_string(i);
-					ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]";
+					ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")";
+					if (interlocked_resources.count(var_id))
+						ep_args += ", raster_order_group(0)";
+					ep_args += "]]";
 				}
 			}
 			else
@ -8248,7 +8270,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 					ep_args += ", ";
 				ep_args +=
 				    get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name;
-				ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
+				ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+				if (interlocked_resources.count(var_id))
+					ep_args += ", raster_order_group(0)";
+				ep_args += "]]";
 			}
 			break;
 		}
@ -8264,7 +8289,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 			ep_args += image_type_glsl(type, var_id) + " " + r.name;
 			if (r.plane > 0)
 				ep_args += join(plane_name_suffix, r.plane);
-			ep_args += " [[texture(" + convert_to_string(r.index) + ")]]";
+			ep_args += " [[texture(" + convert_to_string(r.index) + ")";
+			if (interlocked_resources.count(var_id))
+				ep_args += ", raster_order_group(0)";
+			ep_args += "]]";
 			break;
 		default:
 			if (!ep_args.empty())
@ -8274,7 +8302,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 				           type_to_glsl(type, var_id) + "& " + r.name;
 			else
 				ep_args += type_to_glsl(type, var_id) + " " + r.name;
-			ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
+			ep_args += " [[buffer(" + convert_to_string(r.index) + ")";
+			if (interlocked_resources.count(var_id))
+				ep_args += ", raster_order_group(0)";
+			ep_args += "]]";
 			break;
 		}
 	}