Add some HLSL tests for atomics and image load store.

2017-10-20 15:43:45 +02:00 · 2017-10-20 15:43:45 +02:00 · 85eb972259
--- a/reference/shaders-hlsl/comp/atomic.comp
+++ b/reference/shaders-hlsl/comp/atomic.comp
@ -0,0 +1,90 @@
+RWByteAddressBuffer ssbo : register(u2);
+RWTexture2D<uint> uImage : register(u0);
+RWTexture2D<int> iImage : register(u1);
+
+groupshared int int_atomic;
+groupshared uint uint_atomic;
+groupshared int int_atomic_array[1];
+groupshared uint uint_atomic_array[1];
+
+void comp_main()
+{
+    uint _19;
+    InterlockedAdd(uImage[int2(1, 5)], 1u, _19);
+    uint _27;
+    InterlockedAdd(uImage[int2(1, 5)], 1u, _27);
+    int _28 = int(_27);
+    iImage[int2(1, 6)] = int4(_28, _28, _28, _28);
+    uint _32;
+    InterlockedOr(uImage[int2(1, 5)], 1u, _32);
+    uint _34;
+    InterlockedXor(uImage[int2(1, 5)], 1u, _34);
+    uint _36;
+    InterlockedAnd(uImage[int2(1, 5)], 1u, _36);
+    uint _38;
+    InterlockedMin(uImage[int2(1, 5)], 1u, _38);
+    uint _40;
+    InterlockedMax(uImage[int2(1, 5)], 1u, _40);
+    uint _44;
+    InterlockedCompareExchange(uImage[int2(1, 5)], 10u, 2u, _44);
+    int _47;
+    InterlockedAdd(iImage[int2(1, 6)], 1, _47);
+    int _49;
+    InterlockedOr(iImage[int2(1, 6)], 1, _49);
+    int _51;
+    InterlockedXor(iImage[int2(1, 6)], 1, _51);
+    int _53;
+    InterlockedAnd(iImage[int2(1, 6)], 1, _53);
+    int _55;
+    InterlockedMin(iImage[int2(1, 6)], 1, _55);
+    int _57;
+    InterlockedMax(iImage[int2(1, 6)], 1, _57);
+    int _61;
+    InterlockedCompareExchange(iImage[int2(1, 5)], 10, 2, _61);
+    uint _68;
+    ssbo.InterlockedAdd(0, 1u, _68);
+    uint _70;
+    ssbo.InterlockedOr(0, 1u, _70);
+    uint _72;
+    ssbo.InterlockedXor(0, 1u, _72);
+    uint _74;
+    ssbo.InterlockedAnd(0, 1u, _74);
+    uint _76;
+    ssbo.InterlockedMin(0, 1u, _76);
+    uint _78;
+    ssbo.InterlockedMax(0, 1u, _78);
+    uint _80;
+    ssbo.InterlockedExchange(0, 1u, _80);
+    uint _82;
+    ssbo.InterlockedCompareExchange(0, 10u, 2u, _82);
+    int _85;
+    ssbo.InterlockedAdd(4, 1, _85);
+    int _87;
+    ssbo.InterlockedOr(4, 1, _87);
+    int _89;
+    ssbo.InterlockedXor(4, 1, _89);
+    int _91;
+    ssbo.InterlockedAnd(4, 1, _91);
+    int _93;
+    ssbo.InterlockedMin(4, 1, _93);
+    int _95;
+    ssbo.InterlockedMax(4, 1, _95);
+    int _97;
+    ssbo.InterlockedExchange(4, 1, _97);
+    int _99;
+    ssbo.InterlockedCompareExchange(4, 10, 2, _99);
+    int _102;
+    InterlockedAdd(int_atomic, 10, _102);
+    uint _105;
+    InterlockedAdd(uint_atomic, 10u, _105);
+    int _110;
+    InterlockedAdd(int_atomic_array[0], 10, _110);
+    uint _115;
+    InterlockedAdd(uint_atomic_array[0], 10u, _115);
+}
+
+[numthreads(1, 1, 1)]
+void main()
+{
+    comp_main();
+}
--- a/reference/shaders-hlsl/comp/image.comp
+++ b/reference/shaders-hlsl/comp/image.comp
@ -0,0 +1,21 @@
+RWTexture2D<float> uImageIn : register(u0);
+RWTexture2D<float> uImageOut : register(u1);
+
+static uint3 gl_GlobalInvocationID;
+struct SPIRV_Cross_Input
+{
+    uint3 gl_GlobalInvocationID : SV_DispatchThreadID;
+};
+
+void comp_main()
+{
+    float4 v = uImageIn[int2(gl_GlobalInvocationID.xy)];
+    uImageOut[int2(gl_GlobalInvocationID.xy)] = v;
+}
+
+[numthreads(1, 1, 1)]
+void main(SPIRV_Cross_Input stage_input)
+{
+    gl_GlobalInvocationID = stage_input.gl_GlobalInvocationID;
+    comp_main();
+}
--- a/shaders-hlsl/comp/atomic.comp
+++ b/shaders-hlsl/comp/atomic.comp
@ -0,0 +1,66 @@
+#version 310 es
+#extension GL_OES_shader_image_atomic : require
+layout(local_size_x = 1) in;
+
+layout(r32ui, binding = 0) uniform highp uimage2D uImage;
+layout(r32i, binding = 1) uniform highp iimage2D iImage;
+layout(binding = 2, std430) buffer SSBO
+{
+    uint u32;
+    int i32;
+} ssbo;
+
+shared int int_atomic;
+shared uint uint_atomic;
+shared int int_atomic_array[1];
+shared uint uint_atomic_array[1];
+
+void main()
+{
+    imageAtomicAdd(uImage, ivec2(1, 5), 1u);
+
+    // Test that we do not invalidate OpImage variables which are loaded from UniformConstant
+    // address space.
+    imageStore(iImage, ivec2(1, 6), ivec4(imageAtomicAdd(uImage, ivec2(1, 5), 1u)));
+
+    imageAtomicOr(uImage, ivec2(1, 5), 1u);
+    imageAtomicXor(uImage, ivec2(1, 5), 1u);
+    imageAtomicAnd(uImage, ivec2(1, 5), 1u);
+    imageAtomicMin(uImage, ivec2(1, 5), 1u);
+    imageAtomicMax(uImage, ivec2(1, 5), 1u);
+    //imageAtomicExchange(uImage, ivec2(1, 5), 1u);
+    imageAtomicCompSwap(uImage, ivec2(1, 5), 10u, 2u);
+
+    imageAtomicAdd(iImage, ivec2(1, 6), 1);
+    imageAtomicOr(iImage, ivec2(1, 6), 1);
+    imageAtomicXor(iImage, ivec2(1, 6), 1);
+    imageAtomicAnd(iImage, ivec2(1, 6), 1);
+    imageAtomicMin(iImage, ivec2(1, 6), 1);
+    imageAtomicMax(iImage, ivec2(1, 6), 1);
+    //imageAtomicExchange(iImage, ivec2(1, 5), 1u);
+    imageAtomicCompSwap(iImage, ivec2(1, 5), 10, 2);
+
+    atomicAdd(ssbo.u32, 1u);
+    atomicOr(ssbo.u32, 1u);
+    atomicXor(ssbo.u32, 1u);
+    atomicAnd(ssbo.u32, 1u);
+    atomicMin(ssbo.u32, 1u);
+    atomicMax(ssbo.u32, 1u);
+    atomicExchange(ssbo.u32, 1u);
+    atomicCompSwap(ssbo.u32, 10u, 2u);
+
+    atomicAdd(ssbo.i32, 1);
+    atomicOr(ssbo.i32, 1);
+    atomicXor(ssbo.i32, 1);
+    atomicAnd(ssbo.i32, 1);
+    atomicMin(ssbo.i32, 1);
+    atomicMax(ssbo.i32, 1);
+    atomicExchange(ssbo.i32, 1);
+    atomicCompSwap(ssbo.i32, 10, 2);
+
+	atomicAdd(int_atomic, 10);
+	atomicAdd(uint_atomic, 10u);
+	atomicAdd(int_atomic_array[0], 10);
+	atomicAdd(uint_atomic_array[0], 10u);
+}
+
--- a/shaders-hlsl/comp/image.comp
+++ b/shaders-hlsl/comp/image.comp
@ -0,0 +1,12 @@
+#version 450
+layout(local_size_x = 1) in;
+
+layout(r32f, binding = 0) uniform readonly mediump image2D uImageIn;
+layout(r32f, binding = 1) uniform writeonly mediump image2D uImageOut;
+
+void main()
+{
+    vec4 v = imageLoad(uImageIn, ivec2(gl_GlobalInvocationID.xy));
+    imageStore(uImageOut, ivec2(gl_GlobalInvocationID.xy), v);
+}
+
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@ -23,6 +23,78 @@ using namespace spv;
 using namespace spirv_cross;
 using namespace std;

+static string image_format_to_type(ImageFormat fmt)
+{
+	switch (fmt)
+	{
+	case ImageFormatR8:
+	case ImageFormatR16:
+		return "unorm float";
+	case ImageFormatRg8:
+	case ImageFormatRg16:
+		return "unorm float2";
+	case ImageFormatRgba8:
+	case ImageFormatRgba16:
+		return "unorm float4";
+	case ImageFormatRgb10A2:
+		return "unorm float4";
+
+	case ImageFormatR8Snorm:
+	case ImageFormatR16Snorm:
+		return "snorm float";
+	case ImageFormatRg8Snorm:
+	case ImageFormatRg16Snorm:
+		return "snorm float2";
+	case ImageFormatRgba8Snorm:
+	case ImageFormatRgba16Snorm:
+		return "snorm float4";
+
+	case ImageFormatR16f:
+	case ImageFormatR32f:
+		return "float";
+	case ImageFormatRg16f:
+	case ImageFormatRg32f:
+		return "float2";
+	case ImageFormatRgba16f:
+	case ImageFormatRgba32f:
+		return "float4";
+
+	case ImageFormatR11fG11fB10f:
+		return "float3";
+
+	case ImageFormatR8i:
+	case ImageFormatR16i:
+	case ImageFormatR32i:
+		return "int";
+	case ImageFormatRg8i:
+	case ImageFormatRg16i:
+	case ImageFormatRg32i:
+		return "int2";
+	case ImageFormatRgba8i:
+	case ImageFormatRgba16i:
+	case ImageFormatRgba32i:
+		return "int4";
+
+	case ImageFormatR8ui:
+	case ImageFormatR16ui:
+	case ImageFormatR32ui:
+		return "uint";
+	case ImageFormatRg8ui:
+	case ImageFormatRg16ui:
+	case ImageFormatRg32ui:
+		return "uint2";
+	case ImageFormatRgba8ui:
+	case ImageFormatRgba16ui:
+	case ImageFormatRgba32ui:
+		return "uint4";
+	case ImageFormatRgb10a2ui:
+		return "int4";
+
+	default:
+		SPIRV_CROSS_THROW("Unrecognized typed image format.");
+	}
+}
+
 // Returns true if an arithmetic operation does not change behavior depending on signedness.
 static bool opcode_is_sign_invariant(Op opcode)
 {
@ -48,21 +120,21 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type)
 {
 	auto &imagetype = get<SPIRType>(type.image.type);
 	const char *dim = nullptr;
-	const char *rw = "";
+	bool typed_load = false;
 	uint32_t components = 4;

 	switch (type.image.dim)
 	{
 	case Dim1D:
-		rw = type.image.sampled == 2 ? "RW" : "";
+		typed_load = type.image.sampled == 2;
 		dim = "1D";
 		break;
 	case Dim2D:
-		rw = type.image.sampled == 2 ? "RW" : "";
+		typed_load = type.image.sampled == 2;
 		dim = "2D";
 		break;
 	case Dim3D:
-		rw = type.image.sampled == 2 ? "RW" : "";
+		typed_load = type.image.sampled == 2;
 		dim = "3D";
 		break;
 	case DimCube:
@ -76,10 +148,7 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type)
 		if (type.image.sampled == 1)
 			return join("Buffer<", type_to_glsl(imagetype), components, ">");
 		else if (type.image.sampled == 2)
-		{
-			SPIRV_CROSS_THROW("RWBuffer is not implemented yet for HLSL.");
-			//return join("RWBuffer<", type_to_glsl(imagetype), components, ">");
-		}
+			return join("RWBuffer<", image_format_to_type(imagetype.image.format), components, ">");
 		else
 			SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime.");
 	case DimSubpassData:
@ -90,7 +159,9 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type)
 	}
 	const char *arrayed = type.image.arrayed ? "Array" : "";
 	const char *ms = type.image.ms ? "MS" : "";
-	return join(rw, "Texture", dim, ms, arrayed, "<", type_to_glsl(imagetype), components, ">");
+	const char *rw = typed_load ? "RW" : "";
+	return join(rw, "Texture", dim, ms, arrayed, "<",
+	            typed_load ? image_format_to_type(type.image.format) : join(type_to_glsl(imagetype), components), ">");
 }

 string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type)
@ -2251,7 +2322,8 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction)
 void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
 {
 	const char *atomic_op = nullptr;
-	auto value_expr = to_expression(ops[5]);
+	auto value_expr = to_expression(ops[op == OpAtomicCompareExchange ? 6 : 5]);
+
 	switch (op)
 	{
 	case OpAtomicISub:
@ -2289,6 +2361,13 @@ void CompilerHLSL::emit_atomic(const uint32_t *ops, uint32_t length, spv::Op op)
 		atomic_op = "InterlockedExchange";
 		break;

+	case OpAtomicCompareExchange:
+		if (length < 8)
+			SPIRV_CROSS_THROW("Not enough data for opcode.");
+		atomic_op = "InterlockedCompareExchange";
+		value_expr = join(to_expression(ops[7]), ", ", value_expr);
+		break;
+
 	default:
 		SPIRV_CROSS_THROW("Unknown atomic opcode.");
 	}
@ -2689,8 +2768,6 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 	}

 	case OpAtomicCompareExchange:
-		break;
-
 	case OpAtomicExchange:
 	case OpAtomicISub:
 	case OpAtomicSMin: