From 48f3fa4adb1703799c8d1c7df7422946d05e9347 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 29 Nov 2017 11:33:44 +0100 Subject: [PATCH 1/5] Implement bitfield ops in HLSL. --- .../opt/shaders-hlsl/comp/bitfield.noopt.comp | 113 ++++++++++++++++++ .../shaders-hlsl/comp/bitfield.noopt.comp | 113 ++++++++++++++++++ shaders-hlsl/comp/bitfield.noopt.comp | 44 +++++++ spirv_hlsl.cpp | 97 +++++++++++++++ spirv_hlsl.hpp | 2 + 5 files changed, 369 insertions(+) create mode 100644 reference/opt/shaders-hlsl/comp/bitfield.noopt.comp create mode 100644 reference/shaders-hlsl/comp/bitfield.noopt.comp create mode 100644 shaders-hlsl/comp/bitfield.noopt.comp diff --git a/reference/opt/shaders-hlsl/comp/bitfield.noopt.comp b/reference/opt/shaders-hlsl/comp/bitfield.noopt.comp new file mode 100644 index 00000000..6839d956 --- /dev/null +++ b/reference/opt/shaders-hlsl/comp/bitfield.noopt.comp @@ -0,0 +1,113 @@ +uint SPIRV_Cross_bitfieldInsert(uint Base, uint Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint2 SPIRV_Cross_bitfieldInsert(uint2 Base, uint2 Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint3 SPIRV_Cross_bitfieldInsert(uint3 Base, uint3 Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint4 SPIRV_Cross_bitfieldInsert(uint4 Base, uint4 Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint SPIRV_Cross_bitfieldUExtract(uint Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +uint2 SPIRV_Cross_bitfieldUExtract(uint2 Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +uint3 SPIRV_Cross_bitfieldUExtract(uint3 Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +uint4 SPIRV_Cross_bitfieldUExtract(uint4 Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +int SPIRV_Cross_bitfieldSExtract(int Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +int2 SPIRV_Cross_bitfieldSExtract(int2 Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int2 Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +int3 SPIRV_Cross_bitfieldSExtract(int3 Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int3 Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +int4 SPIRV_Cross_bitfieldSExtract(int4 Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int4 Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +void comp_main() +{ + int signed_value = 0; + uint unsigned_value = 0u; + int3 signed_values = int3(0, 0, 0); + uint3 unsigned_values = uint3(0u, 0u, 0u); + int s = SPIRV_Cross_bitfieldSExtract(signed_value, 5, 20); + uint u = SPIRV_Cross_bitfieldUExtract(unsigned_value, 6, 21); + s = int(SPIRV_Cross_bitfieldInsert(s, 40, 5, 4)); + u = SPIRV_Cross_bitfieldInsert(u, 60u, 5, 4); + u = reversebits(u); + s = reversebits(s); + int v0 = countbits(u); + int v1 = countbits(s); + int v2 = firstbithigh(u); + int v3 = firstbitlow(s); + int3 s_1 = SPIRV_Cross_bitfieldSExtract(signed_values, 5, 20); + uint3 u_1 = SPIRV_Cross_bitfieldUExtract(unsigned_values, 6, 21); + s_1 = int3(SPIRV_Cross_bitfieldInsert(s_1, int3(40, 40, 40), 5, 4)); + u_1 = SPIRV_Cross_bitfieldInsert(u_1, uint3(60u, 60u, 60u), 5, 4); + u_1 = reversebits(u_1); + s_1 = reversebits(s_1); + int3 v0_1 = countbits(u_1); + int3 v1_1 = countbits(s_1); + int3 v2_1 = firstbithigh(u_1); + int3 v3_1 = firstbitlow(s_1); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/reference/shaders-hlsl/comp/bitfield.noopt.comp b/reference/shaders-hlsl/comp/bitfield.noopt.comp new file mode 100644 index 00000000..6839d956 --- /dev/null +++ b/reference/shaders-hlsl/comp/bitfield.noopt.comp @@ -0,0 +1,113 @@ +uint SPIRV_Cross_bitfieldInsert(uint Base, uint Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint2 SPIRV_Cross_bitfieldInsert(uint2 Base, uint2 Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint3 SPIRV_Cross_bitfieldInsert(uint3 Base, uint3 Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint4 SPIRV_Cross_bitfieldInsert(uint4 Base, uint4 Insert, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31)); + return (Base & ~Mask) | ((Insert << Offset) & Mask); +} + +uint SPIRV_Cross_bitfieldUExtract(uint Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +uint2 SPIRV_Cross_bitfieldUExtract(uint2 Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +uint3 SPIRV_Cross_bitfieldUExtract(uint3 Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +uint4 SPIRV_Cross_bitfieldUExtract(uint4 Base, uint Offset, uint Count) +{ + uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1); + return (Base >> Offset) & Mask; +} + +int SPIRV_Cross_bitfieldSExtract(int Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +int2 SPIRV_Cross_bitfieldSExtract(int2 Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int2 Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +int3 SPIRV_Cross_bitfieldSExtract(int3 Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int3 Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +int4 SPIRV_Cross_bitfieldSExtract(int4 Base, int Offset, int Count) +{ + int Mask = Count == 32 ? -1 : ((1 << Count) - 1); + int4 Masked = (Base >> Offset) & Mask; + int ExtendShift = (32 - Count) & 31; + return (Masked << ExtendShift) >> ExtendShift; +} + +void comp_main() +{ + int signed_value = 0; + uint unsigned_value = 0u; + int3 signed_values = int3(0, 0, 0); + uint3 unsigned_values = uint3(0u, 0u, 0u); + int s = SPIRV_Cross_bitfieldSExtract(signed_value, 5, 20); + uint u = SPIRV_Cross_bitfieldUExtract(unsigned_value, 6, 21); + s = int(SPIRV_Cross_bitfieldInsert(s, 40, 5, 4)); + u = SPIRV_Cross_bitfieldInsert(u, 60u, 5, 4); + u = reversebits(u); + s = reversebits(s); + int v0 = countbits(u); + int v1 = countbits(s); + int v2 = firstbithigh(u); + int v3 = firstbitlow(s); + int3 s_1 = SPIRV_Cross_bitfieldSExtract(signed_values, 5, 20); + uint3 u_1 = SPIRV_Cross_bitfieldUExtract(unsigned_values, 6, 21); + s_1 = int3(SPIRV_Cross_bitfieldInsert(s_1, int3(40, 40, 40), 5, 4)); + u_1 = SPIRV_Cross_bitfieldInsert(u_1, uint3(60u, 60u, 60u), 5, 4); + u_1 = reversebits(u_1); + s_1 = reversebits(s_1); + int3 v0_1 = countbits(u_1); + int3 v1_1 = countbits(s_1); + int3 v2_1 = firstbithigh(u_1); + int3 v3_1 = firstbitlow(s_1); +} + +[numthreads(1, 1, 1)] +void main() +{ + comp_main(); +} diff --git a/shaders-hlsl/comp/bitfield.noopt.comp b/shaders-hlsl/comp/bitfield.noopt.comp new file mode 100644 index 00000000..a2ef9aa0 --- /dev/null +++ b/shaders-hlsl/comp/bitfield.noopt.comp @@ -0,0 +1,44 @@ +#version 310 es + +void main() +{ + int signed_value = 0; + uint unsigned_value = 0u; + + ivec3 signed_values = ivec3(0); + uvec3 unsigned_values = uvec3(0u); + + { + int s = bitfieldExtract(signed_value, 5, 20); + uint u = bitfieldExtract(unsigned_value, 6, 21); + + s = bitfieldInsert(s, 40, 5, 4); + u = bitfieldInsert(u, 60u, 5, 4); + + u = bitfieldReverse(u); + s = bitfieldReverse(s); + + int v0 = bitCount(u); + int v1 = bitCount(s); + + int v2 = findMSB(u); + int v3 = findLSB(s); + } + + { + ivec3 s = bitfieldExtract(signed_values, 5, 20); + uvec3 u = bitfieldExtract(unsigned_values, 6, 21); + + s = bitfieldInsert(s, ivec3(40), 5, 4); + u = bitfieldInsert(u, uvec3(60u), 5, 4); + + u = bitfieldReverse(u); + s = bitfieldReverse(s); + + ivec3 v0 = bitCount(u); + ivec3 v1 = bitCount(s); + + ivec3 v2 = findMSB(u); + ivec3 v3 = findLSB(s); + } +} diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 6777663d..3a296774 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -1271,6 +1271,48 @@ void CompilerHLSL::emit_resources() end_scope(); statement(""); } + + if (requires_bitfield_insert) + { + static const char *types[] = { "uint", "uint2", "uint3", "uint4" }; + for (auto &type : types) + { + statement(type, " SPIRV_Cross_bitfieldInsert(", type, " Base, ", type, " Insert, uint Offset, uint Count)"); + begin_scope(); + statement("uint Mask = Count == 32 ? 0xffffffff : (((1u << Count) - 1) << (Offset & 31));"); + statement("return (Base & ~Mask) | ((Insert << Offset) & Mask);"); + end_scope(); + statement(""); + } + } + + if (requires_bitfield_extract) + { + static const char *unsigned_types[] = { "uint", "uint2", "uint3", "uint4" }; + for (auto &type : unsigned_types) + { + statement(type, " SPIRV_Cross_bitfieldUExtract(", type, " Base, uint Offset, uint Count)"); + begin_scope(); + statement("uint Mask = Count == 32 ? 0xffffffff : ((1 << Count) - 1);"); + statement("return (Base >> Offset) & Mask;"); + end_scope(); + statement(""); + } + + // In this overload, we will have to do sign-extension, which we will emulate by shifting up and down. + static const char *signed_types[] = { "int", "int2", "int3", "int4" }; + for (auto &type : signed_types) + { + statement(type, " SPIRV_Cross_bitfieldSExtract(", type, " Base, int Offset, int Count)"); + begin_scope(); + statement("int Mask = Count == 32 ? -1 : ((1 << Count) - 1);"); + statement(type, " Masked = (Base >> Offset) & Mask;"); + statement("int ExtendShift = (32 - Count) & 31;"); + statement("return (Masked << ExtendShift) >> ExtendShift;"); + end_scope(); + statement(""); + } + } } string CompilerHLSL::layout_for_member(const SPIRType &type, uint32_t index) @@ -2391,6 +2433,14 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, case GLSLstd450UnpackDouble2x32: SPIRV_CROSS_THROW("packDouble2x32/unpackDouble2x32 not supported in HLSL."); + case GLSLstd450FindILsb: + emit_unary_func_op(result_type, id, args[0], "firstbitlow"); + break; + case GLSLstd450FindSMsb: + case GLSLstd450FindUMsb: + emit_unary_func_op(result_type, id, args[0], "firstbithigh"); + break; + default: CompilerGLSL::emit_glsl_op(result_type, id, eop, args, count); break; @@ -3297,6 +3347,53 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) break; } + case OpBitFieldInsert: + { + if (!requires_bitfield_insert) + { + requires_bitfield_insert = true; + force_recompile = true; + } + + auto expr = join("SPIRV_Cross_bitfieldInsert(", + to_expression(ops[2]), ", ", + to_expression(ops[3]), ", ", + to_expression(ops[4]), ", ", + to_expression(ops[5]), ")"); + + bool forward = should_forward(ops[2]) && should_forward(ops[3]) && + should_forward(ops[4]) && should_forward(ops[5]); + + auto &restype = get(ops[0]); + expr = bitcast_expression(restype, SPIRType::UInt, expr); + emit_op(ops[0], ops[1], expr, forward); + break; + } + + case OpBitFieldSExtract: + case OpBitFieldUExtract: + { + if (!requires_bitfield_extract) + { + requires_bitfield_extract = true; + force_recompile = true; + } + + if (opcode == OpBitFieldSExtract) + TFOP(SPIRV_Cross_bitfieldSExtract); + else + TFOP(SPIRV_Cross_bitfieldUExtract); + break; + } + + case OpBitCount: + UFOP(countbits); + break; + + case OpBitReverse: + UFOP(reversebits); + break; + default: CompilerGLSL::emit_instruction(instruction); break; diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp index 90df68c8..bad8eb92 100644 --- a/spirv_hlsl.hpp +++ b/spirv_hlsl.hpp @@ -121,6 +121,8 @@ private: bool requires_snorm8_packing = false; bool requires_unorm16_packing = false; bool requires_snorm16_packing = false; + bool requires_bitfield_insert = false; + bool requires_bitfield_extract = false; uint64_t required_textureSizeVariants = 0; void require_texture_query_variant(const SPIRType &type); From c405274e38b7290ccfbaa1a94ee26c8c10ac9d06 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 29 Nov 2017 12:00:48 +0100 Subject: [PATCH 2/5] Run format_all.sh. --- spirv_hlsl.cpp | 11 ++++------- 1 file changed, 4 insertions(+), 7 deletions(-) diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 3a296774..45520b8c 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -3355,14 +3355,11 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) force_recompile = true; } - auto expr = join("SPIRV_Cross_bitfieldInsert(", - to_expression(ops[2]), ", ", - to_expression(ops[3]), ", ", - to_expression(ops[4]), ", ", - to_expression(ops[5]), ")"); + auto expr = join("SPIRV_Cross_bitfieldInsert(", to_expression(ops[2]), ", ", to_expression(ops[3]), ", ", + to_expression(ops[4]), ", ", to_expression(ops[5]), ")"); - bool forward = should_forward(ops[2]) && should_forward(ops[3]) && - should_forward(ops[4]) && should_forward(ops[5]); + bool forward = + should_forward(ops[2]) && should_forward(ops[3]) && should_forward(ops[4]) && should_forward(ops[5]); auto &restype = get(ops[0]); expr = bitcast_expression(restype, SPIRType::UInt, expr); From c65248fdce5ac501e082ebf2f74ae32e819989df Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 29 Nov 2017 12:13:48 +0100 Subject: [PATCH 3/5] Support image load-store without format in HLSL. --- reference/opt/shaders-hlsl/comp/image.comp | 15 ++++- reference/shaders-hlsl/comp/image.comp | 6 ++ shaders-hlsl/comp/image.comp | 8 +++ spirv_hlsl.cpp | 68 ++++++++++++++++++++-- 4 files changed, 88 insertions(+), 9 deletions(-) diff --git a/reference/opt/shaders-hlsl/comp/image.comp b/reference/opt/shaders-hlsl/comp/image.comp index 31cf60f2..0401d8ee 100644 --- a/reference/opt/shaders-hlsl/comp/image.comp +++ b/reference/opt/shaders-hlsl/comp/image.comp @@ -22,6 +22,9 @@ RWTexture2D uImageInU4 : register(u4); RWTexture2D uImageOutU4 : register(u5); RWBuffer uImageInBuffer4 : register(u6); RWBuffer uImageOutBuffer4 : register(u7); +RWTexture2D uImageNoFmtF : register(u8); +RWTexture2D uImageNoFmtU : register(u9); +RWTexture2D uImageNoFmtI : register(u10); static uint3 gl_GlobalInvocationID; struct SPIRV_Cross_Input @@ -38,11 +41,17 @@ void comp_main() uImageOutF2[int2(gl_GlobalInvocationID.xy)] = uImageInF2[int2(gl_GlobalInvocationID.xy)].xyyy.xy; uImageOutI2[int2(gl_GlobalInvocationID.xy)] = uImageInI2[int2(gl_GlobalInvocationID.xy)].xyyy.xy; uImageOutU2[int2(gl_GlobalInvocationID.xy)] = uImageInU2[int2(gl_GlobalInvocationID.xy)].xyyy.xy; - uImageOutBuffer2[int(gl_GlobalInvocationID.x)] = uImageInBuffer2[int(gl_GlobalInvocationID.x)].xyyy.xy; + float4 _135 = uImageInBuffer2[int(gl_GlobalInvocationID.x)].xyyy; + uImageOutBuffer2[int(gl_GlobalInvocationID.x)] = _135.xy; uImageOutF4[int2(gl_GlobalInvocationID.xy)] = uImageInF4[int2(gl_GlobalInvocationID.xy)]; - uImageOutI4[int2(gl_GlobalInvocationID.xy)] = uImageInI4[int2(gl_GlobalInvocationID.xy)]; - uImageOutU4[int2(gl_GlobalInvocationID.xy)] = uImageInU4[int2(gl_GlobalInvocationID.xy)]; + int4 _165 = uImageInI4[int2(gl_GlobalInvocationID.xy)]; + uImageOutI4[int2(gl_GlobalInvocationID.xy)] = _165; + uint4 _180 = uImageInU4[int2(gl_GlobalInvocationID.xy)]; + uImageOutU4[int2(gl_GlobalInvocationID.xy)] = _180; uImageOutBuffer4[int(gl_GlobalInvocationID.x)] = uImageInBuffer4[int(gl_GlobalInvocationID.x)]; + uImageNoFmtF[int2(gl_GlobalInvocationID.xy)] = _135; + uImageNoFmtU[int2(gl_GlobalInvocationID.xy)] = _180; + uImageNoFmtI[int2(gl_GlobalInvocationID.xy)] = _165; } [numthreads(1, 1, 1)] diff --git a/reference/shaders-hlsl/comp/image.comp b/reference/shaders-hlsl/comp/image.comp index cb084e22..43b8df03 100644 --- a/reference/shaders-hlsl/comp/image.comp +++ b/reference/shaders-hlsl/comp/image.comp @@ -22,6 +22,9 @@ RWTexture2D uImageInU4 : register(u4); RWTexture2D uImageOutU4 : register(u5); RWBuffer uImageInBuffer4 : register(u6); RWBuffer uImageOutBuffer4 : register(u7); +RWTexture2D uImageNoFmtF : register(u8); +RWTexture2D uImageNoFmtU : register(u9); +RWTexture2D uImageNoFmtI : register(u10); static uint3 gl_GlobalInvocationID; struct SPIRV_Cross_Input @@ -55,6 +58,9 @@ void comp_main() uImageOutU4[int2(gl_GlobalInvocationID.xy)] = u4; float4 b4 = uImageInBuffer4[int(gl_GlobalInvocationID.x)]; uImageOutBuffer4[int(gl_GlobalInvocationID.x)] = b4; + uImageNoFmtF[int2(gl_GlobalInvocationID.xy)] = b2; + uImageNoFmtU[int2(gl_GlobalInvocationID.xy)] = u4; + uImageNoFmtI[int2(gl_GlobalInvocationID.xy)] = i4; } [numthreads(1, 1, 1)] diff --git a/shaders-hlsl/comp/image.comp b/shaders-hlsl/comp/image.comp index 218af74d..082b2979 100644 --- a/shaders-hlsl/comp/image.comp +++ b/shaders-hlsl/comp/image.comp @@ -28,6 +28,10 @@ layout(rgba32ui, binding = 5) uniform writeonly uimage2D uImageOutU4; layout(rgba32f, binding = 6) uniform readonly imageBuffer uImageInBuffer4; layout(rgba32f, binding = 7) uniform writeonly imageBuffer uImageOutBuffer4; +layout(binding = 8) uniform writeonly image2D uImageNoFmtF; +layout(binding = 9) uniform writeonly uimage2D uImageNoFmtU; +layout(binding = 10) uniform writeonly iimage2D uImageNoFmtI; + void main() { vec4 f = imageLoad(uImageInF, ivec2(gl_GlobalInvocationID.xy)); @@ -65,5 +69,9 @@ void main() vec4 b4 = imageLoad(uImageInBuffer4, int(gl_GlobalInvocationID.x)); imageStore(uImageOutBuffer4, int(gl_GlobalInvocationID.x), b4); + + imageStore(uImageNoFmtF, ivec2(gl_GlobalInvocationID.xy), b2); + imageStore(uImageNoFmtU, ivec2(gl_GlobalInvocationID.xy), u4); + imageStore(uImageNoFmtI, ivec2(gl_GlobalInvocationID.xy), i4); } diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 45520b8c..99604620 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -74,77 +74,129 @@ static unsigned image_format_to_components(ImageFormat fmt) case ImageFormatRgb10a2ui: return 4; + case ImageFormatUnknown: + return 4; // Assume 4. + default: SPIRV_CROSS_THROW("Unrecognized typed image format."); } } -static string image_format_to_type(ImageFormat fmt) +static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype) { switch (fmt) { case ImageFormatR8: case ImageFormatR16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "unorm float"; case ImageFormatRg8: case ImageFormatRg16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "unorm float2"; case ImageFormatRgba8: case ImageFormatRgba16: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "unorm float4"; case ImageFormatRgb10A2: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "unorm float4"; case ImageFormatR8Snorm: case ImageFormatR16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "snorm float"; case ImageFormatRg8Snorm: case ImageFormatRg16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "snorm float2"; case ImageFormatRgba8Snorm: case ImageFormatRgba16Snorm: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "snorm float4"; case ImageFormatR16f: case ImageFormatR32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "float"; case ImageFormatRg16f: case ImageFormatRg32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "float2"; case ImageFormatRgba16f: case ImageFormatRgba32f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "float4"; case ImageFormatR11fG11fB10f: + if (basetype != SPIRType::Float) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "float3"; case ImageFormatR8i: case ImageFormatR16i: case ImageFormatR32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "int"; case ImageFormatRg8i: case ImageFormatRg16i: case ImageFormatRg32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "int2"; case ImageFormatRgba8i: case ImageFormatRgba16i: case ImageFormatRgba32i: + if (basetype != SPIRType::Int) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "int4"; case ImageFormatR8ui: case ImageFormatR16ui: case ImageFormatR32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "uint"; case ImageFormatRg8ui: case ImageFormatRg16ui: case ImageFormatRg32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "uint2"; case ImageFormatRgba8ui: case ImageFormatRgba16ui: case ImageFormatRgba32ui: + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); return "uint4"; case ImageFormatRgb10a2ui: - return "int4"; + if (basetype != SPIRType::UInt) + SPIRV_CROSS_THROW("Mismatch in image type and base type of image."); + return "uint4"; + + case ImageFormatUnknown: + switch (basetype) + { + case SPIRType::Float: + return "float4"; + case SPIRType::Int: + return "int4"; + case SPIRType::UInt: + return "uint4"; + default: + SPIRV_CROSS_THROW("Unsupported base type for image."); + } default: SPIRV_CROSS_THROW("Unrecognized typed image format."); @@ -204,7 +256,7 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) if (type.image.sampled == 1) return join("Buffer<", type_to_glsl(imagetype), components, ">"); else if (type.image.sampled == 2) - return join("RWBuffer<", image_format_to_type(type.image.format), ">"); + return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">"); else SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); case DimSubpassData: @@ -217,7 +269,9 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type) const char *ms = type.image.ms ? "MS" : ""; const char *rw = typed_load ? "RW" : ""; return join(rw, "Texture", dim, ms, arrayed, "<", - typed_load ? image_format_to_type(type.image.format) : join(type_to_glsl(imagetype), components), ">"); + typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : + join(type_to_glsl(imagetype), components), + ">"); } string CompilerHLSL::image_type_hlsl_legacy(const SPIRType &type) @@ -3242,9 +3296,11 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) if (var && var->forwardable) { - auto &e = emit_op(result_type, id, imgexpr, true); + bool forward = forced_temporaries.find(id) == end(forced_temporaries); + auto &e = emit_op(result_type, id, imgexpr, forward); e.loaded_from = var->self; - var->dependees.push_back(id); + if (forward) + var->dependees.push_back(id); } else emit_op(result_type, id, imgexpr, false); From dbfa68699013b65223dce6433c856867c9877683 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 29 Nov 2017 12:38:13 +0100 Subject: [PATCH 4/5] Emit SampleCmpLevelZero for lod/grad variants of SampleCmp. --- .../frag/sample-cmp-level-zero.frag | 38 ++++++++++++++++ .../frag/sample-cmp-level-zero.frag | 45 +++++++++++++++++++ shaders-hlsl/frag/sample-cmp-level-zero.frag | 24 ++++++++++ spirv_hlsl.cpp | 20 +++++++-- 4 files changed, 123 insertions(+), 4 deletions(-) create mode 100644 reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag create mode 100644 reference/shaders-hlsl/frag/sample-cmp-level-zero.frag create mode 100644 shaders-hlsl/frag/sample-cmp-level-zero.frag diff --git a/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag b/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag new file mode 100644 index 00000000..526d96b1 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag @@ -0,0 +1,38 @@ +Texture2D uSampler2D : register(t0); +SamplerComparisonState _uSampler2D_sampler : register(s0); +Texture2DArray uSampler2DArray : register(t1); +SamplerComparisonState _uSampler2DArray_sampler : register(s1); +TextureCube uSamplerCube : register(t2); +SamplerComparisonState _uSamplerCube_sampler : register(s2); +TextureCubeArray uSamplerCubeArray : register(t3); +SamplerComparisonState _uSamplerCubeArray_sampler : register(s3); + +static float3 vUVRef; +static float4 vDirRef; +static float FragColor; + +struct SPIRV_Cross_Input +{ + float3 vUVRef : TEXCOORD0; + float4 vDirRef : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + FragColor = (((((uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)) + uSampler2DArray.SampleCmp(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmp(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSamplerCubeArray.SampleCmp(_uSamplerCubeArray_sampler, vDirRef, 0.5f)) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1))) + uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vUVRef = stage_input.vUVRef; + vDirRef = stage_input.vDirRef; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag b/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag new file mode 100644 index 00000000..5c0a3897 --- /dev/null +++ b/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag @@ -0,0 +1,45 @@ +Texture2D uSampler2D : register(t0); +SamplerComparisonState _uSampler2D_sampler : register(s0); +Texture2DArray uSampler2DArray : register(t1); +SamplerComparisonState _uSampler2DArray_sampler : register(s1); +TextureCube uSamplerCube : register(t2); +SamplerComparisonState _uSamplerCube_sampler : register(s2); +TextureCubeArray uSamplerCubeArray : register(t3); +SamplerComparisonState _uSamplerCubeArray_sampler : register(s3); + +static float3 vUVRef; +static float4 vDirRef; +static float FragColor; + +struct SPIRV_Cross_Input +{ + float3 vUVRef : TEXCOORD0; + float4 vDirRef : TEXCOORD1; +}; + +struct SPIRV_Cross_Output +{ + float FragColor : SV_Target0; +}; + +void frag_main() +{ + float s0 = uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)); + float s1 = uSampler2DArray.SampleCmp(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1)); + float s2 = uSamplerCube.SampleCmp(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w); + float s3 = uSamplerCubeArray.SampleCmp(_uSamplerCubeArray_sampler, vDirRef, 0.5f); + float l0 = uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)); + float l1 = uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1)); + float l2 = uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w); + FragColor = (((((s0 + s1) + s2) + s3) + l0) + l1) + l2; +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + vUVRef = stage_input.vUVRef; + vDirRef = stage_input.vDirRef; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.FragColor = FragColor; + return stage_output; +} diff --git a/shaders-hlsl/frag/sample-cmp-level-zero.frag b/shaders-hlsl/frag/sample-cmp-level-zero.frag new file mode 100644 index 00000000..8025ba43 --- /dev/null +++ b/shaders-hlsl/frag/sample-cmp-level-zero.frag @@ -0,0 +1,24 @@ +#version 450 + +layout(location = 0) out float FragColor; +layout(binding = 0) uniform sampler2DShadow uSampler2D; +layout(binding = 1) uniform sampler2DArrayShadow uSampler2DArray; +layout(binding = 2) uniform samplerCubeShadow uSamplerCube; +layout(binding = 3) uniform samplerCubeArrayShadow uSamplerCubeArray; + +layout(location = 0) in vec3 vUVRef; +layout(location = 1) in vec4 vDirRef; + +void main() +{ + float s0 = textureOffset(uSampler2D, vUVRef, ivec2(-1)); + float s1 = textureOffset(uSampler2DArray, vDirRef, ivec2(-1)); + float s2 = texture(uSamplerCube, vDirRef); + float s3 = texture(uSamplerCubeArray, vDirRef, 0.5); + + float l0 = textureLodOffset(uSampler2D, vUVRef, 0.0, ivec2(-1)); + float l1 = textureGradOffset(uSampler2DArray, vDirRef, vec2(0.0), vec2(0.0), ivec2(-1)); + float l2 = textureGrad(uSamplerCube, vDirRef, vec3(0.0), vec3(0.0)); + + FragColor = s0 + s1 + s2 + s3 + l0 + l1 + l2; +} diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index 99604620..e13e21a4 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -1974,7 +1974,19 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) texop += img_expr; if (imgtype.image.depth) - texop += ".SampleCmp"; + { + if (gather) + { + SPIRV_CROSS_THROW("GatherCmp does not exist in HLSL."); + } + else if (lod || grad_x || grad_y) + { + // Assume we want a fixed level, and the only thing we can get in HLSL is SampleCmpLevelZero. + texop += ".SampleCmpLevelZero"; + } + else + texop += ".SampleCmp"; + } else if (gather) { uint32_t comp_num = get(comp).scalar(); @@ -2146,7 +2158,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) expr += to_expression(dref); } - if (grad_x || grad_y) + if (!dref && (grad_x || grad_y)) { forward = forward && should_forward(grad_x); forward = forward && should_forward(grad_y); @@ -2156,14 +2168,14 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) expr += to_expression(grad_y); } - if (lod && options.shader_model >= 40 && op != OpImageFetch) + if (!dref && lod && options.shader_model >= 40 && op != OpImageFetch) { forward = forward && should_forward(lod); expr += ", "; expr += to_expression(lod); } - if (bias && options.shader_model >= 40) + if (!dref && bias && options.shader_model >= 40) { forward = forward && should_forward(bias); expr += ", "; From 975cf868929b1d7f5cba3be64d1893b4cb2df8f2 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 29 Nov 2017 12:48:06 +0100 Subject: [PATCH 5/5] Test textureProjLodOffset with compare in HLSL as well. --- .../frag/sample-cmp-level-zero.frag | 21 ++++++++++++++++- .../frag/sample-cmp-level-zero.frag | 23 ++++++++++++++++++- shaders-hlsl/frag/sample-cmp-level-zero.frag | 5 +++- 3 files changed, 46 insertions(+), 3 deletions(-) diff --git a/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag b/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag index 526d96b1..6f5ae7e3 100644 --- a/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag +++ b/reference/opt/shaders-hlsl/frag/sample-cmp-level-zero.frag @@ -22,9 +22,28 @@ struct SPIRV_Cross_Output float FragColor : SV_Target0; }; +float SPIRV_Cross_projectTextureCoordinate(float2 coord) +{ + return coord.x / coord.y; +} + +float2 SPIRV_Cross_projectTextureCoordinate(float3 coord) +{ + return float2(coord.x, coord.y) / coord.z; +} + +float3 SPIRV_Cross_projectTextureCoordinate(float4 coord) +{ + return float3(coord.x, coord.y, coord.z) / coord.w; +} + void frag_main() { - FragColor = (((((uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)) + uSampler2DArray.SampleCmp(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmp(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSamplerCubeArray.SampleCmp(_uSamplerCubeArray_sampler, vDirRef, 0.5f)) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1))) + uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w); + float4 _80 = vDirRef; + _80.z = vDirRef.w; + float4 _87 = vDirRef; + _87.z = vDirRef.w; + FragColor = (((((((uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)) + uSampler2DArray.SampleCmp(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmp(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSamplerCubeArray.SampleCmp(_uSamplerCubeArray_sampler, vDirRef, 0.5f)) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1))) + uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1))) + uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w)) + uSampler2D.SampleCmp(_uSampler2D_sampler, SPIRV_Cross_projectTextureCoordinate(_80.xyz), vDirRef.z, int2(1, 1))) + uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, SPIRV_Cross_projectTextureCoordinate(_87.xyz), vDirRef.z, int2(1, 1)); } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag b/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag index 5c0a3897..b52b1df5 100644 --- a/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag +++ b/reference/shaders-hlsl/frag/sample-cmp-level-zero.frag @@ -22,6 +22,21 @@ struct SPIRV_Cross_Output float FragColor : SV_Target0; }; +float SPIRV_Cross_projectTextureCoordinate(float2 coord) +{ + return coord.x / coord.y; +} + +float2 SPIRV_Cross_projectTextureCoordinate(float3 coord) +{ + return float2(coord.x, coord.y) / coord.z; +} + +float3 SPIRV_Cross_projectTextureCoordinate(float4 coord) +{ + return float3(coord.x, coord.y, coord.z) / coord.w; +} + void frag_main() { float s0 = uSampler2D.SampleCmp(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)); @@ -31,7 +46,13 @@ void frag_main() float l0 = uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, vUVRef.xy, vUVRef.z, int2(-1, -1)); float l1 = uSampler2DArray.SampleCmpLevelZero(_uSampler2DArray_sampler, vDirRef.xyz, vDirRef.w, int2(-1, -1)); float l2 = uSamplerCube.SampleCmpLevelZero(_uSamplerCube_sampler, vDirRef.xyz, vDirRef.w); - FragColor = (((((s0 + s1) + s2) + s3) + l0) + l1) + l2; + float4 _80 = vDirRef; + _80.z = vDirRef.w; + float p0 = uSampler2D.SampleCmp(_uSampler2D_sampler, SPIRV_Cross_projectTextureCoordinate(_80.xyz), vDirRef.z, int2(1, 1)); + float4 _87 = vDirRef; + _87.z = vDirRef.w; + float p1 = uSampler2D.SampleCmpLevelZero(_uSampler2D_sampler, SPIRV_Cross_projectTextureCoordinate(_87.xyz), vDirRef.z, int2(1, 1)); + FragColor = (((((((s0 + s1) + s2) + s3) + l0) + l1) + l2) + p0) + p1; } SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) diff --git a/shaders-hlsl/frag/sample-cmp-level-zero.frag b/shaders-hlsl/frag/sample-cmp-level-zero.frag index 8025ba43..c40d742e 100644 --- a/shaders-hlsl/frag/sample-cmp-level-zero.frag +++ b/shaders-hlsl/frag/sample-cmp-level-zero.frag @@ -20,5 +20,8 @@ void main() float l1 = textureGradOffset(uSampler2DArray, vDirRef, vec2(0.0), vec2(0.0), ivec2(-1)); float l2 = textureGrad(uSamplerCube, vDirRef, vec3(0.0), vec3(0.0)); - FragColor = s0 + s1 + s2 + s3 + l0 + l1 + l2; + float p0 = textureProjOffset(uSampler2D, vDirRef, ivec2(+1)); + float p1 = textureProjLodOffset(uSampler2D, vDirRef, 0.0, ivec2(+1)); + + FragColor = s0 + s1 + s2 + s3 + l0 + l1 + l2 + p0 + p1; }