From 47d94ff8d94bfa2830714b08816432878347e3b8 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Wed, 7 Mar 2018 10:21:25 +0100 Subject: [PATCH] Add FP16 to HLSL. Cannot be used in buffer types, similar to mediump in GLSL. half is useless, because it's 32-bit in FXC. --- .../opt/shaders-hlsl/frag/fp16.desktop.frag | 51 +++++ reference/shaders-hlsl/frag/fp16.desktop.frag | 190 ++++++++++++++++++ shaders-hlsl/frag/fp16.desktop.frag | 156 ++++++++++++++ spirv_common.hpp | 6 + spirv_glsl.cpp | 55 +++-- spirv_hlsl.cpp | 56 ++++++ spirv_hlsl.hpp | 1 + spirv_msl.cpp | 5 - 8 files changed, 501 insertions(+), 19 deletions(-) create mode 100644 reference/opt/shaders-hlsl/frag/fp16.desktop.frag create mode 100644 reference/shaders-hlsl/frag/fp16.desktop.frag create mode 100644 shaders-hlsl/frag/fp16.desktop.frag diff --git a/reference/opt/shaders-hlsl/frag/fp16.desktop.frag b/reference/opt/shaders-hlsl/frag/fp16.desktop.frag new file mode 100644 index 0000000..401b969 --- /dev/null +++ b/reference/opt/shaders-hlsl/frag/fp16.desktop.frag @@ -0,0 +1,51 @@ +struct ResType +{ + min16float4 _m0; + int4 _m1; +}; + +static min16float4 v4; +static min16float3 v3; +static min16float v1; +static min16float2 v2; +static float o1; +static float2 o2; +static float3 o3; +static float4 o4; + +struct SPIRV_Cross_Input +{ + min16float v1 : TEXCOORD0; + min16float2 v2 : TEXCOORD1; + min16float3 v3 : TEXCOORD2; + min16float4 v4 : TEXCOORD3; +}; + +struct SPIRV_Cross_Output +{ + float o1 : SV_Target0; + float2 o2 : SV_Target1; + float3 o3 : SV_Target2; + float4 o4 : SV_Target3; +}; + +void frag_main() +{ + min16float4 _335; + min16float4 _399 = modf(v4, _335); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + v4 = stage_input.v4; + v3 = stage_input.v3; + v1 = stage_input.v1; + v2 = stage_input.v2; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.o1 = o1; + stage_output.o2 = o2; + stage_output.o3 = o3; + stage_output.o4 = o4; + return stage_output; +} diff --git a/reference/shaders-hlsl/frag/fp16.desktop.frag b/reference/shaders-hlsl/frag/fp16.desktop.frag new file mode 100644 index 0000000..bfdc8db --- /dev/null +++ b/reference/shaders-hlsl/frag/fp16.desktop.frag @@ -0,0 +1,190 @@ +struct ResType +{ + min16float4 _m0; + int4 _m1; +}; + +static min16float4 v4; +static min16float3 v3; +static min16float v1; +static min16float2 v2; +static float o1; +static float2 o2; +static float3 o3; +static float4 o4; + +struct SPIRV_Cross_Input +{ + min16float v1 : TEXCOORD0; + min16float2 v2 : TEXCOORD1; + min16float3 v3 : TEXCOORD2; + min16float4 v4 : TEXCOORD3; +}; + +struct SPIRV_Cross_Output +{ + float o1 : SV_Target0; + float2 o2 : SV_Target1; + float3 o3 : SV_Target2; + float4 o4 : SV_Target3; +}; + +float mod(float x, float y) +{ + return x - y * floor(x / y); +} + +float2 mod(float2 x, float2 y) +{ + return x - y * floor(x / y); +} + +float3 mod(float3 x, float3 y) +{ + return x - y * floor(x / y); +} + +float4 mod(float4 x, float4 y) +{ + return x - y * floor(x / y); +} + +uint SPIRV_Cross_packFloat2x16(min16float2 value) +{ + uint2 Packed = f32tof16(value); + return Packed.x | (Packed.y << 16); +} + +min16float2 SPIRV_Cross_unpackFloat2x16(uint value) +{ + return min16float2(f16tof32(uint2(value & 0xffff, value >> 16))); +} + +void test_constants() +{ + min16float a = min16float(1.0); + min16float b = min16float(1.5); + min16float c = min16float(-1.5); + min16float d = min16float(0.0 / 0.0); + min16float e = min16float(1.0 / 0.0); + min16float f = min16float(-1.0 / 0.0); + min16float g = min16float(1014.0); + min16float h = min16float(9.5367431640625e-07); +} + +min16float test_result() +{ + return min16float(1.0); +} + +void test_conversions() +{ + min16float one = test_result(); + int a = int(one); + uint b = uint(one); + bool c = one != min16float(0.0); + float d = float(one); + double e = double(one); + min16float a2 = min16float(a); + min16float b2 = min16float(b); + min16float c2 = min16float(c); + min16float d2 = min16float(d); + min16float e2 = min16float(e); +} + +void test_builtins() +{ + min16float4 res = radians(v4); + res = degrees(v4); + res = sin(v4); + res = cos(v4); + res = tan(v4); + res = asin(v4); + res = atan2(v4, v3.xyzz); + res = atan(v4); + res = sinh(v4); + res = cosh(v4); + res = tanh(v4); + res = pow(v4, v4); + res = exp(v4); + res = log(v4); + res = exp2(v4); + res = log2(v4); + res = sqrt(v4); + res = rsqrt(v4); + res = abs(v4); + res = sign(v4); + res = floor(v4); + res = trunc(v4); + res = round(v4); + res = ceil(v4); + res = frac(v4); + res = mod(v4, v4); + min16float4 tmp; + min16float4 _144 = modf(v4, tmp); + res = _144; + res = min(v4, v4); + res = max(v4, v4); + res = clamp(v4, v4, v4); + res = lerp(v4, v4, v4); + bool4 _164 = bool4(v4.x < v4.x, v4.y < v4.y, v4.z < v4.z, v4.w < v4.w); + res = min16float4(_164.x ? v4.x : v4.x, _164.y ? v4.y : v4.y, _164.z ? v4.z : v4.z, _164.w ? v4.w : v4.w); + res = step(v4, v4); + res = smoothstep(v4, v4, v4); + bool4 btmp = isnan(v4); + btmp = isinf(v4); + res = mad(v4, v4, v4); + ResType _188; + _188._m0 = frexp(v4, _188._m1); + int4 itmp = _188._m1; + res = _188._m0; + res = ldexp(res, itmp); + uint pack0 = SPIRV_Cross_packFloat2x16(v4.xy); + uint pack1 = SPIRV_Cross_packFloat2x16(v4.zw); + res = min16float4(SPIRV_Cross_unpackFloat2x16(pack0), SPIRV_Cross_unpackFloat2x16(pack1)); + min16float t0 = length(v4); + t0 = distance(v4, v4); + t0 = dot(v4, v4); + min16float3 res3 = cross(v3, v3); + res = normalize(v4); + res = faceforward(v4, v4, v4); + res = reflect(v4, v4); + res = refract(v4, v4, v1); + btmp = bool4(v4.x < v4.x, v4.y < v4.y, v4.z < v4.z, v4.w < v4.w); + btmp = bool4(v4.x <= v4.x, v4.y <= v4.y, v4.z <= v4.z, v4.w <= v4.w); + btmp = bool4(v4.x > v4.x, v4.y > v4.y, v4.z > v4.z, v4.w > v4.w); + btmp = bool4(v4.x >= v4.x, v4.y >= v4.y, v4.z >= v4.z, v4.w >= v4.w); + btmp = bool4(v4.x == v4.x, v4.y == v4.y, v4.z == v4.z, v4.w == v4.w); + btmp = bool4(v4.x != v4.x, v4.y != v4.y, v4.z != v4.z, v4.w != v4.w); + res = ddx(v4); + res = ddy(v4); + res = ddx_fine(v4); + res = ddy_fine(v4); + res = ddx_coarse(v4); + res = ddy_coarse(v4); + res = fwidth(v4); + res = fwidth(v4); + res = fwidth(v4); +} + +void frag_main() +{ + test_constants(); + test_conversions(); + test_builtins(); +} + +SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input) +{ + v4 = stage_input.v4; + v3 = stage_input.v3; + v1 = stage_input.v1; + v2 = stage_input.v2; + frag_main(); + SPIRV_Cross_Output stage_output; + stage_output.o1 = o1; + stage_output.o2 = o2; + stage_output.o3 = o3; + stage_output.o4 = o4; + return stage_output; +} diff --git a/shaders-hlsl/frag/fp16.desktop.frag b/shaders-hlsl/frag/fp16.desktop.frag new file mode 100644 index 0000000..d4bd81e --- /dev/null +++ b/shaders-hlsl/frag/fp16.desktop.frag @@ -0,0 +1,156 @@ +#version 450 +#extension GL_AMD_gpu_shader_half_float : require + +layout(location = 0) in float16_t v1; +layout(location = 1) in f16vec2 v2; +layout(location = 2) in f16vec3 v3; +layout(location = 3) in f16vec4 v4; + +layout(location = 0) out float o1; +layout(location = 1) out vec2 o2; +layout(location = 2) out vec3 o3; +layout(location = 3) out vec4 o4; + +#if 0 +// Doesn't work on glslang yet. +f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d) +{ + return f16mat2(a, b) * f16mat2(c, d); +} + +f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f) +{ + return f16mat3(a, b, c) * f16mat3(d, e, f); +} +#endif + +void test_constants() +{ + float16_t a = 1.0hf; + float16_t b = 1.5hf; + float16_t c = -1.5hf; // Negatives + float16_t d = (0.0hf / 0.0hf); // NaN + float16_t e = (1.0hf / 0.0hf); // +Inf + float16_t f = (-1.0hf / 0.0hf); // -Inf + float16_t g = 1014.0hf; // Large. + float16_t h = 0.000001hf; // Denormal +} + +float16_t test_result() +{ + return 1.0hf; +} + +void test_conversions() +{ + float16_t one = test_result(); + int a = int(one); + uint b = uint(one); + bool c = bool(one); + float d = float(one); + double e = double(one); + float16_t a2 = float16_t(a); + float16_t b2 = float16_t(b); + float16_t c2 = float16_t(c); + float16_t d2 = float16_t(d); + float16_t e2 = float16_t(e); +} + +void test_builtins() +{ + f16vec4 res; + res = radians(v4); + res = degrees(v4); + res = sin(v4); + res = cos(v4); + res = tan(v4); + res = asin(v4); + res = atan(v4, v3.xyzz); + res = atan(v4); + res = sinh(v4); + res = cosh(v4); + res = tanh(v4); + //res = asinh(v4); + //res = acosh(v4); + //res = atanh(v4); + res = pow(v4, v4); + res = exp(v4); + res = log(v4); + res = exp2(v4); + res = log2(v4); + res = sqrt(v4); + res = inversesqrt(v4); + res = abs(v4); + res = sign(v4); + res = floor(v4); + res = trunc(v4); + res = round(v4); + //res = roundEven(v4); + res = ceil(v4); + res = fract(v4); + res = mod(v4, v4); + f16vec4 tmp; + res = modf(v4, tmp); + res = min(v4, v4); + res = max(v4, v4); + res = clamp(v4, v4, v4); + res = mix(v4, v4, v4); + res = mix(v4, v4, lessThan(v4, v4)); + res = step(v4, v4); + res = smoothstep(v4, v4, v4); + + bvec4 btmp = isnan(v4); + btmp = isinf(v4); + res = fma(v4, v4, v4); + + ivec4 itmp; + res = frexp(v4, itmp); + res = ldexp(res, itmp); + + uint pack0 = packFloat2x16(v4.xy); + uint pack1 = packFloat2x16(v4.zw); + res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1)); + + float16_t t0 = length(v4); + t0 = distance(v4, v4); + t0 = dot(v4, v4); + f16vec3 res3 = cross(v3, v3); + res = normalize(v4); + res = faceforward(v4, v4, v4); + res = reflect(v4, v4); + res = refract(v4, v4, v1); + + btmp = lessThan(v4, v4); + btmp = lessThanEqual(v4, v4); + btmp = greaterThan(v4, v4); + btmp = greaterThanEqual(v4, v4); + btmp = equal(v4, v4); + btmp = notEqual(v4, v4); + + res = dFdx(v4); + res = dFdy(v4); + res = dFdxFine(v4); + res = dFdyFine(v4); + res = dFdxCoarse(v4); + res = dFdyCoarse(v4); + res = fwidth(v4); + res = fwidthFine(v4); + res = fwidthCoarse(v4); + + //res = interpolateAtCentroid(v4); + //res = interpolateAtSample(v4, 0); + //res = interpolateAtOffset(v4, f16vec2(0.1hf)); +} + +void main() +{ + // Basic matrix tests. +#if 0 + f16mat2 m0 = test_mat2(v2, v2, v3.xy, v3.xy); + f16mat3 m1 = test_mat3(v3, v3, v3, v4.xyz, v4.xyz, v4.yzw); +#endif + + test_constants(); + test_conversions(); + test_builtins(); +} diff --git a/spirv_common.hpp b/spirv_common.hpp index 10c6d82..73fbae5 100644 --- a/spirv_common.hpp +++ b/spirv_common.hpp @@ -1116,6 +1116,12 @@ public: private: uint64_t h = 0xcbf29ce484222325ull; }; + +static inline bool type_is_floating_point(const SPIRType &type) +{ + return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double; +} + } #endif diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index c958e3e..8df9b05 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -26,11 +26,6 @@ using namespace spv; using namespace spirv_cross; using namespace std; -static bool type_is_floating_point(const SPIRType &type) -{ - return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double; -} - static bool packing_is_vec4_padded(BufferPackingStandard packing) { switch (packing) @@ -2590,18 +2585,50 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col, if (std::isnan(float_value) || std::isinf(float_value)) { - // There is no uintBitsToFloat for 16-bit, so have to rely on legacy fallback here. - if (float_value == numeric_limits::infinity()) - res = join("(1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")"); - else if (float_value == -numeric_limits::infinity()) - res = join("(-1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")"); - else if (std::isnan(float_value)) - res = join("(0.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")"); + if (backend.half_literal_suffix) + { + // There is no uintBitsToFloat for 16-bit, so have to rely on legacy fallback here. + if (float_value == numeric_limits::infinity()) + res = join("(1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")"); + else if (float_value == -numeric_limits::infinity()) + res = join("(-1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")"); + else if (std::isnan(float_value)) + res = join("(0.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")"); + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } else - SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + { + SPIRType type; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + + if (float_value == numeric_limits::infinity()) + res = join(type_to_glsl(type), "(1.0 / 0.0)"); + else if (float_value == -numeric_limits::infinity()) + res = join(type_to_glsl(type), "(-1.0 / 0.0)"); + else if (std::isnan(float_value)) + res = join(type_to_glsl(type), "(0.0 / 0.0)"); + else + SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant."); + } } else - res = convert_to_string(float_value) + backend.half_literal_suffix; + { + if (backend.half_literal_suffix) + res = convert_to_string(float_value) + backend.half_literal_suffix; + else + { + // In HLSL (FXC), it's important to cast the literals to half precision right away. + // There is no literal for it. + SPIRType type; + type.basetype = SPIRType::Half; + type.vecsize = 1; + type.columns = 1; + res = join(type_to_glsl(type), "(", convert_to_string(float_value), ")"); + } + } return res; } diff --git a/spirv_hlsl.cpp b/spirv_hlsl.cpp index a56865e..b4343a3 100644 --- a/spirv_hlsl.cpp +++ b/spirv_hlsl.cpp @@ -394,6 +394,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id) return backend.basic_uint_type; case SPIRType::AtomicCounter: return "atomic_uint"; + case SPIRType::Half: + return "min16float"; case SPIRType::Float: return "float"; case SPIRType::Double: @@ -416,6 +418,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id) return join("int", type.vecsize); case SPIRType::UInt: return join("uint", type.vecsize); + case SPIRType::Half: + return join("min16float", type.vecsize); case SPIRType::Float: return join("float", type.vecsize); case SPIRType::Double: @@ -438,6 +442,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id) return join("int", type.columns, "x", type.vecsize); case SPIRType::UInt: return join("uint", type.columns, "x", type.vecsize); + case SPIRType::Half: + return join("min16float", type.columns, "x", type.vecsize); case SPIRType::Float: return join("float", type.columns, "x", type.vecsize); case SPIRType::Double: @@ -1427,6 +1433,23 @@ void CompilerHLSL::emit_resources() statement(""); } + if (requires_explicit_fp16_packing) + { + // HLSL does not pack into a single word sadly :( + statement("uint SPIRV_Cross_packFloat2x16(min16float2 value)"); + begin_scope(); + statement("uint2 Packed = f32tof16(value);"); + statement("return Packed.x | (Packed.y << 16);"); + end_scope(); + statement(""); + + statement("min16float2 SPIRV_Cross_unpackFloat2x16(uint value)"); + begin_scope(); + statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));"); + end_scope(); + statement(""); + } + // HLSL does not seem to have builtins for these operation, so roll them by hand ... if (requires_unorm8_packing) { @@ -2839,6 +2862,24 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i return "asdouble"; else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64) return "asdouble"; + else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1) + { + if (!requires_explicit_fp16_packing) + { + requires_explicit_fp16_packing = true; + force_recompile = true; + } + return "SPIRV_Cross_unpackFloat2x16"; + } + else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2) + { + if (!requires_explicit_fp16_packing) + { + requires_explicit_fp16_packing = true; + force_recompile = true; + } + return "SPIRV_Cross_packFloat2x16"; + } else return ""; } @@ -2857,6 +2898,14 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop, emit_unary_func_op(result_type, id, args[0], "frac"); break; + case GLSLstd450RoundEven: + SPIRV_CROSS_THROW("roundEven is not supported on HLSL."); + + case GLSLstd450Acosh: + case GLSLstd450Asinh: + case GLSLstd450Atanh: + SPIRV_CROSS_THROW("Inverse hyperbolics are not supported on HLSL."); + case GLSLstd450FMix: case GLSLstd450IMix: emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "lerp"); @@ -3574,6 +3623,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) UFOP(ddy_coarse); break; + case OpFwidth: + case OpFwidthCoarse: + case OpFwidthFine: + UFOP(fwidth); + break; + case OpLogicalNot: { auto result_type = ops[0]; @@ -4166,6 +4221,7 @@ string CompilerHLSL::compile() CompilerGLSL::options.vulkan_semantics = true; backend.float_literal_suffix = true; backend.double_literal_suffix = false; + backend.half_literal_suffix = nullptr; backend.long_long_literal_suffix = true; backend.uint32_t_literal_suffix = true; backend.basic_int_type = "int"; diff --git a/spirv_hlsl.hpp b/spirv_hlsl.hpp index 0de7240..a16c312 100644 --- a/spirv_hlsl.hpp +++ b/spirv_hlsl.hpp @@ -155,6 +155,7 @@ private: bool requires_op_fmod = false; bool requires_textureProj = false; bool requires_fp16_packing = false; + bool requires_explicit_fp16_packing = false; bool requires_unorm8_packing = false; bool requires_snorm8_packing = false; bool requires_unorm16_packing = false; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 94df36e..3e00af0 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -27,11 +27,6 @@ using namespace std; static const uint32_t k_unknown_location = ~0u; -static bool type_is_floating_point(const SPIRType &type) -{ - return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double; -} - CompilerMSL::CompilerMSL(vector spirv_, vector *p_vtx_attrs, vector *p_res_bindings) : CompilerGLSL(move(spirv_))