Add FP16 to HLSL.

Cannot be used in buffer types, similar to mediump in GLSL. half is useless, because it's 32-bit in FXC.
2018-03-07 10:21:25 +01:00 · 2018-03-07 10:21:25 +01:00 · 47d94ff8d9
--- a/reference/opt/shaders-hlsl/frag/fp16.desktop.frag
+++ b/reference/opt/shaders-hlsl/frag/fp16.desktop.frag
@ -0,0 +1,51 @@
+struct ResType
+{
+    min16float4 _m0;
+    int4 _m1;
+};
+
+static min16float4 v4;
+static min16float3 v3;
+static min16float v1;
+static min16float2 v2;
+static float o1;
+static float2 o2;
+static float3 o3;
+static float4 o4;
+
+struct SPIRV_Cross_Input
+{
+    min16float v1 : TEXCOORD0;
+    min16float2 v2 : TEXCOORD1;
+    min16float3 v3 : TEXCOORD2;
+    min16float4 v4 : TEXCOORD3;
+};
+
+struct SPIRV_Cross_Output
+{
+    float o1 : SV_Target0;
+    float2 o2 : SV_Target1;
+    float3 o3 : SV_Target2;
+    float4 o4 : SV_Target3;
+};
+
+void frag_main()
+{
+    min16float4 _335;
+    min16float4 _399 = modf(v4, _335);
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v4 = stage_input.v4;
+    v3 = stage_input.v3;
+    v1 = stage_input.v1;
+    v2 = stage_input.v2;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.o1 = o1;
+    stage_output.o2 = o2;
+    stage_output.o3 = o3;
+    stage_output.o4 = o4;
+    return stage_output;
+}
--- a/reference/shaders-hlsl/frag/fp16.desktop.frag
+++ b/reference/shaders-hlsl/frag/fp16.desktop.frag
@ -0,0 +1,190 @@
+struct ResType
+{
+    min16float4 _m0;
+    int4 _m1;
+};
+
+static min16float4 v4;
+static min16float3 v3;
+static min16float v1;
+static min16float2 v2;
+static float o1;
+static float2 o2;
+static float3 o3;
+static float4 o4;
+
+struct SPIRV_Cross_Input
+{
+    min16float v1 : TEXCOORD0;
+    min16float2 v2 : TEXCOORD1;
+    min16float3 v3 : TEXCOORD2;
+    min16float4 v4 : TEXCOORD3;
+};
+
+struct SPIRV_Cross_Output
+{
+    float o1 : SV_Target0;
+    float2 o2 : SV_Target1;
+    float3 o3 : SV_Target2;
+    float4 o4 : SV_Target3;
+};
+
+float mod(float x, float y)
+{
+    return x - y * floor(x / y);
+}
+
+float2 mod(float2 x, float2 y)
+{
+    return x - y * floor(x / y);
+}
+
+float3 mod(float3 x, float3 y)
+{
+    return x - y * floor(x / y);
+}
+
+float4 mod(float4 x, float4 y)
+{
+    return x - y * floor(x / y);
+}
+
+uint SPIRV_Cross_packFloat2x16(min16float2 value)
+{
+    uint2 Packed = f32tof16(value);
+    return Packed.x | (Packed.y << 16);
+}
+
+min16float2 SPIRV_Cross_unpackFloat2x16(uint value)
+{
+    return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));
+}
+
+void test_constants()
+{
+    min16float a = min16float(1.0);
+    min16float b = min16float(1.5);
+    min16float c = min16float(-1.5);
+    min16float d = min16float(0.0 / 0.0);
+    min16float e = min16float(1.0 / 0.0);
+    min16float f = min16float(-1.0 / 0.0);
+    min16float g = min16float(1014.0);
+    min16float h = min16float(9.5367431640625e-07);
+}
+
+min16float test_result()
+{
+    return min16float(1.0);
+}
+
+void test_conversions()
+{
+    min16float one = test_result();
+    int a = int(one);
+    uint b = uint(one);
+    bool c = one != min16float(0.0);
+    float d = float(one);
+    double e = double(one);
+    min16float a2 = min16float(a);
+    min16float b2 = min16float(b);
+    min16float c2 = min16float(c);
+    min16float d2 = min16float(d);
+    min16float e2 = min16float(e);
+}
+
+void test_builtins()
+{
+    min16float4 res = radians(v4);
+    res = degrees(v4);
+    res = sin(v4);
+    res = cos(v4);
+    res = tan(v4);
+    res = asin(v4);
+    res = atan2(v4, v3.xyzz);
+    res = atan(v4);
+    res = sinh(v4);
+    res = cosh(v4);
+    res = tanh(v4);
+    res = pow(v4, v4);
+    res = exp(v4);
+    res = log(v4);
+    res = exp2(v4);
+    res = log2(v4);
+    res = sqrt(v4);
+    res = rsqrt(v4);
+    res = abs(v4);
+    res = sign(v4);
+    res = floor(v4);
+    res = trunc(v4);
+    res = round(v4);
+    res = ceil(v4);
+    res = frac(v4);
+    res = mod(v4, v4);
+    min16float4 tmp;
+    min16float4 _144 = modf(v4, tmp);
+    res = _144;
+    res = min(v4, v4);
+    res = max(v4, v4);
+    res = clamp(v4, v4, v4);
+    res = lerp(v4, v4, v4);
+    bool4 _164 = bool4(v4.x < v4.x, v4.y < v4.y, v4.z < v4.z, v4.w < v4.w);
+    res = min16float4(_164.x ? v4.x : v4.x, _164.y ? v4.y : v4.y, _164.z ? v4.z : v4.z, _164.w ? v4.w : v4.w);
+    res = step(v4, v4);
+    res = smoothstep(v4, v4, v4);
+    bool4 btmp = isnan(v4);
+    btmp = isinf(v4);
+    res = mad(v4, v4, v4);
+    ResType _188;
+    _188._m0 = frexp(v4, _188._m1);
+    int4 itmp = _188._m1;
+    res = _188._m0;
+    res = ldexp(res, itmp);
+    uint pack0 = SPIRV_Cross_packFloat2x16(v4.xy);
+    uint pack1 = SPIRV_Cross_packFloat2x16(v4.zw);
+    res = min16float4(SPIRV_Cross_unpackFloat2x16(pack0), SPIRV_Cross_unpackFloat2x16(pack1));
+    min16float t0 = length(v4);
+    t0 = distance(v4, v4);
+    t0 = dot(v4, v4);
+    min16float3 res3 = cross(v3, v3);
+    res = normalize(v4);
+    res = faceforward(v4, v4, v4);
+    res = reflect(v4, v4);
+    res = refract(v4, v4, v1);
+    btmp = bool4(v4.x < v4.x, v4.y < v4.y, v4.z < v4.z, v4.w < v4.w);
+    btmp = bool4(v4.x <= v4.x, v4.y <= v4.y, v4.z <= v4.z, v4.w <= v4.w);
+    btmp = bool4(v4.x > v4.x, v4.y > v4.y, v4.z > v4.z, v4.w > v4.w);
+    btmp = bool4(v4.x >= v4.x, v4.y >= v4.y, v4.z >= v4.z, v4.w >= v4.w);
+    btmp = bool4(v4.x == v4.x, v4.y == v4.y, v4.z == v4.z, v4.w == v4.w);
+    btmp = bool4(v4.x != v4.x, v4.y != v4.y, v4.z != v4.z, v4.w != v4.w);
+    res = ddx(v4);
+    res = ddy(v4);
+    res = ddx_fine(v4);
+    res = ddy_fine(v4);
+    res = ddx_coarse(v4);
+    res = ddy_coarse(v4);
+    res = fwidth(v4);
+    res = fwidth(v4);
+    res = fwidth(v4);
+}
+
+void frag_main()
+{
+    test_constants();
+    test_conversions();
+    test_builtins();
+}
+
+SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
+{
+    v4 = stage_input.v4;
+    v3 = stage_input.v3;
+    v1 = stage_input.v1;
+    v2 = stage_input.v2;
+    frag_main();
+    SPIRV_Cross_Output stage_output;
+    stage_output.o1 = o1;
+    stage_output.o2 = o2;
+    stage_output.o3 = o3;
+    stage_output.o4 = o4;
+    return stage_output;
+}
--- a/shaders-hlsl/frag/fp16.desktop.frag
+++ b/shaders-hlsl/frag/fp16.desktop.frag
@ -0,0 +1,156 @@
+#version 450
+#extension GL_AMD_gpu_shader_half_float : require
+
+layout(location = 0) in float16_t v1;
+layout(location = 1) in f16vec2 v2;
+layout(location = 2) in f16vec3 v3;
+layout(location = 3) in f16vec4 v4;
+
+layout(location = 0) out float o1;
+layout(location = 1) out vec2 o2;
+layout(location = 2) out vec3 o3;
+layout(location = 3) out vec4 o4;
+
+#if 0
+// Doesn't work on glslang yet.
+f16mat2 test_mat2(f16vec2 a, f16vec2 b, f16vec2 c, f16vec2 d)
+{
+	return f16mat2(a, b) * f16mat2(c, d);
+}
+
+f16mat3 test_mat3(f16vec3 a, f16vec3 b, f16vec3 c, f16vec3 d, f16vec3 e, f16vec3 f)
+{
+	return f16mat3(a, b, c) * f16mat3(d, e, f);
+}
+#endif
+
+void test_constants()
+{
+	float16_t a = 1.0hf;
+	float16_t b = 1.5hf;
+	float16_t c = -1.5hf; // Negatives
+	float16_t d = (0.0hf / 0.0hf); // NaN
+	float16_t e = (1.0hf / 0.0hf); // +Inf
+	float16_t f = (-1.0hf / 0.0hf); // -Inf
+	float16_t g = 1014.0hf; // Large.
+	float16_t h = 0.000001hf; // Denormal
+}
+
+float16_t test_result()
+{
+	return 1.0hf;
+}
+
+void test_conversions()
+{
+	float16_t one = test_result();
+	int a = int(one);
+	uint b = uint(one);
+	bool c = bool(one);
+	float d = float(one);
+	double e = double(one);
+	float16_t a2 = float16_t(a);
+	float16_t b2 = float16_t(b);
+	float16_t c2 = float16_t(c);
+	float16_t d2 = float16_t(d);
+	float16_t e2 = float16_t(e);
+}
+
+void test_builtins()
+{
+	f16vec4 res;
+	res = radians(v4);
+	res = degrees(v4);
+	res = sin(v4);
+	res = cos(v4);
+	res = tan(v4);
+	res = asin(v4);
+	res = atan(v4, v3.xyzz);
+	res = atan(v4);
+	res = sinh(v4);
+	res = cosh(v4);
+	res = tanh(v4);
+	//res = asinh(v4);
+	//res = acosh(v4);
+	//res = atanh(v4);
+	res = pow(v4, v4);
+	res = exp(v4);
+	res = log(v4);
+	res = exp2(v4);
+	res = log2(v4);
+	res = sqrt(v4);
+	res = inversesqrt(v4);
+	res = abs(v4);
+	res = sign(v4);
+	res = floor(v4);
+	res = trunc(v4);
+	res = round(v4);
+	//res = roundEven(v4);
+	res = ceil(v4);
+	res = fract(v4);
+	res = mod(v4, v4);
+	f16vec4 tmp;
+	res = modf(v4, tmp);
+	res = min(v4, v4);
+	res = max(v4, v4);
+	res = clamp(v4, v4, v4);
+	res = mix(v4, v4, v4);
+	res = mix(v4, v4, lessThan(v4, v4));
+	res = step(v4, v4);
+	res = smoothstep(v4, v4, v4);
+
+	bvec4 btmp = isnan(v4);
+	btmp = isinf(v4);
+	res = fma(v4, v4, v4);
+
+	ivec4 itmp;
+	res = frexp(v4, itmp);
+	res = ldexp(res, itmp);
+
+	uint pack0 = packFloat2x16(v4.xy);
+	uint pack1 = packFloat2x16(v4.zw);
+	res = f16vec4(unpackFloat2x16(pack0), unpackFloat2x16(pack1));
+
+	float16_t t0 = length(v4);
+	t0 = distance(v4, v4);
+	t0 = dot(v4, v4);
+	f16vec3 res3 = cross(v3, v3);
+	res = normalize(v4);
+	res = faceforward(v4, v4, v4);
+	res = reflect(v4, v4);
+	res = refract(v4, v4, v1);
+
+	btmp = lessThan(v4, v4);
+	btmp = lessThanEqual(v4, v4);
+	btmp = greaterThan(v4, v4);
+	btmp = greaterThanEqual(v4, v4);
+	btmp = equal(v4, v4);
+	btmp = notEqual(v4, v4);
+
+	res = dFdx(v4);
+	res = dFdy(v4);
+	res = dFdxFine(v4);
+	res = dFdyFine(v4);
+	res = dFdxCoarse(v4);
+	res = dFdyCoarse(v4);
+	res = fwidth(v4);
+	res = fwidthFine(v4);
+	res = fwidthCoarse(v4);
+
+	//res = interpolateAtCentroid(v4);
+	//res = interpolateAtSample(v4, 0);
+	//res = interpolateAtOffset(v4, f16vec2(0.1hf));
+}
+
+void main()
+{
+	// Basic matrix tests.
+#if 0
+	f16mat2 m0 = test_mat2(v2, v2, v3.xy, v3.xy);
+	f16mat3 m1 = test_mat3(v3, v3, v3, v4.xyz, v4.xyz, v4.yzw);
+#endif
+
+	test_constants();
+	test_conversions();
+	test_builtins();
+}
--- a/spirv_common.hpp
+++ b/spirv_common.hpp
@ -1116,6 +1116,12 @@ public:
 private:
 	uint64_t h = 0xcbf29ce484222325ull;
 };
+
+static inline bool type_is_floating_point(const SPIRType &type)
+{
+	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double;
+}
+
 }

 #endif
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -26,11 +26,6 @@ using namespace spv;
 using namespace spirv_cross;
 using namespace std;

-static bool type_is_floating_point(const SPIRType &type)
-{
-	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double;
-}
-
 static bool packing_is_vec4_padded(BufferPackingStandard packing)
 {
 	switch (packing)
@ -2590,18 +2585,50 @@ string CompilerGLSL::convert_half_to_string(const SPIRConstant &c, uint32_t col,

 	if (std::isnan(float_value) || std::isinf(float_value))
 	{
-		// There is no uintBitsToFloat for 16-bit, so have to rely on legacy fallback here.
-		if (float_value == numeric_limits<float>::infinity())
-			res = join("(1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
-		else if (float_value == -numeric_limits<float>::infinity())
-			res = join("(-1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
-		else if (std::isnan(float_value))
-			res = join("(0.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
+		if (backend.half_literal_suffix)
+		{
+			// There is no uintBitsToFloat for 16-bit, so have to rely on legacy fallback here.
+			if (float_value == numeric_limits<float>::infinity())
+				res = join("(1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
+			else if (float_value == -numeric_limits<float>::infinity())
+				res = join("(-1.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
+			else if (std::isnan(float_value))
+				res = join("(0.0", backend.half_literal_suffix, " / 0.0", backend.half_literal_suffix, ")");
+			else
+				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		}
 		else
-			SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		{
+			SPIRType type;
+			type.basetype = SPIRType::Half;
+			type.vecsize = 1;
+			type.columns = 1;
+
+			if (float_value == numeric_limits<float>::infinity())
+				res = join(type_to_glsl(type), "(1.0 / 0.0)");
+			else if (float_value == -numeric_limits<float>::infinity())
+				res = join(type_to_glsl(type), "(-1.0 / 0.0)");
+			else if (std::isnan(float_value))
+				res = join(type_to_glsl(type), "(0.0 / 0.0)");
+			else
+				SPIRV_CROSS_THROW("Cannot represent non-finite floating point constant.");
+		}
 	}
 	else
-		res = convert_to_string(float_value) + backend.half_literal_suffix;
+	{
+		if (backend.half_literal_suffix)
+			res = convert_to_string(float_value) + backend.half_literal_suffix;
+		else
+		{
+			// In HLSL (FXC), it's important to cast the literals to half precision right away.
+			// There is no literal for it.
+			SPIRType type;
+			type.basetype = SPIRType::Half;
+			type.vecsize = 1;
+			type.columns = 1;
+			res = join(type_to_glsl(type), "(", convert_to_string(float_value), ")");
+		}
+	}

 	return res;
 }
--- a/spirv_hlsl.cpp
+++ b/spirv_hlsl.cpp
@ -394,6 +394,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return backend.basic_uint_type;
 		case SPIRType::AtomicCounter:
 			return "atomic_uint";
+		case SPIRType::Half:
+			return "min16float";
 		case SPIRType::Float:
 			return "float";
 		case SPIRType::Double:
@ -416,6 +418,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("int", type.vecsize);
 		case SPIRType::UInt:
 			return join("uint", type.vecsize);
+		case SPIRType::Half:
+			return join("min16float", type.vecsize);
 		case SPIRType::Float:
 			return join("float", type.vecsize);
 		case SPIRType::Double:
@ -438,6 +442,8 @@ string CompilerHLSL::type_to_glsl(const SPIRType &type, uint32_t id)
 			return join("int", type.columns, "x", type.vecsize);
 		case SPIRType::UInt:
 			return join("uint", type.columns, "x", type.vecsize);
+		case SPIRType::Half:
+			return join("min16float", type.columns, "x", type.vecsize);
 		case SPIRType::Float:
 			return join("float", type.columns, "x", type.vecsize);
 		case SPIRType::Double:
@ -1427,6 +1433,23 @@ void CompilerHLSL::emit_resources()
 		statement("");
 	}

+	if (requires_explicit_fp16_packing)
+	{
+		// HLSL does not pack into a single word sadly :(
+		statement("uint SPIRV_Cross_packFloat2x16(min16float2 value)");
+		begin_scope();
+		statement("uint2 Packed = f32tof16(value);");
+		statement("return Packed.x | (Packed.y << 16);");
+		end_scope();
+		statement("");
+
+		statement("min16float2 SPIRV_Cross_unpackFloat2x16(uint value)");
+		begin_scope();
+		statement("return min16float2(f16tof32(uint2(value & 0xffff, value >> 16)));");
+		end_scope();
+		statement("");
+	}
+
 	// HLSL does not seem to have builtins for these operation, so roll them by hand ...
 	if (requires_unorm8_packing)
 	{
@ -2839,6 +2862,24 @@ string CompilerHLSL::bitcast_glsl_op(const SPIRType &out_type, const SPIRType &i
 		return "asdouble";
 	else if (out_type.basetype == SPIRType::Double && in_type.basetype == SPIRType::UInt64)
 		return "asdouble";
+	else if (out_type.basetype == SPIRType::Half && in_type.basetype == SPIRType::UInt && in_type.vecsize == 1)
+	{
+		if (!requires_explicit_fp16_packing)
+		{
+			requires_explicit_fp16_packing = true;
+			force_recompile = true;
+		}
+		return "SPIRV_Cross_unpackFloat2x16";
+	}
+	else if (out_type.basetype == SPIRType::UInt && in_type.basetype == SPIRType::Half && in_type.vecsize == 2)
+	{
+		if (!requires_explicit_fp16_packing)
+		{
+			requires_explicit_fp16_packing = true;
+			force_recompile = true;
+		}
+		return "SPIRV_Cross_packFloat2x16";
+	}
 	else
 		return "";
 }
@ -2857,6 +2898,14 @@ void CompilerHLSL::emit_glsl_op(uint32_t result_type, uint32_t id, uint32_t eop,
 		emit_unary_func_op(result_type, id, args[0], "frac");
 		break;

+	case GLSLstd450RoundEven:
+		SPIRV_CROSS_THROW("roundEven is not supported on HLSL.");
+
+	case GLSLstd450Acosh:
+	case GLSLstd450Asinh:
+	case GLSLstd450Atanh:
+		SPIRV_CROSS_THROW("Inverse hyperbolics are not supported on HLSL.");
+
 	case GLSLstd450FMix:
 	case GLSLstd450IMix:
 		emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "lerp");
@ -3574,6 +3623,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
 		UFOP(ddy_coarse);
 		break;

+	case OpFwidth:
+	case OpFwidthCoarse:
+	case OpFwidthFine:
+		UFOP(fwidth);
+		break;
+
 	case OpLogicalNot:
 	{
 		auto result_type = ops[0];
@ -4166,6 +4221,7 @@ string CompilerHLSL::compile()
 	CompilerGLSL::options.vulkan_semantics = true;
 	backend.float_literal_suffix = true;
 	backend.double_literal_suffix = false;
+	backend.half_literal_suffix = nullptr;
 	backend.long_long_literal_suffix = true;
 	backend.uint32_t_literal_suffix = true;
 	backend.basic_int_type = "int";
--- a/spirv_hlsl.hpp
+++ b/spirv_hlsl.hpp
@ -155,6 +155,7 @@ private:
 	bool requires_op_fmod = false;
 	bool requires_textureProj = false;
 	bool requires_fp16_packing = false;
+	bool requires_explicit_fp16_packing = false;
 	bool requires_unorm8_packing = false;
 	bool requires_snorm8_packing = false;
 	bool requires_unorm16_packing = false;
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@ -27,11 +27,6 @@ using namespace std;

 static const uint32_t k_unknown_location = ~0u;

-static bool type_is_floating_point(const SPIRType &type)
-{
-	return type.basetype == SPIRType::Half || type.basetype == SPIRType::Float || type.basetype == SPIRType::Double;
-}
-
 CompilerMSL::CompilerMSL(vector<uint32_t> spirv_, vector<MSLVertexAttr> *p_vtx_attrs,
                         vector<MSLResourceBinding> *p_res_bindings)
    : CompilerGLSL(move(spirv_))