CompilerMSL allow swizzle of packed_float3 vectors by unpacking to float3.

Pass packed indicator back through OpAccessChain and OpLoad. Unpack packed vector before applying swizzle. Add packed swizzle test.
2018-01-05 17:46:56 -05:00 · 2018-01-05 17:46:56 -05:00 · d8d2da9d8d
--- a/reference/opt/shaders-msl/flatten/swizzle.flatten.vert
+++ b/reference/opt/shaders-msl/flatten/swizzle.flatten.vert
@ -38,8 +38,8 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]])
    out.gl_Position = float4(0.0);
    out.oA = _22.A;
    out.oB = float4(_22.B0, _22.B1);
-    out.oC = float4(_22.C0, _22.C1);
-    out.oD = float4(_22.D0, _22.D1);
+    out.oC = float4(_22.C0, _22.C1) + float4(_22.C1.xy, _22.C1.z, _22.C0);
+    out.oD = float4(_22.D0, _22.D1) + float4(float3(_22.D0).xy, float3(_22.D0).z, _22.D1);
    out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3);
    out.oF = float4(_22.F0, _22.F1, _22.F2);
    return out;
--- a/reference/shaders-msl/flatten/swizzle.flatten.vert
+++ b/reference/shaders-msl/flatten/swizzle.flatten.vert
@ -38,8 +38,8 @@ vertex main0_out main0(constant UBO& _22 [[buffer(0)]])
    out.gl_Position = float4(0.0);
    out.oA = _22.A;
    out.oB = float4(_22.B0, _22.B1);
-    out.oC = float4(_22.C0, _22.C1);
-    out.oD = float4(_22.D0, _22.D1);
+    out.oC = float4(_22.C0, _22.C1) + float4(_22.C1.xy, _22.C1.z, _22.C0);
+    out.oD = float4(_22.D0, _22.D1) + float4(float3(_22.D0).xy, float3(_22.D0).z, _22.D1);
    out.oE = float4(_22.E0, _22.E1, _22.E2, _22.E3);
    out.oF = float4(_22.F0, _22.F1, _22.F2);
    return out;
--- a/shaders-msl/flatten/swizzle.flatten.vert
+++ b/shaders-msl/flatten/swizzle.flatten.vert
@ -40,8 +40,8 @@ void main()

    oA = A;
    oB = vec4(B0, B1);
-    oC = vec4(C0, C1);
-    oD = vec4(D0, D1);
+    oC = vec4(C0, C1) + vec4(C1.xy, C1.z, C0);	// not packed
+    oD = vec4(D0, D1) + vec4(D0.xy, D0.z, D1);	// packed - must convert for swizzle
    oE = vec4(E0, E1, E2, E3);
    oF = vec4(F0, F1, F2);
 }
--- a/spirv_glsl.cpp
+++ b/spirv_glsl.cpp
@ -4225,7 +4225,8 @@ const char *CompilerGLSL::index_to_swizzle(uint32_t index)
 }

 string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count,
-                                           bool index_is_literal, bool chain_only, bool *need_transpose)
+                                           bool index_is_literal, bool chain_only, bool *need_transpose,
+                                           bool *result_is_packed)
 {
 	string expr;
 	if (!chain_only)
@ -4368,7 +4369,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice
 		else if (type->vecsize > 1)
 		{
 			if (vector_is_packed)
+			{
 				expr = unpack_expression_type(expr, *type);
+				vector_is_packed = false;
+			}

 			if (index_is_literal)
 			{
@ -4404,6 +4408,10 @@ string CompilerGLSL::access_chain_internal(uint32_t base, const uint32_t *indice

 	if (need_transpose)
 		*need_transpose = row_major_matrix_needs_conversion;
+
+	if (result_is_packed)
+		*result_is_packed = vector_is_packed;
+
 	return expr;
 }

@ -4413,7 +4421,7 @@ string CompilerGLSL::to_flattened_struct_member(const SPIRType &type, uint32_t i
 }

 string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
-                                  bool *out_need_transpose)
+                                  bool *out_need_transpose, bool *result_is_packed)
 {
 	if (flattened_buffer_blocks.count(base))
 	{
@ -4423,6 +4431,8 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32

 		if (out_need_transpose)
 			*out_need_transpose = target_type.columns > 1 && need_transpose;
+		if (result_is_packed)
+			*result_is_packed = false;

 		return flattened_access_chain(base, indices, count, target_type, 0, matrix_stride, need_transpose);
 	}
@ -4432,11 +4442,13 @@ string CompilerGLSL::access_chain(uint32_t base, const uint32_t *indices, uint32
 		auto &type = get<SPIRType>(get<SPIRVariable>(base).basetype);
 		if (out_need_transpose)
 			*out_need_transpose = false;
+		if (result_is_packed)
+			*result_is_packed = false;
 		return sanitize_underscores(join(to_name(type.self), "_", chain));
 	}
 	else
 	{
-		return access_chain_internal(base, indices, count, false, false, out_need_transpose);
+		return access_chain_internal(base, indices, count, false, false, out_need_transpose, result_is_packed);
 	}
 }

@ -5019,7 +5031,7 @@ string CompilerGLSL::build_composite_combiner(const uint32_t *elems, uint32_t le
 		{
 			// We'll likely end up with duplicated swizzles, e.g.
 			// foobar.xyz.xyz from patterns like
-			// OpVectorSwizzle
+			// OpVectorShuffle
 			// OpCompositeExtract x 3
 			// OpCompositeConstruct 3x + other scalar.
 			// Just modify op in-place.
@ -5179,6 +5191,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 		auto &e = emit_op(result_type, id, expr, forward, true);
 		e.need_transpose = need_transpose;
 		register_read(id, ptr, forward);
+
+		// Pass through whether the result is of a packed type.
+		if (has_decoration(ptr, DecorationCPacked))
+			set_decoration(id, DecorationCPacked);
+
 		break;
 	}

@ -5191,11 +5208,18 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)

 		// If the base is immutable, the access chain pointer must also be.
 		// If an expression is mutable and forwardable, we speculate that it is immutable.
-		bool need_transpose;
-		auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &need_transpose);
+		bool need_transpose, result_is_packed;
+		auto e = access_chain(ops[2], &ops[3], length - 3, get<SPIRType>(ops[0]), &need_transpose, &result_is_packed);
 		auto &expr = set<SPIRExpression>(ops[1], move(e), ops[0], should_forward(ops[2]));
 		expr.loaded_from = ops[2];
 		expr.need_transpose = need_transpose;
+
+		// Mark the result as being packed. Some platforms handled packed vectors differently than non-packed.
+		if (result_is_packed)
+			set_decoration(ops[1], DecorationCPacked);
+		else
+			unset_decoration(ops[1], DecorationCPacked);
+
 		break;
 	}

@ -5592,10 +5616,15 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
 			trivial_forward = !expression_is_forwarded(vec0);

 			// We only source from first vector, so can use swizzle.
+			// If the vector is packed, unpack it before applying a swizzle (needed for MSL)
 			expr += to_enclosed_expression(vec0);
+			if (has_decoration(vec0, DecorationCPacked))
+				expr = unpack_expression_type(expr, expression_type(vec0));
+
 			expr += ".";
 			for (uint32_t i = 0; i < length; i++)
 				expr += index_to_swizzle(elems[i]);
+
 			if (backend.swizzle_is_function && length > 1)
 				expr += "()";
 		}
--- a/spirv_glsl.hpp
+++ b/spirv_glsl.hpp
@ -378,9 +378,10 @@ protected:
 	SPIRExpression &emit_op(uint32_t result_type, uint32_t result_id, const std::string &rhs, bool forward_rhs,
 	                        bool suppress_usage_tracking = false);
 	std::string access_chain_internal(uint32_t base, const uint32_t *indices, uint32_t count, bool index_is_literal,
-	                                  bool chain_only = false, bool *need_transpose = nullptr);
+	                                  bool chain_only = false, bool *need_transpose = nullptr,
+	                                  bool *result_is_packed = nullptr);
 	std::string access_chain(uint32_t base, const uint32_t *indices, uint32_t count, const SPIRType &target_type,
-	                         bool *need_transpose = nullptr);
+	                         bool *need_transpose = nullptr, bool *result_is_packed = nullptr);

 	std::string flattened_access_chain(uint32_t base, const uint32_t *indices, uint32_t count,
 	                                   const SPIRType &target_type, uint32_t offset, uint32_t matrix_stride,