From 9436cd30367540b9b0f5179cc4c8033c50bdbb34 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 27 Aug 2019 13:16:16 +0200 Subject: [PATCH] MSL: Deal with array copies from and to threadgroup. --- .../frag/lut-promotion-initializer.asm.frag | 57 +++++- ...input-array-builtin-array.invalid.asm.tesc | 57 +++++- .../comp/composite-array-initialization.comp | 57 +++++- .../shaders-msl/comp/composite-construct.comp | 57 +++++- .../opt/shaders-msl/frag/lut-promotion.frag | 55 +++++- .../comp/array-copy-threadgroup-memory.comp | 79 +++++++++ .../vert/pass-array-by-value.vert | 59 ++++++- .../frag/lut-promotion-initializer.asm.frag | 57 +++++- .../frag/single-function-private-lut.asm.frag | 55 +++++- ...input-array-builtin-array.invalid.asm.tesc | 59 ++++++- .../comp/composite-array-initialization.comp | 57 +++++- .../shaders-msl/comp/composite-construct.comp | 57 +++++- .../comp/copy-array-of-arrays.comp | 163 +++++++++++++++--- .../shaders-msl/frag/constant-composites.frag | 55 +++++- reference/shaders-msl/frag/lut-promotion.frag | 57 +++++- reference/shaders-msl/vert/return-array.vert | 59 ++++++- .../comp/array-copy-threadgroup-memory.comp | 18 ++ spirv_cross.cpp | 9 + spirv_cross.hpp | 1 + spirv_glsl.cpp | 9 +- spirv_glsl.hpp | 3 +- spirv_msl.cpp | 83 ++++++--- spirv_msl.hpp | 3 +- 23 files changed, 998 insertions(+), 168 deletions(-) create mode 100644 reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp create mode 100644 shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp diff --git a/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag index 610d447a..650887df 100644 --- a/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag +++ b/reference/opt/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag @@ -19,17 +19,58 @@ struct main0_in int index [[user(locn0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } fragment main0_out main0(main0_in in [[stage_in]]) @@ -61,7 +102,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) } int _37 = in.index & 3; out.FragColor += foobar[_37].z; - spvArrayCopyFromConstant1(baz, _90); + spvArrayCopyFromConstantToStack1(baz, _90); out.FragColor += baz[_37].z; return out; } diff --git a/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc index bbda7be5..342172d3 100644 --- a/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc +++ b/reference/opt/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc @@ -33,17 +33,58 @@ struct main0_in float4 gl_Position [[attribute(1)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_in_threadgroup]], uint gl_PrimitiveID [[threadgroup_position_in_grid]], device main0_out* spvOut [[buffer(28)]], constant uint* spvIndirectParams [[buffer(29)]], device MTLTriangleTessellationFactorsHalf* spvTessLevel [[buffer(26)]], threadgroup main0_in* gl_in [[threadgroup(0)]]) @@ -56,7 +97,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_ return; VertexOutput _223[3] = { VertexOutput{ gl_in[0].gl_Position, gl_in[0].VertexOutput_uv }, VertexOutput{ gl_in[1].gl_Position, gl_in[1].VertexOutput_uv }, VertexOutput{ gl_in[2].gl_Position, gl_in[2].VertexOutput_uv } }; VertexOutput param[3]; - spvArrayCopyFromStack1(param, _223); + spvArrayCopyFromStackToStack1(param, _223); gl_out[gl_InvocationID].gl_Position = param[gl_InvocationID].pos; gl_out[gl_InvocationID]._entryPointOutput.uv = param[gl_InvocationID].uv; threadgroup_barrier(mem_flags::mem_device); diff --git a/reference/opt/shaders-msl/comp/composite-array-initialization.comp b/reference/opt/shaders-msl/comp/composite-array-initialization.comp index 8dec8bdd..bc60fbc2 100644 --- a/reference/opt/shaders-msl/comp/composite-array-initialization.comp +++ b/reference/opt/shaders-msl/comp/composite-array-initialization.comp @@ -29,24 +29,65 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u); constant Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadgroup_position_in_grid]], uint3 gl_LocalInvocationID [[thread_position_in_threadgroup]]) { Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } }; Data data2[2]; - spvArrayCopyFromStack1(data2, _31); + spvArrayCopyFromStackToStack1(data2, _31); _53.outdata[gl_WorkGroupID.x].a = _25[gl_LocalInvocationID.x].a + data2[gl_LocalInvocationID.x].a; _53.outdata[gl_WorkGroupID.x].b = _25[gl_LocalInvocationID.x].b + data2[gl_LocalInvocationID.x].b; } diff --git a/reference/opt/shaders-msl/comp/composite-construct.comp b/reference/opt/shaders-msl/comp/composite-construct.comp index 6d44fc57..3c8e0956 100644 --- a/reference/opt/shaders-msl/comp/composite-construct.comp +++ b/reference/opt/shaders-msl/comp/composite-construct.comp @@ -15,24 +15,65 @@ struct SSBO1 float4 bs[1]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { float4 _37[2] = { _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] }; float4 values[2]; - spvArrayCopyFromStack1(values, _37); + spvArrayCopyFromStackToStack1(values, _37); _16.as[0] = values[gl_LocalInvocationIndex]; _32.bs[1] = float4(40.0); } diff --git a/reference/opt/shaders-msl/frag/lut-promotion.frag b/reference/opt/shaders-msl/frag/lut-promotion.frag index c9169b79..6c79a12e 100644 --- a/reference/opt/shaders-msl/frag/lut-promotion.frag +++ b/reference/opt/shaders-msl/frag/lut-promotion.frag @@ -19,17 +19,58 @@ struct main0_in int index [[user(locn0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } fragment main0_out main0(main0_in in [[stage_in]]) diff --git a/reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp b/reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp new file mode 100644 index 00000000..62ee1fef --- /dev/null +++ b/reference/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp @@ -0,0 +1,79 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(8u, 1u, 1u); + +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +kernel void main0(uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) +{ + threadgroup float shared_group[8][8]; + threadgroup float shared_group_alt[8][8]; + float blob[8]; + for (int i = 0; i < 8; i++) + { + blob[i] = float(i); + } + spvArrayCopyFromStackToThreadGroup1(shared_group[gl_LocalInvocationIndex], blob); + threadgroup_barrier(mem_flags::mem_threadgroup); + float copied_blob[8]; + spvArrayCopyFromThreadGroupToStack1(copied_blob, shared_group[gl_LocalInvocationIndex ^ 1u]); + spvArrayCopyFromThreadGroupToThreadGroup1(shared_group_alt[gl_LocalInvocationIndex], shared_group[gl_LocalInvocationIndex]); +} + diff --git a/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert index ab563136..67d55bf2 100644 --- a/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert +++ b/reference/shaders-msl-no-opt/vert/pass-array-by-value.vert @@ -18,25 +18,66 @@ struct main0_in int Index2 [[attribute(1)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } float4 consume_constant_arrays2(thread const float4 (&positions)[4], thread const float4 (&positions2)[4], thread int& Index1, thread int& Index2) { float4 indexable[4]; - spvArrayCopyFromStack1(indexable, positions); + spvArrayCopyFromStackToStack1(indexable, positions); float4 indexable_1[4]; - spvArrayCopyFromStack1(indexable_1, positions2); + spvArrayCopyFromStackToStack1(indexable_1, positions2); return indexable[Index1] + indexable_1[Index2]; } diff --git a/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag b/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag index 48f3317d..cd87f5b0 100644 --- a/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag +++ b/reference/shaders-msl/asm/frag/lut-promotion-initializer.asm.frag @@ -19,17 +19,58 @@ struct main0_in int index [[user(locn0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } fragment main0_out main0(main0_in in [[stage_in]]) @@ -59,7 +100,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) foobar[1].z = 20.0; } out.FragColor += foobar[in.index & 3].z; - spvArrayCopyFromConstant1(baz, _90); + spvArrayCopyFromConstantToStack1(baz, _90); out.FragColor += baz[in.index & 3].z; return out; } diff --git a/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag b/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag index 628d5c7c..15e0749e 100644 --- a/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag +++ b/reference/shaders-msl/asm/frag/single-function-private-lut.asm.frag @@ -24,17 +24,58 @@ Tx mod(Tx x, Ty y) return x - y * floor(x / y); } -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } fragment main0_out main0(float4 gl_FragCoord [[position]]) diff --git a/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc b/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc index 6a765117..9beb7c00 100644 --- a/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc +++ b/reference/shaders-msl/asm/tesc/tess-fixed-input-array-builtin-array.invalid.asm.tesc @@ -45,17 +45,58 @@ struct main0_in float4 gl_Position [[attribute(1)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } HSOut _hs_main(thread const VertexOutput (&p)[3], thread const uint& i) @@ -93,7 +134,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_ p[2].uv = gl_in[2].VertexOutput_uv; uint i = gl_InvocationID; VertexOutput param[3]; - spvArrayCopyFromStack1(param, p); + spvArrayCopyFromStackToStack1(param, p); uint param_1 = i; HSOut flattenTemp = _hs_main(param, param_1); gl_out[gl_InvocationID].gl_Position = flattenTemp.pos; @@ -102,7 +143,7 @@ kernel void main0(main0_in in [[stage_in]], uint gl_InvocationID [[thread_index_ if (int(gl_InvocationID) == 0) { VertexOutput param_2[3]; - spvArrayCopyFromStack1(param_2, p); + spvArrayCopyFromStackToStack1(param_2, p); HSConstantOut _patchConstantResult = PatchHS(param_2); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[0] = half(_patchConstantResult.EdgeTess[0]); spvTessLevel[gl_PrimitiveID].edgeTessellationFactor[1] = half(_patchConstantResult.EdgeTess[1]); diff --git a/reference/shaders-msl/comp/composite-array-initialization.comp b/reference/shaders-msl/comp/composite-array-initialization.comp index ac10e750..33ed8702 100644 --- a/reference/shaders-msl/comp/composite-array-initialization.comp +++ b/reference/shaders-msl/comp/composite-array-initialization.comp @@ -29,17 +29,58 @@ constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(2u, 1u, 1u); constant Data _25[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } Data combine(thread const Data& a, thread const Data& b) @@ -52,7 +93,7 @@ kernel void main0(device SSBO& _53 [[buffer(0)]], uint3 gl_WorkGroupID [[threadg Data data[2] = { Data{ 1.0, 2.0 }, Data{ 3.0, 4.0 } }; Data _31[2] = { Data{ X, 2.0 }, Data{ 3.0, 5.0 } }; Data data2[2]; - spvArrayCopyFromStack1(data2, _31); + spvArrayCopyFromStackToStack1(data2, _31); Data param = data[gl_LocalInvocationID.x]; Data param_1 = data2[gl_LocalInvocationID.x]; Data _73 = combine(param, param_1); diff --git a/reference/shaders-msl/comp/composite-construct.comp b/reference/shaders-msl/comp/composite-construct.comp index 4b5ea37e..bc1bf8a6 100644 --- a/reference/shaders-msl/comp/composite-construct.comp +++ b/reference/shaders-msl/comp/composite-construct.comp @@ -23,24 +23,65 @@ struct Composite constant float4 _43[2] = { float4(20.0), float4(40.0) }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } kernel void main0(device SSBO0& _16 [[buffer(0)]], device SSBO1& _32 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { float4 _37[2] = { _16.as[gl_GlobalInvocationID.x], _32.bs[gl_GlobalInvocationID.x] }; float4 values[2]; - spvArrayCopyFromStack1(values, _37); + spvArrayCopyFromStackToStack1(values, _37); Composite c = Composite{ values[0], _43[1] }; _16.as[0] = values[gl_LocalInvocationIndex]; _32.bs[1] = c.b; diff --git a/reference/shaders-msl/comp/copy-array-of-arrays.comp b/reference/shaders-msl/comp/copy-array-of-arrays.comp index 0fa6c6bf..b5f6fced 100644 --- a/reference/shaders-msl/comp/copy-array-of-arrays.comp +++ b/reference/shaders-msl/comp/copy-array-of-arrays.comp @@ -17,59 +17,172 @@ constant float _19[2] = { 3.0, 4.0 }; constant float _20[2][2] = { { 1.0, 2.0 }, { 3.0, 4.0 } }; constant float _21[2][2][2] = { { { 1.0, 2.0 }, { 3.0, 4.0 } }, { { 1.0, 2.0 }, { 3.0, 4.0 } } }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) -{ - for (uint i = 0; i < N; dst[i] = src[i], i++); -} - -template -void spvArrayCopyFromStack2(thread T (&dst)[A][B], thread const T (&src)[A][B]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { for (uint i = 0; i < A; i++) { - spvArrayCopyFromStack1(dst[i], src[i]); + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; } } template -void spvArrayCopyFromConstant2(thread T (&dst)[A][B], constant T (&src)[A][B]) +void spvArrayCopyFromConstantToStack2(thread T (&dst)[A][B], constant T (&src)[A][B]) { for (uint i = 0; i < A; i++) { - spvArrayCopyFromConstant1(dst[i], src[i]); + spvArrayCopyFromConstantToStack1(dst[i], src[i]); + } +} + +template +void spvArrayCopyFromConstantToThreadGroup2(threadgroup T (&dst)[A][B], constant T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromConstantToThreadGroup1(dst[i], src[i]); + } +} + +template +void spvArrayCopyFromStackToStack2(thread T (&dst)[A][B], thread const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToStack1(dst[i], src[i]); + } +} + +template +void spvArrayCopyFromStackToThreadGroup2(threadgroup T (&dst)[A][B], thread const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToThreadGroup1(dst[i], src[i]); + } +} + +template +void spvArrayCopyFromThreadGroupToStack2(thread T (&dst)[A][B], threadgroup const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToStack1(dst[i], src[i]); + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup2(threadgroup T (&dst)[A][B], threadgroup const T (&src)[A][B]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToThreadGroup1(dst[i], src[i]); } } template -void spvArrayCopyFromStack3(thread T (&dst)[A][B][C], thread const T (&src)[A][B][C]) +void spvArrayCopyFromConstantToStack3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C]) { for (uint i = 0; i < A; i++) { - spvArrayCopyFromStack2(dst[i], src[i]); + spvArrayCopyFromConstantToStack2(dst[i], src[i]); } } template -void spvArrayCopyFromConstant3(thread T (&dst)[A][B][C], constant T (&src)[A][B][C]) +void spvArrayCopyFromConstantToThreadGroup3(threadgroup T (&dst)[A][B][C], constant T (&src)[A][B][C]) { for (uint i = 0; i < A; i++) { - spvArrayCopyFromConstant2(dst[i], src[i]); + spvArrayCopyFromConstantToThreadGroup2(dst[i], src[i]); + } +} + +template +void spvArrayCopyFromStackToStack3(thread T (&dst)[A][B][C], thread const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToStack2(dst[i], src[i]); + } +} + +template +void spvArrayCopyFromStackToThreadGroup3(threadgroup T (&dst)[A][B][C], thread const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromStackToThreadGroup2(dst[i], src[i]); + } +} + +template +void spvArrayCopyFromThreadGroupToStack3(thread T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToStack2(dst[i], src[i]); + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup3(threadgroup T (&dst)[A][B][C], threadgroup const T (&src)[A][B][C]) +{ + for (uint i = 0; i < A; i++) + { + spvArrayCopyFromThreadGroupToThreadGroup2(dst[i], src[i]); } } kernel void main0(device BUF& o [[buffer(0)]]) { float c[2][2][2]; - spvArrayCopyFromConstant3(c, _21); + spvArrayCopyFromConstantToStack3(c, _21); o.a = int(c[1][1][1]); float _43[2] = { o.b, o.c }; float _48[2] = { o.b, o.b }; @@ -79,9 +192,9 @@ kernel void main0(device BUF& o [[buffer(0)]]) float _60[2][2] = { { _54[0], _54[1] }, { _59[0], _59[1] } }; float _61[2][2][2] = { { { _49[0][0], _49[0][1] }, { _49[1][0], _49[1][1] } }, { { _60[0][0], _60[0][1] }, { _60[1][0], _60[1][1] } } }; float d[2][2][2]; - spvArrayCopyFromStack3(d, _61); + spvArrayCopyFromStackToStack3(d, _61); float e[2][2][2]; - spvArrayCopyFromStack3(e, d); + spvArrayCopyFromStackToStack3(e, d); o.b = e[1][0][1]; } diff --git a/reference/shaders-msl/frag/constant-composites.frag b/reference/shaders-msl/frag/constant-composites.frag index 504beaa0..e5b3574a 100644 --- a/reference/shaders-msl/frag/constant-composites.frag +++ b/reference/shaders-msl/frag/constant-composites.frag @@ -24,17 +24,58 @@ struct main0_in int line [[user(locn0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } fragment main0_out main0(main0_in in [[stage_in]]) diff --git a/reference/shaders-msl/frag/lut-promotion.frag b/reference/shaders-msl/frag/lut-promotion.frag index f7e51edb..d71214f2 100644 --- a/reference/shaders-msl/frag/lut-promotion.frag +++ b/reference/shaders-msl/frag/lut-promotion.frag @@ -19,17 +19,58 @@ struct main0_in int index [[user(locn0)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } fragment main0_out main0(main0_in in [[stage_in]]) @@ -59,7 +100,7 @@ fragment main0_out main0(main0_in in [[stage_in]]) } out.FragColor += foobar[in.index & 3].z; float4 baz[4] = { float4(0.0), float4(1.0), float4(8.0), float4(5.0) }; - spvArrayCopyFromConstant1(baz, _104); + spvArrayCopyFromConstantToStack1(baz, _104); out.FragColor += baz[in.index & 3].z; return out; } diff --git a/reference/shaders-msl/vert/return-array.vert b/reference/shaders-msl/vert/return-array.vert index cd06fdda..fa0b4c69 100644 --- a/reference/shaders-msl/vert/return-array.vert +++ b/reference/shaders-msl/vert/return-array.vert @@ -18,22 +18,63 @@ struct main0_in float4 vInput1 [[attribute(1)]]; }; -// Implementation of an array copy function to cover GLSL's ability to copy an array via assignment. -template -void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N]) +template +void spvArrayCopyFromConstantToStack1(thread T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } -template -void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N]) +template +void spvArrayCopyFromConstantToThreadGroup1(threadgroup T (&dst)[A], constant T (&src)[A]) { - for (uint i = 0; i < N; dst[i] = src[i], i++); + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToStack1(thread T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromStackToThreadGroup1(threadgroup T (&dst)[A], thread const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToStack1(thread T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } +} + +template +void spvArrayCopyFromThreadGroupToThreadGroup1(threadgroup T (&dst)[A], threadgroup const T (&src)[A]) +{ + for (uint i = 0; i < A; i++) + { + dst[i] = src[i]; + } } void test(thread float4 (&SPIRV_Cross_return_value)[2]) { - spvArrayCopyFromConstant1(SPIRV_Cross_return_value, _20); + spvArrayCopyFromConstantToStack1(SPIRV_Cross_return_value, _20); } void test2(thread float4 (&SPIRV_Cross_return_value)[2], thread float4& vInput0, thread float4& vInput1) @@ -41,7 +82,7 @@ void test2(thread float4 (&SPIRV_Cross_return_value)[2], thread float4& vInput0, float4 foobar[2]; foobar[0] = vInput0; foobar[1] = vInput1; - spvArrayCopyFromStack1(SPIRV_Cross_return_value, foobar); + spvArrayCopyFromStackToStack1(SPIRV_Cross_return_value, foobar); } vertex main0_out main0(main0_in in [[stage_in]]) diff --git a/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp b/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp new file mode 100644 index 00000000..081c3962 --- /dev/null +++ b/shaders-msl-no-opt/comp/array-copy-threadgroup-memory.comp @@ -0,0 +1,18 @@ +#version 450 +layout(local_size_x = 8) in; + +shared float shared_group[8][8]; +shared float shared_group_alt[8][8]; + +void main() +{ + float blob[8]; + for (int i = 0; i < 8; i++) + blob[i] = float(i); + shared_group[gl_LocalInvocationIndex] = blob; + + barrier(); + + float copied_blob[8] = shared_group[gl_LocalInvocationIndex ^ 1u]; + shared_group_alt[gl_LocalInvocationIndex] = shared_group[gl_LocalInvocationIndex]; +} diff --git a/spirv_cross.cpp b/spirv_cross.cpp index a04a9469..c8a6e5c8 100644 --- a/spirv_cross.cpp +++ b/spirv_cross.cpp @@ -265,6 +265,15 @@ SPIRVariable *Compiler::maybe_get_backing_variable(uint32_t chain) return var; } +StorageClass Compiler::get_backing_variable_storage(uint32_t ptr) +{ + auto *var = maybe_get_backing_variable(ptr); + if (var) + return var->storage; + else + return expression_type(ptr).storage; +} + void Compiler::register_read(uint32_t expr, uint32_t chain, bool forwarded) { auto &e = get(expr); diff --git a/spirv_cross.hpp b/spirv_cross.hpp index 9701d4c2..73de7a5d 100644 --- a/spirv_cross.hpp +++ b/spirv_cross.hpp @@ -605,6 +605,7 @@ protected: bool expression_is_lvalue(uint32_t id) const; bool variable_storage_is_aliased(const SPIRVariable &var); SPIRVariable *maybe_get_backing_variable(uint32_t chain); + spv::StorageClass get_backing_variable_storage(uint32_t ptr); void register_read(uint32_t expr, uint32_t chain, bool forwarded); void register_write(uint32_t chain); diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index c390218d..941edb7f 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -7888,7 +7888,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // it is an array, and our backend does not support arrays as value types. // Emit the temporary, and copy it explicitly. e = &emit_uninitialized_temporary_expression(result_type, id); - emit_array_copy(to_expression(id), ptr); + emit_array_copy(to_expression(id), ptr, StorageClassFunction, get_backing_variable_storage(ptr)); } else e = &emit_op(result_type, id, expr, forward, !usage_tracking); @@ -12232,7 +12232,10 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) // If we cannot return arrays, we will have a special out argument we can write to instead. // The backend is responsible for setting this up, and redirection the return values as appropriate. if (ir.ids[block.return_value].get_type() != TypeUndef) - emit_array_copy("SPIRV_Cross_return_value", block.return_value); + { + emit_array_copy("SPIRV_Cross_return_value", block.return_value, + StorageClassFunction, get_backing_variable_storage(block.return_value)); + } if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || block.loop_dominator != SPIRBlock::NoDominator) @@ -12427,7 +12430,7 @@ uint32_t CompilerGLSL::mask_relevant_memory_semantics(uint32_t semantics) MemorySemanticsCrossWorkgroupMemoryMask | MemorySemanticsSubgroupMemoryMask); } -void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id) +void CompilerGLSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageClass, StorageClass) { statement(lhs, " = ", to_expression(rhs_id), ";"); } diff --git a/spirv_glsl.hpp b/spirv_glsl.hpp index a43cae54..7d40f551 100644 --- a/spirv_glsl.hpp +++ b/spirv_glsl.hpp @@ -546,7 +546,8 @@ protected: std::string layout_for_variable(const SPIRVariable &variable); std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id); virtual bool skip_argument(uint32_t id) const; - virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id); + virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, + spv::StorageClass lhs_storage, spv::StorageClass rhs_storage); virtual void emit_block_hints(const SPIRBlock &block); virtual std::string to_initializer_expression(const SPIRVariable &var); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index f3b2ded7..8d365dc9 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -3185,40 +3185,41 @@ void CompilerMSL::emit_custom_functions() break; case SPVFuncImplArrayCopy: - statement("// Implementation of an array copy function to cover GLSL's ability to copy an array via " - "assignment."); - statement("template"); - statement("void spvArrayCopyFromStack1(thread T (&dst)[N], thread const T (&src)[N])"); - begin_scope(); - statement("for (uint i = 0; i < N; dst[i] = src[i], i++);"); - end_scope(); - statement(""); - - statement("template"); - statement("void spvArrayCopyFromConstant1(thread T (&dst)[N], constant T (&src)[N])"); - begin_scope(); - statement("for (uint i = 0; i < N; dst[i] = src[i], i++);"); - end_scope(); - statement(""); - break; - case SPVFuncImplArrayOfArrayCopy2Dim: case SPVFuncImplArrayOfArrayCopy3Dim: case SPVFuncImplArrayOfArrayCopy4Dim: case SPVFuncImplArrayOfArrayCopy5Dim: case SPVFuncImplArrayOfArrayCopy6Dim: { + // Unfortunately we cannot template on the address space, so combinatorial explosion it is. static const char *function_name_tags[] = { - "FromStack", - "FromConstant", + "FromConstantToStack", + "FromConstantToThreadGroup", + "FromStackToStack", + "FromStackToThreadGroup", + "FromThreadGroupToStack", + "FromThreadGroupToThreadGroup", }; static const char *src_address_space[] = { - "thread const", "constant", + "constant", + "thread const", + "thread const", + "threadgroup const", + "threadgroup const", }; - for (uint32_t variant = 0; variant < 2; variant++) + static const char *dst_address_space[] = { + "thread", + "threadgroup", + "thread", + "threadgroup", + "thread", + "threadgroup", + }; + + for (uint32_t variant = 0; variant < 6; variant++) { uint32_t dimensions = spv_func - SPVFuncImplArrayCopyMultidimBase; string tmp = "templateself); - emit_array_copy(to_expression(id_lhs), id_rhs); + emit_array_copy(to_expression(id_lhs), id_rhs, get_backing_variable_storage(id_lhs), get_backing_variable_storage(id_rhs)); register_write(id_lhs); return true; diff --git a/spirv_msl.hpp b/spirv_msl.hpp index f0858c9d..1ca9c782 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -555,7 +555,8 @@ protected: void add_pragma_line(const std::string &line); void add_typedef_line(const std::string &line); void emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uint32_t id_mem_sem); - void emit_array_copy(const std::string &lhs, uint32_t rhs_id) override; + void emit_array_copy(const std::string &lhs, uint32_t rhs_id, + spv::StorageClass lhs_storage, spv::StorageClass rhs_storage) override; void build_implicit_builtins(); uint32_t build_constant_uint_array_pointer(); void emit_entry_point_declarations() override;