MSL: Handle coherent, volatile, and restrict.
This maps them to their MSL equivalents. I've mapped `Coherent` to `volatile` since MSL doesn't have anything weaker than `volatile` but stronger than nothing. As part of this, I had to remove the implicit `volatile` added for atomic operation casts. If the buffer is already `coherent` or `volatile`, then we would add a second `volatile`, which would be redundant. I think this is OK even when the buffer *doesn't* have `coherent`: `T *` is implicitly convertible to `volatile T *`, but not vice-versa. It seems to compile OK at any rate. (Note that the non-`volatile` overloads of the atomic functions documented in the spec aren't present in the MSL 2.2 stdlib headers.) `restrict` is tricky, because in MSL, as in C++, it needs to go *after* the asterisk or ampersand for the pointer type it's modifying. Another issue is that, in the `Simple`, `GLSL450`, and `Vulkan` memory models, `Restrict` is the default (i.e. does not need to be specified); but MSL likely follows the `OpenCL` model where `Aliased` is the default. We probably need to implicitly set either `Restrict` or `Aliased` depending on the module's declared memory model.
This commit is contained in:
Родитель
9f3bebe3d0
Коммит
058f1a0933
|
@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
|
|||
|
||||
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type<int>(as_type<float>(_29))));
|
||||
}
|
||||
|
||||
|
|
|
@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
|
|||
|
||||
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type<int>(as_type<float>(_29))));
|
||||
}
|
||||
|
||||
|
|
|
@ -15,7 +15,7 @@ struct _4
|
|||
int4 _m1;
|
||||
};
|
||||
|
||||
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
|
||||
kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]])
|
||||
{
|
||||
_6._m0 = _5._m1 + uint4(_5._m0);
|
||||
_6._m0 = uint4(_5._m0) + _5._m1;
|
||||
|
|
|
@ -15,7 +15,7 @@ struct _7
|
|||
int4 _m1;
|
||||
};
|
||||
|
||||
kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]])
|
||||
kernel void main0(device _6& restrict _8 [[buffer(0)]], device _7& restrict _9 [[buffer(1)]])
|
||||
{
|
||||
_9._m0 = _8._m1 + uint4(_8._m0);
|
||||
_9._m0 = uint4(_8._m0) + _8._m1;
|
||||
|
|
|
@ -16,55 +16,55 @@ kernel void main0(device SSBO& ssbo [[buffer(0)]])
|
|||
{
|
||||
threadgroup uint shared_u32;
|
||||
threadgroup int shared_i32;
|
||||
uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _32;
|
||||
do
|
||||
{
|
||||
_32 = 10u;
|
||||
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
|
||||
int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
|
||||
int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _52;
|
||||
do
|
||||
{
|
||||
_52 = 10;
|
||||
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
|
||||
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
|
||||
shared_u32 = 10u;
|
||||
shared_i32 = 10;
|
||||
uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _64;
|
||||
do
|
||||
{
|
||||
_64 = 10u;
|
||||
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
|
||||
int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
} while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
|
||||
int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _72;
|
||||
do
|
||||
{
|
||||
_72 = 10;
|
||||
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
|
||||
} while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
|
||||
}
|
||||
|
||||
|
|
|
@ -26,7 +26,7 @@ kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buff
|
|||
float4 _29 = _23.in_data[gl_GlobalInvocationID.x];
|
||||
if (dot(_29, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
|
||||
{
|
||||
uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
|
||||
uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
|
||||
_45.out_data[_52] = _29;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ struct SSBO
|
|||
float4 value;
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO& _10 [[buffer(0)]])
|
||||
kernel void main0(volatile device SSBO& _10 [[buffer(0)]])
|
||||
{
|
||||
_10.value = float4(20.0);
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ struct SSBO
|
|||
int4 value;
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
|
||||
kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
|
||||
{
|
||||
_10.value = uImage.read(uint2(int2(10)));
|
||||
}
|
||||
|
|
|
@ -28,7 +28,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff
|
|||
float _28 = _22.in_data[gl_GlobalInvocationID.x];
|
||||
if (_28 > 12.0)
|
||||
{
|
||||
uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
|
||||
uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
|
||||
_38.out_data[_45] = _28;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ struct main0_in
|
|||
float4 m_17 [[attribute(0)]];
|
||||
};
|
||||
|
||||
vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]])
|
||||
vertex void main0(main0_in in [[stage_in]], volatile device _23& _25 [[buffer(0)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.gl_Position = in.m_17;
|
||||
|
|
|
@ -14,7 +14,7 @@ struct SSBO
|
|||
|
||||
kernel void main0(device SSBO& _5 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _24 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_5.count, 1u, memory_order_relaxed);
|
||||
uint _24 = atomic_fetch_add_explicit((device atomic_uint*)&_5.count, 1u, memory_order_relaxed);
|
||||
if (_24 < 1024u)
|
||||
{
|
||||
_5.data[_24] = gl_GlobalInvocationID.x;
|
||||
|
|
|
@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
|
|||
|
||||
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
float4 r0;
|
||||
r0.x = as_type<float>(_29);
|
||||
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));
|
||||
|
|
|
@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
|
|||
|
||||
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
float4 r0;
|
||||
r0.x = as_type<float>(_29);
|
||||
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));
|
||||
|
|
|
@ -15,7 +15,7 @@ struct _4
|
|||
int4 _m1;
|
||||
};
|
||||
|
||||
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
|
||||
kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]])
|
||||
{
|
||||
_6._m0 = _5._m1 + uint4(_5._m0);
|
||||
_6._m0 = uint4(_5._m0) + _5._m1;
|
||||
|
|
|
@ -15,7 +15,7 @@ struct _7
|
|||
int4 _m1;
|
||||
};
|
||||
|
||||
kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]])
|
||||
kernel void main0(device _6& restrict _8 [[buffer(0)]], device _7& restrict _9 [[buffer(1)]])
|
||||
{
|
||||
_9._m0 = _8._m1 + uint4(_8._m0);
|
||||
_9._m0 = uint4(_8._m0) + _8._m1;
|
||||
|
|
|
@ -16,55 +16,55 @@ kernel void main0(device SSBO& ssbo [[buffer(0)]])
|
|||
{
|
||||
threadgroup uint shared_u32;
|
||||
threadgroup int shared_i32;
|
||||
uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
|
||||
uint _32;
|
||||
do
|
||||
{
|
||||
_32 = 10u;
|
||||
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
|
||||
int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
|
||||
int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
|
||||
int _52;
|
||||
do
|
||||
{
|
||||
_52 = 10;
|
||||
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
|
||||
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
|
||||
shared_u32 = 10u;
|
||||
shared_i32 = 10;
|
||||
uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
|
||||
uint _64;
|
||||
do
|
||||
{
|
||||
_64 = 10u;
|
||||
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
|
||||
int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
} while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
|
||||
int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
|
||||
int _72;
|
||||
do
|
||||
{
|
||||
_72 = 10;
|
||||
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
|
||||
} while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
|
||||
}
|
||||
|
||||
|
|
|
@ -27,7 +27,7 @@ kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buff
|
|||
float4 idata = _23.in_data[ident];
|
||||
if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
|
||||
{
|
||||
uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
|
||||
uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
|
||||
_45.out_data[_52] = idata;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ struct SSBO
|
|||
float4 value;
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO& _10 [[buffer(0)]])
|
||||
kernel void main0(volatile device SSBO& _10 [[buffer(0)]])
|
||||
{
|
||||
_10.value = float4(20.0);
|
||||
}
|
||||
|
|
|
@ -8,7 +8,7 @@ struct SSBO
|
|||
int4 value;
|
||||
};
|
||||
|
||||
kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
|
||||
kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
|
||||
{
|
||||
_10.value = uImage.read(uint2(int2(10)));
|
||||
}
|
||||
|
|
|
@ -29,7 +29,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff
|
|||
float idata = _22.in_data[ident];
|
||||
if (idata > 12.0)
|
||||
{
|
||||
uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
|
||||
uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
|
||||
_38.out_data[_45] = idata;
|
||||
}
|
||||
}
|
||||
|
|
|
@ -21,7 +21,7 @@ struct main0_in
|
|||
float4 m_17 [[attribute(0)]];
|
||||
};
|
||||
|
||||
vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]])
|
||||
vertex void main0(main0_in in [[stage_in]], volatile device _23& _25 [[buffer(0)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.gl_Position = in.m_17;
|
||||
|
|
137
spirv_msl.cpp
137
spirv_msl.cpp
|
@ -728,7 +728,7 @@ void CompilerMSL::emit_entry_point_declarations()
|
|||
const auto &var = get<SPIRVariable>(array_id);
|
||||
const auto &type = get_variable_data_type(var);
|
||||
string name = to_name(array_id);
|
||||
statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] =");
|
||||
statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(array_id), name, "[] =");
|
||||
begin_scope();
|
||||
for (uint32_t i = 0; i < type.array[0]; ++i)
|
||||
statement(name + "_" + convert_to_string(i) + ",");
|
||||
|
@ -4594,7 +4594,7 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
|
|||
string exp = string(op) + "(";
|
||||
|
||||
auto &type = get_pointee_type(expression_type(obj));
|
||||
exp += "(volatile ";
|
||||
exp += "(";
|
||||
auto *var = maybe_get_backing_variable(obj);
|
||||
if (!var)
|
||||
SPIRV_CROSS_THROW("No backing variable for atomic operation.");
|
||||
|
@ -6190,11 +6190,19 @@ string CompilerMSL::func_type_decl(SPIRType &type)
|
|||
string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
|
||||
{
|
||||
const auto &type = get<SPIRType>(argument.basetype);
|
||||
Bitset flags;
|
||||
if (type.basetype == SPIRType::Struct &&
|
||||
(has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
|
||||
flags = ir.get_buffer_block_flags(argument);
|
||||
else
|
||||
flags = get_decoration_bitset(argument.self);
|
||||
const char *addr_space = nullptr;
|
||||
|
||||
switch (type.storage)
|
||||
{
|
||||
case StorageClassWorkgroup:
|
||||
return "threadgroup";
|
||||
addr_space = "threadgroup";
|
||||
break;
|
||||
|
||||
case StorageClassStorageBuffer:
|
||||
{
|
||||
|
@ -6202,9 +6210,10 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
|
|||
// we should not assume any constness here. Only for global SSBOs.
|
||||
bool readonly = false;
|
||||
if (has_decoration(type.self, DecorationBlock))
|
||||
readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
|
||||
readonly = flags.get(DecorationNonWritable);
|
||||
|
||||
return readonly ? "const device" : "device";
|
||||
addr_space = readonly ? "const device" : "device";
|
||||
break;
|
||||
}
|
||||
|
||||
case StorageClassUniform:
|
||||
|
@ -6215,54 +6224,61 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
|
|||
bool ssbo = has_decoration(type.self, DecorationBufferBlock);
|
||||
if (ssbo)
|
||||
{
|
||||
bool readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
|
||||
return readonly ? "const device" : "device";
|
||||
bool readonly = flags.get(DecorationNonWritable);
|
||||
addr_space = readonly ? "const device" : "device";
|
||||
}
|
||||
else
|
||||
return "constant";
|
||||
addr_space = "constant";
|
||||
break;
|
||||
}
|
||||
break;
|
||||
|
||||
case StorageClassFunction:
|
||||
case StorageClassGeneric:
|
||||
// No address space for plain values.
|
||||
return type.pointer ? "thread" : "";
|
||||
addr_space = type.pointer ? "thread" : "";
|
||||
break;
|
||||
|
||||
case StorageClassInput:
|
||||
if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id)
|
||||
return "threadgroup";
|
||||
addr_space = "threadgroup";
|
||||
break;
|
||||
|
||||
case StorageClassOutput:
|
||||
if (capture_output_to_buffer)
|
||||
return "device";
|
||||
addr_space = "device";
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return "thread";
|
||||
if (!addr_space)
|
||||
addr_space = "thread";
|
||||
|
||||
return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
|
||||
}
|
||||
|
||||
string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id)
|
||||
{
|
||||
// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
|
||||
Bitset flags;
|
||||
if (ir.ids[id].get_type() == TypeVariable && type.basetype == SPIRType::Struct &&
|
||||
(has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
|
||||
flags = get_buffer_block_flags(id);
|
||||
else
|
||||
flags = get_decoration_bitset(id);
|
||||
|
||||
const char *addr_space = nullptr;
|
||||
switch (type.storage)
|
||||
{
|
||||
case StorageClassWorkgroup:
|
||||
return "threadgroup";
|
||||
addr_space = "threadgroup";
|
||||
break;
|
||||
|
||||
case StorageClassStorageBuffer:
|
||||
{
|
||||
// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
|
||||
Bitset flags;
|
||||
if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
|
||||
flags = get_buffer_block_flags(id);
|
||||
else
|
||||
flags = get_decoration_bitset(id);
|
||||
|
||||
return flags.get(DecorationNonWritable) ? "const device" : "device";
|
||||
}
|
||||
addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
|
||||
break;
|
||||
|
||||
case StorageClassUniform:
|
||||
case StorageClassUniformConstant:
|
||||
|
@ -6271,37 +6287,53 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id)
|
|||
{
|
||||
bool ssbo = has_decoration(type.self, DecorationBufferBlock);
|
||||
if (ssbo)
|
||||
{
|
||||
// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
|
||||
Bitset flags;
|
||||
if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
|
||||
flags = get_buffer_block_flags(id);
|
||||
else
|
||||
flags = get_decoration_bitset(id);
|
||||
|
||||
return flags.get(DecorationNonWritable) ? "const device" : "device";
|
||||
}
|
||||
addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
|
||||
else
|
||||
return "constant";
|
||||
addr_space = "constant";
|
||||
}
|
||||
else
|
||||
return "constant";
|
||||
addr_space = "constant";
|
||||
break;
|
||||
|
||||
case StorageClassFunction:
|
||||
case StorageClassGeneric:
|
||||
// No address space for plain values.
|
||||
return type.pointer ? "thread" : "";
|
||||
addr_space = type.pointer ? "thread" : "";
|
||||
break;
|
||||
|
||||
case StorageClassOutput:
|
||||
if (capture_output_to_buffer)
|
||||
return "device";
|
||||
addr_space = "device";
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
|
||||
return "thread";
|
||||
if (!addr_space)
|
||||
addr_space = "thread";
|
||||
|
||||
return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
|
||||
}
|
||||
|
||||
const char *CompilerMSL::to_restrict(uint32_t id, bool space)
|
||||
{
|
||||
// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
|
||||
Bitset flags;
|
||||
if (ir.ids[id].get_type() == TypeVariable)
|
||||
{
|
||||
uint32_t type_id = expression_type_id(id);
|
||||
auto &type = expression_type(id);
|
||||
if (type.basetype == SPIRType::Struct &&
|
||||
(has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock)))
|
||||
flags = get_buffer_block_flags(id);
|
||||
else
|
||||
flags = get_decoration_bitset(id);
|
||||
}
|
||||
else
|
||||
flags = get_decoration_bitset(id);
|
||||
|
||||
return flags.get(DecorationRestrict) ? (space ? "restrict " : "restrict") : "";
|
||||
}
|
||||
|
||||
string CompilerMSL::entry_point_arg_stage_in()
|
||||
|
@ -6469,7 +6501,7 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
|
|||
|
||||
claimed_bindings.set(buffer_binding);
|
||||
|
||||
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
|
||||
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id) + to_name(id);
|
||||
ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
|
||||
|
||||
next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
|
||||
|
@ -6605,8 +6637,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
|
|||
{
|
||||
if (!ep_args.empty())
|
||||
ep_args += ", ";
|
||||
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + r.name + "_" +
|
||||
convert_to_string(i);
|
||||
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) +
|
||||
r.name + "_" + convert_to_string(i);
|
||||
ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]";
|
||||
}
|
||||
}
|
||||
|
@ -6614,7 +6646,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
|
|||
{
|
||||
if (!ep_args.empty())
|
||||
ep_args += ", ";
|
||||
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + r.name;
|
||||
ep_args +=
|
||||
get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name;
|
||||
ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
|
||||
}
|
||||
break;
|
||||
|
@ -7087,6 +7120,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
|
|||
// non-constant arrays, but we can create thread const from constant.
|
||||
decl = string("thread const ") + decl;
|
||||
decl += " (&";
|
||||
const char *restrict_kw = to_restrict(name_id);
|
||||
if (*restrict_kw)
|
||||
{
|
||||
decl += " ";
|
||||
decl += restrict_kw;
|
||||
}
|
||||
decl += to_expression(name_id);
|
||||
decl += ")";
|
||||
decl += type_to_array_glsl(type);
|
||||
|
@ -7125,6 +7164,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
|
|||
}
|
||||
|
||||
decl += " (&";
|
||||
const char *restrict_kw = to_restrict(name_id);
|
||||
if (*restrict_kw)
|
||||
{
|
||||
decl += " ";
|
||||
decl += restrict_kw;
|
||||
}
|
||||
decl += to_expression(name_id);
|
||||
decl += ")";
|
||||
decl += type_to_array_glsl(type);
|
||||
|
@ -7142,6 +7187,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
|
|||
}
|
||||
decl += "&";
|
||||
decl += " ";
|
||||
decl += to_restrict(name_id);
|
||||
decl += to_expression(name_id);
|
||||
}
|
||||
else
|
||||
|
@ -7520,6 +7566,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
|
|||
// Pointer?
|
||||
if (type.pointer)
|
||||
{
|
||||
const char *restrict_kw;
|
||||
type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get<SPIRType>(type.parent_type), id));
|
||||
switch (type.basetype)
|
||||
{
|
||||
|
@ -7531,6 +7578,12 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
|
|||
default:
|
||||
// Anything else can be a raw pointer.
|
||||
type_name += "*";
|
||||
restrict_kw = to_restrict(id);
|
||||
if (*restrict_kw)
|
||||
{
|
||||
type_name += " ";
|
||||
type_name += restrict_kw;
|
||||
}
|
||||
break;
|
||||
}
|
||||
return type_name;
|
||||
|
|
|
@ -516,6 +516,7 @@ protected:
|
|||
MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
|
||||
std::string get_argument_address_space(const SPIRVariable &argument);
|
||||
std::string get_type_address_space(const SPIRType &type, uint32_t id);
|
||||
const char *to_restrict(uint32_t id, bool space = true);
|
||||
SPIRType &get_stage_in_struct_type();
|
||||
SPIRType &get_stage_out_struct_type();
|
||||
SPIRType &get_patch_stage_in_struct_type();
|
||||
|
|
Загрузка…
Ссылка в новой задаче