MSL: Handle coherent, volatile, and restrict.

This maps them to their MSL equivalents. I've mapped `Coherent` to
`volatile` since MSL doesn't have anything weaker than `volatile` but
stronger than nothing.

As part of this, I had to remove the implicit `volatile` added for
atomic operation casts. If the buffer is already `coherent` or
`volatile`, then we would add a second `volatile`, which would be
redundant. I think this is OK even when the buffer *doesn't* have
`coherent`: `T *` is implicitly convertible to `volatile T *`, but not
vice-versa. It seems to compile OK at any rate. (Note that the
non-`volatile` overloads of the atomic functions documented in the spec
aren't present in the MSL 2.2 stdlib headers.)

`restrict` is tricky, because in MSL, as in C++, it needs to go *after*
the asterisk or ampersand for the pointer type it's modifying.

Another issue is that, in the `Simple`, `GLSL450`, and `Vulkan` memory
models, `Restrict` is the default (i.e. does not need to be specified);
but MSL likely follows the `OpenCL` model where `Aliased` is the
default. We probably need to implicitly set either `Restrict` or
`Aliased` depending on the module's declared memory model.
This commit is contained in:
Chip Davis 2019-07-10 11:17:40 -05:00
Родитель 9f3bebe3d0
Коммит 058f1a0933
23 изменённых файлов: 179 добавлений и 125 удалений

Просмотреть файл

@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type<int>(as_type<float>(_29))));
}

Просмотреть файл

@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type<int>(as_type<float>(_29))));
}

Просмотреть файл

@ -15,7 +15,7 @@ struct _4
int4 _m1;
};
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]])
{
_6._m0 = _5._m1 + uint4(_5._m0);
_6._m0 = uint4(_5._m0) + _5._m1;

Просмотреть файл

@ -15,7 +15,7 @@ struct _7
int4 _m1;
};
kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]])
kernel void main0(device _6& restrict _8 [[buffer(0)]], device _7& restrict _9 [[buffer(1)]])
{
_9._m0 = _8._m1 + uint4(_8._m0);
_9._m0 = uint4(_8._m0) + _8._m1;

Просмотреть файл

@ -16,55 +16,55 @@ kernel void main0(device SSBO& ssbo [[buffer(0)]])
{
threadgroup uint shared_u32;
threadgroup int shared_i32;
uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _32;
do
{
_32 = 10u;
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _52;
do
{
_52 = 10;
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
shared_u32 = 10u;
shared_i32 = 10;
uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _64;
do
{
_64 = 10u;
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
} while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _72;
do
{
_72 = 10;
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
} while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
}

Просмотреть файл

@ -26,7 +26,7 @@ kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buff
float4 _29 = _23.in_data[gl_GlobalInvocationID.x];
if (dot(_29, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
{
uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
_45.out_data[_52] = _29;
}
}

Просмотреть файл

@ -8,7 +8,7 @@ struct SSBO
float4 value;
};
kernel void main0(device SSBO& _10 [[buffer(0)]])
kernel void main0(volatile device SSBO& _10 [[buffer(0)]])
{
_10.value = float4(20.0);
}

Просмотреть файл

@ -8,7 +8,7 @@ struct SSBO
int4 value;
};
kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
{
_10.value = uImage.read(uint2(int2(10)));
}

Просмотреть файл

@ -28,7 +28,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff
float _28 = _22.in_data[gl_GlobalInvocationID.x];
if (_28 > 12.0)
{
uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
_38.out_data[_45] = _28;
}
}

Просмотреть файл

@ -21,7 +21,7 @@ struct main0_in
float4 m_17 [[attribute(0)]];
};
vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]])
vertex void main0(main0_in in [[stage_in]], volatile device _23& _25 [[buffer(0)]])
{
main0_out out = {};
out.gl_Position = in.m_17;

Просмотреть файл

@ -14,7 +14,7 @@ struct SSBO
kernel void main0(device SSBO& _5 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _24 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_5.count, 1u, memory_order_relaxed);
uint _24 = atomic_fetch_add_explicit((device atomic_uint*)&_5.count, 1u, memory_order_relaxed);
if (_24 < 1024u)
{
_5.data[_24] = gl_GlobalInvocationID.x;

Просмотреть файл

@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
float4 r0;
r0.x = as_type<float>(_29);
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));

Просмотреть файл

@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
{
uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
float4 r0;
r0.x = as_type<float>(_29);
u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));

Просмотреть файл

@ -15,7 +15,7 @@ struct _4
int4 _m1;
};
kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]])
{
_6._m0 = _5._m1 + uint4(_5._m0);
_6._m0 = uint4(_5._m0) + _5._m1;

Просмотреть файл

@ -15,7 +15,7 @@ struct _7
int4 _m1;
};
kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]])
kernel void main0(device _6& restrict _8 [[buffer(0)]], device _7& restrict _9 [[buffer(1)]])
{
_9._m0 = _8._m1 + uint4(_8._m0);
_9._m0 = uint4(_8._m0) + _8._m1;

Просмотреть файл

@ -16,55 +16,55 @@ kernel void main0(device SSBO& ssbo [[buffer(0)]])
{
threadgroup uint shared_u32;
threadgroup int shared_i32;
uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _32;
do
{
_32 = 10u;
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
} while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _52;
do
{
_52 = 10;
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
} while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
shared_u32 = 10u;
shared_i32 = 10;
uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _64;
do
{
_64 = 10u;
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
} while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _72;
do
{
_72 = 10;
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
} while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
}

Просмотреть файл

@ -27,7 +27,7 @@ kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buff
float4 idata = _23.in_data[ident];
if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
{
uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
_45.out_data[_52] = idata;
}
}

Просмотреть файл

@ -8,7 +8,7 @@ struct SSBO
float4 value;
};
kernel void main0(device SSBO& _10 [[buffer(0)]])
kernel void main0(volatile device SSBO& _10 [[buffer(0)]])
{
_10.value = float4(20.0);
}

Просмотреть файл

@ -8,7 +8,7 @@ struct SSBO
int4 value;
};
kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
{
_10.value = uImage.read(uint2(int2(10)));
}

Просмотреть файл

@ -29,7 +29,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff
float idata = _22.in_data[ident];
if (idata > 12.0)
{
uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
_38.out_data[_45] = idata;
}
}

Просмотреть файл

@ -21,7 +21,7 @@ struct main0_in
float4 m_17 [[attribute(0)]];
};
vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]])
vertex void main0(main0_in in [[stage_in]], volatile device _23& _25 [[buffer(0)]])
{
main0_out out = {};
out.gl_Position = in.m_17;

Просмотреть файл

@ -728,7 +728,7 @@ void CompilerMSL::emit_entry_point_declarations()
const auto &var = get<SPIRVariable>(array_id);
const auto &type = get_variable_data_type(var);
string name = to_name(array_id);
statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] =");
statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(array_id), name, "[] =");
begin_scope();
for (uint32_t i = 0; i < type.array[0]; ++i)
statement(name + "_" + convert_to_string(i) + ",");
@ -4594,7 +4594,7 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
string exp = string(op) + "(";
auto &type = get_pointee_type(expression_type(obj));
exp += "(volatile ";
exp += "(";
auto *var = maybe_get_backing_variable(obj);
if (!var)
SPIRV_CROSS_THROW("No backing variable for atomic operation.");
@ -6190,11 +6190,19 @@ string CompilerMSL::func_type_decl(SPIRType &type)
string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
{
const auto &type = get<SPIRType>(argument.basetype);
Bitset flags;
if (type.basetype == SPIRType::Struct &&
(has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
flags = ir.get_buffer_block_flags(argument);
else
flags = get_decoration_bitset(argument.self);
const char *addr_space = nullptr;
switch (type.storage)
{
case StorageClassWorkgroup:
return "threadgroup";
addr_space = "threadgroup";
break;
case StorageClassStorageBuffer:
{
@ -6202,9 +6210,10 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
// we should not assume any constness here. Only for global SSBOs.
bool readonly = false;
if (has_decoration(type.self, DecorationBlock))
readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
readonly = flags.get(DecorationNonWritable);
return readonly ? "const device" : "device";
addr_space = readonly ? "const device" : "device";
break;
}
case StorageClassUniform:
@ -6215,54 +6224,61 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
bool ssbo = has_decoration(type.self, DecorationBufferBlock);
if (ssbo)
{
bool readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
return readonly ? "const device" : "device";
bool readonly = flags.get(DecorationNonWritable);
addr_space = readonly ? "const device" : "device";
}
else
return "constant";
addr_space = "constant";
break;
}
break;
case StorageClassFunction:
case StorageClassGeneric:
// No address space for plain values.
return type.pointer ? "thread" : "";
addr_space = type.pointer ? "thread" : "";
break;
case StorageClassInput:
if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id)
return "threadgroup";
addr_space = "threadgroup";
break;
case StorageClassOutput:
if (capture_output_to_buffer)
return "device";
addr_space = "device";
break;
default:
break;
}
return "thread";
if (!addr_space)
addr_space = "thread";
return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
}
string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id)
{
// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
Bitset flags;
if (ir.ids[id].get_type() == TypeVariable && type.basetype == SPIRType::Struct &&
(has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
flags = get_buffer_block_flags(id);
else
flags = get_decoration_bitset(id);
const char *addr_space = nullptr;
switch (type.storage)
{
case StorageClassWorkgroup:
return "threadgroup";
addr_space = "threadgroup";
break;
case StorageClassStorageBuffer:
{
// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
Bitset flags;
if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
flags = get_buffer_block_flags(id);
else
flags = get_decoration_bitset(id);
return flags.get(DecorationNonWritable) ? "const device" : "device";
}
addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
break;
case StorageClassUniform:
case StorageClassUniformConstant:
@ -6271,37 +6287,53 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id)
{
bool ssbo = has_decoration(type.self, DecorationBufferBlock);
if (ssbo)
{
// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
Bitset flags;
if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
flags = get_buffer_block_flags(id);
else
flags = get_decoration_bitset(id);
return flags.get(DecorationNonWritable) ? "const device" : "device";
}
addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
else
return "constant";
addr_space = "constant";
}
else
return "constant";
addr_space = "constant";
break;
case StorageClassFunction:
case StorageClassGeneric:
// No address space for plain values.
return type.pointer ? "thread" : "";
addr_space = type.pointer ? "thread" : "";
break;
case StorageClassOutput:
if (capture_output_to_buffer)
return "device";
addr_space = "device";
break;
default:
break;
}
return "thread";
if (!addr_space)
addr_space = "thread";
return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
}
const char *CompilerMSL::to_restrict(uint32_t id, bool space)
{
// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
Bitset flags;
if (ir.ids[id].get_type() == TypeVariable)
{
uint32_t type_id = expression_type_id(id);
auto &type = expression_type(id);
if (type.basetype == SPIRType::Struct &&
(has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock)))
flags = get_buffer_block_flags(id);
else
flags = get_decoration_bitset(id);
}
else
flags = get_decoration_bitset(id);
return flags.get(DecorationRestrict) ? (space ? "restrict " : "restrict") : "";
}
string CompilerMSL::entry_point_arg_stage_in()
@ -6469,7 +6501,7 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
claimed_bindings.set(buffer_binding);
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id) + to_name(id);
ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
@ -6605,8 +6637,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
{
if (!ep_args.empty())
ep_args += ", ";
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + r.name + "_" +
convert_to_string(i);
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) +
r.name + "_" + convert_to_string(i);
ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]";
}
}
@ -6614,7 +6646,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
{
if (!ep_args.empty())
ep_args += ", ";
ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + r.name;
ep_args +=
get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name;
ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
}
break;
@ -7087,6 +7120,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
// non-constant arrays, but we can create thread const from constant.
decl = string("thread const ") + decl;
decl += " (&";
const char *restrict_kw = to_restrict(name_id);
if (*restrict_kw)
{
decl += " ";
decl += restrict_kw;
}
decl += to_expression(name_id);
decl += ")";
decl += type_to_array_glsl(type);
@ -7125,6 +7164,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
}
decl += " (&";
const char *restrict_kw = to_restrict(name_id);
if (*restrict_kw)
{
decl += " ";
decl += restrict_kw;
}
decl += to_expression(name_id);
decl += ")";
decl += type_to_array_glsl(type);
@ -7142,6 +7187,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
}
decl += "&";
decl += " ";
decl += to_restrict(name_id);
decl += to_expression(name_id);
}
else
@ -7520,6 +7566,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
// Pointer?
if (type.pointer)
{
const char *restrict_kw;
type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get<SPIRType>(type.parent_type), id));
switch (type.basetype)
{
@ -7531,6 +7578,12 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
default:
// Anything else can be a raw pointer.
type_name += "*";
restrict_kw = to_restrict(id);
if (*restrict_kw)
{
type_name += " ";
type_name += restrict_kw;
}
break;
}
return type_name;

Просмотреть файл

@ -516,6 +516,7 @@ protected:
MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
std::string get_argument_address_space(const SPIRVariable &argument);
std::string get_type_address_space(const SPIRType &type, uint32_t id);
const char *to_restrict(uint32_t id, bool space = true);
SPIRType &get_stage_in_struct_type();
SPIRType &get_stage_out_struct_type();
SPIRType &get_patch_stage_in_struct_type();