diff --git a/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp b/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp index feb7dbbe..04d92a34 100644 --- a/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp @@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc) kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); + uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type(as_type(_29)))); } diff --git a/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp b/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp index 22409301..b0fd1d4f 100644 --- a/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp @@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc) kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); + uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type(as_type(_29)))); } diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp index 47ce85f8..ad61d750 100644 --- a/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp @@ -15,7 +15,7 @@ struct _4 int4 _m1; }; -kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]]) +kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]]) { _6._m0 = _5._m1 + uint4(_5._m0); _6._m0 = uint4(_5._m0) + _5._m1; diff --git a/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp b/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp index 76527332..25ccf623 100644 --- a/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp +++ b/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp @@ -15,7 +15,7 @@ struct _7 int4 _m1; }; -kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]]) +kernel void main0(device _6& restrict _8 [[buffer(0)]], device _7& restrict _9 [[buffer(1)]]) { _9._m0 = _8._m1 + uint4(_8._m0); _9._m0 = uint4(_8._m0) + _8._m1; diff --git a/reference/opt/shaders-msl/comp/atomic.comp b/reference/opt/shaders-msl/comp/atomic.comp index 43e6a8f0..04721502 100644 --- a/reference/opt/shaders-msl/comp/atomic.comp +++ b/reference/opt/shaders-msl/comp/atomic.comp @@ -16,55 +16,55 @@ kernel void main0(device SSBO& ssbo [[buffer(0)]]) { threadgroup uint shared_u32; threadgroup int shared_i32; - uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); uint _32; do { _32 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u); - int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u); + int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); int _52; do { _52 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10); + } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10); shared_u32 = 10u; shared_i32 = 10; - uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); uint _64; do { _64 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u); - int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u); + int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); int _72; do { _72 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10); + } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10); } diff --git a/reference/opt/shaders-msl/comp/basic.comp b/reference/opt/shaders-msl/comp/basic.comp index 22ec7419..e37d4fc5 100644 --- a/reference/opt/shaders-msl/comp/basic.comp +++ b/reference/opt/shaders-msl/comp/basic.comp @@ -26,7 +26,7 @@ kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buff float4 _29 = _23.in_data[gl_GlobalInvocationID.x]; if (dot(_29, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) { - uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed); + uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed); _45.out_data[_52] = _29; } } diff --git a/reference/opt/shaders-msl/comp/coherent-block.comp b/reference/opt/shaders-msl/comp/coherent-block.comp index bec9b218..580b9e3e 100644 --- a/reference/opt/shaders-msl/comp/coherent-block.comp +++ b/reference/opt/shaders-msl/comp/coherent-block.comp @@ -8,7 +8,7 @@ struct SSBO float4 value; }; -kernel void main0(device SSBO& _10 [[buffer(0)]]) +kernel void main0(volatile device SSBO& _10 [[buffer(0)]]) { _10.value = float4(20.0); } diff --git a/reference/opt/shaders-msl/comp/coherent-image.comp b/reference/opt/shaders-msl/comp/coherent-image.comp index 0fe044fb..c6af46b5 100644 --- a/reference/opt/shaders-msl/comp/coherent-image.comp +++ b/reference/opt/shaders-msl/comp/coherent-image.comp @@ -8,7 +8,7 @@ struct SSBO int4 value; }; -kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d uImage [[texture(0)]]) +kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d uImage [[texture(0)]]) { _10.value = uImage.read(uint2(int2(10))); } diff --git a/reference/opt/shaders-msl/comp/culling.comp b/reference/opt/shaders-msl/comp/culling.comp index 95ffff83..55735475 100644 --- a/reference/opt/shaders-msl/comp/culling.comp +++ b/reference/opt/shaders-msl/comp/culling.comp @@ -28,7 +28,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff float _28 = _22.in_data[gl_GlobalInvocationID.x]; if (_28 > 12.0) { - uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed); + uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed); _38.out_data[_45] = _28; } } diff --git a/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert b/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert index ca4d6a5b..e2f38878 100644 --- a/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert +++ b/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert @@ -21,7 +21,7 @@ struct main0_in float4 m_17 [[attribute(0)]]; }; -vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]]) +vertex void main0(main0_in in [[stage_in]], volatile device _23& _25 [[buffer(0)]]) { main0_out out = {}; out.gl_Position = in.m_17; diff --git a/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp b/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp index 8b669428..4624ef0b 100644 --- a/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp +++ b/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp @@ -14,7 +14,7 @@ struct SSBO kernel void main0(device SSBO& _5 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - uint _24 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_5.count, 1u, memory_order_relaxed); + uint _24 = atomic_fetch_add_explicit((device atomic_uint*)&_5.count, 1u, memory_order_relaxed); if (_24 < 1024u) { _5.data[_24] = gl_GlobalInvocationID.x; diff --git a/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp b/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp index 95841a78..b06c0e9c 100644 --- a/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp +++ b/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp @@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc) kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); + uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); float4 r0; r0.x = as_type(_29); u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type(r0.x)) * 1u) + (uint(0) >> 2u)))); diff --git a/reference/shaders-msl/asm/comp/atomic-increment.asm.comp b/reference/shaders-msl/asm/comp/atomic-increment.asm.comp index cd78fa2c..71af7bee 100644 --- a/reference/shaders-msl/asm/comp/atomic-increment.asm.comp +++ b/reference/shaders-msl/asm/comp/atomic-increment.asm.comp @@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc) kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) { - uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); + uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed); float4 r0; r0.x = as_type(_29); u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type(r0.x)) * 1u) + (uint(0) >> 2u)))); diff --git a/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp b/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp index 47ce85f8..ad61d750 100644 --- a/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp +++ b/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp @@ -15,7 +15,7 @@ struct _4 int4 _m1; }; -kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]]) +kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]]) { _6._m0 = _5._m1 + uint4(_5._m0); _6._m0 = uint4(_5._m0) + _5._m1; diff --git a/reference/shaders-msl/asm/comp/multiple-entry.asm.comp b/reference/shaders-msl/asm/comp/multiple-entry.asm.comp index 76527332..25ccf623 100644 --- a/reference/shaders-msl/asm/comp/multiple-entry.asm.comp +++ b/reference/shaders-msl/asm/comp/multiple-entry.asm.comp @@ -15,7 +15,7 @@ struct _7 int4 _m1; }; -kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]]) +kernel void main0(device _6& restrict _8 [[buffer(0)]], device _7& restrict _9 [[buffer(1)]]) { _9._m0 = _8._m1 + uint4(_8._m0); _9._m0 = uint4(_8._m0) + _8._m1; diff --git a/reference/shaders-msl/comp/atomic.comp b/reference/shaders-msl/comp/atomic.comp index 43e6a8f0..04721502 100644 --- a/reference/shaders-msl/comp/atomic.comp +++ b/reference/shaders-msl/comp/atomic.comp @@ -16,55 +16,55 @@ kernel void main0(device SSBO& ssbo [[buffer(0)]]) { threadgroup uint shared_u32; threadgroup int shared_i32; - uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); - uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); uint _32; do { _32 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u); - int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); - int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u); + int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); int _52; do { _52 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10); + } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10); shared_u32 = 10u; shared_i32 = 10; - uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); - uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); uint _64; do { _64 = 10u; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u); - int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); - int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u); + int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); int _72; do { _72 = 10; - } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10); + } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10); } diff --git a/reference/shaders-msl/comp/basic.comp b/reference/shaders-msl/comp/basic.comp index 6410894b..36b419b7 100644 --- a/reference/shaders-msl/comp/basic.comp +++ b/reference/shaders-msl/comp/basic.comp @@ -27,7 +27,7 @@ kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buff float4 idata = _23.in_data[ident]; if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) { - uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed); + uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed); _45.out_data[_52] = idata; } } diff --git a/reference/shaders-msl/comp/coherent-block.comp b/reference/shaders-msl/comp/coherent-block.comp index bec9b218..580b9e3e 100644 --- a/reference/shaders-msl/comp/coherent-block.comp +++ b/reference/shaders-msl/comp/coherent-block.comp @@ -8,7 +8,7 @@ struct SSBO float4 value; }; -kernel void main0(device SSBO& _10 [[buffer(0)]]) +kernel void main0(volatile device SSBO& _10 [[buffer(0)]]) { _10.value = float4(20.0); } diff --git a/reference/shaders-msl/comp/coherent-image.comp b/reference/shaders-msl/comp/coherent-image.comp index 0fe044fb..c6af46b5 100644 --- a/reference/shaders-msl/comp/coherent-image.comp +++ b/reference/shaders-msl/comp/coherent-image.comp @@ -8,7 +8,7 @@ struct SSBO int4 value; }; -kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d uImage [[texture(0)]]) +kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d uImage [[texture(0)]]) { _10.value = uImage.read(uint2(int2(10))); } diff --git a/reference/shaders-msl/comp/culling.comp b/reference/shaders-msl/comp/culling.comp index 32acf599..13578363 100644 --- a/reference/shaders-msl/comp/culling.comp +++ b/reference/shaders-msl/comp/culling.comp @@ -29,7 +29,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff float idata = _22.in_data[ident]; if (idata > 12.0) { - uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed); + uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed); _38.out_data[_45] = idata; } } diff --git a/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert b/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert index 9fe99e29..cb4a2195 100644 --- a/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert +++ b/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert @@ -21,7 +21,7 @@ struct main0_in float4 m_17 [[attribute(0)]]; }; -vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]]) +vertex void main0(main0_in in [[stage_in]], volatile device _23& _25 [[buffer(0)]]) { main0_out out = {}; out.gl_Position = in.m_17; diff --git a/spirv_msl.cpp b/spirv_msl.cpp index eaee10a0..f53e4f83 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -728,7 +728,7 @@ void CompilerMSL::emit_entry_point_declarations() const auto &var = get(array_id); const auto &type = get_variable_data_type(var); string name = to_name(array_id); - statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] ="); + statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(array_id), name, "[] ="); begin_scope(); for (uint32_t i = 0; i < type.array[0]; ++i) statement(name + "_" + convert_to_string(i) + ","); @@ -4594,7 +4594,7 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, string exp = string(op) + "("; auto &type = get_pointee_type(expression_type(obj)); - exp += "(volatile "; + exp += "("; auto *var = maybe_get_backing_variable(obj); if (!var) SPIRV_CROSS_THROW("No backing variable for atomic operation."); @@ -6190,11 +6190,19 @@ string CompilerMSL::func_type_decl(SPIRType &type) string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) { const auto &type = get(argument.basetype); + Bitset flags; + if (type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) + flags = ir.get_buffer_block_flags(argument); + else + flags = get_decoration_bitset(argument.self); + const char *addr_space = nullptr; switch (type.storage) { case StorageClassWorkgroup: - return "threadgroup"; + addr_space = "threadgroup"; + break; case StorageClassStorageBuffer: { @@ -6202,9 +6210,10 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) // we should not assume any constness here. Only for global SSBOs. bool readonly = false; if (has_decoration(type.self, DecorationBlock)) - readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable); + readonly = flags.get(DecorationNonWritable); - return readonly ? "const device" : "device"; + addr_space = readonly ? "const device" : "device"; + break; } case StorageClassUniform: @@ -6215,54 +6224,61 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument) bool ssbo = has_decoration(type.self, DecorationBufferBlock); if (ssbo) { - bool readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable); - return readonly ? "const device" : "device"; + bool readonly = flags.get(DecorationNonWritable); + addr_space = readonly ? "const device" : "device"; } else - return "constant"; + addr_space = "constant"; + break; } break; case StorageClassFunction: case StorageClassGeneric: // No address space for plain values. - return type.pointer ? "thread" : ""; + addr_space = type.pointer ? "thread" : ""; + break; case StorageClassInput: if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id) - return "threadgroup"; + addr_space = "threadgroup"; break; case StorageClassOutput: if (capture_output_to_buffer) - return "device"; + addr_space = "device"; break; default: break; } - return "thread"; + if (!addr_space) + addr_space = "thread"; + + return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space); } string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id) { + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + if (ir.ids[id].get_type() == TypeVariable && type.basetype == SPIRType::Struct && + (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock))) + flags = get_buffer_block_flags(id); + else + flags = get_decoration_bitset(id); + + const char *addr_space = nullptr; switch (type.storage) { case StorageClassWorkgroup: - return "threadgroup"; + addr_space = "threadgroup"; + break; case StorageClassStorageBuffer: - { - // This can be called for variable pointer contexts as well, so be very careful about which method we choose. - Bitset flags; - if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock)) - flags = get_buffer_block_flags(id); - else - flags = get_decoration_bitset(id); - - return flags.get(DecorationNonWritable) ? "const device" : "device"; - } + addr_space = flags.get(DecorationNonWritable) ? "const device" : "device"; + break; case StorageClassUniform: case StorageClassUniformConstant: @@ -6271,37 +6287,53 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id) { bool ssbo = has_decoration(type.self, DecorationBufferBlock); if (ssbo) - { - // This can be called for variable pointer contexts as well, so be very careful about which method we choose. - Bitset flags; - if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock)) - flags = get_buffer_block_flags(id); - else - flags = get_decoration_bitset(id); - - return flags.get(DecorationNonWritable) ? "const device" : "device"; - } + addr_space = flags.get(DecorationNonWritable) ? "const device" : "device"; else - return "constant"; + addr_space = "constant"; } else - return "constant"; + addr_space = "constant"; + break; case StorageClassFunction: case StorageClassGeneric: // No address space for plain values. - return type.pointer ? "thread" : ""; + addr_space = type.pointer ? "thread" : ""; + break; case StorageClassOutput: if (capture_output_to_buffer) - return "device"; + addr_space = "device"; break; default: break; } - return "thread"; + if (!addr_space) + addr_space = "thread"; + + return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space); +} + +const char *CompilerMSL::to_restrict(uint32_t id, bool space) +{ + // This can be called for variable pointer contexts as well, so be very careful about which method we choose. + Bitset flags; + if (ir.ids[id].get_type() == TypeVariable) + { + uint32_t type_id = expression_type_id(id); + auto &type = expression_type(id); + if (type.basetype == SPIRType::Struct && + (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock))) + flags = get_buffer_block_flags(id); + else + flags = get_decoration_bitset(id); + } + else + flags = get_decoration_bitset(id); + + return flags.get(DecorationRestrict) ? (space ? "restrict " : "restrict") : ""; } string CompilerMSL::entry_point_arg_stage_in() @@ -6469,7 +6501,7 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma) claimed_bindings.set(buffer_binding); - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id); + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id) + to_name(id); ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]"; next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1); @@ -6605,8 +6637,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) { if (!ep_args.empty()) ep_args += ", "; - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + r.name + "_" + - convert_to_string(i); + ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) + + r.name + "_" + convert_to_string(i); ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]"; } } @@ -6614,7 +6646,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) { if (!ep_args.empty()) ep_args += ", "; - ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + r.name; + ep_args += + get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name; ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; } break; @@ -7087,6 +7120,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) // non-constant arrays, but we can create thread const from constant. decl = string("thread const ") + decl; decl += " (&"; + const char *restrict_kw = to_restrict(name_id); + if (*restrict_kw) + { + decl += " "; + decl += restrict_kw; + } decl += to_expression(name_id); decl += ")"; decl += type_to_array_glsl(type); @@ -7125,6 +7164,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } decl += " (&"; + const char *restrict_kw = to_restrict(name_id); + if (*restrict_kw) + { + decl += " "; + decl += restrict_kw; + } decl += to_expression(name_id); decl += ")"; decl += type_to_array_glsl(type); @@ -7142,6 +7187,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) } decl += "&"; decl += " "; + decl += to_restrict(name_id); decl += to_expression(name_id); } else @@ -7520,6 +7566,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) // Pointer? if (type.pointer) { + const char *restrict_kw; type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get(type.parent_type), id)); switch (type.basetype) { @@ -7531,6 +7578,12 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id) default: // Anything else can be a raw pointer. type_name += "*"; + restrict_kw = to_restrict(id); + if (*restrict_kw) + { + type_name += " "; + type_name += restrict_kw; + } break; } return type_name; diff --git a/spirv_msl.hpp b/spirv_msl.hpp index 9f2bab49..6b0cb8e1 100644 --- a/spirv_msl.hpp +++ b/spirv_msl.hpp @@ -516,6 +516,7 @@ protected: MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index); std::string get_argument_address_space(const SPIRVariable &argument); std::string get_type_address_space(const SPIRType &type, uint32_t id); + const char *to_restrict(uint32_t id, bool space = true); SPIRType &get_stage_in_struct_type(); SPIRType &get_stage_out_struct_type(); SPIRType &get_patch_stage_in_struct_type();