diff --git a/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp b/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp
index feb7dbbe..04d92a34 100644
--- a/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/atomic-decrement.asm.comp
@@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
 
 kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
+    uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
     u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type<int>(as_type<float>(_29))));
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp b/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp
index 22409301..b0fd1d4f 100644
--- a/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/atomic-increment.asm.comp
@@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
 
 kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
+    uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
     u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(as_type<int>(as_type<float>(_29))));
 }
 
diff --git a/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp b/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp
index 47ce85f8..ad61d750 100644
--- a/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/bitcast_iadd.asm.comp
@@ -15,7 +15,7 @@ struct _4
     int4 _m1;
 };
 
-kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
+kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]])
 {
     _6._m0 = _5._m1 + uint4(_5._m0);
     _6._m0 = uint4(_5._m0) + _5._m1;
diff --git a/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp b/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp
index 76527332..25ccf623 100644
--- a/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp
+++ b/reference/opt/shaders-msl/asm/comp/multiple-entry.asm.comp
@@ -15,7 +15,7 @@ struct _7
     int4 _m1;
 };
 
-kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]])
+kernel void main0(device _6& restrict _8 [[buffer(0)]], device _7& restrict _9 [[buffer(1)]])
 {
     _9._m0 = _8._m1 + uint4(_8._m0);
     _9._m0 = uint4(_8._m0) + _8._m1;
diff --git a/reference/opt/shaders-msl/comp/atomic.comp b/reference/opt/shaders-msl/comp/atomic.comp
index 43e6a8f0..04721502 100644
--- a/reference/opt/shaders-msl/comp/atomic.comp
+++ b/reference/opt/shaders-msl/comp/atomic.comp
@@ -16,55 +16,55 @@ kernel void main0(device SSBO& ssbo [[buffer(0)]])
 {
     threadgroup uint shared_u32;
     threadgroup int shared_i32;
-    uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
     uint _32;
     do
     {
         _32 = 10u;
-    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
-    int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
+    int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
     int _52;
     do
     {
         _52 = 10;
-    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
+    } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
     shared_u32 = 10u;
     shared_i32 = 10;
-    uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
     uint _64;
     do
     {
         _64 = 10u;
-    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
-    int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
+    int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
     int _72;
     do
     {
         _72 = 10;
-    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
+    } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
 }
 
diff --git a/reference/opt/shaders-msl/comp/basic.comp b/reference/opt/shaders-msl/comp/basic.comp
index 22ec7419..e37d4fc5 100644
--- a/reference/opt/shaders-msl/comp/basic.comp
+++ b/reference/opt/shaders-msl/comp/basic.comp
@@ -26,7 +26,7 @@ kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buff
     float4 _29 = _23.in_data[gl_GlobalInvocationID.x];
     if (dot(_29, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
     {
-        uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
+        uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
         _45.out_data[_52] = _29;
     }
 }
diff --git a/reference/opt/shaders-msl/comp/coherent-block.comp b/reference/opt/shaders-msl/comp/coherent-block.comp
index bec9b218..580b9e3e 100644
--- a/reference/opt/shaders-msl/comp/coherent-block.comp
+++ b/reference/opt/shaders-msl/comp/coherent-block.comp
@@ -8,7 +8,7 @@ struct SSBO
     float4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]])
+kernel void main0(volatile device SSBO& _10 [[buffer(0)]])
 {
     _10.value = float4(20.0);
 }
diff --git a/reference/opt/shaders-msl/comp/coherent-image.comp b/reference/opt/shaders-msl/comp/coherent-image.comp
index 0fe044fb..c6af46b5 100644
--- a/reference/opt/shaders-msl/comp/coherent-image.comp
+++ b/reference/opt/shaders-msl/comp/coherent-image.comp
@@ -8,7 +8,7 @@ struct SSBO
     int4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
+kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
 {
     _10.value = uImage.read(uint2(int2(10)));
 }
diff --git a/reference/opt/shaders-msl/comp/culling.comp b/reference/opt/shaders-msl/comp/culling.comp
index 95ffff83..55735475 100644
--- a/reference/opt/shaders-msl/comp/culling.comp
+++ b/reference/opt/shaders-msl/comp/culling.comp
@@ -28,7 +28,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff
     float _28 = _22.in_data[gl_GlobalInvocationID.x];
     if (_28 > 12.0)
     {
-        uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
+        uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
         _38.out_data[_45] = _28;
     }
 }
diff --git a/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert b/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
index ca4d6a5b..e2f38878 100644
--- a/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
+++ b/reference/opt/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
@@ -21,7 +21,7 @@ struct main0_in
     float4 m_17 [[attribute(0)]];
 };
 
-vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]])
+vertex void main0(main0_in in [[stage_in]], volatile device _23& _25 [[buffer(0)]])
 {
     main0_out out = {};
     out.gl_Position = in.m_17;
diff --git a/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp b/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp
index 8b669428..4624ef0b 100644
--- a/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp
+++ b/reference/shaders-msl-no-opt/asm/comp/atomic-result-temporary.asm.comp
@@ -14,7 +14,7 @@ struct SSBO
 
 kernel void main0(device SSBO& _5 [[buffer(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint _24 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_5.count, 1u, memory_order_relaxed);
+    uint _24 = atomic_fetch_add_explicit((device atomic_uint*)&_5.count, 1u, memory_order_relaxed);
     if (_24 < 1024u)
     {
         _5.data[_24] = gl_GlobalInvocationID.x;
diff --git a/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp b/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp
index 95841a78..b06c0e9c 100644
--- a/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp
+++ b/reference/shaders-msl/asm/comp/atomic-decrement.asm.comp
@@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
 
 kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
+    uint _29 = atomic_fetch_sub_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
     float4 r0;
     r0.x = as_type<float>(_29);
     u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));
diff --git a/reference/shaders-msl/asm/comp/atomic-increment.asm.comp b/reference/shaders-msl/asm/comp/atomic-increment.asm.comp
index cd78fa2c..71af7bee 100644
--- a/reference/shaders-msl/asm/comp/atomic-increment.asm.comp
+++ b/reference/shaders-msl/asm/comp/atomic-increment.asm.comp
@@ -20,7 +20,7 @@ uint2 spvTexelBufferCoord(uint tc)
 
 kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
 {
-    uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
+    uint _29 = atomic_fetch_add_explicit((device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
     float4 r0;
     r0.x = as_type<float>(_29);
     u0.write(uint4(uint(int(gl_GlobalInvocationID.x))), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));
diff --git a/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp b/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp
index 47ce85f8..ad61d750 100644
--- a/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp
+++ b/reference/shaders-msl/asm/comp/bitcast_iadd.asm.comp
@@ -15,7 +15,7 @@ struct _4
     int4 _m1;
 };
 
-kernel void main0(device _3& _5 [[buffer(0)]], device _4& _6 [[buffer(1)]])
+kernel void main0(device _3& restrict _5 [[buffer(0)]], device _4& restrict _6 [[buffer(1)]])
 {
     _6._m0 = _5._m1 + uint4(_5._m0);
     _6._m0 = uint4(_5._m0) + _5._m1;
diff --git a/reference/shaders-msl/asm/comp/multiple-entry.asm.comp b/reference/shaders-msl/asm/comp/multiple-entry.asm.comp
index 76527332..25ccf623 100644
--- a/reference/shaders-msl/asm/comp/multiple-entry.asm.comp
+++ b/reference/shaders-msl/asm/comp/multiple-entry.asm.comp
@@ -15,7 +15,7 @@ struct _7
     int4 _m1;
 };
 
-kernel void main0(device _6& _8 [[buffer(0)]], device _7& _9 [[buffer(1)]])
+kernel void main0(device _6& restrict _8 [[buffer(0)]], device _7& restrict _9 [[buffer(1)]])
 {
     _9._m0 = _8._m1 + uint4(_8._m0);
     _9._m0 = uint4(_8._m0) + _8._m1;
diff --git a/reference/shaders-msl/comp/atomic.comp b/reference/shaders-msl/comp/atomic.comp
index 43e6a8f0..04721502 100644
--- a/reference/shaders-msl/comp/atomic.comp
+++ b/reference/shaders-msl/comp/atomic.comp
@@ -16,55 +16,55 @@ kernel void main0(device SSBO& ssbo [[buffer(0)]])
 {
     threadgroup uint shared_u32;
     threadgroup int shared_i32;
-    uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
-    uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _16 = atomic_fetch_add_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _18 = atomic_fetch_or_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _20 = atomic_fetch_xor_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _22 = atomic_fetch_and_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _24 = atomic_fetch_min_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _26 = atomic_fetch_max_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
+    uint _28 = atomic_exchange_explicit((device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
     uint _32;
     do
     {
         _32 = 10u;
-    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
-    int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
-    int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    } while (!atomic_compare_exchange_weak_explicit((device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed) && _32 == 10u);
+    int _36 = atomic_fetch_add_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _38 = atomic_fetch_or_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _40 = atomic_fetch_xor_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _42 = atomic_fetch_and_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _44 = atomic_fetch_min_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _46 = atomic_fetch_max_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
+    int _48 = atomic_exchange_explicit((device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
     int _52;
     do
     {
         _52 = 10;
-    } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
+    } while (!atomic_compare_exchange_weak_explicit((device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed) && _52 == 10);
     shared_u32 = 10u;
     shared_i32 = 10;
-    uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
-    uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _57 = atomic_fetch_add_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _58 = atomic_fetch_or_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _59 = atomic_fetch_xor_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _60 = atomic_fetch_and_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _61 = atomic_fetch_min_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _62 = atomic_fetch_max_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
+    uint _63 = atomic_exchange_explicit((threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
     uint _64;
     do
     {
         _64 = 10u;
-    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
-    int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
-    int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed) && _64 == 10u);
+    int _65 = atomic_fetch_add_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _66 = atomic_fetch_or_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _67 = atomic_fetch_xor_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _68 = atomic_fetch_and_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _69 = atomic_fetch_min_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _70 = atomic_fetch_max_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
+    int _71 = atomic_exchange_explicit((threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
     int _72;
     do
     {
         _72 = 10;
-    } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
+    } while (!atomic_compare_exchange_weak_explicit((threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed) && _72 == 10);
 }
 
diff --git a/reference/shaders-msl/comp/basic.comp b/reference/shaders-msl/comp/basic.comp
index 6410894b..36b419b7 100644
--- a/reference/shaders-msl/comp/basic.comp
+++ b/reference/shaders-msl/comp/basic.comp
@@ -27,7 +27,7 @@ kernel void main0(const device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buff
     float4 idata = _23.in_data[ident];
     if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
     {
-        uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
+        uint _52 = atomic_fetch_add_explicit((device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
         _45.out_data[_52] = idata;
     }
 }
diff --git a/reference/shaders-msl/comp/coherent-block.comp b/reference/shaders-msl/comp/coherent-block.comp
index bec9b218..580b9e3e 100644
--- a/reference/shaders-msl/comp/coherent-block.comp
+++ b/reference/shaders-msl/comp/coherent-block.comp
@@ -8,7 +8,7 @@ struct SSBO
     float4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]])
+kernel void main0(volatile device SSBO& _10 [[buffer(0)]])
 {
     _10.value = float4(20.0);
 }
diff --git a/reference/shaders-msl/comp/coherent-image.comp b/reference/shaders-msl/comp/coherent-image.comp
index 0fe044fb..c6af46b5 100644
--- a/reference/shaders-msl/comp/coherent-image.comp
+++ b/reference/shaders-msl/comp/coherent-image.comp
@@ -8,7 +8,7 @@ struct SSBO
     int4 value;
 };
 
-kernel void main0(device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
+kernel void main0(volatile device SSBO& _10 [[buffer(0)]], texture2d<int> uImage [[texture(0)]])
 {
     _10.value = uImage.read(uint2(int2(10)));
 }
diff --git a/reference/shaders-msl/comp/culling.comp b/reference/shaders-msl/comp/culling.comp
index 32acf599..13578363 100644
--- a/reference/shaders-msl/comp/culling.comp
+++ b/reference/shaders-msl/comp/culling.comp
@@ -29,7 +29,7 @@ kernel void main0(const device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buff
     float idata = _22.in_data[ident];
     if (idata > 12.0)
     {
-        uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
+        uint _45 = atomic_fetch_add_explicit((device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
         _38.out_data[_45] = idata;
     }
 }
diff --git a/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert b/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
index 9fe99e29..cb4a2195 100644
--- a/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
+++ b/reference/shaders-msl/vert/no_stage_out.write_buff_atomic.vert
@@ -21,7 +21,7 @@ struct main0_in
     float4 m_17 [[attribute(0)]];
 };
 
-vertex void main0(main0_in in [[stage_in]], device _23& _25 [[buffer(0)]])
+vertex void main0(main0_in in [[stage_in]], volatile device _23& _25 [[buffer(0)]])
 {
     main0_out out = {};
     out.gl_Position = in.m_17;
diff --git a/spirv_msl.cpp b/spirv_msl.cpp
index eaee10a0..f53e4f83 100644
--- a/spirv_msl.cpp
+++ b/spirv_msl.cpp
@@ -728,7 +728,7 @@ void CompilerMSL::emit_entry_point_declarations()
 		const auto &var = get<SPIRVariable>(array_id);
 		const auto &type = get_variable_data_type(var);
 		string name = to_name(array_id);
-		statement(get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + name + "[] =");
+		statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(array_id), name, "[] =");
 		begin_scope();
 		for (uint32_t i = 0; i < type.array[0]; ++i)
 			statement(name + "_" + convert_to_string(i) + ",");
@@ -4594,7 +4594,7 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
 	string exp = string(op) + "(";
 
 	auto &type = get_pointee_type(expression_type(obj));
-	exp += "(volatile ";
+	exp += "(";
 	auto *var = maybe_get_backing_variable(obj);
 	if (!var)
 		SPIRV_CROSS_THROW("No backing variable for atomic operation.");
@@ -6190,11 +6190,19 @@ string CompilerMSL::func_type_decl(SPIRType &type)
 string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
 {
 	const auto &type = get<SPIRType>(argument.basetype);
+	Bitset flags;
+	if (type.basetype == SPIRType::Struct &&
+	    (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
+		flags = ir.get_buffer_block_flags(argument);
+	else
+		flags = get_decoration_bitset(argument.self);
+	const char *addr_space = nullptr;
 
 	switch (type.storage)
 	{
 	case StorageClassWorkgroup:
-		return "threadgroup";
+		addr_space = "threadgroup";
+		break;
 
 	case StorageClassStorageBuffer:
 	{
@@ -6202,9 +6210,10 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
 		// we should not assume any constness here. Only for global SSBOs.
 		bool readonly = false;
 		if (has_decoration(type.self, DecorationBlock))
-			readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
+			readonly = flags.get(DecorationNonWritable);
 
-		return readonly ? "const device" : "device";
+		addr_space = readonly ? "const device" : "device";
+		break;
 	}
 
 	case StorageClassUniform:
@@ -6215,54 +6224,61 @@ string CompilerMSL::get_argument_address_space(const SPIRVariable &argument)
 			bool ssbo = has_decoration(type.self, DecorationBufferBlock);
 			if (ssbo)
 			{
-				bool readonly = ir.get_buffer_block_flags(argument).get(DecorationNonWritable);
-				return readonly ? "const device" : "device";
+				bool readonly = flags.get(DecorationNonWritable);
+				addr_space = readonly ? "const device" : "device";
 			}
 			else
-				return "constant";
+				addr_space = "constant";
+			break;
 		}
 		break;
 
 	case StorageClassFunction:
 	case StorageClassGeneric:
 		// No address space for plain values.
-		return type.pointer ? "thread" : "";
+		addr_space = type.pointer ? "thread" : "";
+		break;
 
 	case StorageClassInput:
 		if (get_execution_model() == ExecutionModelTessellationControl && argument.basevariable == stage_in_ptr_var_id)
-			return "threadgroup";
+			addr_space = "threadgroup";
 		break;
 
 	case StorageClassOutput:
 		if (capture_output_to_buffer)
-			return "device";
+			addr_space = "device";
 		break;
 
 	default:
 		break;
 	}
 
-	return "thread";
+	if (!addr_space)
+		addr_space = "thread";
+
+	return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
 }
 
 string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id)
 {
+	// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
+	Bitset flags;
+	if (ir.ids[id].get_type() == TypeVariable && type.basetype == SPIRType::Struct &&
+	    (has_decoration(type.self, DecorationBlock) || has_decoration(type.self, DecorationBufferBlock)))
+		flags = get_buffer_block_flags(id);
+	else
+		flags = get_decoration_bitset(id);
+
+	const char *addr_space = nullptr;
 	switch (type.storage)
 	{
 	case StorageClassWorkgroup:
-		return "threadgroup";
+		addr_space = "threadgroup";
+		break;
 
 	case StorageClassStorageBuffer:
-	{
-		// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
-		Bitset flags;
-		if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
-			flags = get_buffer_block_flags(id);
-		else
-			flags = get_decoration_bitset(id);
-
-		return flags.get(DecorationNonWritable) ? "const device" : "device";
-	}
+		addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
+		break;
 
 	case StorageClassUniform:
 	case StorageClassUniformConstant:
@@ -6271,37 +6287,53 @@ string CompilerMSL::get_type_address_space(const SPIRType &type, uint32_t id)
 		{
 			bool ssbo = has_decoration(type.self, DecorationBufferBlock);
 			if (ssbo)
-			{
-				// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
-				Bitset flags;
-				if (ir.ids[id].get_type() == TypeVariable && has_decoration(type.self, DecorationBlock))
-					flags = get_buffer_block_flags(id);
-				else
-					flags = get_decoration_bitset(id);
-
-				return flags.get(DecorationNonWritable) ? "const device" : "device";
-			}
+				addr_space = flags.get(DecorationNonWritable) ? "const device" : "device";
 			else
-				return "constant";
+				addr_space = "constant";
 		}
 		else
-			return "constant";
+			addr_space = "constant";
+		break;
 
 	case StorageClassFunction:
 	case StorageClassGeneric:
 		// No address space for plain values.
-		return type.pointer ? "thread" : "";
+		addr_space = type.pointer ? "thread" : "";
+		break;
 
 	case StorageClassOutput:
 		if (capture_output_to_buffer)
-			return "device";
+			addr_space = "device";
 		break;
 
 	default:
 		break;
 	}
 
-	return "thread";
+	if (!addr_space)
+		addr_space = "thread";
+
+	return join(flags.get(DecorationVolatile) || flags.get(DecorationCoherent) ? "volatile " : "", addr_space);
+}
+
+const char *CompilerMSL::to_restrict(uint32_t id, bool space)
+{
+	// This can be called for variable pointer contexts as well, so be very careful about which method we choose.
+	Bitset flags;
+	if (ir.ids[id].get_type() == TypeVariable)
+	{
+		uint32_t type_id = expression_type_id(id);
+		auto &type = expression_type(id);
+		if (type.basetype == SPIRType::Struct &&
+		    (has_decoration(type_id, DecorationBlock) || has_decoration(type_id, DecorationBufferBlock)))
+			flags = get_buffer_block_flags(id);
+		else
+			flags = get_decoration_bitset(id);
+	}
+	else
+		flags = get_decoration_bitset(id);
+
+	return flags.get(DecorationRestrict) ? (space ? "restrict " : "restrict") : "";
 }
 
 string CompilerMSL::entry_point_arg_stage_in()
@@ -6469,7 +6501,7 @@ string CompilerMSL::entry_point_args_argument_buffer(bool append_comma)
 
 		claimed_bindings.set(buffer_binding);
 
-		ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_name(id);
+		ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(id) + to_name(id);
 		ep_args += " [[buffer(" + convert_to_string(buffer_binding) + ")]]";
 
 		next_metal_resource_index_buffer = max(next_metal_resource_index_buffer, buffer_binding + 1);
@@ -6605,8 +6637,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 				{
 					if (!ep_args.empty())
 						ep_args += ", ";
-					ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + r.name + "_" +
-					           convert_to_string(i);
+					ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) +
+					           r.name + "_" + convert_to_string(i);
 					ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]";
 				}
 			}
@@ -6614,7 +6646,8 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args)
 			{
 				if (!ep_args.empty())
 					ep_args += ", ";
-				ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + r.name;
+				ep_args +=
+				    get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name;
 				ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]";
 			}
 			break;
@@ -7087,6 +7120,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 			// non-constant arrays, but we can create thread const from constant.
 			decl = string("thread const ") + decl;
 			decl += " (&";
+			const char *restrict_kw = to_restrict(name_id);
+			if (*restrict_kw)
+			{
+				decl += " ";
+				decl += restrict_kw;
+			}
 			decl += to_expression(name_id);
 			decl += ")";
 			decl += type_to_array_glsl(type);
@@ -7125,6 +7164,12 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 		}
 
 		decl += " (&";
+		const char *restrict_kw = to_restrict(name_id);
+		if (*restrict_kw)
+		{
+			decl += " ";
+			decl += restrict_kw;
+		}
 		decl += to_expression(name_id);
 		decl += ")";
 		decl += type_to_array_glsl(type);
@@ -7142,6 +7187,7 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg)
 		}
 		decl += "&";
 		decl += " ";
+		decl += to_restrict(name_id);
 		decl += to_expression(name_id);
 	}
 	else
@@ -7520,6 +7566,7 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 	// Pointer?
 	if (type.pointer)
 	{
+		const char *restrict_kw;
 		type_name = join(get_type_address_space(type, id), " ", type_to_glsl(get<SPIRType>(type.parent_type), id));
 		switch (type.basetype)
 		{
@@ -7531,6 +7578,12 @@ string CompilerMSL::type_to_glsl(const SPIRType &type, uint32_t id)
 		default:
 			// Anything else can be a raw pointer.
 			type_name += "*";
+			restrict_kw = to_restrict(id);
+			if (*restrict_kw)
+			{
+				type_name += " ";
+				type_name += restrict_kw;
+			}
 			break;
 		}
 		return type_name;
diff --git a/spirv_msl.hpp b/spirv_msl.hpp
index 9f2bab49..6b0cb8e1 100644
--- a/spirv_msl.hpp
+++ b/spirv_msl.hpp
@@ -516,6 +516,7 @@ protected:
 	MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
 	std::string get_argument_address_space(const SPIRVariable &argument);
 	std::string get_type_address_space(const SPIRType &type, uint32_t id);
+	const char *to_restrict(uint32_t id, bool space = true);
 	SPIRType &get_stage_in_struct_type();
 	SPIRType &get_stage_out_struct_type();
 	SPIRType &get_patch_stage_in_struct_type();