From 26b887ec99cb0b54439cebeafe7d145eacb66bb5 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Tue, 15 May 2018 16:03:20 +0200 Subject: [PATCH] Fix atomic_compare_exchange_weak_explicit. Need to emit a CAS loop. Fix shared memory declaration. Declare atomic ops with correct memory scope. --- reference/opt/shaders-msl/comp/atomic.comp | 70 +++++++++++++----- reference/opt/shaders-msl/comp/basic.comp | 2 +- reference/opt/shaders-msl/comp/culling.comp | 2 +- reference/shaders-msl/comp/atomic.comp | 70 +++++++++++++----- reference/shaders-msl/comp/basic.comp | 2 +- reference/shaders-msl/comp/culling.comp | 2 +- reference/shaders-msl/comp/shared.comp | 2 +- shaders-msl/comp/atomic.comp | 23 ++++++ spirv_glsl.cpp | 11 ++- spirv_msl.cpp | 81 ++++++++++++--------- 10 files changed, 188 insertions(+), 77 deletions(-) diff --git a/reference/opt/shaders-msl/comp/atomic.comp b/reference/opt/shaders-msl/comp/atomic.comp index 0318b26..f77922a 100644 --- a/reference/opt/shaders-msl/comp/atomic.comp +++ b/reference/opt/shaders-msl/comp/atomic.comp @@ -14,23 +14,57 @@ struct SSBO kernel void main0(device SSBO& ssbo [[buffer(2)]]) { - uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _30 = 10u; - uint _32 = atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&(ssbo.u32), &(_30), 2u, memory_order_relaxed, memory_order_relaxed); - int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _48 = atomic_exchange_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _50 = 10; - int _52 = atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&(ssbo.i32), &(_50), 2, memory_order_relaxed, memory_order_relaxed); + threadgroup uint shared_u32; + threadgroup int shared_i32; + uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _32; + do + { + _32 = 10u; + } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed)); + int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _52; + do + { + _52 = 10; + } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed)); + shared_u32 = 10u; + shared_i32 = 10; + uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _64; + do + { + _64 = 10u; + } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed)); + int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _72; + do + { + _72 = 10; + } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed)); } diff --git a/reference/opt/shaders-msl/comp/basic.comp b/reference/opt/shaders-msl/comp/basic.comp index c41f7c0..374d03c 100644 --- a/reference/opt/shaders-msl/comp/basic.comp +++ b/reference/opt/shaders-msl/comp/basic.comp @@ -26,7 +26,7 @@ kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)] float4 _29 = _23.in_data[gl_GlobalInvocationID.x]; if (dot(_29, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) { - uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_48.counter), 1u, memory_order_relaxed); + uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed); _45.out_data[_52] = _29; } } diff --git a/reference/opt/shaders-msl/comp/culling.comp b/reference/opt/shaders-msl/comp/culling.comp index b20480b..2d36fb3 100644 --- a/reference/opt/shaders-msl/comp/culling.comp +++ b/reference/opt/shaders-msl/comp/culling.comp @@ -28,7 +28,7 @@ kernel void main0(device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buffer(1)] float _28 = _22.in_data[gl_GlobalInvocationID.x]; if (_28 > 12.0) { - uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_41.count), 1u, memory_order_relaxed); + uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed); _38.out_data[_45] = _28; } } diff --git a/reference/shaders-msl/comp/atomic.comp b/reference/shaders-msl/comp/atomic.comp index 0318b26..f77922a 100644 --- a/reference/shaders-msl/comp/atomic.comp +++ b/reference/shaders-msl/comp/atomic.comp @@ -14,23 +14,57 @@ struct SSBO kernel void main0(device SSBO& ssbo [[buffer(2)]]) { - uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed); - uint _30 = 10u; - uint _32 = atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&(ssbo.u32), &(_30), 2u, memory_order_relaxed, memory_order_relaxed); - int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _48 = atomic_exchange_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed); - int _50 = 10; - int _52 = atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&(ssbo.i32), &(_50), 2, memory_order_relaxed, memory_order_relaxed); + threadgroup uint shared_u32; + threadgroup int shared_i32; + uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed); + uint _32; + do + { + _32 = 10u; + } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed)); + int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed); + int _52; + do + { + _52 = 10; + } while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed)); + shared_u32 = 10u; + shared_i32 = 10; + uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed); + uint _64; + do + { + _64 = 10u; + } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed)); + int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed); + int _72; + do + { + _72 = 10; + } while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed)); } diff --git a/reference/shaders-msl/comp/basic.comp b/reference/shaders-msl/comp/basic.comp index 732b1cb..cc14772 100644 --- a/reference/shaders-msl/comp/basic.comp +++ b/reference/shaders-msl/comp/basic.comp @@ -27,7 +27,7 @@ kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)] float4 idata = _23.in_data[ident]; if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875) { - uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_48.counter), 1u, memory_order_relaxed); + uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed); _45.out_data[_52] = idata; } } diff --git a/reference/shaders-msl/comp/culling.comp b/reference/shaders-msl/comp/culling.comp index ef84f1d..10fb915 100644 --- a/reference/shaders-msl/comp/culling.comp +++ b/reference/shaders-msl/comp/culling.comp @@ -29,7 +29,7 @@ kernel void main0(device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buffer(1)] float idata = _22.in_data[ident]; if (idata > 12.0) { - uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_41.count), 1u, memory_order_relaxed); + uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed); _38.out_data[_45] = idata; } } diff --git a/reference/shaders-msl/comp/shared.comp b/reference/shaders-msl/comp/shared.comp index 5aeaa4f..c493b07 100644 --- a/reference/shaders-msl/comp/shared.comp +++ b/reference/shaders-msl/comp/shared.comp @@ -17,9 +17,9 @@ struct SSBO2 kernel void main0(device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]]) { + threadgroup float sShared[4]; uint ident = gl_GlobalInvocationID.x; float idata = _22.in_data[ident]; - threadgroup float sShared[4]; sShared[gl_LocalInvocationIndex] = idata; threadgroup_barrier(mem_flags::mem_threadgroup); _44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u]; diff --git a/shaders-msl/comp/atomic.comp b/shaders-msl/comp/atomic.comp index 417284d..e25c4f6 100644 --- a/shaders-msl/comp/atomic.comp +++ b/shaders-msl/comp/atomic.comp @@ -10,6 +10,9 @@ layout(binding = 2, std430) buffer SSBO int i32; } ssbo; +shared uint shared_u32; +shared int shared_i32; + void main() { atomicAdd(ssbo.u32, 1u); @@ -29,5 +32,25 @@ void main() atomicMax(ssbo.i32, 1); atomicExchange(ssbo.i32, 1); atomicCompSwap(ssbo.i32, 10, 2); + + shared_u32 = 10u; + shared_i32 = 10; + atomicAdd(shared_u32, 1u); + atomicOr(shared_u32, 1u); + atomicXor(shared_u32, 1u); + atomicAnd(shared_u32, 1u); + atomicMin(shared_u32, 1u); + atomicMax(shared_u32, 1u); + atomicExchange(shared_u32, 1u); + atomicCompSwap(shared_u32, 10u, 2u); + + atomicAdd(shared_i32, 1); + atomicOr(shared_i32, 1); + atomicXor(shared_i32, 1); + atomicAnd(shared_i32, 1); + atomicMin(shared_i32, 1); + atomicMax(shared_i32, 1); + atomicExchange(shared_i32, 1); + atomicCompSwap(shared_i32, 10, 2); } diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 8a48869..90f41e1 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -8853,7 +8853,16 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags) for (auto &v : func.local_variables) { auto &var = get(v); - if (expression_is_lvalue(v)) + if (var.storage == StorageClassWorkgroup) + { + // Special variable type which cannot have initializer, + // need to be declared as standalone variables. + // Comes from MSL which can push global variables as local variables in main function. + add_local_variable_name(var.self); + statement(variable_decl(var), ";"); + var.deferred_declaration = false; + } + else if (expression_is_lvalue(v)) { add_local_variable_name(var.self); diff --git a/spirv_msl.cpp b/spirv_msl.cpp index 66f6169..0e44e32 100644 --- a/spirv_msl.cpp +++ b/spirv_msl.cpp @@ -389,7 +389,6 @@ void CompilerMSL::localize_global_variables() auto &var = get(v_id); if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup) { - var.storage = StorageClassFunction; entry_func.add_local_variable(v_id); iter = global_variables.erase(iter); } @@ -1734,7 +1733,6 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) } case OpAtomicCompareExchange: - case OpAtomicCompareExchangeWeak: { uint32_t result_type = ops[0]; uint32_t id = ops[1]; @@ -1748,6 +1746,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) break; } + case OpAtomicCompareExchangeWeak: + SPIRV_CROSS_THROW("OpAtomicCompareExchangeWeak is only supported in kernel profile."); + case OpAtomicLoad: { uint32_t result_type = ops[0]; @@ -2207,52 +2208,62 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, { forced_temporaries.insert(result_id); - bool fwd_obj = should_forward(obj); - bool fwd_op1 = op1 ? should_forward(op1) : true; - bool fwd_op2 = op2 ? should_forward(op2) : true; - - bool forward = fwd_obj && fwd_op1 && fwd_op2; - string exp = string(op) + "("; auto &type = expression_type(obj); exp += "(volatile "; - exp += "device"; + auto *var = maybe_get_backing_variable(obj); + if (!var) + SPIRV_CROSS_THROW("No backing variable for atomic operation."); + exp += get_argument_address_space(*var); exp += " atomic_"; exp += type_to_glsl(type); exp += "*)"; - exp += "&("; - exp += to_expression(obj); - exp += ")"; + exp += "&"; + exp += to_enclosed_expression(obj); - if (op1) + bool is_atomic_compare_exchange_strong = op1_is_pointer && op1; + + if (is_atomic_compare_exchange_strong) { - if (op1_is_pointer) - { - statement(declare_temporary(expression_type(op2).self, op1), to_expression(op1), ";"); - exp += ", &(" + to_name(op1) + ")"; - } - else - exp += ", " + to_expression(op1); + assert(strcmp(op, "atomic_compare_exchange_weak_explicit") == 0); + assert(op2); + assert(has_mem_order_2); + exp += ", &"; + exp += to_name(result_id); + exp += ", "; + exp += to_expression(op2); + exp += ", "; + exp += get_memory_order(mem_order_1); + exp += ", "; + exp += get_memory_order(mem_order_2); + exp += ")"; + + // MSL only supports the weak atomic compare exchange, + // so emit a CAS loop here. + statement(variable_decl(type, to_name(result_id)), ";"); + statement("do"); + begin_scope(); + statement(to_name(result_id), " = ", to_expression(op1), ";"); + end_scope_decl(join("while (!", exp, ")")); + set(result_id, to_name(result_id), result_type, true); } + else + { + assert(strcmp(op, "atomic_compare_exchange_weak_explicit") != 0); + if (op1) + exp += ", " + to_expression(op1); + if (op2) + exp += ", " + to_expression(op2); - if (op2) - exp += ", " + to_expression(op2); + exp += string(", ") + get_memory_order(mem_order_1); + if (has_mem_order_2) + exp += string(", ") + get_memory_order(mem_order_2); - exp += string(", ") + get_memory_order(mem_order_1); - - if (has_mem_order_2) - exp += string(", ") + get_memory_order(mem_order_2); - - exp += ")"; - emit_op(result_type, result_id, exp, forward); - - inherit_expression_dependencies(result_id, obj); - if (op1) - inherit_expression_dependencies(result_id, op1); - if (op2) - inherit_expression_dependencies(result_id, op2); + exp += ")"; + emit_op(result_type, result_id, exp, false); + } flush_all_atomic_capable_variables(); }