Fix atomic_compare_exchange_weak_explicit.

Need to emit a CAS loop.
Fix shared memory declaration.
Declare atomic ops with correct memory scope.
This commit is contained in:
Hans-Kristian Arntzen 2018-05-15 16:03:20 +02:00
Родитель fb7181bff1
Коммит 26b887ec99
10 изменённых файлов: 188 добавлений и 77 удалений

Просмотреть файл

@ -14,23 +14,57 @@ struct SSBO
kernel void main0(device SSBO& ssbo [[buffer(2)]])
{
uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _30 = 10u;
uint _32 = atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&(ssbo.u32), &(_30), 2u, memory_order_relaxed, memory_order_relaxed);
int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _48 = atomic_exchange_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _50 = 10;
int _52 = atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&(ssbo.i32), &(_50), 2, memory_order_relaxed, memory_order_relaxed);
threadgroup uint shared_u32;
threadgroup int shared_i32;
uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _32;
do
{
_32 = 10u;
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed));
int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _52;
do
{
_52 = 10;
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed));
shared_u32 = 10u;
shared_i32 = 10;
uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _64;
do
{
_64 = 10u;
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed));
int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _72;
do
{
_72 = 10;
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed));
}

Просмотреть файл

@ -26,7 +26,7 @@ kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]
float4 _29 = _23.in_data[gl_GlobalInvocationID.x];
if (dot(_29, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
{
uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_48.counter), 1u, memory_order_relaxed);
uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
_45.out_data[_52] = _29;
}
}

Просмотреть файл

@ -28,7 +28,7 @@ kernel void main0(device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buffer(1)]
float _28 = _22.in_data[gl_GlobalInvocationID.x];
if (_28 > 12.0)
{
uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_41.count), 1u, memory_order_relaxed);
uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
_38.out_data[_45] = _28;
}
}

Просмотреть файл

@ -14,23 +14,57 @@ struct SSBO
kernel void main0(device SSBO& ssbo [[buffer(2)]])
{
uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&(ssbo.u32), 1u, memory_order_relaxed);
uint _30 = 10u;
uint _32 = atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&(ssbo.u32), &(_30), 2u, memory_order_relaxed, memory_order_relaxed);
int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _48 = atomic_exchange_explicit((volatile device atomic_int*)&(ssbo.i32), 1, memory_order_relaxed);
int _50 = 10;
int _52 = atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&(ssbo.i32), &(_50), 2, memory_order_relaxed, memory_order_relaxed);
threadgroup uint shared_u32;
threadgroup int shared_i32;
uint _16 = atomic_fetch_add_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _18 = atomic_fetch_or_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _20 = atomic_fetch_xor_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _22 = atomic_fetch_and_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _24 = atomic_fetch_min_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _26 = atomic_fetch_max_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _28 = atomic_exchange_explicit((volatile device atomic_uint*)&ssbo.u32, 1u, memory_order_relaxed);
uint _32;
do
{
_32 = 10u;
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_uint*)&ssbo.u32, &_32, 2u, memory_order_relaxed, memory_order_relaxed));
int _36 = atomic_fetch_add_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _38 = atomic_fetch_or_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _40 = atomic_fetch_xor_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _42 = atomic_fetch_and_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _44 = atomic_fetch_min_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _46 = atomic_fetch_max_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _48 = atomic_exchange_explicit((volatile device atomic_int*)&ssbo.i32, 1, memory_order_relaxed);
int _52;
do
{
_52 = 10;
} while (!atomic_compare_exchange_weak_explicit((volatile device atomic_int*)&ssbo.i32, &_52, 2, memory_order_relaxed, memory_order_relaxed));
shared_u32 = 10u;
shared_i32 = 10;
uint _57 = atomic_fetch_add_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _58 = atomic_fetch_or_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _59 = atomic_fetch_xor_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _60 = atomic_fetch_and_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _61 = atomic_fetch_min_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _62 = atomic_fetch_max_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _63 = atomic_exchange_explicit((volatile threadgroup atomic_uint*)&shared_u32, 1u, memory_order_relaxed);
uint _64;
do
{
_64 = 10u;
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_uint*)&shared_u32, &_64, 2u, memory_order_relaxed, memory_order_relaxed));
int _65 = atomic_fetch_add_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _66 = atomic_fetch_or_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _67 = atomic_fetch_xor_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _68 = atomic_fetch_and_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _69 = atomic_fetch_min_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _70 = atomic_fetch_max_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _71 = atomic_exchange_explicit((volatile threadgroup atomic_int*)&shared_i32, 1, memory_order_relaxed);
int _72;
do
{
_72 = 10;
} while (!atomic_compare_exchange_weak_explicit((volatile threadgroup atomic_int*)&shared_i32, &_72, 2, memory_order_relaxed, memory_order_relaxed));
}

Просмотреть файл

@ -27,7 +27,7 @@ kernel void main0(device SSBO& _23 [[buffer(0)]], device SSBO2& _45 [[buffer(1)]
float4 idata = _23.in_data[ident];
if (dot(idata, float4(1.0, 5.0, 6.0, 2.0)) > 8.19999980926513671875)
{
uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_48.counter), 1u, memory_order_relaxed);
uint _52 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_48.counter, 1u, memory_order_relaxed);
_45.out_data[_52] = idata;
}
}

Просмотреть файл

@ -29,7 +29,7 @@ kernel void main0(device SSBO& _22 [[buffer(0)]], device SSBO2& _38 [[buffer(1)]
float idata = _22.in_data[ident];
if (idata > 12.0)
{
uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&(_41.count), 1u, memory_order_relaxed);
uint _45 = atomic_fetch_add_explicit((volatile device atomic_uint*)&_41.count, 1u, memory_order_relaxed);
_38.out_data[_45] = idata;
}
}

Просмотреть файл

@ -17,9 +17,9 @@ struct SSBO2
kernel void main0(device SSBO& _22 [[buffer(0)]], device SSBO2& _44 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]], uint gl_LocalInvocationIndex [[thread_index_in_threadgroup]])
{
threadgroup float sShared[4];
uint ident = gl_GlobalInvocationID.x;
float idata = _22.in_data[ident];
threadgroup float sShared[4];
sShared[gl_LocalInvocationIndex] = idata;
threadgroup_barrier(mem_flags::mem_threadgroup);
_44.out_data[ident] = sShared[(4u - gl_LocalInvocationIndex) - 1u];

Просмотреть файл

@ -10,6 +10,9 @@ layout(binding = 2, std430) buffer SSBO
int i32;
} ssbo;
shared uint shared_u32;
shared int shared_i32;
void main()
{
atomicAdd(ssbo.u32, 1u);
@ -29,5 +32,25 @@ void main()
atomicMax(ssbo.i32, 1);
atomicExchange(ssbo.i32, 1);
atomicCompSwap(ssbo.i32, 10, 2);
shared_u32 = 10u;
shared_i32 = 10;
atomicAdd(shared_u32, 1u);
atomicOr(shared_u32, 1u);
atomicXor(shared_u32, 1u);
atomicAnd(shared_u32, 1u);
atomicMin(shared_u32, 1u);
atomicMax(shared_u32, 1u);
atomicExchange(shared_u32, 1u);
atomicCompSwap(shared_u32, 10u, 2u);
atomicAdd(shared_i32, 1);
atomicOr(shared_i32, 1);
atomicXor(shared_i32, 1);
atomicAnd(shared_i32, 1);
atomicMin(shared_i32, 1);
atomicMax(shared_i32, 1);
atomicExchange(shared_i32, 1);
atomicCompSwap(shared_i32, 10, 2);
}

Просмотреть файл

@ -8853,7 +8853,16 @@ void CompilerGLSL::emit_function(SPIRFunction &func, const Bitset &return_flags)
for (auto &v : func.local_variables)
{
auto &var = get<SPIRVariable>(v);
if (expression_is_lvalue(v))
if (var.storage == StorageClassWorkgroup)
{
// Special variable type which cannot have initializer,
// need to be declared as standalone variables.
// Comes from MSL which can push global variables as local variables in main function.
add_local_variable_name(var.self);
statement(variable_decl(var), ";");
var.deferred_declaration = false;
}
else if (expression_is_lvalue(v))
{
add_local_variable_name(var.self);

Просмотреть файл

@ -389,7 +389,6 @@ void CompilerMSL::localize_global_variables()
auto &var = get<SPIRVariable>(v_id);
if (var.storage == StorageClassPrivate || var.storage == StorageClassWorkgroup)
{
var.storage = StorageClassFunction;
entry_func.add_local_variable(v_id);
iter = global_variables.erase(iter);
}
@ -1734,7 +1733,6 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
}
case OpAtomicCompareExchange:
case OpAtomicCompareExchangeWeak:
{
uint32_t result_type = ops[0];
uint32_t id = ops[1];
@ -1748,6 +1746,9 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
break;
}
case OpAtomicCompareExchangeWeak:
SPIRV_CROSS_THROW("OpAtomicCompareExchangeWeak is only supported in kernel profile.");
case OpAtomicLoad:
{
uint32_t result_type = ops[0];
@ -2207,52 +2208,62 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
{
forced_temporaries.insert(result_id);
bool fwd_obj = should_forward(obj);
bool fwd_op1 = op1 ? should_forward(op1) : true;
bool fwd_op2 = op2 ? should_forward(op2) : true;
bool forward = fwd_obj && fwd_op1 && fwd_op2;
string exp = string(op) + "(";
auto &type = expression_type(obj);
exp += "(volatile ";
exp += "device";
auto *var = maybe_get_backing_variable(obj);
if (!var)
SPIRV_CROSS_THROW("No backing variable for atomic operation.");
exp += get_argument_address_space(*var);
exp += " atomic_";
exp += type_to_glsl(type);
exp += "*)";
exp += "&(";
exp += to_expression(obj);
exp += ")";
exp += "&";
exp += to_enclosed_expression(obj);
if (op1)
bool is_atomic_compare_exchange_strong = op1_is_pointer && op1;
if (is_atomic_compare_exchange_strong)
{
if (op1_is_pointer)
{
statement(declare_temporary(expression_type(op2).self, op1), to_expression(op1), ";");
exp += ", &(" + to_name(op1) + ")";
}
else
exp += ", " + to_expression(op1);
assert(strcmp(op, "atomic_compare_exchange_weak_explicit") == 0);
assert(op2);
assert(has_mem_order_2);
exp += ", &";
exp += to_name(result_id);
exp += ", ";
exp += to_expression(op2);
exp += ", ";
exp += get_memory_order(mem_order_1);
exp += ", ";
exp += get_memory_order(mem_order_2);
exp += ")";
// MSL only supports the weak atomic compare exchange,
// so emit a CAS loop here.
statement(variable_decl(type, to_name(result_id)), ";");
statement("do");
begin_scope();
statement(to_name(result_id), " = ", to_expression(op1), ";");
end_scope_decl(join("while (!", exp, ")"));
set<SPIRExpression>(result_id, to_name(result_id), result_type, true);
}
else
{
assert(strcmp(op, "atomic_compare_exchange_weak_explicit") != 0);
if (op1)
exp += ", " + to_expression(op1);
if (op2)
exp += ", " + to_expression(op2);
if (op2)
exp += ", " + to_expression(op2);
exp += string(", ") + get_memory_order(mem_order_1);
if (has_mem_order_2)
exp += string(", ") + get_memory_order(mem_order_2);
exp += string(", ") + get_memory_order(mem_order_1);
if (has_mem_order_2)
exp += string(", ") + get_memory_order(mem_order_2);
exp += ")";
emit_op(result_type, result_id, exp, forward);
inherit_expression_dependencies(result_id, obj);
if (op1)
inherit_expression_dependencies(result_id, op1);
if (op2)
inherit_expression_dependencies(result_id, op2);
exp += ")";
emit_op(result_type, result_id, exp, false);
}
flush_all_atomic_capable_variables();
}