MSL: Fix OpAtomicIIncrement and OpAtomicIDecrement.
We were passing a constant '1' to `emit_atomic_func_op()`--which caused us to refer to SPIR-V value `%1`, which is almost certainly not what we want! What we really want is to add/subtract the literal constant '1' to/from the memory location.
This commit is contained in:
Родитель
403011e973
Коммит
41eb5c43b5
|
@ -0,0 +1,26 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct u0_counters
|
||||
{
|
||||
uint c;
|
||||
};
|
||||
|
||||
// Returns 2D texture coords corresponding to 1D texel buffer coords
|
||||
uint2 spvTexelBufferCoord(uint tc)
|
||||
{
|
||||
return uint2(tc % 4096, tc / 4096);
|
||||
}
|
||||
|
||||
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
u0.write(uint4(uint(int3(gl_GlobalInvocationID).x)), spvTexelBufferCoord((uint(as_type<int>(as_type<float>(_29))) + 0u)));
|
||||
}
|
||||
|
|
@ -0,0 +1,26 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct u0_counters
|
||||
{
|
||||
uint c;
|
||||
};
|
||||
|
||||
// Returns 2D texture coords corresponding to 1D texel buffer coords
|
||||
uint2 spvTexelBufferCoord(uint tc)
|
||||
{
|
||||
return uint2(tc % 4096, tc / 4096);
|
||||
}
|
||||
|
||||
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
u0.write(uint4(uint(int3(gl_GlobalInvocationID).x)), spvTexelBufferCoord((uint(as_type<int>(as_type<float>(_29))) + 0u)));
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct u0_counters
|
||||
{
|
||||
uint c;
|
||||
};
|
||||
|
||||
// Returns 2D texture coords corresponding to 1D texel buffer coords
|
||||
uint2 spvTexelBufferCoord(uint tc)
|
||||
{
|
||||
return uint2(tc % 4096, tc / 4096);
|
||||
}
|
||||
|
||||
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _29 = atomic_fetch_sub_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
float4 r0;
|
||||
r0.x = as_type<float>(_29);
|
||||
u0.write(uint4(uint(int3(gl_GlobalInvocationID).x)), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));
|
||||
}
|
||||
|
|
@ -0,0 +1,28 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
#pragma clang diagnostic ignored "-Wunused-variable"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
#include <metal_atomic>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct u0_counters
|
||||
{
|
||||
uint c;
|
||||
};
|
||||
|
||||
// Returns 2D texture coords corresponding to 1D texel buffer coords
|
||||
uint2 spvTexelBufferCoord(uint tc)
|
||||
{
|
||||
return uint2(tc % 4096, tc / 4096);
|
||||
}
|
||||
|
||||
kernel void main0(device u0_counters& u0_counter [[buffer(0)]], texture2d<uint, access::write> u0 [[texture(0)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint _29 = atomic_fetch_add_explicit((volatile device atomic_uint*)&u0_counter.c, 1, memory_order_relaxed);
|
||||
float4 r0;
|
||||
r0.x = as_type<float>(_29);
|
||||
u0.write(uint4(uint(int3(gl_GlobalInvocationID).x)), spvTexelBufferCoord(((uint(as_type<int>(r0.x)) * 1u) + (uint(0) >> 2u))));
|
||||
}
|
||||
|
|
@ -0,0 +1,71 @@
|
|||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Wine VKD3D Shader Compiler; 0
|
||||
; Bound: 43
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpCapability SampledBuffer
|
||||
OpCapability ImageBuffer
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %3 "main" %15
|
||||
OpExecutionMode %3 LocalSize 4 1 1
|
||||
OpName %3 "main"
|
||||
OpName %8 "u0"
|
||||
OpName %9 "u0_counters"
|
||||
OpMemberName %9 0 "c"
|
||||
OpName %11 "u0_counter"
|
||||
OpName %15 "vThreadID"
|
||||
OpName %19 "r0"
|
||||
OpDecorate %8 DescriptorSet 0
|
||||
OpDecorate %8 Binding 0
|
||||
OpMemberDecorate %9 0 Offset 0
|
||||
OpDecorate %9 BufferBlock
|
||||
OpDecorate %11 DescriptorSet 1
|
||||
OpDecorate %11 Binding 0
|
||||
OpDecorate %15 BuiltIn GlobalInvocationId
|
||||
%1 = OpTypeVoid
|
||||
%2 = OpTypeFunction %1
|
||||
%5 = OpTypeInt 32 0
|
||||
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
|
||||
%7 = OpTypePointer UniformConstant %6
|
||||
%8 = OpVariable %7 UniformConstant
|
||||
%9 = OpTypeStruct %5
|
||||
%10 = OpTypePointer Uniform %9
|
||||
%11 = OpVariable %10 Uniform
|
||||
%12 = OpTypeInt 32 1
|
||||
%13 = OpTypeVector %12 3
|
||||
%14 = OpTypePointer Input %13
|
||||
%15 = OpVariable %14 Input
|
||||
%16 = OpTypeFloat 32
|
||||
%17 = OpTypeVector %16 4
|
||||
%18 = OpTypePointer Function %17
|
||||
%20 = OpTypePointer Uniform %5
|
||||
%21 = OpConstant %5 0
|
||||
%23 = OpConstant %5 1
|
||||
%26 = OpTypePointer Function %16
|
||||
%33 = OpConstant %12 0
|
||||
%34 = OpConstant %5 2
|
||||
%37 = OpTypePointer Input %12
|
||||
%41 = OpTypeVector %5 4
|
||||
%3 = OpFunction %1 None %2
|
||||
%4 = OpLabel
|
||||
%19 = OpVariable %18 Function
|
||||
%22 = OpAccessChain %20 %11 %21
|
||||
%24 = OpAtomicIDecrement %5 %22 %23 %21
|
||||
%25 = OpBitcast %16 %24
|
||||
%27 = OpInBoundsAccessChain %26 %19 %21
|
||||
OpStore %27 %25
|
||||
%28 = OpLoad %6 %8
|
||||
%29 = OpInBoundsAccessChain %26 %19 %21
|
||||
%30 = OpLoad %16 %29
|
||||
%31 = OpBitcast %12 %30
|
||||
%32 = OpIMul %5 %31 %23
|
||||
%35 = OpShiftRightLogical %5 %33 %34
|
||||
%36 = OpIAdd %5 %32 %35
|
||||
%38 = OpInBoundsAccessChain %37 %15 %21
|
||||
%39 = OpLoad %12 %38
|
||||
%40 = OpBitcast %5 %39
|
||||
%42 = OpCompositeConstruct %41 %40 %40 %40 %40
|
||||
OpImageWrite %28 %36 %42
|
||||
OpReturn
|
||||
OpFunctionEnd
|
|
@ -0,0 +1,71 @@
|
|||
; SPIR-V
|
||||
; Version: 1.0
|
||||
; Generator: Wine VKD3D Shader Compiler; 0
|
||||
; Bound: 43
|
||||
; Schema: 0
|
||||
OpCapability Shader
|
||||
OpCapability SampledBuffer
|
||||
OpCapability ImageBuffer
|
||||
OpMemoryModel Logical GLSL450
|
||||
OpEntryPoint GLCompute %3 "main" %15
|
||||
OpExecutionMode %3 LocalSize 4 1 1
|
||||
OpName %3 "main"
|
||||
OpName %8 "u0"
|
||||
OpName %9 "u0_counters"
|
||||
OpMemberName %9 0 "c"
|
||||
OpName %11 "u0_counter"
|
||||
OpName %15 "vThreadID"
|
||||
OpName %19 "r0"
|
||||
OpDecorate %8 DescriptorSet 0
|
||||
OpDecorate %8 Binding 0
|
||||
OpMemberDecorate %9 0 Offset 0
|
||||
OpDecorate %9 BufferBlock
|
||||
OpDecorate %11 DescriptorSet 1
|
||||
OpDecorate %11 Binding 0
|
||||
OpDecorate %15 BuiltIn GlobalInvocationId
|
||||
%1 = OpTypeVoid
|
||||
%2 = OpTypeFunction %1
|
||||
%5 = OpTypeInt 32 0
|
||||
%6 = OpTypeImage %5 Buffer 0 0 0 2 R32ui
|
||||
%7 = OpTypePointer UniformConstant %6
|
||||
%8 = OpVariable %7 UniformConstant
|
||||
%9 = OpTypeStruct %5
|
||||
%10 = OpTypePointer Uniform %9
|
||||
%11 = OpVariable %10 Uniform
|
||||
%12 = OpTypeInt 32 1
|
||||
%13 = OpTypeVector %12 3
|
||||
%14 = OpTypePointer Input %13
|
||||
%15 = OpVariable %14 Input
|
||||
%16 = OpTypeFloat 32
|
||||
%17 = OpTypeVector %16 4
|
||||
%18 = OpTypePointer Function %17
|
||||
%20 = OpTypePointer Uniform %5
|
||||
%21 = OpConstant %5 0
|
||||
%23 = OpConstant %5 1
|
||||
%26 = OpTypePointer Function %16
|
||||
%33 = OpConstant %12 0
|
||||
%34 = OpConstant %5 2
|
||||
%37 = OpTypePointer Input %12
|
||||
%41 = OpTypeVector %5 4
|
||||
%3 = OpFunction %1 None %2
|
||||
%4 = OpLabel
|
||||
%19 = OpVariable %18 Function
|
||||
%22 = OpAccessChain %20 %11 %21
|
||||
%24 = OpAtomicIIncrement %5 %22 %23 %21
|
||||
%25 = OpBitcast %16 %24
|
||||
%27 = OpInBoundsAccessChain %26 %19 %21
|
||||
OpStore %27 %25
|
||||
%28 = OpLoad %6 %8
|
||||
%29 = OpInBoundsAccessChain %26 %19 %21
|
||||
%30 = OpLoad %16 %29
|
||||
%31 = OpBitcast %12 %30
|
||||
%32 = OpIMul %5 %31 %23
|
||||
%35 = OpShiftRightLogical %5 %33 %34
|
||||
%36 = OpIAdd %5 %32 %35
|
||||
%38 = OpInBoundsAccessChain %37 %15 %21
|
||||
%39 = OpLoad %12 %38
|
||||
%40 = OpBitcast %5 %39
|
||||
%42 = OpCompositeConstruct %41 %40 %40 %40 %40
|
||||
OpImageWrite %28 %36 %42
|
||||
OpReturn
|
||||
OpFunctionEnd
|
|
@ -1784,7 +1784,7 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
|||
uint32_t val = ops[6];
|
||||
uint32_t comp = ops[7];
|
||||
emit_atomic_func_op(result_type, id, "atomic_compare_exchange_weak_explicit", mem_sem_pass, mem_sem_fail, true,
|
||||
ptr, comp, true, val);
|
||||
ptr, comp, true, false, val);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -1812,19 +1812,20 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
|||
break;
|
||||
}
|
||||
|
||||
#define MSL_AFMO_IMPL(op, valsrc) \
|
||||
do \
|
||||
{ \
|
||||
uint32_t result_type = ops[0]; \
|
||||
uint32_t id = ops[1]; \
|
||||
uint32_t ptr = ops[2]; \
|
||||
uint32_t mem_sem = ops[4]; \
|
||||
uint32_t val = valsrc; \
|
||||
emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", mem_sem, mem_sem, false, ptr, val); \
|
||||
#define MSL_AFMO_IMPL(op, valsrc, valconst) \
|
||||
do \
|
||||
{ \
|
||||
uint32_t result_type = ops[0]; \
|
||||
uint32_t id = ops[1]; \
|
||||
uint32_t ptr = ops[2]; \
|
||||
uint32_t mem_sem = ops[4]; \
|
||||
uint32_t val = valsrc; \
|
||||
emit_atomic_func_op(result_type, id, "atomic_fetch_" #op "_explicit", mem_sem, mem_sem, false, ptr, val, \
|
||||
false, valconst); \
|
||||
} while (false)
|
||||
|
||||
#define MSL_AFMO(op) MSL_AFMO_IMPL(op, ops[5])
|
||||
#define MSL_AFMIO(op) MSL_AFMO_IMPL(op, 1)
|
||||
#define MSL_AFMO(op) MSL_AFMO_IMPL(op, ops[5], false)
|
||||
#define MSL_AFMIO(op) MSL_AFMO_IMPL(op, 1, true)
|
||||
|
||||
case OpAtomicIIncrement:
|
||||
MSL_AFMIO(add);
|
||||
|
@ -2260,7 +2261,7 @@ bool CompilerMSL::maybe_emit_array_assignment(uint32_t id_lhs, uint32_t id_rhs)
|
|||
// Emits one of the atomic functions. In MSL, the atomic functions operate on pointers
|
||||
void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1,
|
||||
uint32_t mem_order_2, bool has_mem_order_2, uint32_t obj, uint32_t op1,
|
||||
bool op1_is_pointer, uint32_t op2)
|
||||
bool op1_is_pointer, bool op1_is_constant, uint32_t op2)
|
||||
{
|
||||
forced_temporaries.insert(result_id);
|
||||
|
||||
|
@ -2309,7 +2310,12 @@ void CompilerMSL::emit_atomic_func_op(uint32_t result_type, uint32_t result_id,
|
|||
{
|
||||
assert(strcmp(op, "atomic_compare_exchange_weak_explicit") != 0);
|
||||
if (op1)
|
||||
exp += ", " + to_expression(op1);
|
||||
{
|
||||
if (op1_is_constant)
|
||||
exp += join(", ", op1);
|
||||
else
|
||||
exp += ", " + to_expression(op1);
|
||||
}
|
||||
if (op2)
|
||||
exp += ", " + to_expression(op2);
|
||||
|
||||
|
|
|
@ -356,7 +356,7 @@ protected:
|
|||
std::string get_argument_address_space(const SPIRVariable &argument);
|
||||
void emit_atomic_func_op(uint32_t result_type, uint32_t result_id, const char *op, uint32_t mem_order_1,
|
||||
uint32_t mem_order_2, bool has_mem_order_2, uint32_t op0, uint32_t op1 = 0,
|
||||
bool op1_is_pointer = false, uint32_t op2 = 0);
|
||||
bool op1_is_pointer = false, bool op1_is_constant = false, uint32_t op2 = 0);
|
||||
const char *get_memory_order(uint32_t spv_mem_sem);
|
||||
void add_pragma_line(const std::string &line);
|
||||
void add_typedef_line(const std::string &line);
|
||||
|
|
Загрузка…
Ссылка в новой задаче