Merge pull request #1055 from cdavis5e/msl21-frag-subgroup-builtins
MSL: Support SubgroupLocalInvocationId and SubgroupSize in all stages.
This commit is contained in:
Коммит
3af18e741f
|
@ -66,7 +66,7 @@ inline bool spvSubgroupAllEqual(bool value)
|
|||
|
||||
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
|
||||
{
|
||||
uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
|
||||
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
|
||||
uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
|
||||
uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
|
||||
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
|
||||
|
|
|
@ -0,0 +1,89 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float FragColor [[color(0)]];
|
||||
};
|
||||
|
||||
inline uint4 spvSubgroupBallot(bool value)
|
||||
{
|
||||
simd_vote vote = simd_ballot(value);
|
||||
// simd_ballot() returns a 64-bit integer-like object, but
|
||||
// SPIR-V callers expect a uint4. We must convert.
|
||||
// FIXME: This won't include higher bits if Apple ever supports
|
||||
// 128 lanes in an SIMD-group.
|
||||
return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
|
||||
}
|
||||
|
||||
inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
|
||||
{
|
||||
return !!extract_bits(ballot[bit / 32], bit % 32, 1);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindLSB(uint4 ballot)
|
||||
{
|
||||
return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindMSB(uint4 ballot)
|
||||
{
|
||||
return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotBitCount(uint4 ballot)
|
||||
{
|
||||
return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
|
||||
return spvSubgroupBallotBitCount(ballot & mask);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
|
||||
return spvSubgroupBallotBitCount(ballot & mask);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline bool spvSubgroupAllEqual(T value)
|
||||
{
|
||||
return simd_all(value == simd_broadcast_first(value));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupAllEqual(bool value)
|
||||
{
|
||||
return simd_all(value) || !simd_any(value);
|
||||
}
|
||||
|
||||
fragment main0_out main0()
|
||||
{
|
||||
main0_out out = {};
|
||||
uint gl_SubgroupSize = simd_sum(1);
|
||||
uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
|
||||
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
|
||||
uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
|
||||
uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
|
||||
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
|
||||
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
|
||||
out.FragColor = float(gl_SubgroupSize);
|
||||
out.FragColor = float(gl_SubgroupInvocationID);
|
||||
out.FragColor = float4(gl_SubgroupEqMask).x;
|
||||
out.FragColor = float4(gl_SubgroupGeMask).x;
|
||||
out.FragColor = float4(gl_SubgroupGtMask).x;
|
||||
out.FragColor = float4(gl_SubgroupLeMask).x;
|
||||
out.FragColor = float4(gl_SubgroupLtMask).x;
|
||||
uint4 _63 = spvSubgroupBallot(true);
|
||||
float4 _147 = simd_prefix_inclusive_product(simd_product(float4(20.0)));
|
||||
int4 _149 = simd_prefix_inclusive_product(simd_product(int4(20)));
|
||||
return out;
|
||||
}
|
||||
|
|
@ -66,7 +66,7 @@ inline bool spvSubgroupAllEqual(bool value)
|
|||
|
||||
kernel void main0(device SSBO& _9 [[buffer(0)]], uint gl_NumSubgroups [[simdgroups_per_threadgroup]], uint gl_SubgroupID [[simdgroup_index_in_threadgroup]], uint gl_SubgroupSize [[thread_execution_width]], uint gl_SubgroupInvocationID [[thread_index_in_simdgroup]])
|
||||
{
|
||||
uint4 gl_SubgroupEqMask = 27 > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
|
||||
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
|
||||
uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
|
||||
uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
|
||||
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
|
||||
|
|
|
@ -0,0 +1,143 @@
|
|||
#pragma clang diagnostic ignored "-Wmissing-prototypes"
|
||||
|
||||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float FragColor [[color(0)]];
|
||||
};
|
||||
|
||||
inline uint4 spvSubgroupBallot(bool value)
|
||||
{
|
||||
simd_vote vote = simd_ballot(value);
|
||||
// simd_ballot() returns a 64-bit integer-like object, but
|
||||
// SPIR-V callers expect a uint4. We must convert.
|
||||
// FIXME: This won't include higher bits if Apple ever supports
|
||||
// 128 lanes in an SIMD-group.
|
||||
return uint4((uint)((simd_vote::vote_t)vote & 0xFFFFFFFF), (uint)(((simd_vote::vote_t)vote >> 32) & 0xFFFFFFFF), 0, 0);
|
||||
}
|
||||
|
||||
inline bool spvSubgroupBallotBitExtract(uint4 ballot, uint bit)
|
||||
{
|
||||
return !!extract_bits(ballot[bit / 32], bit % 32, 1);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindLSB(uint4 ballot)
|
||||
{
|
||||
return select(ctz(ballot.x), select(32 + ctz(ballot.y), select(64 + ctz(ballot.z), select(96 + ctz(ballot.w), uint(-1), ballot.w == 0), ballot.z == 0), ballot.y == 0), ballot.x == 0);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotFindMSB(uint4 ballot)
|
||||
{
|
||||
return select(128 - (clz(ballot.w) + 1), select(96 - (clz(ballot.z) + 1), select(64 - (clz(ballot.y) + 1), select(32 - (clz(ballot.x) + 1), uint(-1), ballot.x == 0), ballot.y == 0), ballot.z == 0), ballot.w == 0);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotBitCount(uint4 ballot)
|
||||
{
|
||||
return popcount(ballot.x) + popcount(ballot.y) + popcount(ballot.z) + popcount(ballot.w);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotInclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
|
||||
return spvSubgroupBallotBitCount(ballot & mask);
|
||||
}
|
||||
|
||||
inline uint spvSubgroupBallotExclusiveBitCount(uint4 ballot, uint gl_SubgroupInvocationID)
|
||||
{
|
||||
uint4 mask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
|
||||
return spvSubgroupBallotBitCount(ballot & mask);
|
||||
}
|
||||
|
||||
template<typename T>
|
||||
inline bool spvSubgroupAllEqual(T value)
|
||||
{
|
||||
return simd_all(value == simd_broadcast_first(value));
|
||||
}
|
||||
|
||||
template<>
|
||||
inline bool spvSubgroupAllEqual(bool value)
|
||||
{
|
||||
return simd_all(value) || !simd_any(value);
|
||||
}
|
||||
|
||||
fragment main0_out main0()
|
||||
{
|
||||
main0_out out = {};
|
||||
uint gl_SubgroupSize = simd_sum(1);
|
||||
uint gl_SubgroupInvocationID = simd_prefix_exclusive_sum(1);
|
||||
uint4 gl_SubgroupEqMask = gl_SubgroupInvocationID > 32 ? uint4(0, (1 << (gl_SubgroupInvocationID - 32)), uint2(0)) : uint4(1 << gl_SubgroupInvocationID, uint3(0));
|
||||
uint4 gl_SubgroupGeMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID, 32u), 0)), uint2(0));
|
||||
uint4 gl_SubgroupGtMask = uint4(extract_bits(0xFFFFFFFF, min(gl_SubgroupInvocationID + 1, 32u), (uint)max(min((int)gl_SubgroupSize, 32) - (int)gl_SubgroupInvocationID - 1, 0)), extract_bits(0xFFFFFFFF, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0), (uint)max((int)gl_SubgroupSize - (int)max(gl_SubgroupInvocationID + 1, 32u), 0)), uint2(0));
|
||||
uint4 gl_SubgroupLeMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID + 1, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID + 1 - 32, 0)), uint2(0));
|
||||
uint4 gl_SubgroupLtMask = uint4(extract_bits(0xFFFFFFFF, 0, min(gl_SubgroupInvocationID, 32u)), extract_bits(0xFFFFFFFF, 0, (uint)max((int)gl_SubgroupInvocationID - 32, 0)), uint2(0));
|
||||
out.FragColor = float(gl_SubgroupSize);
|
||||
out.FragColor = float(gl_SubgroupInvocationID);
|
||||
bool elected = simd_is_first();
|
||||
out.FragColor = float4(gl_SubgroupEqMask).x;
|
||||
out.FragColor = float4(gl_SubgroupGeMask).x;
|
||||
out.FragColor = float4(gl_SubgroupGtMask).x;
|
||||
out.FragColor = float4(gl_SubgroupLeMask).x;
|
||||
out.FragColor = float4(gl_SubgroupLtMask).x;
|
||||
float4 broadcasted = simd_broadcast(float4(10.0), 8u);
|
||||
float3 first = simd_broadcast_first(float3(20.0));
|
||||
uint4 ballot_value = spvSubgroupBallot(true);
|
||||
bool inverse_ballot_value = spvSubgroupBallotBitExtract(ballot_value, gl_SubgroupInvocationID);
|
||||
bool bit_extracted = spvSubgroupBallotBitExtract(uint4(10u), 8u);
|
||||
uint bit_count = spvSubgroupBallotBitCount(ballot_value);
|
||||
uint inclusive_bit_count = spvSubgroupBallotInclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
|
||||
uint exclusive_bit_count = spvSubgroupBallotExclusiveBitCount(ballot_value, gl_SubgroupInvocationID);
|
||||
uint lsb = spvSubgroupBallotFindLSB(ballot_value);
|
||||
uint msb = spvSubgroupBallotFindMSB(ballot_value);
|
||||
uint shuffled = simd_shuffle(10u, 8u);
|
||||
uint shuffled_xor = simd_shuffle_xor(30u, 8u);
|
||||
uint shuffled_up = simd_shuffle_up(20u, 4u);
|
||||
uint shuffled_down = simd_shuffle_down(20u, 4u);
|
||||
bool has_all = simd_all(true);
|
||||
bool has_any = simd_any(true);
|
||||
bool has_equal = spvSubgroupAllEqual(0);
|
||||
has_equal = spvSubgroupAllEqual(true);
|
||||
float4 added = simd_sum(float4(20.0));
|
||||
int4 iadded = simd_sum(int4(20));
|
||||
float4 multiplied = simd_product(float4(20.0));
|
||||
int4 imultiplied = simd_product(int4(20));
|
||||
float4 lo = simd_min(float4(20.0));
|
||||
float4 hi = simd_max(float4(20.0));
|
||||
int4 slo = simd_min(int4(20));
|
||||
int4 shi = simd_max(int4(20));
|
||||
uint4 ulo = simd_min(uint4(20u));
|
||||
uint4 uhi = simd_max(uint4(20u));
|
||||
uint4 anded = simd_and(ballot_value);
|
||||
uint4 ored = simd_or(ballot_value);
|
||||
uint4 xored = simd_xor(ballot_value);
|
||||
added = simd_prefix_inclusive_sum(added);
|
||||
iadded = simd_prefix_inclusive_sum(iadded);
|
||||
multiplied = simd_prefix_inclusive_product(multiplied);
|
||||
imultiplied = simd_prefix_inclusive_product(imultiplied);
|
||||
added = simd_prefix_exclusive_sum(multiplied);
|
||||
multiplied = simd_prefix_exclusive_product(multiplied);
|
||||
iadded = simd_prefix_exclusive_sum(imultiplied);
|
||||
imultiplied = simd_prefix_exclusive_product(imultiplied);
|
||||
added = quad_sum(added);
|
||||
multiplied = quad_product(multiplied);
|
||||
iadded = quad_sum(iadded);
|
||||
imultiplied = quad_product(imultiplied);
|
||||
lo = quad_min(lo);
|
||||
hi = quad_max(hi);
|
||||
ulo = quad_min(ulo);
|
||||
uhi = quad_max(uhi);
|
||||
slo = quad_min(slo);
|
||||
shi = quad_max(shi);
|
||||
anded = quad_and(anded);
|
||||
ored = quad_or(ored);
|
||||
xored = quad_xor(xored);
|
||||
float4 swap_horiz = quad_shuffle_xor(float4(20.0), 1u);
|
||||
float4 swap_vertical = quad_shuffle_xor(float4(20.0), 2u);
|
||||
float4 swap_diagonal = quad_shuffle_xor(float4(20.0), 3u);
|
||||
float4 quad_broadcast0 = quad_broadcast(float4(20.0), 3u);
|
||||
return out;
|
||||
}
|
||||
|
|
@ -0,0 +1,119 @@
|
|||
#version 450
|
||||
#extension GL_KHR_shader_subgroup_basic : require
|
||||
#extension GL_KHR_shader_subgroup_ballot : require
|
||||
#extension GL_KHR_shader_subgroup_vote : require
|
||||
#extension GL_KHR_shader_subgroup_shuffle : require
|
||||
#extension GL_KHR_shader_subgroup_shuffle_relative : require
|
||||
#extension GL_KHR_shader_subgroup_arithmetic : require
|
||||
#extension GL_KHR_shader_subgroup_clustered : require
|
||||
#extension GL_KHR_shader_subgroup_quad : require
|
||||
|
||||
layout(location = 0) out float FragColor;
|
||||
|
||||
void main()
|
||||
{
|
||||
// basic
|
||||
FragColor = float(gl_SubgroupSize);
|
||||
FragColor = float(gl_SubgroupInvocationID);
|
||||
subgroupBarrier();
|
||||
subgroupMemoryBarrier();
|
||||
subgroupMemoryBarrierBuffer();
|
||||
subgroupMemoryBarrierImage();
|
||||
bool elected = subgroupElect();
|
||||
|
||||
// ballot
|
||||
FragColor = float(gl_SubgroupEqMask);
|
||||
FragColor = float(gl_SubgroupGeMask);
|
||||
FragColor = float(gl_SubgroupGtMask);
|
||||
FragColor = float(gl_SubgroupLeMask);
|
||||
FragColor = float(gl_SubgroupLtMask);
|
||||
vec4 broadcasted = subgroupBroadcast(vec4(10.0), 8u);
|
||||
vec3 first = subgroupBroadcastFirst(vec3(20.0));
|
||||
uvec4 ballot_value = subgroupBallot(true);
|
||||
bool inverse_ballot_value = subgroupInverseBallot(ballot_value);
|
||||
bool bit_extracted = subgroupBallotBitExtract(uvec4(10u), 8u);
|
||||
uint bit_count = subgroupBallotBitCount(ballot_value);
|
||||
uint inclusive_bit_count = subgroupBallotInclusiveBitCount(ballot_value);
|
||||
uint exclusive_bit_count = subgroupBallotExclusiveBitCount(ballot_value);
|
||||
uint lsb = subgroupBallotFindLSB(ballot_value);
|
||||
uint msb = subgroupBallotFindMSB(ballot_value);
|
||||
|
||||
// shuffle
|
||||
uint shuffled = subgroupShuffle(10u, 8u);
|
||||
uint shuffled_xor = subgroupShuffleXor(30u, 8u);
|
||||
|
||||
// shuffle relative
|
||||
uint shuffled_up = subgroupShuffleUp(20u, 4u);
|
||||
uint shuffled_down = subgroupShuffleDown(20u, 4u);
|
||||
|
||||
// vote
|
||||
bool has_all = subgroupAll(true);
|
||||
bool has_any = subgroupAny(true);
|
||||
bool has_equal = subgroupAllEqual(0);
|
||||
has_equal = subgroupAllEqual(true);
|
||||
|
||||
// arithmetic
|
||||
vec4 added = subgroupAdd(vec4(20.0));
|
||||
ivec4 iadded = subgroupAdd(ivec4(20));
|
||||
vec4 multiplied = subgroupMul(vec4(20.0));
|
||||
ivec4 imultiplied = subgroupMul(ivec4(20));
|
||||
vec4 lo = subgroupMin(vec4(20.0));
|
||||
vec4 hi = subgroupMax(vec4(20.0));
|
||||
ivec4 slo = subgroupMin(ivec4(20));
|
||||
ivec4 shi = subgroupMax(ivec4(20));
|
||||
uvec4 ulo = subgroupMin(uvec4(20));
|
||||
uvec4 uhi = subgroupMax(uvec4(20));
|
||||
uvec4 anded = subgroupAnd(ballot_value);
|
||||
uvec4 ored = subgroupOr(ballot_value);
|
||||
uvec4 xored = subgroupXor(ballot_value);
|
||||
|
||||
added = subgroupInclusiveAdd(added);
|
||||
iadded = subgroupInclusiveAdd(iadded);
|
||||
multiplied = subgroupInclusiveMul(multiplied);
|
||||
imultiplied = subgroupInclusiveMul(imultiplied);
|
||||
//lo = subgroupInclusiveMin(lo); // FIXME: Unsupported by Metal
|
||||
//hi = subgroupInclusiveMax(hi);
|
||||
//slo = subgroupInclusiveMin(slo);
|
||||
//shi = subgroupInclusiveMax(shi);
|
||||
//ulo = subgroupInclusiveMin(ulo);
|
||||
//uhi = subgroupInclusiveMax(uhi);
|
||||
//anded = subgroupInclusiveAnd(anded);
|
||||
//ored = subgroupInclusiveOr(ored);
|
||||
//xored = subgroupInclusiveXor(ored);
|
||||
//added = subgroupExclusiveAdd(lo);
|
||||
|
||||
added = subgroupExclusiveAdd(multiplied);
|
||||
multiplied = subgroupExclusiveMul(multiplied);
|
||||
iadded = subgroupExclusiveAdd(imultiplied);
|
||||
imultiplied = subgroupExclusiveMul(imultiplied);
|
||||
//lo = subgroupExclusiveMin(lo); // FIXME: Unsupported by Metal
|
||||
//hi = subgroupExclusiveMax(hi);
|
||||
//ulo = subgroupExclusiveMin(ulo);
|
||||
//uhi = subgroupExclusiveMax(uhi);
|
||||
//slo = subgroupExclusiveMin(slo);
|
||||
//shi = subgroupExclusiveMax(shi);
|
||||
//anded = subgroupExclusiveAnd(anded);
|
||||
//ored = subgroupExclusiveOr(ored);
|
||||
//xored = subgroupExclusiveXor(ored);
|
||||
|
||||
// clustered
|
||||
added = subgroupClusteredAdd(added, 4u);
|
||||
multiplied = subgroupClusteredMul(multiplied, 4u);
|
||||
iadded = subgroupClusteredAdd(iadded, 4u);
|
||||
imultiplied = subgroupClusteredMul(imultiplied, 4u);
|
||||
lo = subgroupClusteredMin(lo, 4u);
|
||||
hi = subgroupClusteredMax(hi, 4u);
|
||||
ulo = subgroupClusteredMin(ulo, 4u);
|
||||
uhi = subgroupClusteredMax(uhi, 4u);
|
||||
slo = subgroupClusteredMin(slo, 4u);
|
||||
shi = subgroupClusteredMax(shi, 4u);
|
||||
anded = subgroupClusteredAnd(anded, 4u);
|
||||
ored = subgroupClusteredOr(ored, 4u);
|
||||
xored = subgroupClusteredXor(xored, 4u);
|
||||
|
||||
// quad
|
||||
vec4 swap_horiz = subgroupQuadSwapHorizontal(vec4(20.0));
|
||||
vec4 swap_vertical = subgroupQuadSwapVertical(vec4(20.0));
|
||||
vec4 swap_diagonal = subgroupQuadSwapDiagonal(vec4(20.0));
|
||||
vec4 quad_broadcast = subgroupQuadBroadcast(vec4(20.0), 3u);
|
||||
}
|
|
@ -6183,7 +6183,10 @@ void CompilerMSL::entry_point_args_builtin(string &ep_args)
|
|||
bi_type != BuiltInSubgroupGeMask && bi_type != BuiltInSubgroupGtMask &&
|
||||
bi_type != BuiltInSubgroupLeMask && bi_type != BuiltInSubgroupLtMask &&
|
||||
((get_execution_model() == ExecutionModelFragment && msl_options.multiview) ||
|
||||
bi_type != BuiltInViewIndex))
|
||||
bi_type != BuiltInViewIndex) &&
|
||||
(get_execution_model() == ExecutionModelGLCompute ||
|
||||
(get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2)) ||
|
||||
(bi_type != BuiltInSubgroupLocalInvocationId && bi_type != BuiltInSubgroupSize)))
|
||||
{
|
||||
if (!ep_args.empty())
|
||||
ep_args += ", ";
|
||||
|
@ -6590,6 +6593,50 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
|
|||
entry_func.fixup_hooks_in.push_back([=]() { statement(tc, ".y = 1.0 - ", tc, ".y;"); });
|
||||
}
|
||||
break;
|
||||
case BuiltInSubgroupLocalInvocationId:
|
||||
// This is natively supported in compute shaders.
|
||||
if (get_execution_model() == ExecutionModelGLCompute)
|
||||
break;
|
||||
|
||||
// This is natively supported in fragment shaders in MSL 2.2.
|
||||
if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
|
||||
break;
|
||||
|
||||
if (msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW(
|
||||
"SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.2 on iOS.");
|
||||
|
||||
if (!msl_options.supports_msl_version(2, 1))
|
||||
SPIRV_CROSS_THROW(
|
||||
"SubgroupLocalInvocationId cannot be used outside of compute shaders before MSL 2.1.");
|
||||
|
||||
// Shaders other than compute shaders don't support the SIMD-group
|
||||
// builtins directly, but we can emulate them using the SIMD-group
|
||||
// functions. This might break if some of the subgroup terminated
|
||||
// before reaching the entry point.
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id),
|
||||
" = simd_prefix_exclusive_sum(1);");
|
||||
});
|
||||
break;
|
||||
case BuiltInSubgroupSize:
|
||||
// This is natively supported in compute shaders.
|
||||
if (get_execution_model() == ExecutionModelGLCompute)
|
||||
break;
|
||||
|
||||
// This is natively supported in fragment shaders in MSL 2.2.
|
||||
if (get_execution_model() == ExecutionModelFragment && msl_options.supports_msl_version(2, 2))
|
||||
break;
|
||||
|
||||
if (msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders on iOS.");
|
||||
|
||||
if (!msl_options.supports_msl_version(2, 1))
|
||||
SPIRV_CROSS_THROW("SubgroupSize cannot be used outside of compute shaders before Metal 2.1.");
|
||||
|
||||
entry_func.fixup_hooks_in.push_back(
|
||||
[=]() { statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = simd_sum(1);"); });
|
||||
break;
|
||||
case BuiltInSubgroupEqMask:
|
||||
if (msl_options.is_ios())
|
||||
SPIRV_CROSS_THROW("Subgroup ballot functionality is unavailable on iOS.");
|
||||
|
@ -6597,7 +6644,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
|
|||
SPIRV_CROSS_THROW("Subgroup ballot functionality requires Metal 2.1.");
|
||||
entry_func.fixup_hooks_in.push_back([=]() {
|
||||
statement(builtin_type_decl(bi_type), " ", to_expression(var_id), " = ",
|
||||
builtin_subgroup_invocation_id_id, " > 32 ? uint4(0, (1 << (",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " > 32 ? uint4(0, (1 << (",
|
||||
to_expression(builtin_subgroup_invocation_id_id), " - 32)), uint2(0)) : uint4(1 << ",
|
||||
to_expression(builtin_subgroup_invocation_id_id), ", uint3(0));");
|
||||
});
|
||||
|
|
Загрузка…
Ссылка в новой задаче