MSL: Support inline uniform blocks in argument buffers.
Here, the inline uniform block is explicit: we instantiate the buffer block itself in the argument buffer, instead of a pointer to the buffer. I just hope this will work with the `MTLArgumentDescriptor` API... Note that Metal recursively assigns individual members of embedded structs IDs. This means for automatic assignment that we have to calculate the binding stride for a given buffer block. For MoltenVK, we'll simply increment the ID by the size of the inline uniform block. Then the later IDs will never conflict with the inline uniform block. We can get away with this because Metal doesn't require that IDs be contiguous, only monotonically increasing.
This commit is contained in:
Родитель
f9376058ce
Коммит
fedbc35315
|
@ -315,7 +315,7 @@ if (SPIRV_CROSS_STATIC)
|
|||
endif()
|
||||
|
||||
set(spirv-cross-abi-major 0)
|
||||
set(spirv-cross-abi-minor 23)
|
||||
set(spirv-cross-abi-minor 24)
|
||||
set(spirv-cross-abi-patch 0)
|
||||
|
||||
if (SPIRV_CROSS_SHARED)
|
||||
|
|
11
main.cpp
11
main.cpp
|
@ -529,6 +529,7 @@ struct CLIArguments
|
|||
SmallVector<uint32_t> msl_discrete_descriptor_sets;
|
||||
SmallVector<uint32_t> msl_device_argument_buffers;
|
||||
SmallVector<pair<uint32_t, uint32_t>> msl_dynamic_buffers;
|
||||
SmallVector<pair<uint32_t, uint32_t>> msl_inline_uniform_blocks;
|
||||
SmallVector<PLSArg> pls_in;
|
||||
SmallVector<PLSArg> pls_out;
|
||||
SmallVector<Remap> remaps;
|
||||
|
@ -612,6 +613,7 @@ static void print_help()
|
|||
"\t[--msl-view-index-from-device-index]\n"
|
||||
"\t[--msl-dispatch-base]\n"
|
||||
"\t[--msl-dynamic-buffer <set index> <binding>]\n"
|
||||
"\t[--msl-inline-uniform-block <set index> <binding>]\n"
|
||||
"\t[--msl-decoration-binding]\n"
|
||||
"\t[--msl-force-active-argument-buffer-resources]\n"
|
||||
"\t[--hlsl]\n"
|
||||
|
@ -812,6 +814,8 @@ static string compile_iteration(const CLIArguments &args, std::vector<uint32_t>
|
|||
uint32_t i = 0;
|
||||
for (auto &v : args.msl_dynamic_buffers)
|
||||
msl_comp->add_dynamic_buffer(v.first, v.second, i++);
|
||||
for (auto &v : args.msl_inline_uniform_blocks)
|
||||
msl_comp->add_inline_uniform_block(v.first, v.second);
|
||||
}
|
||||
else if (args.hlsl)
|
||||
compiler.reset(new CompilerHLSL(move(spirv_parser.get_parsed_ir())));
|
||||
|
@ -1153,6 +1157,13 @@ static int main_inner(int argc, char *argv[])
|
|||
cbs.add("--msl-decoration-binding", [&args](CLIParser &) { args.msl_decoration_binding = true; });
|
||||
cbs.add("--msl-force-active-argument-buffer-resources",
|
||||
[&args](CLIParser &) { args.msl_force_active_argument_buffer_resources = true; });
|
||||
cbs.add("--msl-inline-uniform-block", [&args](CLIParser &parser) {
|
||||
args.msl_argument_buffers = true;
|
||||
// Make sure next_uint() is called in-order.
|
||||
uint32_t desc_set = parser.next_uint();
|
||||
uint32_t binding = parser.next_uint();
|
||||
args.msl_inline_uniform_blocks.push_back(make_pair(desc_set, binding));
|
||||
});
|
||||
cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); });
|
||||
cbs.add("--rename-entry-point", [&args](CLIParser &parser) {
|
||||
auto old_name = parser.next_string();
|
||||
|
|
|
@ -0,0 +1,53 @@
|
|||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
typedef packed_float4 packed_float4x4[4];
|
||||
|
||||
struct Baz
|
||||
{
|
||||
int f;
|
||||
int g;
|
||||
};
|
||||
|
||||
struct X
|
||||
{
|
||||
int x;
|
||||
int y;
|
||||
float z;
|
||||
};
|
||||
|
||||
struct Foo
|
||||
{
|
||||
int a;
|
||||
int b;
|
||||
packed_float4x4 c;
|
||||
X x[2];
|
||||
};
|
||||
|
||||
struct Bar
|
||||
{
|
||||
int d;
|
||||
int e;
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u);
|
||||
|
||||
struct spvDescriptorSetBuffer0
|
||||
{
|
||||
Foo m_32 [[id(0)]];
|
||||
constant Bar* m_38 [[id(12)]];
|
||||
};
|
||||
|
||||
struct spvDescriptorSetBuffer1
|
||||
{
|
||||
device Baz* baz [[id(0)]][3];
|
||||
};
|
||||
|
||||
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
spvDescriptorSet1.baz[gl_GlobalInvocationID.x]->f = spvDescriptorSet0.m_32.a + (*spvDescriptorSet0.m_38).d;
|
||||
spvDescriptorSet1.baz[gl_GlobalInvocationID.x]->g = spvDescriptorSet0.m_32.b * (*spvDescriptorSet0.m_38).e;
|
||||
}
|
||||
|
|
@ -0,0 +1,54 @@
|
|||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
typedef packed_float4 packed_float4x4[4];
|
||||
|
||||
struct Baz
|
||||
{
|
||||
int f;
|
||||
int g;
|
||||
};
|
||||
|
||||
struct X
|
||||
{
|
||||
int x;
|
||||
int y;
|
||||
float z;
|
||||
};
|
||||
|
||||
struct Foo
|
||||
{
|
||||
int a;
|
||||
int b;
|
||||
packed_float4x4 c;
|
||||
X x[2];
|
||||
};
|
||||
|
||||
struct Bar
|
||||
{
|
||||
int d;
|
||||
int e;
|
||||
};
|
||||
|
||||
constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u);
|
||||
|
||||
struct spvDescriptorSetBuffer0
|
||||
{
|
||||
Foo m_32 [[id(0)]];
|
||||
constant Bar* m_38 [[id(12)]];
|
||||
};
|
||||
|
||||
struct spvDescriptorSetBuffer1
|
||||
{
|
||||
device Baz* baz [[id(0)]][3];
|
||||
};
|
||||
|
||||
kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]])
|
||||
{
|
||||
uint3 coords = gl_GlobalInvocationID;
|
||||
spvDescriptorSet1.baz[coords.x]->f = spvDescriptorSet0.m_32.a + (*spvDescriptorSet0.m_38).d;
|
||||
spvDescriptorSet1.baz[coords.x]->g = spvDescriptorSet0.m_32.b * (*spvDescriptorSet0.m_38).e;
|
||||
}
|
||||
|
|
@ -0,0 +1,37 @@
|
|||
#version 450
|
||||
#extension GL_EXT_scalar_block_layout : require
|
||||
layout(local_size_x = 3, local_size_y = 3, local_size_z = 2) in;
|
||||
|
||||
struct X
|
||||
{
|
||||
int x;
|
||||
int y;
|
||||
float z;
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 0, scalar) uniform Foo
|
||||
{
|
||||
int a;
|
||||
int b;
|
||||
mat4 c;
|
||||
X x[2];
|
||||
};
|
||||
|
||||
layout(set = 0, binding = 1) uniform Bar
|
||||
{
|
||||
int d;
|
||||
int e;
|
||||
};
|
||||
|
||||
layout(set = 1, binding = 2) buffer Baz
|
||||
{
|
||||
int f;
|
||||
int g;
|
||||
} baz[3];
|
||||
|
||||
void main()
|
||||
{
|
||||
uvec3 coords = gl_GlobalInvocationID;
|
||||
baz[coords.x].f = a + d;
|
||||
baz[coords.x].g = b * e;
|
||||
}
|
|
@ -1029,6 +1029,26 @@ spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigne
|
|||
#endif
|
||||
}
|
||||
|
||||
spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding)
|
||||
{
|
||||
#if SPIRV_CROSS_C_API_MSL
|
||||
if (compiler->backend != SPVC_BACKEND_MSL)
|
||||
{
|
||||
compiler->context->report_error("MSL function used on a non-MSL backend.");
|
||||
return SPVC_ERROR_INVALID_ARGUMENT;
|
||||
}
|
||||
|
||||
auto &msl = *static_cast<CompilerMSL *>(compiler->compiler.get());
|
||||
msl.add_inline_uniform_block(desc_set, binding);
|
||||
return SPVC_SUCCESS;
|
||||
#else
|
||||
(void)binding;
|
||||
(void)desc_set;
|
||||
compiler->context->report_error("MSL function used on a non-MSL backend.");
|
||||
return SPVC_ERROR_INVALID_ARGUMENT;
|
||||
#endif
|
||||
}
|
||||
|
||||
spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set)
|
||||
{
|
||||
#if SPIRV_CROSS_C_API_MSL
|
||||
|
|
|
@ -33,7 +33,7 @@ extern "C" {
|
|||
/* Bumped if ABI or API breaks backwards compatibility. */
|
||||
#define SPVC_C_API_VERSION_MAJOR 0
|
||||
/* Bumped if APIs or enumerations are added in a backwards compatible way. */
|
||||
#define SPVC_C_API_VERSION_MINOR 23
|
||||
#define SPVC_C_API_VERSION_MINOR 24
|
||||
/* Bumped if internal implementation details change. */
|
||||
#define SPVC_C_API_VERSION_PATCH 0
|
||||
|
||||
|
@ -691,6 +691,8 @@ SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_second
|
|||
|
||||
SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index);
|
||||
|
||||
SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_inline_uniform_block(spvc_compiler compiler, unsigned desc_set, unsigned binding);
|
||||
|
||||
/*
|
||||
* Reflect resources.
|
||||
* Maps almost 1:1 to C++ API.
|
||||
|
|
|
@ -68,6 +68,12 @@ void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32
|
|||
buffers_requiring_dynamic_offset[pair] = { index, 0 };
|
||||
}
|
||||
|
||||
void CompilerMSL::add_inline_uniform_block(uint32_t desc_set, uint32_t binding)
|
||||
{
|
||||
SetBindingPair pair = { desc_set, binding };
|
||||
inline_uniform_blocks.insert(pair);
|
||||
}
|
||||
|
||||
void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set)
|
||||
{
|
||||
if (desc_set < kMaxArgumentBuffers)
|
||||
|
@ -9850,6 +9856,31 @@ void CompilerMSL::fix_up_shader_inputs_outputs()
|
|||
});
|
||||
}
|
||||
|
||||
// Returns the number of IDs that Metal would assign to an inline uniform block in an argument buffer.
|
||||
uint32_t CompilerMSL::get_inline_uniform_block_binding_stride(SPIRType &type)
|
||||
{
|
||||
// We need this information now.
|
||||
mark_scalar_layout_structs(type);
|
||||
unordered_set<uint32_t> aligned_structs;
|
||||
align_struct(type, aligned_structs);
|
||||
uint32_t binding_stride = 0;
|
||||
for (uint32_t i = 0; i < uint32_t(type.member_types.size()); i++)
|
||||
{
|
||||
uint32_t member_type_id = type.member_types[i];
|
||||
uint32_t member_binding_stride = 1;
|
||||
auto &member_type = get<SPIRType>(member_type_id);
|
||||
if (member_type.basetype == SPIRType::Struct)
|
||||
member_binding_stride = get_inline_uniform_block_binding_stride(member_type);
|
||||
else if (member_is_packed_physical_type(type, i) && is_matrix(member_type))
|
||||
// Packed matrices are represented as arrays, so they get multiple IDs.
|
||||
member_binding_stride = member_type.columns;
|
||||
for (uint32_t j = 0; j < uint32_t(member_type.array.size()); j++)
|
||||
member_binding_stride *= to_array_size_literal(member_type, j);
|
||||
binding_stride += member_binding_stride;
|
||||
}
|
||||
return binding_stride;
|
||||
}
|
||||
|
||||
// Returns the Metal index of the resource of the specified type as used by the specified variable.
|
||||
uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane)
|
||||
{
|
||||
|
@ -9912,7 +9943,14 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
|
|||
// If we did not explicitly remap, allocate bindings on demand.
|
||||
// We cannot reliably use Binding decorations since SPIR-V and MSL's binding models are very different.
|
||||
|
||||
bool allocate_argument_buffer_ids = false;
|
||||
|
||||
if (var.storage != StorageClassPushConstant)
|
||||
allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(var_desc_set);
|
||||
|
||||
uint32_t binding_stride = 1;
|
||||
if (inline_uniform_blocks.count(SetBindingPair{ var_desc_set, var_binding }))
|
||||
binding_stride = get_inline_uniform_block_binding_stride(get_variable_data_type(var));
|
||||
auto &type = get<SPIRType>(var.basetype);
|
||||
for (uint32_t i = 0; i < uint32_t(type.array.size()); i++)
|
||||
binding_stride *= to_array_size_literal(type, i);
|
||||
|
@ -9922,20 +9960,11 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base
|
|||
// If a binding has not been specified, revert to incrementing resource indices.
|
||||
uint32_t resource_index;
|
||||
|
||||
bool allocate_argument_buffer_ids = false;
|
||||
uint32_t desc_set = 0;
|
||||
|
||||
if (var.storage != StorageClassPushConstant)
|
||||
{
|
||||
desc_set = get_decoration(var.self, DecorationDescriptorSet);
|
||||
allocate_argument_buffer_ids = descriptor_set_is_argument_buffer(desc_set);
|
||||
}
|
||||
|
||||
if (allocate_argument_buffer_ids)
|
||||
{
|
||||
// Allocate from a flat ID binding space.
|
||||
resource_index = next_metal_resource_ids[desc_set];
|
||||
next_metal_resource_ids[desc_set] += binding_stride;
|
||||
resource_index = next_metal_resource_ids[var_desc_set];
|
||||
next_metal_resource_ids[var_desc_set] += binding_stride;
|
||||
}
|
||||
else
|
||||
{
|
||||
|
@ -12676,6 +12705,12 @@ void CompilerMSL::analyze_argument_buffers()
|
|||
buffer_type.member_types.push_back(var.basetype);
|
||||
buffers_requiring_dynamic_offset[pair].second = var.self;
|
||||
}
|
||||
else if (inline_uniform_blocks.count(pair))
|
||||
{
|
||||
// Put the buffer block itself into the argument buffer.
|
||||
buffer_type.member_types.push_back(get_variable_data_type_id(var));
|
||||
set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name));
|
||||
}
|
||||
else
|
||||
{
|
||||
// Resources will be declared as pointers not references, so automatically dereference as appropriate.
|
||||
|
|
|
@ -432,6 +432,13 @@ public:
|
|||
// an offset taken from the dynamic offset buffer.
|
||||
void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index);
|
||||
|
||||
// desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource
|
||||
// in this shader. This function marks that resource as an inline uniform block
|
||||
// (VK_DESCRIPTOR_TYPE_INLINE_UNIFORM_BLOCK_EXT). This function only has any effect if argument buffers
|
||||
// are enabled. If so, the buffer block will be directly embedded into the argument
|
||||
// buffer, instead of being referenced indirectly via pointer.
|
||||
void add_inline_uniform_block(uint32_t desc_set, uint32_t binding);
|
||||
|
||||
// When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets.
|
||||
// This corresponds to VK_KHR_push_descriptor in Vulkan.
|
||||
void add_discrete_descriptor_set(uint32_t desc_set);
|
||||
|
@ -854,6 +861,9 @@ protected:
|
|||
// Must be ordered since array is in a specific order.
|
||||
std::map<SetBindingPair, std::pair<uint32_t, uint32_t>> buffers_requiring_dynamic_offset;
|
||||
|
||||
std::unordered_set<SetBindingPair, InternalHasher> inline_uniform_blocks;
|
||||
uint32_t get_inline_uniform_block_binding_stride(SPIRType &type);
|
||||
|
||||
uint32_t argument_buffer_ids[kMaxArgumentBuffers];
|
||||
uint32_t argument_buffer_discrete_mask = 0;
|
||||
uint32_t argument_buffer_device_storage_mask = 0;
|
||||
|
|
|
@ -244,6 +244,11 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths):
|
|||
msl_args.append('--msl-dynamic-buffer')
|
||||
msl_args.append('1')
|
||||
msl_args.append('2')
|
||||
if '.inline-block.' in shader:
|
||||
# Arbitrary for testing purposes.
|
||||
msl_args.append('--msl-inline-uniform-block')
|
||||
msl_args.append('0')
|
||||
msl_args.append('0')
|
||||
if '.device-argument-buffer.' in shader:
|
||||
msl_args.append('--msl-device-argument-buffer')
|
||||
msl_args.append('0')
|
||||
|
|
Загрузка…
Ссылка в новой задаче