MSL: Support scalar block layout.

Relaxed block layout relaxed the restrictions on vector alignment,
allowing them to be aligned on scalar boundaries. Scalar block layout
relaxes this further, allowing *any* member to be aligned on a scalar
boundary. The requirement that a vector not improperly straddle a
16-byte boundary is also relaxed.

I've also added a test showing that `std430` layout works with UBOs.

I'm troubled by the dual meaning of the `Packed` extended decoration. In
some instances (struct, `float[]`, and `vec2[]` members), it actually
means the exact opposite, that the member needs extra padding. This is
especially problematic for `vec2[]`, because now we need to distinguish
the two cases by checking the array stride. I wonder if this should
actually be split into two decorations.
This commit is contained in:
Chip Davis 2019-07-09 20:28:02 -05:00
Родитель 8aa6731925
Коммит e5fa7edfd6
8 изменённых файлов: 580 добавлений и 27 удалений

Просмотреть файл

@ -0,0 +1,156 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
typedef float3x2 packed_float2x3;
struct S0
{
packed_float2 a[1];
float b;
};
struct S1
{
packed_float3 a;
float b;
};
struct S2
{
packed_float3 a[1];
float b;
};
struct S3
{
packed_float2 a;
float b;
};
struct S4
{
float2 c;
};
struct Content
{
S0 m0s[1];
S1 m1s[1];
S2 m2s[1];
S0 m0;
S1 m1;
S2 m2;
S3 m3;
float m4;
S4 m3s[8];
};
struct SSBO1
{
Content content;
Content content1[2];
Content content2;
float2x2 m0;
float2x2 m1;
packed_float2x3 m2[4];
float3x2 m3;
float2x2 m4;
float2x2 m5[9];
packed_float2x3 m6[4][2];
float3x2 m7;
float array[1];
};
struct S0_1
{
float4 a[1];
float b;
};
struct S1_1
{
packed_float3 a;
float b;
};
struct S2_1
{
float3 a[1];
float b;
};
struct S3_1
{
float2 a;
float b;
};
struct S4_1
{
float2 c;
};
struct Content_1
{
S0_1 m0s[1];
S1_1 m1s[1];
S2_1 m2s[1];
S0_1 m0;
S1_1 m1;
S2_1 m2;
S3_1 m3;
float m4;
char _m8_pad[12];
/* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8];
};
struct SSBO0
{
Content_1 content;
Content_1 content1[2];
Content_1 content2;
float2x2 m0;
char _m4_pad[16];
float2x2 m1;
char _m5_pad[16];
float2x3 m2[4];
float3x2 m3;
char _m7_pad[24];
float2x2 m4;
char _m8_pad[16];
float2x2 m5[9];
float2x3 m6[4][2];
float3x2 m7;
float4 array[1];
};
kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
{
ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0].xy;
ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b;
ssbo_430.content.m1s[0].a = float3(ssbo_140.content.m1s[0].a);
ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b;
ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b;
ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0].xy;
ssbo_430.content.m0.b = ssbo_140.content.m0.b;
ssbo_430.content.m1.a = float3(ssbo_140.content.m1.a);
ssbo_430.content.m1.b = ssbo_140.content.m1.b;
ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0];
ssbo_430.content.m2.b = ssbo_140.content.m2.b;
ssbo_430.content.m3.a = ssbo_140.content.m3.a;
ssbo_430.content.m3.b = ssbo_140.content.m3.b;
ssbo_430.content.m4 = ssbo_140.content.m4;
ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c;
ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c;
ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c;
ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c;
ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c;
ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
ssbo_430.content.m1.a = float2x3(ssbo_430.m2[1]) * float2(ssbo_430.content.m0.a[0]);
}

Просмотреть файл

@ -0,0 +1,36 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct UBO
{
float a[1];
float2 b[2];
};
struct UBOEnhancedLayout
{
float c[1];
float2 d[2];
char _m2_pad[9976];
float e;
};
struct main0_out
{
float FragColor [[color(0)]];
};
struct main0_in
{
int vIndex [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _17 [[buffer(0)]], constant UBOEnhancedLayout& _30 [[buffer(1)]])
{
main0_out out = {};
out.FragColor = (_17.a[in.vIndex] + _30.c[in.vIndex]) + _30.e;
return out;
}

Просмотреть файл

@ -0,0 +1,156 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
typedef float3x2 packed_float2x3;
struct S0
{
packed_float2 a[1];
float b;
};
struct S1
{
packed_float3 a;
float b;
};
struct S2
{
packed_float3 a[1];
float b;
};
struct S3
{
packed_float2 a;
float b;
};
struct S4
{
float2 c;
};
struct Content
{
S0 m0s[1];
S1 m1s[1];
S2 m2s[1];
S0 m0;
S1 m1;
S2 m2;
S3 m3;
float m4;
S4 m3s[8];
};
struct SSBO1
{
Content content;
Content content1[2];
Content content2;
float2x2 m0;
float2x2 m1;
packed_float2x3 m2[4];
float3x2 m3;
float2x2 m4;
float2x2 m5[9];
packed_float2x3 m6[4][2];
float3x2 m7;
float array[1];
};
struct S0_1
{
float4 a[1];
float b;
};
struct S1_1
{
packed_float3 a;
float b;
};
struct S2_1
{
float3 a[1];
float b;
};
struct S3_1
{
float2 a;
float b;
};
struct S4_1
{
float2 c;
};
struct Content_1
{
S0_1 m0s[1];
S1_1 m1s[1];
S2_1 m2s[1];
S0_1 m0;
S1_1 m1;
S2_1 m2;
S3_1 m3;
float m4;
char _m8_pad[12];
/* FIXME: A padded struct is needed here. If you see this message, file a bug! */ S4_1 m3s[8];
};
struct SSBO0
{
Content_1 content;
Content_1 content1[2];
Content_1 content2;
float2x2 m0;
char _m4_pad[16];
float2x2 m1;
char _m5_pad[16];
float2x3 m2[4];
float3x2 m3;
char _m7_pad[24];
float2x2 m4;
char _m8_pad[16];
float2x2 m5[9];
float2x3 m6[4][2];
float3x2 m7;
float4 array[1];
};
kernel void main0(device SSBO1& ssbo_430 [[buffer(0)]], device SSBO0& ssbo_140 [[buffer(1)]])
{
ssbo_430.content.m0s[0].a[0] = ssbo_140.content.m0s[0].a[0].xy;
ssbo_430.content.m0s[0].b = ssbo_140.content.m0s[0].b;
ssbo_430.content.m1s[0].a = float3(ssbo_140.content.m1s[0].a);
ssbo_430.content.m1s[0].b = ssbo_140.content.m1s[0].b;
ssbo_430.content.m2s[0].a[0] = ssbo_140.content.m2s[0].a[0];
ssbo_430.content.m2s[0].b = ssbo_140.content.m2s[0].b;
ssbo_430.content.m0.a[0] = ssbo_140.content.m0.a[0].xy;
ssbo_430.content.m0.b = ssbo_140.content.m0.b;
ssbo_430.content.m1.a = float3(ssbo_140.content.m1.a);
ssbo_430.content.m1.b = ssbo_140.content.m1.b;
ssbo_430.content.m2.a[0] = ssbo_140.content.m2.a[0];
ssbo_430.content.m2.b = ssbo_140.content.m2.b;
ssbo_430.content.m3.a = ssbo_140.content.m3.a;
ssbo_430.content.m3.b = ssbo_140.content.m3.b;
ssbo_430.content.m4 = ssbo_140.content.m4;
ssbo_430.content.m3s[0].c = ssbo_140.content.m3s[0].c;
ssbo_430.content.m3s[1].c = ssbo_140.content.m3s[1].c;
ssbo_430.content.m3s[2].c = ssbo_140.content.m3s[2].c;
ssbo_430.content.m3s[3].c = ssbo_140.content.m3s[3].c;
ssbo_430.content.m3s[4].c = ssbo_140.content.m3s[4].c;
ssbo_430.content.m3s[5].c = ssbo_140.content.m3s[5].c;
ssbo_430.content.m3s[6].c = ssbo_140.content.m3s[6].c;
ssbo_430.content.m3s[7].c = ssbo_140.content.m3s[7].c;
ssbo_430.content.m1.a = float2x3(ssbo_430.m2[1]) * float2(ssbo_430.content.m0.a[0]);
}

Просмотреть файл

@ -0,0 +1,36 @@
#include <metal_stdlib>
#include <simd/simd.h>
using namespace metal;
struct UBO
{
float a[1];
float2 b[2];
};
struct UBOEnhancedLayout
{
float c[1];
float2 d[2];
char _m2_pad[9976];
float e;
};
struct main0_out
{
float FragColor [[color(0)]];
};
struct main0_in
{
int vIndex [[user(locn0)]];
};
fragment main0_out main0(main0_in in [[stage_in]], constant UBO& _17 [[buffer(0)]], constant UBOEnhancedLayout& _30 [[buffer(1)]])
{
main0_out out = {};
out.FragColor = (_17.a[in.vIndex] + _30.c[in.vIndex]) + _30.e;
return out;
}

Просмотреть файл

@ -0,0 +1,89 @@
#version 310 es
#extension GL_EXT_scalar_block_layout : require
layout(local_size_x = 1) in;
struct S0
{
vec2 a[1];
float b;
};
struct S1
{
vec3 a;
float b;
};
struct S2
{
vec3 a[1];
float b;
};
struct S3
{
vec2 a;
float b;
};
struct S4
{
vec2 c;
};
struct Content
{
S0 m0s[1];
S1 m1s[1];
S2 m2s[1];
S0 m0;
S1 m1;
S2 m2;
S3 m3;
float m4;
S4 m3s[8];
};
layout(binding = 1, scalar) restrict buffer SSBO1
{
Content content;
Content content1[2];
Content content2;
layout(column_major) mat2 m0;
layout(column_major) mat2 m1;
layout(column_major) mat2x3 m2[4];
layout(column_major) mat3x2 m3;
layout(row_major) mat2 m4;
layout(row_major) mat2 m5[9];
layout(row_major) mat2x3 m6[4][2];
layout(row_major) mat3x2 m7;
float array[];
} ssbo_430;
layout(binding = 0, std140) restrict buffer SSBO0
{
Content content;
Content content1[2];
Content content2;
layout(column_major) mat2 m0;
layout(column_major) mat2 m1;
layout(column_major) mat2x3 m2[4];
layout(column_major) mat3x2 m3;
layout(row_major) mat2 m4;
layout(row_major) mat2 m5[9];
layout(row_major) mat2x3 m6[4][2];
layout(row_major) mat3x2 m7;
float array[];
} ssbo_140;
void main()
{
ssbo_430.content = ssbo_140.content;
ssbo_430.content.m1.a = ssbo_430.m2[1] * ssbo_430.content.m0.a[0]; // test packed matrix access
}

Просмотреть файл

@ -0,0 +1,23 @@
#version 450
#extension GL_EXT_scalar_block_layout : require
layout(std430, binding = 0) uniform UBO
{
float a[1];
vec2 b[2];
};
layout(std430, binding = 1) uniform UBOEnhancedLayout
{
float c[1];
vec2 d[2];
layout(offset = 10000) float e;
};
layout(location = 0) flat in int vIndex;
layout(location = 0) out float FragColor;
void main()
{
FragColor = a[vIndex] + c[vIndex] + e;
}

Просмотреть файл

@ -2447,7 +2447,7 @@ void CompilerMSL::align_struct(SPIRType &ib_type)
// Returns whether the specified struct member supports a packable type
// variation that is smaller than the unpacked variation of that type.
bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index, uint32_t base_offset)
{
// We've already marked it as packable
if (has_extended_member_decoration(ib_type.self, index, SPIRVCrossDecorationPacked))
@ -2470,16 +2470,66 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
return true;
}
// Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself.
// This can happen for struct A { float a }; A a[]; in std140 layout.
// TODO: Emit a padded struct which can be used for this purpose.
if (is_array(mbr_type) && mbr_type.basetype == SPIRType::Struct)
uint32_t mbr_offset_curr = base_offset + get_member_decoration(ib_type.self, index, DecorationOffset);
if (mbr_type.basetype == SPIRType::Struct)
{
// If this is a struct type, check if any of its members need packing.
for (uint32_t i = 0; i < mbr_type.member_types.size(); i++)
{
if (is_member_packable(mbr_type, i, mbr_offset_curr))
{
set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked);
set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType,
mbr_type.member_types[i]);
}
}
size_t declared_struct_size = get_declared_struct_size(mbr_type);
size_t alignment = get_declared_struct_member_alignment(ib_type, index);
declared_struct_size = (declared_struct_size + alignment - 1) & ~(alignment - 1);
if (type_struct_member_array_stride(ib_type, index) > declared_struct_size)
return true;
// Check for array of struct, where the SPIR-V declares an array stride which is larger than the struct itself.
// This can happen for struct A { float a }; A a[]; in std140 layout.
// TODO: Emit a padded struct which can be used for this purpose.
if (is_array(mbr_type))
{
size_t array_stride = type_struct_member_array_stride(ib_type, index);
if (array_stride > declared_struct_size)
return true;
if (array_stride < declared_struct_size)
{
// If the stride is *less* (i.e. more tightly packed), then
// we need to pack the members of the struct itself.
for (uint32_t i = 0; i < mbr_type.member_types.size(); i++)
{
if (is_member_packable(mbr_type, i, mbr_offset_curr + array_stride))
{
set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked);
set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType,
mbr_type.member_types[i]);
}
}
}
}
else
{
// Pack if there is not enough space between this member and next.
if (index < ib_type.member_types.size() - 1)
{
uint32_t mbr_offset_next =
base_offset + get_member_decoration(ib_type.self, index + 1, DecorationOffset);
if (declared_struct_size > mbr_offset_next - mbr_offset_curr)
{
for (uint32_t i = 0; i < mbr_type.member_types.size(); i++)
{
if (is_member_packable(mbr_type, i, mbr_offset_next))
{
set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPacked);
set_extended_member_decoration(mbr_type.self, i, SPIRVCrossDecorationPackedType,
mbr_type.member_types[i]);
}
}
}
}
}
}
// TODO: Another sanity check for matrices. We currently do not support std140 matrices which need to be padded out per column.
@ -2490,9 +2540,10 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
if (mbr_type.vecsize == 1 || (is_matrix(mbr_type) && mbr_type.vecsize != 3))
return false;
// Only row-major matrices need to be packed.
if (is_matrix(mbr_type) && !has_member_decoration(ib_type.self, index, DecorationRowMajor))
return false;
// Pack if the member's offset doesn't conform to the type's usual
// alignment. For example, a float3 at offset 4.
if (mbr_offset_curr % get_declared_struct_member_alignment(ib_type, index))
return true;
if (is_array(mbr_type))
{
@ -2509,16 +2560,11 @@ bool CompilerMSL::is_member_packable(SPIRType &ib_type, uint32_t index)
}
else
{
uint32_t mbr_offset_curr = get_member_decoration(ib_type.self, index, DecorationOffset);
// For vectors, pack if the member's offset doesn't conform to the
// type's usual alignment. For example, a float3 at offset 4.
if (!is_matrix(mbr_type) && (mbr_offset_curr % unpacked_mbr_size))
return true;
// Pack if there is not enough space between this member and next.
// If last member, only pack if it's a row-major matrix.
if (index < ib_type.member_types.size() - 1)
{
uint32_t mbr_offset_next = get_member_decoration(ib_type.self, index + 1, DecorationOffset);
uint32_t mbr_offset_next = base_offset + get_member_decoration(ib_type.self, index + 1, DecorationOffset);
return unpacked_mbr_size > mbr_offset_next - mbr_offset_curr;
}
else
@ -2546,16 +2592,21 @@ void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_exp
{
// Special handling when storing to a float[] or float2[] in std140 layout.
auto &type = get<SPIRType>(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType));
uint32_t type_id = get_extended_decoration(lhs_expression, SPIRVCrossDecorationPackedType);
auto &type = get<SPIRType>(type_id);
string lhs = to_dereferenced_expression(lhs_expression);
string rhs = to_pointer_expression(rhs_expression);
uint32_t stride = get_decoration(type_id, DecorationArrayStride);
// Unpack the expression so we can store to it with a float or float2.
// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
if (is_scalar(type) && is_array(type))
lhs = enclose_expression(lhs) + ".x";
else if (is_vector(type) && type.vecsize == 2 && is_array(type))
lhs = enclose_expression(lhs) + ".xy";
if (is_array(type) && stride == 4 * type.width / 8)
{
// Unpack the expression so we can store to it with a float or float2.
// It's still an l-value, so it's fine. Most other unpacking of expressions turn them into r-values instead.
if (is_scalar(type))
lhs = enclose_expression(lhs) + ".x";
else if (is_vector(type) && type.vecsize == 2)
lhs = enclose_expression(lhs) + ".xy";
}
if (!optimize_read_modify_write(expression_type(rhs_expression), lhs, rhs))
statement(lhs, " = ", rhs, ";");
@ -2568,13 +2619,18 @@ void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_exp
string CompilerMSL::unpack_expression_type(string expr_str, const SPIRType &type, uint32_t packed_type_id)
{
const SPIRType *packed_type = nullptr;
uint32_t stride = 0;
if (packed_type_id)
{
packed_type = &get<SPIRType>(packed_type_id);
stride = get_decoration(packed_type_id, DecorationArrayStride);
}
// float[] and float2[] cases are really just padding, so directly swizzle from the backing float4 instead.
if (packed_type && is_array(*packed_type) && is_scalar(*packed_type))
if (packed_type && is_array(*packed_type) && is_scalar(*packed_type) && stride == 4 * packed_type->width / 8)
return enclose_expression(expr_str) + ".x";
else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2)
else if (packed_type && is_array(*packed_type) && is_vector(*packed_type) && packed_type->vecsize == 2 &&
stride == 4 * packed_type->width / 8)
return enclose_expression(expr_str) + ".xy";
else
return join(type_to_glsl(type), "(", expr_str, ")");
@ -5653,7 +5709,8 @@ string CompilerMSL::to_struct_member(const SPIRType &type, uint32_t member_type_
td_line += ";";
add_typedef_line(td_line);
}
else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct)
else if (is_array(membertype) && membertype.vecsize <= 2 && membertype.basetype != SPIRType::Struct &&
type_struct_member_array_stride(type, index) == 4 * membertype.width / 8)
{
// A "packed" float array, but we pad here instead to 4-vector.
override_type = membertype;

Просмотреть файл

@ -511,7 +511,7 @@ protected:
size_t get_declared_struct_member_alignment(const SPIRType &struct_type, uint32_t index) const;
std::string to_component_argument(uint32_t id);
void align_struct(SPIRType &ib_type);
bool is_member_packable(SPIRType &ib_type, uint32_t index);
bool is_member_packable(SPIRType &ib_type, uint32_t index, uint32_t base_offset = 0);
MSLStructMemberKey get_struct_member_key(uint32_t type_id, uint32_t index);
std::string get_argument_address_space(const SPIRVariable &argument);
std::string get_type_address_space(const SPIRType &type, uint32_t id);