diff --git a/3rdparty/spirv-cross/CMakeLists.txt b/3rdparty/spirv-cross/CMakeLists.txt index 5cfa74117..104bdd7e2 100644 --- a/3rdparty/spirv-cross/CMakeLists.txt +++ b/3rdparty/spirv-cross/CMakeLists.txt @@ -287,7 +287,7 @@ if (SPIRV_CROSS_STATIC) endif() set(spirv-cross-abi-major 0) -set(spirv-cross-abi-minor 16) +set(spirv-cross-abi-minor 18) set(spirv-cross-abi-patch 0) if (SPIRV_CROSS_SHARED) @@ -461,6 +461,14 @@ if (SPIRV_CROSS_CLI) target_link_libraries(spirv-cross-msl-resource-binding-test spirv-cross-c) set_target_properties(spirv-cross-msl-resource-binding-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}") + add_executable(spirv-cross-msl-ycbcr-conversion-test tests-other/msl_ycbcr_conversion_test.cpp) + target_link_libraries(spirv-cross-msl-ycbcr-conversion-test spirv-cross-c) + set_target_properties(spirv-cross-msl-ycbcr-conversion-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}") + + add_executable(spirv-cross-typed-id-test tests-other/typed_id_test.cpp) + target_link_libraries(spirv-cross-typed-id-test spirv-cross-core) + set_target_properties(spirv-cross-typed-id-test PROPERTIES LINK_FLAGS "${spirv-cross-link-flags}") + if (CMAKE_COMPILER_IS_GNUCXX OR (${CMAKE_CXX_COMPILER_ID} MATCHES "Clang")) target_compile_options(spirv-cross-c-api-test PRIVATE -std=c89 -Wall -Wextra) endif() @@ -475,6 +483,12 @@ if (SPIRV_CROSS_CLI) COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_constexpr_test.spv) add_test(NAME spirv-cross-msl-resource-binding-test COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_resource_binding.spv) + add_test(NAME spirv-cross-msl-ycbcr-conversion-test + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_ycbcr_conversion_test.spv) + add_test(NAME spirv-cross-msl-ycbcr-conversion-test-2 + COMMAND $ ${CMAKE_CURRENT_SOURCE_DIR}/tests-other/msl_ycbcr_conversion_test_2.spv) + add_test(NAME spirv-cross-typed-id-test + COMMAND $) add_test(NAME spirv-cross-test COMMAND ${PYTHON_EXECUTABLE} ${CMAKE_CURRENT_SOURCE_DIR}/test_shaders.py --parallel ${spirv-cross-externals} diff --git a/3rdparty/spirv-cross/main.cpp b/3rdparty/spirv-cross/main.cpp index 3d97247e1..b05e81264 100644 --- a/3rdparty/spirv-cross/main.cpp +++ b/3rdparty/spirv-cross/main.cpp @@ -246,7 +246,7 @@ static void print_resources(const Compiler &compiler, const char *tag, const Sma compiler.get_decoration_bitset(type.self).get(DecorationBufferBlock); bool is_sized_block = is_block && (compiler.get_storage_class(res.id) == StorageClassUniform || compiler.get_storage_class(res.id) == StorageClassUniformConstant); - uint32_t fallback_id = !is_push_constant && is_block ? res.base_type_id : res.id; + ID fallback_id = !is_push_constant && is_block ? ID(res.base_type_id) : ID(res.id); uint32_t block_size = 0; uint32_t runtime_array_stride = 0; @@ -268,7 +268,7 @@ static void print_resources(const Compiler &compiler, const char *tag, const Sma for (auto arr : type.array) array = join("[", arr ? convert_to_string(arr) : "", "]") + array; - fprintf(stderr, " ID %03u : %s%s", res.id, + fprintf(stderr, " ID %03u : %s%s", uint32_t(res.id), !res.name.empty() ? res.name.c_str() : compiler.get_fallback_name(fallback_id).c_str(), array.c_str()); if (mask.get(DecorationLocation)) @@ -442,7 +442,7 @@ static void print_spec_constants(const Compiler &compiler) fprintf(stderr, "Specialization constants\n"); fprintf(stderr, "==================\n\n"); for (auto &c : spec_constants) - fprintf(stderr, "ID: %u, Spec ID: %u\n", c.id, c.constant_id); + fprintf(stderr, "ID: %u, Spec ID: %u\n", uint32_t(c.id), c.constant_id); fprintf(stderr, "==================\n\n"); } @@ -522,6 +522,7 @@ struct CLIArguments bool vulkan_glsl_disable_ext_samplerless_texture_functions = false; bool emit_line_directives = false; SmallVector msl_discrete_descriptor_sets; + SmallVector> msl_dynamic_buffers; SmallVector pls_in; SmallVector pls_out; SmallVector remaps; @@ -600,6 +601,7 @@ static void print_help() "\t[--msl-multiview]\n" "\t[--msl-view-index-from-device-index]\n" "\t[--msl-dispatch-base]\n" + "\t[--msl-dynamic-buffer ]\n" "\t[--hlsl]\n" "\t[--reflect]\n" "\t[--shader-model]\n" @@ -764,6 +766,9 @@ static string compile_iteration(const CLIArguments &args, std::vector msl_comp->set_msl_options(msl_opts); for (auto &v : args.msl_discrete_descriptor_sets) msl_comp->add_discrete_descriptor_set(v); + uint32_t i = 0; + for (auto &v : args.msl_dynamic_buffers) + msl_comp->add_dynamic_buffer(v.first, v.second, i++); } else if (args.hlsl) compiler.reset(new CompilerHLSL(move(spirv_parser.get_parsed_ir()))); @@ -1086,6 +1091,13 @@ static int main_inner(int argc, char *argv[]) cbs.add("--msl-view-index-from-device-index", [&args](CLIParser &) { args.msl_view_index_from_device_index = true; }); cbs.add("--msl-dispatch-base", [&args](CLIParser &) { args.msl_dispatch_base = true; }); + cbs.add("--msl-dynamic-buffer", [&args](CLIParser &parser) { + args.msl_argument_buffers = true; + // Make sure next_uint() is called in-order. + uint32_t desc_set = parser.next_uint(); + uint32_t binding = parser.next_uint(); + args.msl_dynamic_buffers.push_back(make_pair(desc_set, binding)); + }); cbs.add("--extension", [&args](CLIParser &parser) { args.extensions.push_back(parser.next_string()); }); cbs.add("--rename-entry-point", [&args](CLIParser &parser) { auto old_name = parser.next_string(); diff --git a/3rdparty/spirv-cross/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/3rdparty/spirv-cross/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag new file mode 100644 index 000000000..8923f96a7 --- /dev/null +++ b/3rdparty/spirv-cross/reference/opt/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag @@ -0,0 +1,24 @@ +RWByteAddressBuffer _9 : register(u6, space0); +globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0); +RasterizerOrderedByteAddressBuffer _52 : register(u4, space0); +RWTexture2D img4 : register(u5, space0); +RasterizerOrderedTexture2D img : register(u0, space0); +RasterizerOrderedTexture2D img3 : register(u2, space0); +RasterizerOrderedTexture2D img2 : register(u1, space0); + +void frag_main() +{ + _9.Store(0, uint(0)); + img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f); + img[int2(0, 0)] = img3[int2(0, 0)]; + uint _39; + InterlockedAdd(img2[int2(0, 0)], 1u, _39); + _42.Store(0, uint(int(_42.Load(0)) + 42)); + uint _55; + _42.InterlockedAnd(4, _52.Load(0), _55); +} + +void main() +{ + frag_main(); +} diff --git a/3rdparty/spirv-cross/reference/opt/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp b/3rdparty/spirv-cross/reference/opt/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp new file mode 100644 index 000000000..25a0233ae --- /dev/null +++ b/3rdparty/spirv-cross/reference/opt/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp @@ -0,0 +1,11 @@ +#include +#include + +using namespace metal; + +kernel void main0(texture2d uImage [[texture(0)]], texture2d uImageRead [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + int2 _17 = int2(gl_GlobalInvocationID.xy); + uImage.write(uImageRead.read(uint2(_17)), uint2(_17)); +} + diff --git a/3rdparty/spirv-cross/reference/opt/shaders-msl/comp/basic.dynamic-buffer.msl2.comp b/3rdparty/spirv-cross/reference/opt/shaders-msl/comp/basic.dynamic-buffer.msl2.comp new file mode 100644 index 000000000..b872f726d --- /dev/null +++ b/3rdparty/spirv-cross/reference/opt/shaders-msl/comp/basic.dynamic-buffer.msl2.comp @@ -0,0 +1,89 @@ +#include +#include + +using namespace metal; + +struct Baz +{ + int e; + int f; +}; + +struct Foo +{ + int a; + int b; +}; + +struct Bar +{ + int c; + int d; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u); + +struct spvDescriptorSetBuffer0 +{ + constant Foo* m_34 [[id(0)]]; + constant Bar* m_40 [[id(1)]]; +}; + +struct spvDescriptorSetBuffer1 +{ + device Baz* baz [[id(0)]][3][3][2]; +}; + +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvDynamicOffsets [[buffer(23)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + constant auto& _34 = *(constant Foo* )((constant char* )spvDescriptorSet0.m_34 + spvDynamicOffsets[0]); + device Baz* baz[3][3][2] = + { + { + { + (device Baz* )((device char* )spvDescriptorSet1.baz[0][0][0] + spvDynamicOffsets[1]), + (device Baz* )((device char* )spvDescriptorSet1.baz[0][0][1] + spvDynamicOffsets[2]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[0][1][0] + spvDynamicOffsets[3]), + (device Baz* )((device char* )spvDescriptorSet1.baz[0][1][1] + spvDynamicOffsets[4]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[0][2][0] + spvDynamicOffsets[5]), + (device Baz* )((device char* )spvDescriptorSet1.baz[0][2][1] + spvDynamicOffsets[6]), + }, + }, + { + { + (device Baz* )((device char* )spvDescriptorSet1.baz[1][0][0] + spvDynamicOffsets[7]), + (device Baz* )((device char* )spvDescriptorSet1.baz[1][0][1] + spvDynamicOffsets[8]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[1][1][0] + spvDynamicOffsets[9]), + (device Baz* )((device char* )spvDescriptorSet1.baz[1][1][1] + spvDynamicOffsets[10]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[1][2][0] + spvDynamicOffsets[11]), + (device Baz* )((device char* )spvDescriptorSet1.baz[1][2][1] + spvDynamicOffsets[12]), + }, + }, + { + { + (device Baz* )((device char* )spvDescriptorSet1.baz[2][0][0] + spvDynamicOffsets[13]), + (device Baz* )((device char* )spvDescriptorSet1.baz[2][0][1] + spvDynamicOffsets[14]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[2][1][0] + spvDynamicOffsets[15]), + (device Baz* )((device char* )spvDescriptorSet1.baz[2][1][1] + spvDynamicOffsets[16]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[2][2][0] + spvDynamicOffsets[17]), + (device Baz* )((device char* )spvDescriptorSet1.baz[2][2][1] + spvDynamicOffsets[18]), + }, + }, + }; + + baz[gl_GlobalInvocationID.x][gl_GlobalInvocationID.y][gl_GlobalInvocationID.z]->e = _34.a + (*spvDescriptorSet0.m_40).c; + baz[gl_GlobalInvocationID.x][gl_GlobalInvocationID.y][gl_GlobalInvocationID.z]->f = _34.b * (*spvDescriptorSet0.m_40).d; +} + diff --git a/3rdparty/spirv-cross/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp b/3rdparty/spirv-cross/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp index 7cb8913da..fbf4c4f7f 100644 --- a/3rdparty/spirv-cross/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp +++ b/3rdparty/spirv-cross/reference/opt/shaders-msl/comp/force-recompile-hooks.swizzle.comp @@ -5,17 +5,6 @@ using namespace metal; -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -28,6 +17,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -65,66 +65,6 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) -{ - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} - -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - kernel void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture2d foo [[texture(0)]], texture2d bar [[texture(1)]], sampler fooSmplr [[sampler(0)]]) { constant uint& fooSwzl = spvSwizzleConstants[0]; diff --git a/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag b/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag index 2160e0be6..bf9195334 100644 --- a/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag +++ b/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag @@ -22,17 +22,6 @@ struct main0_in float2 vUV [[user(locn0)]]; }; -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -45,6 +34,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -82,66 +82,6 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) -{ - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} - -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - fragment main0_out main0(main0_in in [[stage_in]], constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant uint* spvSwizzleConstants [[buffer(30)]], texture2d uSampler1 [[texture(0)]], sampler uSampler1Smplr [[sampler(0)]]) { main0_out out = {}; diff --git a/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag b/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag index 337abb99d..2f9aef138 100644 --- a/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag +++ b/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag @@ -15,17 +15,6 @@ struct main0_in float2 vUV [[user(locn0)]]; }; -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -38,6 +27,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -75,66 +75,6 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) -{ - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} - -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - fragment main0_out main0(main0_in in [[stage_in]], constant uint* spvSwizzleConstants [[buffer(30)]], array, 4> uSampler [[texture(0)]], array uSamplerSmplr [[sampler(0)]]) { main0_out out = {}; diff --git a/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag new file mode 100644 index 000000000..f77b8ed99 --- /dev/null +++ b/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -0,0 +1,43 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +struct spvDescriptorSetBuffer0 +{ + device Buffer3* m_9 [[id(0)]]; + texture2d img4 [[id(1)]]; + texture2d img [[id(2), raster_order_group(0)]]; + texture2d img3 [[id(3), raster_order_group(0)]]; + volatile device Buffer* m_34 [[id(4), raster_order_group(0)]]; + device Buffer2* m_44 [[id(5), raster_order_group(0)]]; +}; + +fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]]) +{ + (*spvDescriptorSet0.m_9).baz = 0; + spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0))); + (*spvDescriptorSet0.m_34).foo += 42; + uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed); +} + diff --git a/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag new file mode 100644 index 000000000..803416c66 --- /dev/null +++ b/3rdparty/spirv-cross/reference/opt/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -0,0 +1,33 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d img4 [[texture(0)]], texture2d img [[texture(1), raster_order_group(0)]], texture2d img3 [[texture(2), raster_order_group(0)]]) +{ + _9.baz = 0; + img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + img.write(img3.read(uint2(int2(0))), uint2(int2(0))); + _34.foo += 42; + uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed); +} + diff --git a/3rdparty/spirv-cross/reference/opt/shaders/frag/pixel-interlock-ordered.frag b/3rdparty/spirv-cross/reference/opt/shaders/frag/pixel-interlock-ordered.frag new file mode 100644 index 000000000..46cca96c6 --- /dev/null +++ b/3rdparty/spirv-cross/reference/opt/shaders/frag/pixel-interlock-ordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/3rdparty/spirv-cross/reference/opt/shaders/frag/pixel-interlock-unordered.frag b/3rdparty/spirv-cross/reference/opt/shaders/frag/pixel-interlock-unordered.frag new file mode 100644 index 000000000..d60cd1451 --- /dev/null +++ b/3rdparty/spirv-cross/reference/opt/shaders/frag/pixel-interlock-unordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_unordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/3rdparty/spirv-cross/reference/opt/shaders/frag/sample-interlock-ordered.frag b/3rdparty/spirv-cross/reference/opt/shaders/frag/sample-interlock-ordered.frag new file mode 100644 index 000000000..67ca5560e --- /dev/null +++ b/3rdparty/spirv-cross/reference/opt/shaders/frag/sample-interlock-ordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(sample_interlock_ordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0])); + endInvocationInterlockARB(); +} + diff --git a/3rdparty/spirv-cross/reference/opt/shaders/frag/sample-interlock-unordered.frag b/3rdparty/spirv-cross/reference/opt/shaders/frag/sample-interlock-unordered.frag new file mode 100644 index 000000000..ea74397d6 --- /dev/null +++ b/3rdparty/spirv-cross/reference/opt/shaders/frag/sample-interlock-unordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(sample_interlock_unordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag new file mode 100644 index 000000000..3268995c8 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag @@ -0,0 +1,32 @@ +RasterizerOrderedByteAddressBuffer _7 : register(u1, space0); +RWByteAddressBuffer _9 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _31 = int(gl_FragCoord.x); + _7.Store(_31 * 4 + 0, _7.Load(_31 * 4 + 0) + 1u); +} + +void callee() +{ + int _39 = int(gl_FragCoord.x); + _9.Store(_39 * 4 + 0, _9.Load(_39 * 4 + 0) + 1u); + callee2(); +} + +void frag_main() +{ + callee(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + frag_main(); +} diff --git a/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag new file mode 100644 index 000000000..692771215 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag @@ -0,0 +1,42 @@ +RasterizerOrderedByteAddressBuffer _7 : register(u1, space0); +RWByteAddressBuffer _13 : register(u2, space0); +RasterizerOrderedByteAddressBuffer _9 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _44 = int(gl_FragCoord.x); + _7.Store(_44 * 4 + 0, _7.Load(_44 * 4 + 0) + 1u); +} + +void callee() +{ + int _52 = int(gl_FragCoord.x); + _9.Store(_52 * 4 + 0, _9.Load(_52 * 4 + 0) + 1u); + callee2(); + if (true) + { + } +} + +void _35() +{ + _13.Store(int(gl_FragCoord.x) * 4 + 0, 4u); +} + +void frag_main() +{ + callee(); + _35(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + frag_main(); +} diff --git a/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag new file mode 100644 index 000000000..bd963a74d --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag @@ -0,0 +1,42 @@ +RasterizerOrderedByteAddressBuffer _7 : register(u1, space0); +RasterizerOrderedByteAddressBuffer _9 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _37 = int(gl_FragCoord.x); + _7.Store(_37 * 4 + 0, _7.Load(_37 * 4 + 0) + 1u); +} + +void callee() +{ + int _45 = int(gl_FragCoord.x); + _9.Store(_45 * 4 + 0, _9.Load(_45 * 4 + 0) + 1u); + callee2(); +} + +void _29() +{ +} + +void _31() +{ +} + +void frag_main() +{ + callee(); + _29(); + _31(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + frag_main(); +} diff --git a/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag new file mode 100644 index 000000000..55b71de21 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag @@ -0,0 +1,32 @@ +RasterizerOrderedByteAddressBuffer _14 : register(u1, space0); +RasterizerOrderedByteAddressBuffer _35 : register(u0, space0); + +static float4 gl_FragCoord; +struct SPIRV_Cross_Input +{ + float4 gl_FragCoord : SV_Position; +}; + +void callee2() +{ + int _25 = int(gl_FragCoord.x); + _14.Store(_25 * 4 + 0, _14.Load(_25 * 4 + 0) + 1u); +} + +void callee() +{ + int _38 = int(gl_FragCoord.x); + _35.Store(_38 * 4 + 0, _35.Load(_38 * 4 + 0) + 1u); + callee2(); +} + +void frag_main() +{ + callee(); +} + +void main(SPIRV_Cross_Input stage_input) +{ + gl_FragCoord = stage_input.gl_FragCoord; + frag_main(); +} diff --git a/3rdparty/spirv-cross/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/3rdparty/spirv-cross/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag new file mode 100644 index 000000000..8923f96a7 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag @@ -0,0 +1,24 @@ +RWByteAddressBuffer _9 : register(u6, space0); +globallycoherent RasterizerOrderedByteAddressBuffer _42 : register(u3, space0); +RasterizerOrderedByteAddressBuffer _52 : register(u4, space0); +RWTexture2D img4 : register(u5, space0); +RasterizerOrderedTexture2D img : register(u0, space0); +RasterizerOrderedTexture2D img3 : register(u2, space0); +RasterizerOrderedTexture2D img2 : register(u1, space0); + +void frag_main() +{ + _9.Store(0, uint(0)); + img4[int2(1, 1)] = float4(1.0f, 0.0f, 0.0f, 1.0f); + img[int2(0, 0)] = img3[int2(0, 0)]; + uint _39; + InterlockedAdd(img2[int2(0, 0)], 1u, _39); + _42.Store(0, uint(int(_42.Load(0)) + 42)); + uint _55; + _42.InterlockedAnd(4, _52.Load(0), _55); +} + +void main() +{ + frag_main(); +} diff --git a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag new file mode 100644 index 000000000..1b6af2a36 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag @@ -0,0 +1,35 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7) +{ + int _31 = int(gl_FragCoord.x); + v_7.values1[_31]++; +} + +inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9) +{ + int _39 = int(gl_FragCoord.x); + v_9.values0[_39]++; + callee2(gl_FragCoord, v_7); +} + +fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1)]], float4 gl_FragCoord [[position]]) +{ + callee(gl_FragCoord, v_7, v_9); +} + diff --git a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag new file mode 100644 index 000000000..dded6a145 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag @@ -0,0 +1,49 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct _12 +{ + uint _m0[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7) +{ + int _44 = int(gl_FragCoord.x); + v_7.values1[_44]++; +} + +inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9) +{ + int _52 = int(gl_FragCoord.x); + v_9.values0[_52]++; + callee2(gl_FragCoord, v_7); + if (true) + { + } +} + +inline void _35(thread float4& gl_FragCoord, device _12& v_13) +{ + v_13._m0[int(gl_FragCoord.x)] = 4u; +} + +fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device _12& v_13 [[buffer(1)]], device SSBO0& v_9 [[buffer(2), raster_order_group(0)]], float4 gl_FragCoord [[position]]) +{ + callee(gl_FragCoord, v_7, v_9); + _35(gl_FragCoord, v_13); +} + diff --git a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag new file mode 100644 index 000000000..5fe65f3b0 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag @@ -0,0 +1,45 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +inline void callee2(thread float4& gl_FragCoord, device SSBO1& v_7) +{ + int _37 = int(gl_FragCoord.x); + v_7.values1[_37]++; +} + +inline void callee(thread float4& gl_FragCoord, device SSBO1& v_7, device SSBO0& v_9) +{ + int _45 = int(gl_FragCoord.x); + v_9.values0[_45]++; + callee2(gl_FragCoord, v_7); +} + +inline void _29() +{ +} + +inline void _31() +{ +} + +fragment void main0(device SSBO1& v_7 [[buffer(0), raster_order_group(0)]], device SSBO0& v_9 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]]) +{ + callee(gl_FragCoord, v_7, v_9); + _29(); + _31(); +} + diff --git a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag index afebb421c..78b33afde 100644 --- a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag +++ b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/asm/frag/texture-access.swizzle.asm.frag @@ -11,17 +11,6 @@ inline uint2 spvTexelBufferCoord(uint tc) return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -34,6 +23,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -72,8 +72,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -109,8 +109,8 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } // Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +template class Tex, typename... Ts> +inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint sw, Ts... params) { if (sw) { @@ -177,13 +177,13 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl); c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl); c = texBuffer.read(spvTexelBufferCoord(0)); - c = spvGatherSwizzle, float2, int2>(tex2dSamp, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl); - c = spvGatherSwizzle, float3>(texCubeSamp, texCube, float3(0.0), component::y, texCubeSwzl); - c = spvGatherSwizzle, float2, uint, int2>(tex2dArraySamp, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl); - c = spvGatherSwizzle, float3, uint>(texCubeArraySamp, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl); - c = spvGatherCompareSwizzle, float2, float>(depth2dSamp, depth2d, float2(0.0), 1.0, depth2dSwzl); - c = spvGatherCompareSwizzle, float3, float>(depthCubeSamp, depthCube, float3(0.0), 1.0, depthCubeSwzl); - c = spvGatherCompareSwizzle, float2, uint, float>(depth2dArraySamp, depth2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, depth2dArraySwzl); - c = spvGatherCompareSwizzle, float3, uint, float>(depthCubeArraySamp, depthCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, depthCubeArraySwzl); + c = spvGatherSwizzle(tex2d, tex2dSamp, tex2dSwzl, component::x, float2(0.0), int2(0)); + c = spvGatherSwizzle(texCube, texCubeSamp, texCubeSwzl, component::y, float3(0.0)); + c = spvGatherSwizzle(tex2dArray, tex2dArraySamp, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0)); + c = spvGatherSwizzle(texCubeArray, texCubeArraySamp, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w))); + c = spvGatherCompareSwizzle(depth2d, depth2dSamp, depth2dSwzl, float2(0.0), 1.0); + c = spvGatherCompareSwizzle(depthCube, depthCubeSamp, depthCubeSwzl, float3(0.0), 1.0); + c = spvGatherCompareSwizzle(depth2dArray, depth2dArraySamp, depth2dArraySwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0); + c = spvGatherCompareSwizzle(depthCubeArray, depthCubeArraySamp, depthCubeArraySwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); } diff --git a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag new file mode 100644 index 000000000..716ba251f --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag @@ -0,0 +1,35 @@ +#pragma clang diagnostic ignored "-Wmissing-prototypes" + +#include +#include + +using namespace metal; + +struct SSBO1 +{ + uint values1[1]; +}; + +struct SSBO0 +{ + uint values0[1]; +}; + +inline void callee2(device SSBO1& v_14, thread float4& gl_FragCoord) +{ + int _25 = int(gl_FragCoord.x); + v_14.values1[_25]++; +} + +inline void callee(device SSBO1& v_14, thread float4& gl_FragCoord, device SSBO0& v_35) +{ + int _38 = int(gl_FragCoord.x); + v_35.values0[_38]++; + callee2(v_14, gl_FragCoord); +} + +fragment void main0(device SSBO1& v_14 [[buffer(0), raster_order_group(0)]], device SSBO0& v_35 [[buffer(1), raster_order_group(0)]], float4 gl_FragCoord [[position]]) +{ + callee(v_14, gl_FragCoord, v_35); +} + diff --git a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag index b8f431344..d1c52aef4 100644 --- a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag +++ b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-int.swizzle.frag @@ -11,17 +11,6 @@ inline uint2 spvTexelBufferCoord(uint tc) return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -34,6 +23,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -72,8 +72,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -108,29 +108,6 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } } -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]]) { constant uint& tex1dSwzl = spvSwizzleConstants[0]; @@ -162,9 +139,9 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d c = float4(spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl)); c = float4(spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl)); c = float4(texBuffer.read(spvTexelBufferCoord(0))); - c = float4(spvGatherSwizzle, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl)); - c = float4(spvGatherSwizzle, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl)); - c = float4(spvGatherSwizzle, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl)); - c = float4(spvGatherSwizzle, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl)); + c = float4(spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0))); + c = float4(spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0))); + c = float4(spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0))); + c = float4(spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w)))); } diff --git a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag index 3d3a15311..4666702b4 100644 --- a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag +++ b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-leaf.swizzle.frag @@ -11,17 +11,6 @@ inline uint2 spvTexelBufferCoord(uint tc) return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -34,6 +23,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -72,8 +72,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -109,8 +109,8 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } // Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +template class Tex, typename... Ts> +inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint sw, Ts... params) { if (sw) { @@ -167,14 +167,14 @@ inline float4 doSwizzle(thread texture1d tex1d, thread const sampler tex1 c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl); c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl); c = texBuffer.read(spvTexelBufferCoord(0)); - c = spvGatherSwizzle, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl); - c = spvGatherSwizzle, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl); - c = spvGatherSwizzle, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl); - c = spvGatherSwizzle, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl); - c = spvGatherCompareSwizzle, float2, float>(depth2dSmplr, depth2d, float2(0.0), 1.0, depth2dSwzl); - c = spvGatherCompareSwizzle, float3, float>(depthCubeSmplr, depthCube, float3(0.0), 1.0, depthCubeSwzl); - c = spvGatherCompareSwizzle, float2, uint, float>(depth2dArraySmplr, depth2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, depth2dArraySwzl); - c = spvGatherCompareSwizzle, float3, uint, float>(depthCubeArraySmplr, depthCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, depthCubeArraySwzl); + c = spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0)); + c = spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0)); + c = spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0)); + c = spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w))); + c = spvGatherCompareSwizzle(depth2d, depth2dSmplr, depth2dSwzl, float2(0.0), 1.0); + c = spvGatherCompareSwizzle(depthCube, depthCubeSmplr, depthCubeSwzl, float3(0.0), 1.0); + c = spvGatherCompareSwizzle(depth2dArray, depth2dArraySmplr, depth2dArraySwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0); + c = spvGatherCompareSwizzle(depthCubeArray, depthCubeArraySmplr, depthCubeArraySwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); return c; } diff --git a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag index 505bffa17..1063b8ab3 100644 --- a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag +++ b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access-uint.swizzle.frag @@ -11,17 +11,6 @@ inline uint2 spvTexelBufferCoord(uint tc) return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -34,6 +23,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -72,8 +72,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -108,29 +108,6 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } } -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d tex1d [[texture(0)]], texture2d tex2d [[texture(1)]], texture3d tex3d [[texture(2)]], texturecube texCube [[texture(3)]], texture2d_array tex2dArray [[texture(4)]], texturecube_array texCubeArray [[texture(5)]], texture2d texBuffer [[texture(6)]], sampler tex1dSmplr [[sampler(0)]], sampler tex2dSmplr [[sampler(1)]], sampler tex3dSmplr [[sampler(2)]], sampler texCubeSmplr [[sampler(3)]], sampler tex2dArraySmplr [[sampler(4)]], sampler texCubeArraySmplr [[sampler(5)]]) { constant uint& tex1dSwzl = spvSwizzleConstants[0]; @@ -162,9 +139,9 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d c = float4(spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl)); c = float4(spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl)); c = float4(texBuffer.read(spvTexelBufferCoord(0))); - c = float4(spvGatherSwizzle, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl)); - c = float4(spvGatherSwizzle, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl)); - c = float4(spvGatherSwizzle, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl)); - c = float4(spvGatherSwizzle, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl)); + c = float4(spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0))); + c = float4(spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0))); + c = float4(spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0))); + c = float4(spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w)))); } diff --git a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag index 35886f434..389b3a68a 100644 --- a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag +++ b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/frag/texture-access.swizzle.frag @@ -11,17 +11,6 @@ inline uint2 spvTexelBufferCoord(uint tc) return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -34,6 +23,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -72,8 +72,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -109,8 +109,8 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } // Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +template class Tex, typename... Ts> +inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint sw, Ts... params) { if (sw) { @@ -177,13 +177,13 @@ fragment void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture1d c = spvTextureSwizzle(tex3d.read(uint3(int3(0)), 0), tex3dSwzl); c = spvTextureSwizzle(tex2dArray.read(uint2(int3(0).xy), uint(int3(0).z), 0), tex2dArraySwzl); c = texBuffer.read(spvTexelBufferCoord(0)); - c = spvGatherSwizzle, float2, int2>(tex2dSmplr, tex2d, float2(0.0), int2(0), component::x, tex2dSwzl); - c = spvGatherSwizzle, float3>(texCubeSmplr, texCube, float3(0.0), component::y, texCubeSwzl); - c = spvGatherSwizzle, float2, uint, int2>(tex2dArraySmplr, tex2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, tex2dArraySwzl); - c = spvGatherSwizzle, float3, uint>(texCubeArraySmplr, texCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, texCubeArraySwzl); - c = spvGatherCompareSwizzle, float2, float>(depth2dSmplr, depth2d, float2(0.0), 1.0, depth2dSwzl); - c = spvGatherCompareSwizzle, float3, float>(depthCubeSmplr, depthCube, float3(0.0), 1.0, depthCubeSwzl); - c = spvGatherCompareSwizzle, float2, uint, float>(depth2dArraySmplr, depth2dArray, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, depth2dArraySwzl); - c = spvGatherCompareSwizzle, float3, uint, float>(depthCubeArraySmplr, depthCubeArray, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, depthCubeArraySwzl); + c = spvGatherSwizzle(tex2d, tex2dSmplr, tex2dSwzl, component::x, float2(0.0), int2(0)); + c = spvGatherSwizzle(texCube, texCubeSmplr, texCubeSwzl, component::y, float3(0.0)); + c = spvGatherSwizzle(tex2dArray, tex2dArraySmplr, tex2dArraySwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0)); + c = spvGatherSwizzle(texCubeArray, texCubeArraySmplr, texCubeArraySwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w))); + c = spvGatherCompareSwizzle(depth2d, depth2dSmplr, depth2dSwzl, float2(0.0), 1.0); + c = spvGatherCompareSwizzle(depthCube, depthCubeSmplr, depthCubeSwzl, float3(0.0), 1.0); + c = spvGatherCompareSwizzle(depth2dArray, depth2dArraySmplr, depth2dArraySwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0); + c = spvGatherCompareSwizzle(depthCubeArray, depthCubeArraySmplr, depthCubeArraySwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); } diff --git a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag index e7a740379..0fcca3af8 100644 --- a/3rdparty/spirv-cross/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag +++ b/3rdparty/spirv-cross/reference/shaders-msl-no-opt/vulkan/frag/texture-access-function.swizzle.vk.frag @@ -16,17 +16,6 @@ inline uint2 spvTexelBufferCoord(uint tc) return uint2(tc % 4096, tc / 4096); } -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -39,6 +28,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -77,8 +77,8 @@ inline T spvTextureSwizzle(T x, uint s) } // Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) +template class Tex, typename... Ts> +inline vec spvGatherSwizzle(const thread Tex& t, sampler s, uint sw, component c, Ts... params) METAL_CONST_ARG(c) { if (sw) { @@ -114,8 +114,8 @@ inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, } // Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) +template class Tex, typename... Ts> +inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler s, uint sw, Ts... params) { if (sw) { @@ -172,14 +172,14 @@ inline float4 do_samples(thread const texture1d t1, thread const sampler c = spvTextureSwizzle(t3.read(uint3(int3(0)), 0), t3Swzl); c = spvTextureSwizzle(t2a.read(uint2(int3(0).xy), uint(int3(0).z), 0), t2aSwzl); c = tb.read(spvTexelBufferCoord(0)); - c = spvGatherSwizzle, float2, int2>(defaultSampler, t2, float2(0.0), int2(0), component::x, t2Swzl); - c = spvGatherSwizzle, float3>(defaultSampler, tc, float3(0.0), component::y, tcSwzl); - c = spvGatherSwizzle, float2, uint, int2>(t2aSmplr, t2a, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0), component::z, t2aSwzl); - c = spvGatherSwizzle, float3, uint>(tcaSmplr, tca, float4(0.0).xyz, uint(round(float4(0.0).w)), component::w, tcaSwzl); - c = spvGatherCompareSwizzle, float2, float>(d2Smplr, d2, float2(0.0), 1.0, d2Swzl); - c = spvGatherCompareSwizzle, float3, float>(dcSmplr, dc, float3(0.0), 1.0, dcSwzl); - c = spvGatherCompareSwizzle, float2, uint, float>(shadowSampler, d2a, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0, d2aSwzl); - c = spvGatherCompareSwizzle, float3, uint, float>(dcaSmplr, dca, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0, dcaSwzl); + c = spvGatherSwizzle(t2, defaultSampler, t2Swzl, component::x, float2(0.0), int2(0)); + c = spvGatherSwizzle(tc, defaultSampler, tcSwzl, component::y, float3(0.0)); + c = spvGatherSwizzle(t2a, t2aSmplr, t2aSwzl, component::z, float3(0.0).xy, uint(round(float3(0.0).z)), int2(0)); + c = spvGatherSwizzle(tca, tcaSmplr, tcaSwzl, component::w, float4(0.0).xyz, uint(round(float4(0.0).w))); + c = spvGatherCompareSwizzle(d2, d2Smplr, d2Swzl, float2(0.0), 1.0); + c = spvGatherCompareSwizzle(dc, dcSmplr, dcSwzl, float3(0.0), 1.0); + c = spvGatherCompareSwizzle(d2a, shadowSampler, d2aSwzl, float3(0.0).xy, uint(round(float3(0.0).z)), 1.0); + c = spvGatherCompareSwizzle(dca, dcaSmplr, dcaSwzl, float4(0.0).xyz, uint(round(float4(0.0).w)), 1.0); return c; } diff --git a/3rdparty/spirv-cross/reference/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp b/3rdparty/spirv-cross/reference/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp new file mode 100644 index 000000000..c23a9d1d0 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp @@ -0,0 +1,11 @@ +#include +#include + +using namespace metal; + +kernel void main0(texture2d uImage [[texture(0)]], texture2d uImageRead [[texture(1)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + int2 coord = int2(gl_GlobalInvocationID.xy); + uImage.write(uImageRead.read(uint2(coord)), uint2(coord)); +} + diff --git a/3rdparty/spirv-cross/reference/shaders-msl/comp/basic.dynamic-buffer.msl2.comp b/3rdparty/spirv-cross/reference/shaders-msl/comp/basic.dynamic-buffer.msl2.comp new file mode 100644 index 000000000..ae8c5b029 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-msl/comp/basic.dynamic-buffer.msl2.comp @@ -0,0 +1,90 @@ +#include +#include + +using namespace metal; + +struct Baz +{ + int e; + int f; +}; + +struct Foo +{ + int a; + int b; +}; + +struct Bar +{ + int c; + int d; +}; + +constant uint3 gl_WorkGroupSize [[maybe_unused]] = uint3(3u, 3u, 2u); + +struct spvDescriptorSetBuffer0 +{ + constant Foo* m_34 [[id(0)]]; + constant Bar* m_40 [[id(1)]]; +}; + +struct spvDescriptorSetBuffer1 +{ + device Baz* baz [[id(0)]][3][3][2]; +}; + +kernel void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]], constant spvDescriptorSetBuffer1& spvDescriptorSet1 [[buffer(1)]], constant uint* spvDynamicOffsets [[buffer(23)]], uint3 gl_GlobalInvocationID [[thread_position_in_grid]]) +{ + constant auto& _34 = *(constant Foo* )((constant char* )spvDescriptorSet0.m_34 + spvDynamicOffsets[0]); + device Baz* baz[3][3][2] = + { + { + { + (device Baz* )((device char* )spvDescriptorSet1.baz[0][0][0] + spvDynamicOffsets[1]), + (device Baz* )((device char* )spvDescriptorSet1.baz[0][0][1] + spvDynamicOffsets[2]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[0][1][0] + spvDynamicOffsets[3]), + (device Baz* )((device char* )spvDescriptorSet1.baz[0][1][1] + spvDynamicOffsets[4]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[0][2][0] + spvDynamicOffsets[5]), + (device Baz* )((device char* )spvDescriptorSet1.baz[0][2][1] + spvDynamicOffsets[6]), + }, + }, + { + { + (device Baz* )((device char* )spvDescriptorSet1.baz[1][0][0] + spvDynamicOffsets[7]), + (device Baz* )((device char* )spvDescriptorSet1.baz[1][0][1] + spvDynamicOffsets[8]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[1][1][0] + spvDynamicOffsets[9]), + (device Baz* )((device char* )spvDescriptorSet1.baz[1][1][1] + spvDynamicOffsets[10]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[1][2][0] + spvDynamicOffsets[11]), + (device Baz* )((device char* )spvDescriptorSet1.baz[1][2][1] + spvDynamicOffsets[12]), + }, + }, + { + { + (device Baz* )((device char* )spvDescriptorSet1.baz[2][0][0] + spvDynamicOffsets[13]), + (device Baz* )((device char* )spvDescriptorSet1.baz[2][0][1] + spvDynamicOffsets[14]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[2][1][0] + spvDynamicOffsets[15]), + (device Baz* )((device char* )spvDescriptorSet1.baz[2][1][1] + spvDynamicOffsets[16]), + }, + { + (device Baz* )((device char* )spvDescriptorSet1.baz[2][2][0] + spvDynamicOffsets[17]), + (device Baz* )((device char* )spvDescriptorSet1.baz[2][2][1] + spvDynamicOffsets[18]), + }, + }, + }; + + uint3 coords = gl_GlobalInvocationID; + baz[coords.x][coords.y][coords.z]->e = _34.a + (*spvDescriptorSet0.m_40).c; + baz[coords.x][coords.y][coords.z]->f = _34.b * (*spvDescriptorSet0.m_40).d; +} + diff --git a/3rdparty/spirv-cross/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp b/3rdparty/spirv-cross/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp index f9608de34..856efeaba 100644 --- a/3rdparty/spirv-cross/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp +++ b/3rdparty/spirv-cross/reference/shaders-msl/comp/force-recompile-hooks.swizzle.comp @@ -5,17 +5,6 @@ using namespace metal; -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -28,6 +17,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -65,66 +65,6 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) -{ - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} - -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - kernel void main0(constant uint* spvSwizzleConstants [[buffer(30)]], texture2d foo [[texture(0)]], texture2d bar [[texture(1)]], sampler fooSmplr [[sampler(0)]]) { constant uint& fooSwzl = spvSwizzleConstants[0]; diff --git a/3rdparty/spirv-cross/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag b/3rdparty/spirv-cross/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag index 6578bab1c..e06824407 100644 --- a/3rdparty/spirv-cross/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag +++ b/3rdparty/spirv-cross/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.argument.discrete.swizzle.frag @@ -22,17 +22,6 @@ struct main0_in float2 vUV [[user(locn0)]]; }; -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -45,6 +34,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -82,66 +82,6 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) -{ - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} - -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - inline float4 sample_in_func_1(thread const array, 4> uSampler0, thread const array uSampler0Smplr, constant uint* uSampler0Swzl, thread float2& vUV) { return spvTextureSwizzle(uSampler0[2].sample(uSampler0Smplr[2], vUV), uSampler0Swzl[2]); diff --git a/3rdparty/spirv-cross/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag b/3rdparty/spirv-cross/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag index 55632d1d5..19d030063 100644 --- a/3rdparty/spirv-cross/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag +++ b/3rdparty/spirv-cross/reference/shaders-msl/frag/array-of-texture-swizzle.msl2.swizzle.frag @@ -15,17 +15,6 @@ struct main0_in float2 vUV [[user(locn0)]]; }; -enum class spvSwizzle : uint -{ - none = 0, - zero, - one, - red, - green, - blue, - alpha -}; - template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; template struct spvRemoveReference { typedef T type; }; @@ -38,6 +27,17 @@ template inline constexpr thread T&& spvForward(thread typename spvR return static_cast(x); } +enum class spvSwizzle : uint +{ + none = 0, + zero, + one, + red, + green, + blue, + alpha +}; + template inline T spvGetSwizzle(vec x, T c, spvSwizzle s) { @@ -75,66 +75,6 @@ inline T spvTextureSwizzle(T x, uint s) return spvTextureSwizzle(vec(x, 0, 0, 1), s).x; } -// Wrapper function that swizzles texture gathers. -template -inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) METAL_CONST_ARG(c) -{ - if (sw) - { - switch (spvSwizzle((sw >> (uint(c) * 8)) & 0xFF)) - { - case spvSwizzle::none: - break; - case spvSwizzle::zero: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - case spvSwizzle::red: - return t.gather(s, spvForward(params)..., component::x); - case spvSwizzle::green: - return t.gather(s, spvForward(params)..., component::y); - case spvSwizzle::blue: - return t.gather(s, spvForward(params)..., component::z); - case spvSwizzle::alpha: - return t.gather(s, spvForward(params)..., component::w); - } - } - switch (c) - { - case component::x: - return t.gather(s, spvForward(params)..., component::x); - case component::y: - return t.gather(s, spvForward(params)..., component::y); - case component::z: - return t.gather(s, spvForward(params)..., component::z); - case component::w: - return t.gather(s, spvForward(params)..., component::w); - } -} - -// Wrapper function that swizzles depth texture gathers. -template -inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) -{ - if (sw) - { - switch (spvSwizzle(sw & 0xFF)) - { - case spvSwizzle::none: - case spvSwizzle::red: - break; - case spvSwizzle::zero: - case spvSwizzle::green: - case spvSwizzle::blue: - case spvSwizzle::alpha: - return vec(0, 0, 0, 0); - case spvSwizzle::one: - return vec(1, 1, 1, 1); - } - } - return t.gather_compare(s, spvForward(params)...); -} - inline float4 sample_in_func(thread const array, 4> uSampler, thread const array uSamplerSmplr, constant uint* uSamplerSwzl, thread float2& vUV) { return spvTextureSwizzle(uSampler[2].sample(uSamplerSmplr[2], vUV), uSamplerSwzl[2]); diff --git a/3rdparty/spirv-cross/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/3rdparty/spirv-cross/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag new file mode 100644 index 000000000..f77b8ed99 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -0,0 +1,43 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +struct spvDescriptorSetBuffer0 +{ + device Buffer3* m_9 [[id(0)]]; + texture2d img4 [[id(1)]]; + texture2d img [[id(2), raster_order_group(0)]]; + texture2d img3 [[id(3), raster_order_group(0)]]; + volatile device Buffer* m_34 [[id(4), raster_order_group(0)]]; + device Buffer2* m_44 [[id(5), raster_order_group(0)]]; +}; + +fragment void main0(constant spvDescriptorSetBuffer0& spvDescriptorSet0 [[buffer(0)]]) +{ + (*spvDescriptorSet0.m_9).baz = 0; + spvDescriptorSet0.img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + spvDescriptorSet0.img.write(spvDescriptorSet0.img3.read(uint2(int2(0))), uint2(int2(0))); + (*spvDescriptorSet0.m_34).foo += 42; + uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&(*spvDescriptorSet0.m_34).bar, (*spvDescriptorSet0.m_44).quux, memory_order_relaxed); +} + diff --git a/3rdparty/spirv-cross/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/3rdparty/spirv-cross/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag new file mode 100644 index 000000000..803416c66 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -0,0 +1,33 @@ +#pragma clang diagnostic ignored "-Wunused-variable" + +#include +#include +#include + +using namespace metal; + +struct Buffer3 +{ + int baz; +}; + +struct Buffer +{ + int foo; + uint bar; +}; + +struct Buffer2 +{ + uint quux; +}; + +fragment void main0(device Buffer3& _9 [[buffer(0)]], volatile device Buffer& _34 [[buffer(1), raster_order_group(0)]], device Buffer2& _44 [[buffer(2), raster_order_group(0)]], texture2d img4 [[texture(0)]], texture2d img [[texture(1), raster_order_group(0)]], texture2d img3 [[texture(2), raster_order_group(0)]]) +{ + _9.baz = 0; + img4.write(float4(1.0, 0.0, 0.0, 1.0), uint2(int2(1))); + img.write(img3.read(uint2(int2(0))), uint2(int2(0))); + _34.foo += 42; + uint _49 = atomic_fetch_and_explicit((volatile device atomic_uint*)&_34.bar, _44.quux, memory_order_relaxed); +} + diff --git a/3rdparty/spirv-cross/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/3rdparty/spirv-cross/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag new file mode 100644 index 000000000..948803c67 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag @@ -0,0 +1,39 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _7; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _9; + +void callee2() +{ + int _31 = int(gl_FragCoord.x); + _7.values1[_31]++; +} + +void callee() +{ + int _39 = int(gl_FragCoord.x); + _9.values0[_39]++; + callee2(); +} + +void spvMainInterlockedBody() +{ + callee(); +} + +void main() +{ + // Interlocks were used in a way not compatible with GLSL, this is very slow. + beginInvocationInterlockARB(); + spvMainInterlockedBody(); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/3rdparty/spirv-cross/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag new file mode 100644 index 000000000..72dca0d7f --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag @@ -0,0 +1,53 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _7; + +layout(binding = 2, std430) buffer _12_13 +{ + uint _m0[]; +} _13; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _9; + +void callee2() +{ + int _44 = int(gl_FragCoord.x); + _7.values1[_44]++; +} + +void callee() +{ + int _52 = int(gl_FragCoord.x); + _9.values0[_52]++; + callee2(); + if (true) + { + } +} + +void _35() +{ + _13._m0[int(gl_FragCoord.x)] = 4u; +} + +void spvMainInterlockedBody() +{ + callee(); + _35(); +} + +void main() +{ + // Interlocks were used in a way not compatible with GLSL, this is very slow. + beginInvocationInterlockARB(); + spvMainInterlockedBody(); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/3rdparty/spirv-cross/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag new file mode 100644 index 000000000..b09eb6667 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag @@ -0,0 +1,49 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _7; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _9; + +void callee2() +{ + int _37 = int(gl_FragCoord.x); + _7.values1[_37]++; +} + +void callee() +{ + int _45 = int(gl_FragCoord.x); + _9.values0[_45]++; + callee2(); +} + +void _29() +{ +} + +void _31() +{ +} + +void spvMainInterlockedBody() +{ + callee(); + _29(); + _31(); +} + +void main() +{ + // Interlocks were used in a way not compatible with GLSL, this is very slow. + beginInvocationInterlockARB(); + spvMainInterlockedBody(); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/3rdparty/spirv-cross/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag new file mode 100644 index 000000000..151ed01d4 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag @@ -0,0 +1,34 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +} _14; + +layout(binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +} _35; + +void callee2() +{ + int _25 = int(gl_FragCoord.x); + _14.values1[_25]++; +} + +void callee() +{ + int _38 = int(gl_FragCoord.x); + _35.values0[_38]++; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} + diff --git a/3rdparty/spirv-cross/reference/shaders/frag/pixel-interlock-ordered.frag b/3rdparty/spirv-cross/reference/shaders/frag/pixel-interlock-ordered.frag new file mode 100644 index 000000000..46cca96c6 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders/frag/pixel-interlock-ordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/3rdparty/spirv-cross/reference/shaders/frag/pixel-interlock-unordered.frag b/3rdparty/spirv-cross/reference/shaders/frag/pixel-interlock-unordered.frag new file mode 100644 index 000000000..d60cd1451 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders/frag/pixel-interlock-unordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_unordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/3rdparty/spirv-cross/reference/shaders/frag/sample-interlock-ordered.frag b/3rdparty/spirv-cross/reference/shaders/frag/sample-interlock-ordered.frag new file mode 100644 index 000000000..67ca5560e --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders/frag/sample-interlock-ordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(sample_interlock_ordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _47 = atomicAnd(_30.bar, uint(gl_SampleMaskIn[0])); + endInvocationInterlockARB(); +} + diff --git a/3rdparty/spirv-cross/reference/shaders/frag/sample-interlock-unordered.frag b/3rdparty/spirv-cross/reference/shaders/frag/sample-interlock-unordered.frag new file mode 100644 index 000000000..ea74397d6 --- /dev/null +++ b/3rdparty/spirv-cross/reference/shaders/frag/sample-interlock-unordered.frag @@ -0,0 +1,23 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(sample_interlock_unordered) in; + +layout(binding = 2, std430) coherent buffer Buffer +{ + int foo; + uint bar; +} _30; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0), vec4(1.0, 0.0, 0.0, 1.0)); + uint _27 = imageAtomicAdd(img2, ivec2(0), 1u); + _30.foo += 42; + uint _41 = atomicAnd(_30.bar, 255u); + endInvocationInterlockARB(); +} + diff --git a/3rdparty/spirv-cross/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag b/3rdparty/spirv-cross/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag new file mode 100644 index 000000000..ebd8d6bab --- /dev/null +++ b/3rdparty/spirv-cross/shaders-hlsl-no-opt/asm/frag/pixel-interlock-callstack.sm51.fxconly.asm.frag @@ -0,0 +1,89 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + OpReturn + OpFunctionEnd + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + OpBeginInvocationInterlockEXT + %43 = OpFunctionCall %void %callee2_ + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd diff --git a/3rdparty/spirv-cross/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag b/3rdparty/spirv-cross/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag new file mode 100644 index 000000000..69b8f9112 --- /dev/null +++ b/3rdparty/spirv-cross/shaders-hlsl-no-opt/asm/frag/pixel-interlock-control-flow.sm51.fxconly.asm.frag @@ -0,0 +1,121 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %ssbo2 DescriptorSet 0 + OpDecorate %ssbo2 Binding 2 + + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint + %SSBO2 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %bool = OpTypeBool + %true = OpConstantTrue %bool +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %callee3_res = OpFunctionCall %void %callee3_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %calle3_block = OpLabel + %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %frag_coord_x = OpLoad %float %frag_coord_x_ptr + %frag_coord_int = OpConvertFToS %int %frag_coord_x + %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int + OpStore %ssbo_ptr %uint_4 + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + + OpSelectionMerge %merged_block None + OpBranchConditional %true %dummy_block %merged_block + %dummy_block = OpLabel + OpBeginInvocationInterlockEXT + OpEndInvocationInterlockEXT + OpBranch %merged_block + + %merged_block = OpLabel + OpReturn + + OpFunctionEnd diff --git a/3rdparty/spirv-cross/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag b/3rdparty/spirv-cross/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag new file mode 100644 index 000000000..7c0fe9a2b --- /dev/null +++ b/3rdparty/spirv-cross/shaders-hlsl-no-opt/asm/frag/pixel-interlock-split-functions.sm51.fxconly.asm.frag @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %call3res = OpFunctionCall %void %callee3_ + %call4res = OpFunctionCall %void %callee4_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %begin3 = OpLabel + OpBeginInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee4_ = OpFunction %void None %3 + %begin4 = OpLabel + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + OpReturn + OpFunctionEnd diff --git a/3rdparty/spirv-cross/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag b/3rdparty/spirv-cross/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag new file mode 100644 index 000000000..59079fe58 --- /dev/null +++ b/3rdparty/spirv-cross/shaders-hlsl-no-opt/frag/pixel-interlock-simple-callstack.sm51.fxconly.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(set = 0, binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +}; + +layout(set = 0, binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +}; + +void callee2() +{ + values1[int(gl_FragCoord.x)] += 1; +} + +void callee() +{ + values0[int(gl_FragCoord.x)] += 1; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag b/3rdparty/spirv-cross/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag new file mode 100644 index 000000000..ceac8cc50 --- /dev/null +++ b/3rdparty/spirv-cross/shaders-hlsl/frag/pixel-interlock-ordered.sm51.fxconly.frag @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2, rgba8) uniform readonly image2D img3; +layout(binding = 3) coherent buffer Buffer +{ + int foo; + uint bar; +}; +layout(binding = 4) buffer Buffer2 +{ + uint quux; +}; + +layout(binding = 5, rgba8) uniform writeonly image2D img4; +layout(binding = 6) buffer Buffer3 +{ + int baz; +}; + +void main() +{ + // Deliberately outside the critical section to test usage tracking. + baz = 0; + imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, quux); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag b/3rdparty/spirv-cross/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag new file mode 100644 index 000000000..ebd8d6bab --- /dev/null +++ b/3rdparty/spirv-cross/shaders-msl-no-opt/asm/frag/pixel-interlock-callstack.msl2.asm.frag @@ -0,0 +1,89 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + OpReturn + OpFunctionEnd + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + OpBeginInvocationInterlockEXT + %43 = OpFunctionCall %void %callee2_ + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd diff --git a/3rdparty/spirv-cross/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag b/3rdparty/spirv-cross/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag new file mode 100644 index 000000000..69b8f9112 --- /dev/null +++ b/3rdparty/spirv-cross/shaders-msl-no-opt/asm/frag/pixel-interlock-control-flow.msl2.asm.frag @@ -0,0 +1,121 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %ssbo2 DescriptorSet 0 + OpDecorate %ssbo2 Binding 2 + + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint + %SSBO2 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %bool = OpTypeBool + %true = OpConstantTrue %bool +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %callee3_res = OpFunctionCall %void %callee3_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %calle3_block = OpLabel + %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %frag_coord_x = OpLoad %float %frag_coord_x_ptr + %frag_coord_int = OpConvertFToS %int %frag_coord_x + %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int + OpStore %ssbo_ptr %uint_4 + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + + OpSelectionMerge %merged_block None + OpBranchConditional %true %dummy_block %merged_block + %dummy_block = OpLabel + OpBeginInvocationInterlockEXT + OpEndInvocationInterlockEXT + OpBranch %merged_block + + %merged_block = OpLabel + OpReturn + + OpFunctionEnd diff --git a/3rdparty/spirv-cross/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag b/3rdparty/spirv-cross/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag new file mode 100644 index 000000000..7c0fe9a2b --- /dev/null +++ b/3rdparty/spirv-cross/shaders-msl-no-opt/asm/frag/pixel-interlock-split-functions.msl2.asm.frag @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %call3res = OpFunctionCall %void %callee3_ + %call4res = OpFunctionCall %void %callee4_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %begin3 = OpLabel + OpBeginInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee4_ = OpFunction %void None %3 + %begin4 = OpLabel + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + OpReturn + OpFunctionEnd diff --git a/3rdparty/spirv-cross/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag b/3rdparty/spirv-cross/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag new file mode 100644 index 000000000..59079fe58 --- /dev/null +++ b/3rdparty/spirv-cross/shaders-msl-no-opt/frag/pixel-interlock-simple-callstack.msl2.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(set = 0, binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +}; + +layout(set = 0, binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +}; + +void callee2() +{ + values1[int(gl_FragCoord.x)] += 1; +} + +void callee() +{ + values0[int(gl_FragCoord.x)] += 1; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp b/3rdparty/spirv-cross/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp new file mode 100644 index 000000000..72ca8899a --- /dev/null +++ b/3rdparty/spirv-cross/shaders-msl/comp/argument-buffers-image-load-store.ios.msl2.argument.comp @@ -0,0 +1,10 @@ +#version 450 + +layout(set = 0, binding = 1, r32f) writeonly uniform image2D uImage; +layout(set = 0, binding = 2, r32f) readonly uniform image2D uImageRead; + +void main() +{ + ivec2 coord = ivec2(gl_GlobalInvocationID.xy); + imageStore(uImage, coord, imageLoad(uImageRead, coord)); +} diff --git a/3rdparty/spirv-cross/shaders-msl/comp/basic.dynamic-buffer.msl2.comp b/3rdparty/spirv-cross/shaders-msl/comp/basic.dynamic-buffer.msl2.comp new file mode 100644 index 000000000..c2965731e --- /dev/null +++ b/3rdparty/spirv-cross/shaders-msl/comp/basic.dynamic-buffer.msl2.comp @@ -0,0 +1,27 @@ +#version 450 +layout(local_size_x = 3, local_size_y = 3, local_size_z = 2) in; + +layout(set = 0, binding = 0) uniform Foo +{ + int a; + int b; +}; + +layout(set = 0, binding = 1) uniform Bar +{ + int c; + int d; +}; + +layout(set = 1, binding = 2) buffer Baz +{ + int e; + int f; +} baz[3][3][2]; + +void main() +{ + uvec3 coords = gl_GlobalInvocationID; + baz[coords.x][coords.y][coords.z].e = a + c; + baz[coords.x][coords.y][coords.z].f = b * d; +} diff --git a/3rdparty/spirv-cross/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag b/3rdparty/spirv-cross/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag new file mode 100644 index 000000000..04886a672 --- /dev/null +++ b/3rdparty/spirv-cross/shaders-msl/frag/pixel-interlock-ordered.msl2.argument.frag @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +//layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2, rgba8) uniform readonly image2D img3; +layout(binding = 3) coherent buffer Buffer +{ + int foo; + uint bar; +}; +layout(binding = 4) buffer Buffer2 +{ + uint quux; +}; + +layout(binding = 5, rgba8) uniform writeonly image2D img4; +layout(binding = 6) buffer Buffer3 +{ + int baz; +}; + +void main() +{ + // Deliberately outside the critical section to test usage tracking. + baz = 0; + imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); + //imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, quux); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/shaders-msl/frag/pixel-interlock-ordered.msl2.frag b/3rdparty/spirv-cross/shaders-msl/frag/pixel-interlock-ordered.msl2.frag new file mode 100644 index 000000000..04886a672 --- /dev/null +++ b/3rdparty/spirv-cross/shaders-msl/frag/pixel-interlock-ordered.msl2.frag @@ -0,0 +1,36 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +//layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2, rgba8) uniform readonly image2D img3; +layout(binding = 3) coherent buffer Buffer +{ + int foo; + uint bar; +}; +layout(binding = 4) buffer Buffer2 +{ + uint quux; +}; + +layout(binding = 5, rgba8) uniform writeonly image2D img4; +layout(binding = 6) buffer Buffer3 +{ + int baz; +}; + +void main() +{ + // Deliberately outside the critical section to test usage tracking. + baz = 0; + imageStore(img4, ivec2(1, 1), vec4(1.0, 0.0, 0.0, 1.0)); + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), imageLoad(img3, ivec2(0, 0))); + //imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, quux); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag b/3rdparty/spirv-cross/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag new file mode 100644 index 000000000..ebd8d6bab --- /dev/null +++ b/3rdparty/spirv-cross/shaders-no-opt/asm/frag/pixel-interlock-callstack.asm.frag @@ -0,0 +1,89 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + OpReturn + OpFunctionEnd + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + OpBeginInvocationInterlockEXT + %43 = OpFunctionCall %void %callee2_ + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd diff --git a/3rdparty/spirv-cross/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag b/3rdparty/spirv-cross/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag new file mode 100644 index 000000000..69b8f9112 --- /dev/null +++ b/3rdparty/spirv-cross/shaders-no-opt/asm/frag/pixel-interlock-control-flow.asm.frag @@ -0,0 +1,121 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + + OpMemberDecorate %SSBO2 0 Offset 0 + OpDecorate %SSBO2 BufferBlock + OpDecorate %ssbo2 DescriptorSet 0 + OpDecorate %ssbo2 Binding 2 + + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint + %SSBO2 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 +%_ptr_Uniform_SSBO2 = OpTypePointer Uniform %SSBO2 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %ssbo2 = OpVariable %_ptr_Uniform_SSBO2 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %uint_4 = OpConstant %uint 4 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 + %bool = OpTypeBool + %true = OpConstantTrue %bool +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %callee3_res = OpFunctionCall %void %callee3_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %calle3_block = OpLabel + %frag_coord_x_ptr = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %frag_coord_x = OpLoad %float %frag_coord_x_ptr + %frag_coord_int = OpConvertFToS %int %frag_coord_x + %ssbo_ptr = OpAccessChain %_ptr_Uniform_uint %ssbo2 %int_0 %frag_coord_int + OpStore %ssbo_ptr %uint_4 + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + + OpSelectionMerge %merged_block None + OpBranchConditional %true %dummy_block %merged_block + %dummy_block = OpLabel + OpBeginInvocationInterlockEXT + OpEndInvocationInterlockEXT + OpBranch %merged_block + + %merged_block = OpLabel + OpReturn + + OpFunctionEnd diff --git a/3rdparty/spirv-cross/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag b/3rdparty/spirv-cross/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag new file mode 100644 index 000000000..7c0fe9a2b --- /dev/null +++ b/3rdparty/spirv-cross/shaders-no-opt/asm/frag/pixel-interlock-split-functions.asm.frag @@ -0,0 +1,102 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 7 +; Bound: 45 +; Schema: 0 + OpCapability Shader + OpCapability FragmentShaderPixelInterlockEXT + OpExtension "SPV_EXT_fragment_shader_interlock" + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %gl_FragCoord + OpExecutionMode %main OriginUpperLeft + OpExecutionMode %main PixelInterlockOrderedEXT + OpSource GLSL 450 + OpSourceExtension "GL_ARB_fragment_shader_interlock" + OpName %main "main" + OpName %callee2_ "callee2(" + OpName %callee_ "callee(" + OpName %SSBO1 "SSBO1" + OpMemberName %SSBO1 0 "values1" + OpName %_ "" + OpName %gl_FragCoord "gl_FragCoord" + OpName %SSBO0 "SSBO0" + OpMemberName %SSBO0 0 "values0" + OpName %__0 "" + OpDecorate %_runtimearr_uint ArrayStride 4 + OpMemberDecorate %SSBO1 0 Offset 0 + OpDecorate %SSBO1 BufferBlock + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %gl_FragCoord BuiltIn FragCoord + OpDecorate %_runtimearr_uint_0 ArrayStride 4 + OpMemberDecorate %SSBO0 0 Offset 0 + OpDecorate %SSBO0 BufferBlock + OpDecorate %__0 DescriptorSet 0 + OpDecorate %__0 Binding 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %uint = OpTypeInt 32 0 +%_runtimearr_uint = OpTypeRuntimeArray %uint + %SSBO1 = OpTypeStruct %_runtimearr_uint +%_ptr_Uniform_SSBO1 = OpTypePointer Uniform %SSBO1 + %_ = OpVariable %_ptr_Uniform_SSBO1 Uniform + %int = OpTypeInt 32 1 + %int_0 = OpConstant %int 0 + %float = OpTypeFloat 32 + %v4float = OpTypeVector %float 4 +%_ptr_Input_v4float = OpTypePointer Input %v4float +%gl_FragCoord = OpVariable %_ptr_Input_v4float Input + %uint_0 = OpConstant %uint 0 +%_ptr_Input_float = OpTypePointer Input %float + %uint_1 = OpConstant %uint 1 +%_ptr_Uniform_uint = OpTypePointer Uniform %uint +%_runtimearr_uint_0 = OpTypeRuntimeArray %uint + %SSBO0 = OpTypeStruct %_runtimearr_uint_0 +%_ptr_Uniform_SSBO0 = OpTypePointer Uniform %SSBO0 + %__0 = OpVariable %_ptr_Uniform_SSBO0 Uniform + %main = OpFunction %void None %3 + %5 = OpLabel + %44 = OpFunctionCall %void %callee_ + %call3res = OpFunctionCall %void %callee3_ + %call4res = OpFunctionCall %void %callee4_ + OpReturn + OpFunctionEnd + + %callee3_ = OpFunction %void None %3 + %begin3 = OpLabel + OpBeginInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee4_ = OpFunction %void None %3 + %begin4 = OpLabel + OpEndInvocationInterlockEXT + OpReturn + OpFunctionEnd + + %callee2_ = OpFunction %void None %3 + %7 = OpLabel + %23 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %24 = OpLoad %float %23 + %25 = OpConvertFToS %int %24 + %28 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + %29 = OpLoad %uint %28 + %30 = OpIAdd %uint %29 %uint_1 + %31 = OpAccessChain %_ptr_Uniform_uint %_ %int_0 %25 + OpStore %31 %30 + OpReturn + OpFunctionEnd + %callee_ = OpFunction %void None %3 + %9 = OpLabel + %36 = OpAccessChain %_ptr_Input_float %gl_FragCoord %uint_0 + %37 = OpLoad %float %36 + %38 = OpConvertFToS %int %37 + %39 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + %40 = OpLoad %uint %39 + %41 = OpIAdd %uint %40 %uint_1 + %42 = OpAccessChain %_ptr_Uniform_uint %__0 %int_0 %38 + OpStore %42 %41 + %43 = OpFunctionCall %void %callee2_ + OpReturn + OpFunctionEnd diff --git a/3rdparty/spirv-cross/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag b/3rdparty/spirv-cross/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag new file mode 100644 index 000000000..59079fe58 --- /dev/null +++ b/3rdparty/spirv-cross/shaders-no-opt/frag/pixel-interlock-simple-callstack.frag @@ -0,0 +1,31 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require +layout(pixel_interlock_ordered) in; + +layout(set = 0, binding = 0, std430) buffer SSBO0 +{ + uint values0[]; +}; + +layout(set = 0, binding = 1, std430) buffer SSBO1 +{ + uint values1[]; +}; + +void callee2() +{ + values1[int(gl_FragCoord.x)] += 1; +} + +void callee() +{ + values0[int(gl_FragCoord.x)] += 1; + callee2(); +} + +void main() +{ + beginInvocationInterlockARB(); + callee(); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/shaders/frag/pixel-interlock-ordered.frag b/3rdparty/spirv-cross/shaders/frag/pixel-interlock-ordered.frag new file mode 100644 index 000000000..4439f0672 --- /dev/null +++ b/3rdparty/spirv-cross/shaders/frag/pixel-interlock-ordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, 0xff); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/shaders/frag/pixel-interlock-unordered.frag b/3rdparty/spirv-cross/shaders/frag/pixel-interlock-unordered.frag new file mode 100644 index 000000000..f8fd468c1 --- /dev/null +++ b/3rdparty/spirv-cross/shaders/frag/pixel-interlock-unordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(pixel_interlock_unordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, 0xff); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/shaders/frag/sample-interlock-ordered.frag b/3rdparty/spirv-cross/shaders/frag/sample-interlock-ordered.frag new file mode 100644 index 000000000..fa80dc9f8 --- /dev/null +++ b/3rdparty/spirv-cross/shaders/frag/sample-interlock-ordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(sample_interlock_ordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, gl_SampleMaskIn[0]); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/shaders/frag/sample-interlock-unordered.frag b/3rdparty/spirv-cross/shaders/frag/sample-interlock-unordered.frag new file mode 100644 index 000000000..6fe5437f3 --- /dev/null +++ b/3rdparty/spirv-cross/shaders/frag/sample-interlock-unordered.frag @@ -0,0 +1,22 @@ +#version 450 +#extension GL_ARB_fragment_shader_interlock : require + +layout(sample_interlock_unordered) in; + +layout(binding = 0, rgba8) uniform writeonly image2D img; +layout(binding = 1, r32ui) uniform uimage2D img2; +layout(binding = 2) coherent buffer Buffer +{ + int foo; + uint bar; +}; + +void main() +{ + beginInvocationInterlockARB(); + imageStore(img, ivec2(0, 0), vec4(1.0, 0.0, 0.0, 1.0)); + imageAtomicAdd(img2, ivec2(0, 0), 1u); + foo += 42; + atomicAnd(bar, 0xff); + endInvocationInterlockARB(); +} diff --git a/3rdparty/spirv-cross/spirv_cfg.cpp b/3rdparty/spirv-cross/spirv_cfg.cpp index de7906695..df1a7fd82 100644 --- a/3rdparty/spirv-cross/spirv_cfg.cpp +++ b/3rdparty/spirv-cross/spirv_cfg.cpp @@ -237,13 +237,13 @@ uint32_t CFG::find_loop_dominator(uint32_t block_id) const for (auto &pred : itr->second) { auto &pred_block = compiler.get(pred); - if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == block_id) + if (pred_block.merge == SPIRBlock::MergeLoop && pred_block.merge_block == ID(block_id)) { pred_block_id = pred; ignore_loop_header = true; break; } - else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == block_id) + else if (pred_block.merge == SPIRBlock::MergeSelection && pred_block.next_block == ID(block_id)) { pred_block_id = pred; break; @@ -268,14 +268,14 @@ uint32_t CFG::find_loop_dominator(uint32_t block_id) const return block_id; } -bool CFG::node_terminates_control_flow_in_sub_graph(uint32_t from, uint32_t to) const +bool CFG::node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const { // Walk backwards, starting from "to" block. // Only follow pred edges if they have a 1:1 relationship, or a merge relationship. // If we cannot find a path to "from", we must assume that to is inside control flow in some way. auto &from_block = compiler.get(from); - uint32_t ignore_block_id = 0; + BlockID ignore_block_id = 0; if (from_block.merge == SPIRBlock::MergeLoop) ignore_block_id = from_block.merge_block; diff --git a/3rdparty/spirv-cross/spirv_cfg.hpp b/3rdparty/spirv-cross/spirv_cfg.hpp index fd3c0e6e7..5f62cca30 100644 --- a/3rdparty/spirv-cross/spirv_cfg.hpp +++ b/3rdparty/spirv-cross/spirv_cfg.hpp @@ -97,7 +97,7 @@ public: uint32_t find_loop_dominator(uint32_t block) const; - bool node_terminates_control_flow_in_sub_graph(uint32_t from, uint32_t to) const; + bool node_terminates_control_flow_in_sub_graph(BlockID from, BlockID to) const; private: struct VisitOrder diff --git a/3rdparty/spirv-cross/spirv_common.hpp b/3rdparty/spirv-cross/spirv_common.hpp index 1641fdaa5..c1c6fc812 100644 --- a/3rdparty/spirv-cross/spirv_common.hpp +++ b/3rdparty/spirv-cross/spirv_common.hpp @@ -20,6 +20,7 @@ #include "spirv.hpp" #include "spirv_cross_containers.hpp" #include "spirv_cross_error_handling.hpp" +#include // A bit crude, but allows projects which embed SPIRV-Cross statically to // effectively hide all the symbols from other projects. @@ -270,20 +271,6 @@ struct Instruction uint32_t length = 0; }; -// Helper for Variant interface. -struct IVariant -{ - virtual ~IVariant() = default; - virtual IVariant *clone(ObjectPoolBase *pool) = 0; - uint32_t self = 0; -}; - -#define SPIRV_CROSS_DECLARE_CLONE(T) \ - IVariant *clone(ObjectPoolBase *pool) override \ - { \ - return static_cast *>(pool)->allocate(*this); \ - } - enum Types { TypeNone, @@ -303,6 +290,136 @@ enum Types TypeCount }; +template +class TypedID; + +template <> +class TypedID +{ +public: + TypedID() = default; + TypedID(uint32_t id_) + : id(id_) + { + } + + template + TypedID(const TypedID &other) + { + *this = other; + } + + template + TypedID &operator=(const TypedID &other) + { + id = uint32_t(other); + return *this; + } + + // Implicit conversion to u32 is desired here. + // As long as we block implicit conversion between TypedID and TypedID we're good. + operator uint32_t() const + { + return id; + } + + template + operator TypedID() const + { + return TypedID(*this); + } + + bool operator==(const TypedID &other) const + { + return id == other.id; + } + + bool operator!=(const TypedID &other) const + { + return id != other.id; + } + + template + bool operator==(const TypedID &other) const + { + return id == uint32_t(other); + } + + template + bool operator!=(const TypedID &other) const + { + return id != uint32_t(other); + } + +private: + uint32_t id = 0; +}; + +template +class TypedID +{ +public: + TypedID() = default; + TypedID(uint32_t id_) + : id(id_) + { + } + + explicit TypedID(const TypedID &other) + : id(uint32_t(other)) + { + } + + operator uint32_t() const + { + return id; + } + + bool operator==(const TypedID &other) const + { + return id == other.id; + } + + bool operator!=(const TypedID &other) const + { + return id != other.id; + } + + bool operator==(const TypedID &other) const + { + return id == uint32_t(other); + } + + bool operator!=(const TypedID &other) const + { + return id != uint32_t(other); + } + +private: + uint32_t id = 0; +}; + +using VariableID = TypedID; +using TypeID = TypedID; +using ConstantID = TypedID; +using FunctionID = TypedID; +using BlockID = TypedID; +using ID = TypedID; + +// Helper for Variant interface. +struct IVariant +{ + virtual ~IVariant() = default; + virtual IVariant *clone(ObjectPoolBase *pool) = 0; + ID self = 0; +}; + +#define SPIRV_CROSS_DECLARE_CLONE(T) \ + IVariant *clone(ObjectPoolBase *pool) override \ + { \ + return static_cast *>(pool)->allocate(*this); \ + } + struct SPIRUndef : IVariant { enum @@ -310,11 +427,11 @@ struct SPIRUndef : IVariant type = TypeUndef }; - explicit SPIRUndef(uint32_t basetype_) + explicit SPIRUndef(TypeID basetype_) : basetype(basetype_) { } - uint32_t basetype; + TypeID basetype; SPIRV_CROSS_DECLARE_CLONE(SPIRUndef) }; @@ -344,15 +461,15 @@ struct SPIRCombinedImageSampler : IVariant { type = TypeCombinedImageSampler }; - SPIRCombinedImageSampler(uint32_t type_, uint32_t image_, uint32_t sampler_) + SPIRCombinedImageSampler(TypeID type_, VariableID image_, VariableID sampler_) : combined_type(type_) , image(image_) , sampler(sampler_) { } - uint32_t combined_type; - uint32_t image; - uint32_t sampler; + TypeID combined_type; + VariableID image; + VariableID sampler; SPIRV_CROSS_DECLARE_CLONE(SPIRCombinedImageSampler) }; @@ -364,16 +481,18 @@ struct SPIRConstantOp : IVariant type = TypeConstantOp }; - SPIRConstantOp(uint32_t result_type, spv::Op op, const uint32_t *args, uint32_t length) + SPIRConstantOp(TypeID result_type, spv::Op op, const uint32_t *args, uint32_t length) : opcode(op) - , arguments(args, args + length) , basetype(result_type) { + arguments.reserve(length); + for (uint32_t i = 0; i < length; i++) + arguments.push_back(args[i]); } spv::Op opcode; SmallVector arguments; - uint32_t basetype; + TypeID basetype; SPIRV_CROSS_DECLARE_CLONE(SPIRConstantOp) }; @@ -436,11 +555,11 @@ struct SPIRType : IVariant spv::StorageClass storage = spv::StorageClassGeneric; - SmallVector member_types; + SmallVector member_types; struct ImageType { - uint32_t type; + TypeID type; spv::Dim dim; bool depth; bool arrayed; @@ -453,11 +572,11 @@ struct SPIRType : IVariant // Structs can be declared multiple times if they are used as part of interface blocks. // We want to detect this so that we only emit the struct definition once. // Since we cannot rely on OpName to be equal, we need to figure out aliases. - uint32_t type_alias = 0; + TypeID type_alias = 0; // Denotes the type which this type is based on. // Allows the backend to traverse how a complex type is built up during access chains. - uint32_t parent_type = 0; + TypeID parent_type = 0; // Used in backends to avoid emitting members with conflicting names. std::unordered_set member_name_cache; @@ -496,7 +615,7 @@ struct SPIRExtension : IVariant // so in order to avoid conflicts, we can't stick them in the ids array. struct SPIREntryPoint { - SPIREntryPoint(uint32_t self_, spv::ExecutionModel execution_model, const std::string &entry_name) + SPIREntryPoint(FunctionID self_, spv::ExecutionModel execution_model, const std::string &entry_name) : self(self_) , name(entry_name) , orig_name(entry_name) @@ -505,10 +624,10 @@ struct SPIREntryPoint } SPIREntryPoint() = default; - uint32_t self = 0; + FunctionID self = 0; std::string name; std::string orig_name; - SmallVector interface_variables; + SmallVector interface_variables; Bitset flags; struct @@ -529,7 +648,7 @@ struct SPIRExpression : IVariant }; // Only created by the backend target to avoid creating tons of temporaries. - SPIRExpression(std::string expr, uint32_t expression_type_, bool immutable_) + SPIRExpression(std::string expr, TypeID expression_type_, bool immutable_) : expression(move(expr)) , expression_type(expression_type_) , immutable(immutable_) @@ -539,14 +658,14 @@ struct SPIRExpression : IVariant // If non-zero, prepend expression with to_expression(base_expression). // Used in amortizing multiple calls to to_expression() // where in certain cases that would quickly force a temporary when not needed. - uint32_t base_expression = 0; + ID base_expression = 0; std::string expression; - uint32_t expression_type = 0; + TypeID expression_type = 0; // If this expression is a forwarded load, // allow us to reference the original variable. - uint32_t loaded_from = 0; + ID loaded_from = 0; // If this expression will never change, we can avoid lots of temporaries // in high level source. @@ -562,11 +681,11 @@ struct SPIRExpression : IVariant bool access_chain = false; // A list of expressions which this expression depends on. - SmallVector expression_dependencies; + SmallVector expression_dependencies; // By reading this expression, we implicitly read these expressions as well. // Used by access chain Store and Load since we read multiple expressions in this case. - SmallVector implied_read_expressions; + SmallVector implied_read_expressions; SPIRV_CROSS_DECLARE_CLONE(SPIRExpression) }; @@ -578,12 +697,12 @@ struct SPIRFunctionPrototype : IVariant type = TypeFunctionPrototype }; - explicit SPIRFunctionPrototype(uint32_t return_type_) + explicit SPIRFunctionPrototype(TypeID return_type_) : return_type(return_type_) { } - uint32_t return_type; + TypeID return_type; SmallVector parameter_types; SPIRV_CROSS_DECLARE_CLONE(SPIRFunctionPrototype) @@ -658,23 +777,23 @@ struct SPIRBlock : IVariant Terminator terminator = Unknown; Merge merge = MergeNone; Hints hint = HintNone; - uint32_t next_block = 0; - uint32_t merge_block = 0; - uint32_t continue_block = 0; + BlockID next_block = 0; + BlockID merge_block = 0; + BlockID continue_block = 0; - uint32_t return_value = 0; // If 0, return nothing (void). - uint32_t condition = 0; - uint32_t true_block = 0; - uint32_t false_block = 0; - uint32_t default_block = 0; + ID return_value = 0; // If 0, return nothing (void). + ID condition = 0; + BlockID true_block = 0; + BlockID false_block = 0; + BlockID default_block = 0; SmallVector ops; struct Phi { - uint32_t local_variable; // flush local variable ... - uint32_t parent; // If we're in from_block and want to branch into this block ... - uint32_t function_variable; // to this function-global "phi" variable first. + ID local_variable; // flush local variable ... + BlockID parent; // If we're in from_block and want to branch into this block ... + VariableID function_variable; // to this function-global "phi" variable first. }; // Before entering this block flush out local variables to magical "phi" variables. @@ -682,16 +801,16 @@ struct SPIRBlock : IVariant // Declare these temporaries before beginning the block. // Used for handling complex continue blocks which have side effects. - SmallVector> declare_temporary; + SmallVector> declare_temporary; // Declare these temporaries, but only conditionally if this block turns out to be // a complex loop header. - SmallVector> potential_declare_temporary; + SmallVector> potential_declare_temporary; struct Case { uint32_t value; - uint32_t block; + BlockID block; }; SmallVector cases; @@ -707,25 +826,25 @@ struct SPIRBlock : IVariant // If marked, we have explicitly handled Phi from this block, so skip any flushes related to that on a branch. // Used to handle an edge case with switch and case-label fallthrough where fall-through writes to Phi. - uint32_t ignore_phi_from_block = 0; + BlockID ignore_phi_from_block = 0; // The dominating block which this block might be within. // Used in continue; blocks to determine if we really need to write continue. - uint32_t loop_dominator = 0; + BlockID loop_dominator = 0; // All access to these variables are dominated by this block, // so before branching anywhere we need to make sure that we declare these variables. - SmallVector dominated_variables; + SmallVector dominated_variables; // These are variables which should be declared in a for loop header, if we // fail to use a classic for-loop, // we remove these variables, and fall back to regular variables outside the loop. - SmallVector loop_variables; + SmallVector loop_variables; // Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or // sub-group-like operations. // Make sure that we only use these expressions in the original block. - SmallVector invalidate_expressions; + SmallVector invalidate_expressions; SPIRV_CROSS_DECLARE_CLONE(SPIRBlock) }; @@ -737,7 +856,7 @@ struct SPIRFunction : IVariant type = TypeFunction }; - SPIRFunction(uint32_t return_type_, uint32_t function_type_) + SPIRFunction(TypeID return_type_, TypeID function_type_) : return_type(return_type_) , function_type(function_type_) { @@ -745,8 +864,8 @@ struct SPIRFunction : IVariant struct Parameter { - uint32_t type; - uint32_t id; + TypeID type; + ID id; uint32_t read_count; uint32_t write_count; @@ -768,25 +887,25 @@ struct SPIRFunction : IVariant // or a global ID. struct CombinedImageSamplerParameter { - uint32_t id; - uint32_t image_id; - uint32_t sampler_id; + VariableID id; + VariableID image_id; + VariableID sampler_id; bool global_image; bool global_sampler; bool depth; }; - uint32_t return_type; - uint32_t function_type; + TypeID return_type; + TypeID function_type; SmallVector arguments; // Can be used by backends to add magic arguments. // Currently used by combined image/sampler implementation. SmallVector shadow_arguments; - SmallVector local_variables; - uint32_t entry_block = 0; - SmallVector blocks; + SmallVector local_variables; + BlockID entry_block = 0; + SmallVector blocks; SmallVector combined_parameters; struct EntryLine @@ -796,12 +915,12 @@ struct SPIRFunction : IVariant }; EntryLine entry_line; - void add_local_variable(uint32_t id) + void add_local_variable(VariableID id) { local_variables.push_back(id); } - void add_parameter(uint32_t parameter_type, uint32_t id, bool alias_global_variable = false) + void add_parameter(TypeID parameter_type, ID id, bool alias_global_variable = false) { // Arguments are read-only until proven otherwise. arguments.push_back({ parameter_type, id, 0u, 0u, alias_global_variable }); @@ -822,7 +941,7 @@ struct SPIRFunction : IVariant // On function entry, make sure to copy a constant array into thread addr space to work around // the case where we are passing a constant array by value to a function on backends which do not // consider arrays value types. - SmallVector constant_arrays_needed_on_stack; + SmallVector constant_arrays_needed_on_stack; bool active = false; bool flush_undeclared = true; @@ -838,7 +957,7 @@ struct SPIRAccessChain : IVariant type = TypeAccessChain }; - SPIRAccessChain(uint32_t basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_, + SPIRAccessChain(TypeID basetype_, spv::StorageClass storage_, std::string base_, std::string dynamic_index_, int32_t static_index_) : basetype(basetype_) , storage(storage_) @@ -853,20 +972,20 @@ struct SPIRAccessChain : IVariant // which has no usable buffer type ala GLSL SSBOs. // StructuredBuffer is too limited, so our only option is to deal with ByteAddressBuffer which works with raw addresses. - uint32_t basetype; + TypeID basetype; spv::StorageClass storage; std::string base; std::string dynamic_index; int32_t static_index; - uint32_t loaded_from = 0; + VariableID loaded_from = 0; uint32_t matrix_stride = 0; bool row_major_matrix = false; bool immutable = false; // By reading this expression, we implicitly read these expressions as well. // Used by access chain Store and Load since we read multiple expressions in this case. - SmallVector implied_read_expressions; + SmallVector implied_read_expressions; SPIRV_CROSS_DECLARE_CLONE(SPIRAccessChain) }; @@ -879,7 +998,7 @@ struct SPIRVariable : IVariant }; SPIRVariable() = default; - SPIRVariable(uint32_t basetype_, spv::StorageClass storage_, uint32_t initializer_ = 0, uint32_t basevariable_ = 0) + SPIRVariable(TypeID basetype_, spv::StorageClass storage_, ID initializer_ = 0, VariableID basevariable_ = 0) : basetype(basetype_) , storage(storage_) , initializer(initializer_) @@ -887,11 +1006,11 @@ struct SPIRVariable : IVariant { } - uint32_t basetype = 0; + TypeID basetype = 0; spv::StorageClass storage = spv::StorageClassGeneric; uint32_t decoration = 0; - uint32_t initializer = 0; - uint32_t basevariable = 0; + ID initializer = 0; + VariableID basevariable = 0; SmallVector dereference_chain; bool compat_builtin = false; @@ -901,10 +1020,10 @@ struct SPIRVariable : IVariant // When we read the variable as an expression, just forward // shadowed_id as the expression. bool statically_assigned = false; - uint32_t static_expression = 0; + ID static_expression = 0; // Temporaries which can remain forwarded as long as this variable is not modified. - SmallVector dependees; + SmallVector dependees; bool forwardable = true; bool deferred_declaration = false; @@ -917,7 +1036,7 @@ struct SPIRVariable : IVariant uint32_t remapped_components = 0; // The block which dominates all access to this variable. - uint32_t dominator = 0; + BlockID dominator = 0; // If true, this variable is a loop variable, when accessing the variable // outside a loop, // we should statically forward it. @@ -952,15 +1071,12 @@ struct SPIRConstant : IVariant { Constant r[4]; // If != 0, this element is a specialization constant, and we should keep track of it as such. - uint32_t id[4]; + ID id[4]; uint32_t vecsize = 1; - // Workaround for MSVC 2013, initializing an array breaks. ConstantVector() { memset(r, 0, sizeof(r)); - for (unsigned i = 0; i < 4; i++) - id[i] = 0; } }; @@ -968,15 +1084,8 @@ struct SPIRConstant : IVariant { ConstantVector c[4]; // If != 0, this column is a specialization constant, and we should keep track of it as such. - uint32_t id[4]; + ID id[4]; uint32_t columns = 1; - - // Workaround for MSVC 2013, initializing an array breaks. - ConstantMatrix() - { - for (unsigned i = 0; i < 4; i++) - id[i] = 0; - } }; static inline float f16_to_f32(uint16_t u16_value) @@ -1141,16 +1250,18 @@ struct SPIRConstant : IVariant SPIRConstant() = default; - SPIRConstant(uint32_t constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized) + SPIRConstant(TypeID constant_type_, const uint32_t *elements, uint32_t num_elements, bool specialized) : constant_type(constant_type_) , specialization(specialized) { - subconstants.insert(std::end(subconstants), elements, elements + num_elements); + subconstants.reserve(num_elements); + for (uint32_t i = 0; i < num_elements; i++) + subconstants.push_back(elements[i]); specialization = specialized; } // Construct scalar (32-bit). - SPIRConstant(uint32_t constant_type_, uint32_t v0, bool specialized) + SPIRConstant(TypeID constant_type_, uint32_t v0, bool specialized) : constant_type(constant_type_) , specialization(specialized) { @@ -1160,7 +1271,7 @@ struct SPIRConstant : IVariant } // Construct scalar (64-bit). - SPIRConstant(uint32_t constant_type_, uint64_t v0, bool specialized) + SPIRConstant(TypeID constant_type_, uint64_t v0, bool specialized) : constant_type(constant_type_) , specialization(specialized) { @@ -1170,7 +1281,7 @@ struct SPIRConstant : IVariant } // Construct vectors and matrices. - SPIRConstant(uint32_t constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements, + SPIRConstant(TypeID constant_type_, const SPIRConstant *const *vector_elements, uint32_t num_elements, bool specialized) : constant_type(constant_type_) , specialization(specialized) @@ -1202,7 +1313,7 @@ struct SPIRConstant : IVariant } } - uint32_t constant_type = 0; + TypeID constant_type = 0; ConstantMatrix m; // If this constant is a specialization constant (i.e. created with OpSpecConstant*). @@ -1214,7 +1325,7 @@ struct SPIRConstant : IVariant bool is_used_as_lut = false; // For composites which are constant arrays, etc. - SmallVector subconstants; + SmallVector subconstants; // Non-Vulkan GLSL, HLSL and sometimes MSL emits defines for each specialization constant, // and uses them to initialize the constant. This allows the user @@ -1349,9 +1460,9 @@ public: return type; } - uint32_t get_id() const + ID get_id() const { - return holder ? holder->self : 0; + return holder ? holder->self : ID(0); } bool empty() const @@ -1431,6 +1542,9 @@ enum ExtendedDecorations // Used for decorations like resource indices for samplers when part of combined image samplers. // A variable might need to hold two resource indices in this case. SPIRVCrossDecorationResourceIndexSecondary, + // Used for resource indices for multiplanar images when part of combined image samplers. + SPIRVCrossDecorationResourceIndexTertiary, + SPIRVCrossDecorationResourceIndexQuaternary, // Marks a buffer block for using explicit offsets (GLSL/HLSL). SPIRVCrossDecorationExplicitOffset, @@ -1439,6 +1553,12 @@ enum ExtendedDecorations // In MSL, this is used to adjust the WorkgroupId and GlobalInvocationId variables. SPIRVCrossDecorationBuiltInDispatchBase, + // Apply to a variable that is a function parameter; marks it as being a "dynamic" + // combined image-sampler. In MSL, this is used when a function parameter might hold + // either a regular combined image-sampler or one that has an attached sampler + // Y'CbCr conversion. + SPIRVCrossDecorationDynamicImageSampler, + SPIRVCrossDecorationCount }; @@ -1582,4 +1702,16 @@ static inline bool opcode_is_sign_invariant(spv::Op opcode) } } // namespace SPIRV_CROSS_NAMESPACE +namespace std +{ +template +struct hash> +{ + size_t operator()(const SPIRV_CROSS_NAMESPACE::TypedID &value) const + { + return std::hash()(value); + } +}; +} // namespace std + #endif diff --git a/3rdparty/spirv-cross/spirv_cross.cpp b/3rdparty/spirv-cross/spirv_cross.cpp index 166a9e0a3..2a6b96438 100644 --- a/3rdparty/spirv-cross/spirv_cross.cpp +++ b/3rdparty/spirv-cross/spirv_cross.cpp @@ -578,7 +578,7 @@ ShaderResources Compiler::get_shader_resources() const return get_shader_resources(nullptr); } -ShaderResources Compiler::get_shader_resources(const unordered_set &active_variables) const +ShaderResources Compiler::get_shader_resources(const unordered_set &active_variables) const { return get_shader_resources(&active_variables); } @@ -735,16 +735,16 @@ bool Compiler::InterfaceVariableAccessHandler::handle(Op opcode, const uint32_t return true; } -unordered_set Compiler::get_active_interface_variables() const +unordered_set Compiler::get_active_interface_variables() const { // Traverse the call graph and find all interface variables which are in use. - unordered_set variables; + unordered_set variables; InterfaceVariableAccessHandler handler(*this, variables); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); // Make sure we preserve output variables which are only initialized, but never accessed by any code. ir.for_each_typed_id([&](uint32_t, const SPIRVariable &var) { - if (var.storage == StorageClassOutput && var.initializer != 0) + if (var.storage == StorageClassOutput && var.initializer != ID(0)) variables.insert(var.self); }); @@ -755,13 +755,13 @@ unordered_set Compiler::get_active_interface_variables() const return variables; } -void Compiler::set_enabled_interface_variables(std::unordered_set active_variables) +void Compiler::set_enabled_interface_variables(std::unordered_set active_variables) { active_interface_variables = move(active_variables); check_active_interface_variables = true; } -ShaderResources Compiler::get_shader_resources(const unordered_set *active_variables) const +ShaderResources Compiler::get_shader_resources(const unordered_set *active_variables) const { ShaderResources res; @@ -978,17 +978,17 @@ void Compiler::update_name_cache(unordered_set &cache, string &name) update_name_cache(cache, cache, name); } -void Compiler::set_name(uint32_t id, const std::string &name) +void Compiler::set_name(ID id, const std::string &name) { ir.set_name(id, name); } -const SPIRType &Compiler::get_type(uint32_t id) const +const SPIRType &Compiler::get_type(TypeID id) const { return get(id); } -const SPIRType &Compiler::get_type_from_variable(uint32_t id) const +const SPIRType &Compiler::get_type_from_variable(VariableID id) const { return get(get(id).basetype); } @@ -1059,23 +1059,23 @@ bool Compiler::is_sampled_image_type(const SPIRType &type) type.image.dim != DimBuffer; } -void Compiler::set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, +void Compiler::set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, const std::string &argument) { ir.set_member_decoration_string(id, index, decoration, argument); } -void Compiler::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument) +void Compiler::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument) { ir.set_member_decoration(id, index, decoration, argument); } -void Compiler::set_member_name(uint32_t id, uint32_t index, const std::string &name) +void Compiler::set_member_name(TypeID id, uint32_t index, const std::string &name) { ir.set_member_name(id, index, name); } -const std::string &Compiler::get_member_name(uint32_t id, uint32_t index) const +const std::string &Compiler::get_member_name(TypeID id, uint32_t index) const { return ir.get_member_name(id, index); } @@ -1091,7 +1091,7 @@ void Compiler::set_member_qualified_name(uint32_t type_id, uint32_t index, const ir.meta[type_id].members[index].qualified_alias = name; } -const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t index) const +const string &Compiler::get_member_qualified_name(TypeID type_id, uint32_t index) const { auto *m = ir.find_meta(type_id); if (m && index < m->members.size()) @@ -1100,32 +1100,32 @@ const string &Compiler::get_member_qualified_name(uint32_t type_id, uint32_t ind return ir.get_empty_string(); } -uint32_t Compiler::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +uint32_t Compiler::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return ir.get_member_decoration(id, index, decoration); } -const Bitset &Compiler::get_member_decoration_bitset(uint32_t id, uint32_t index) const +const Bitset &Compiler::get_member_decoration_bitset(TypeID id, uint32_t index) const { return ir.get_member_decoration_bitset(id, index); } -bool Compiler::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +bool Compiler::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return ir.has_member_decoration(id, index, decoration); } -void Compiler::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration) +void Compiler::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration) { ir.unset_member_decoration(id, index, decoration); } -void Compiler::set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument) +void Compiler::set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument) { ir.set_decoration_string(id, decoration, argument); } -void Compiler::set_decoration(uint32_t id, Decoration decoration, uint32_t argument) +void Compiler::set_decoration(ID id, Decoration decoration, uint32_t argument) { ir.set_decoration(id, decoration, argument); } @@ -1152,6 +1152,8 @@ static uint32_t get_default_extended_decoration(ExtendedDecorations decoration) { case SPIRVCrossDecorationResourceIndexPrimary: case SPIRVCrossDecorationResourceIndexSecondary: + case SPIRVCrossDecorationResourceIndexTertiary: + case SPIRVCrossDecorationResourceIndexQuaternary: case SPIRVCrossDecorationInterfaceMemberIndex: return ~(0u); @@ -1227,22 +1229,22 @@ void Compiler::unset_extended_member_decoration(uint32_t type, uint32_t index, E dec.extended.values[decoration] = 0; } -StorageClass Compiler::get_storage_class(uint32_t id) const +StorageClass Compiler::get_storage_class(VariableID id) const { return get(id).storage; } -const std::string &Compiler::get_name(uint32_t id) const +const std::string &Compiler::get_name(ID id) const { return ir.get_name(id); } -const std::string Compiler::get_fallback_name(uint32_t id) const +const std::string Compiler::get_fallback_name(ID id) const { return join("_", id); } -const std::string Compiler::get_block_fallback_name(uint32_t id) const +const std::string Compiler::get_block_fallback_name(VariableID id) const { auto &var = get(id); if (get_name(id).empty()) @@ -1251,37 +1253,37 @@ const std::string Compiler::get_block_fallback_name(uint32_t id) const return get_name(id); } -const Bitset &Compiler::get_decoration_bitset(uint32_t id) const +const Bitset &Compiler::get_decoration_bitset(ID id) const { return ir.get_decoration_bitset(id); } -bool Compiler::has_decoration(uint32_t id, Decoration decoration) const +bool Compiler::has_decoration(ID id, Decoration decoration) const { return ir.has_decoration(id, decoration); } -const string &Compiler::get_decoration_string(uint32_t id, Decoration decoration) const +const string &Compiler::get_decoration_string(ID id, Decoration decoration) const { return ir.get_decoration_string(id, decoration); } -const string &Compiler::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const +const string &Compiler::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const { return ir.get_member_decoration_string(id, index, decoration); } -uint32_t Compiler::get_decoration(uint32_t id, Decoration decoration) const +uint32_t Compiler::get_decoration(ID id, Decoration decoration) const { return ir.get_decoration(id, decoration); } -void Compiler::unset_decoration(uint32_t id, Decoration decoration) +void Compiler::unset_decoration(ID id, Decoration decoration) { ir.unset_decoration(id, decoration); } -bool Compiler::get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const +bool Compiler::get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const { auto *m = ir.find_meta(id); if (!m) @@ -1460,7 +1462,7 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc if (block.merge == SPIRBlock::MergeLoop) return SPIRBlock::WhileLoop; - if (block.loop_dominator == SPIRBlock::NoDominator) + if (block.loop_dominator == BlockID(SPIRBlock::NoDominator)) { // Continue block is never reached from CFG. return SPIRBlock::ComplexLoop; @@ -1505,6 +1507,7 @@ SPIRBlock::ContinueBlockType Compiler::continue_block_type(const SPIRBlock &bloc bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHandler &handler) const { handler.set_current_block(block); + handler.rearm_current_block(block); // Ideally, perhaps traverse the CFG instead of all blocks in order to eliminate dead blocks, // but this shouldn't be a problem in practice unless the SPIR-V is doing insane things like recursing @@ -1528,6 +1531,8 @@ bool Compiler::traverse_all_reachable_opcodes(const SPIRBlock &block, OpcodeHand return false; if (!handler.end_function_scope(ops, i.length)) return false; + + handler.rearm_current_block(block); } } } @@ -1724,7 +1729,7 @@ bool Compiler::BufferAccessHandler::handle(Op opcode, const uint32_t *args, uint return true; } -SmallVector Compiler::get_active_buffer_ranges(uint32_t id) const +SmallVector Compiler::get_active_buffer_ranges(VariableID id) const { SmallVector ranges; BufferAccessHandler handler(*this, ranges, id); @@ -1817,19 +1822,19 @@ uint32_t Compiler::get_work_group_size_specialization_constants(SpecializationCo { auto &c = get(execution.workgroup_size.constant); - if (c.m.c[0].id[0] != 0) + if (c.m.c[0].id[0] != ID(0)) { x.id = c.m.c[0].id[0]; x.constant_id = get_decoration(c.m.c[0].id[0], DecorationSpecId); } - if (c.m.c[0].id[1] != 0) + if (c.m.c[0].id[1] != ID(0)) { y.id = c.m.c[0].id[1]; y.constant_id = get_decoration(c.m.c[0].id[1], DecorationSpecId); } - if (c.m.c[0].id[2] != 0) + if (c.m.c[0].id[2] != ID(0)) { z.id = c.m.c[0].id[2]; z.constant_id = get_decoration(c.m.c[0].id[2], DecorationSpecId); @@ -1884,36 +1889,36 @@ bool Compiler::is_tessellation_shader() const return is_tessellation_shader(get_execution_model()); } -void Compiler::set_remapped_variable_state(uint32_t id, bool remap_enable) +void Compiler::set_remapped_variable_state(VariableID id, bool remap_enable) { get(id).remapped_variable = remap_enable; } -bool Compiler::get_remapped_variable_state(uint32_t id) const +bool Compiler::get_remapped_variable_state(VariableID id) const { return get(id).remapped_variable; } -void Compiler::set_subpass_input_remapped_components(uint32_t id, uint32_t components) +void Compiler::set_subpass_input_remapped_components(VariableID id, uint32_t components) { get(id).remapped_components = components; } -uint32_t Compiler::get_subpass_input_remapped_components(uint32_t id) const +uint32_t Compiler::get_subpass_input_remapped_components(VariableID id) const { return get(id).remapped_components; } void Compiler::add_implied_read_expression(SPIRExpression &e, uint32_t source) { - auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source); + auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); if (itr == end(e.implied_read_expressions)) e.implied_read_expressions.push_back(source); } void Compiler::add_implied_read_expression(SPIRAccessChain &e, uint32_t source) { - auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), source); + auto itr = find(begin(e.implied_read_expressions), end(e.implied_read_expressions), ID(source)); if (itr == end(e.implied_read_expressions)) e.implied_read_expressions.push_back(source); } @@ -2054,7 +2059,7 @@ bool Compiler::interface_variable_exists_in_entry_point(uint32_t id) const return true; auto &execution = get_entry_point(); - return find(begin(execution.interface_variables), end(execution.interface_variables), id) != + return find(begin(execution.interface_variables), end(execution.interface_variables), VariableID(id)) != end(execution.interface_variables); } @@ -2134,8 +2139,8 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a { for (auto ¶m : params) { - uint32_t image_id = param.global_image ? param.image_id : args[param.image_id]; - uint32_t sampler_id = param.global_sampler ? param.sampler_id : args[param.sampler_id]; + VariableID image_id = param.global_image ? param.image_id : VariableID(args[param.image_id]); + VariableID sampler_id = param.global_sampler ? param.sampler_id : VariableID(args[param.sampler_id]); auto *i = compiler.maybe_get_backing_variable(image_id); auto *s = compiler.maybe_get_backing_variable(sampler_id); @@ -2152,8 +2157,8 @@ bool Compiler::CombinedImageSamplerHandler::end_function_scope(const uint32_t *a } void Compiler::CombinedImageSamplerHandler::register_combined_image_sampler(SPIRFunction &caller, - uint32_t combined_module_id, - uint32_t image_id, uint32_t sampler_id, + VariableID combined_module_id, + VariableID image_id, VariableID sampler_id, bool depth) { // We now have a texture ID and a sampler ID which will either be found as a global @@ -2440,8 +2445,8 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar // This information is statically known from the current place in the call stack. // Function parameters are not necessarily pointers, so if we don't have a backing variable, remapping will know // which backing variable the image/sample came from. - uint32_t image_id = remap_parameter(args[2]); - uint32_t sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]); + VariableID image_id = remap_parameter(args[2]); + VariableID sampler_id = is_fetch ? compiler.dummy_sampler_id : remap_parameter(args[3]); auto itr = find_if(begin(compiler.combined_image_samplers), end(compiler.combined_image_samplers), [image_id, sampler_id](const CombinedImageSampler &combined) { @@ -2510,7 +2515,7 @@ bool Compiler::CombinedImageSamplerHandler::handle(Op opcode, const uint32_t *ar return true; } -uint32_t Compiler::build_dummy_sampler_for_combined_images() +VariableID Compiler::build_dummy_sampler_for_combined_images() { DummySamplerForCombinedImageHandler handler(*this); traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); @@ -2564,12 +2569,12 @@ SmallVector Compiler::get_specialization_constants() con return spec_consts; } -SPIRConstant &Compiler::get_constant(uint32_t id) +SPIRConstant &Compiler::get_constant(ConstantID id) { return get(id); } -const SPIRConstant &Compiler::get_constant(uint32_t id) const +const SPIRConstant &Compiler::get_constant(ConstantID id) const { return get(id); } @@ -2753,7 +2758,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (length < 2) return false; - uint32_t ptr = args[0]; + ID ptr = args[0]; auto *var = compiler.maybe_get_backing_variable(ptr); // If we store through an access chain, we have a partial write. @@ -2798,7 +2803,7 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 // The result of an access chain is a fixed expression and is not really considered a temporary. auto &e = compiler.set(args[1], "", args[0], true); auto *backing_variable = compiler.maybe_get_backing_variable(ptr); - e.loaded_from = backing_variable ? backing_variable->self : 0; + e.loaded_from = backing_variable ? VariableID(backing_variable->self) : VariableID(0); // Other backends might use SPIRAccessChain for this later. compiler.ir.ids[args[1]].set_allow_type_rewrite(); @@ -2811,8 +2816,8 @@ bool Compiler::AnalyzeVariableScopeAccessHandler::handle(spv::Op op, const uint3 if (length < 2) return false; - uint32_t lhs = args[0]; - uint32_t rhs = args[1]; + ID lhs = args[0]; + ID rhs = args[1]; auto *var = compiler.maybe_get_backing_variable(lhs); // If we store through an access chain, we have a partial write. @@ -3177,7 +3182,8 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA for (auto &var : handler.accessed_variables_to_block) { // Only deal with variables which are considered local variables in this function. - if (find(begin(entry.local_variables), end(entry.local_variables), var.first) == end(entry.local_variables)) + if (find(begin(entry.local_variables), end(entry.local_variables), VariableID(var.first)) == + end(entry.local_variables)) continue; DominatorBuilder builder(cfg); @@ -3218,7 +3224,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA builder.lift_continue_block_dominator(); // Add it to a per-block list of variables. - uint32_t dominating_block = builder.get_dominator(); + BlockID dominating_block = builder.get_dominator(); // For variables whose dominating block is inside a loop, there is a risk that these variables // actually need to be preserved across loop iterations. We can express this by adding @@ -3236,7 +3242,7 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA if (preserve) { // Find the outermost loop scope. - while (block->loop_dominator != SPIRBlock::NoDominator) + while (block->loop_dominator != BlockID(SPIRBlock::NoDominator)) block = &get(block->loop_dominator); if (block->self != dominating_block) @@ -3356,17 +3362,17 @@ void Compiler::analyze_variable_scope(SPIRFunction &entry, AnalyzeVariableScopeA { auto &var = get(loop_variable.first); auto dominator = var.dominator; - auto block = loop_variable.second; + BlockID block = loop_variable.second; // The variable was accessed in multiple continue blocks, ignore. - if (block == ~(0u) || block == 0) + if (block == BlockID(~(0u)) || block == BlockID(0)) continue; // Dead code. - if (dominator == 0) + if (dominator == ID(0)) continue; - uint32_t header = 0; + BlockID header = 0; // Find the loop header for this block if we are a continue block. { @@ -3517,7 +3523,7 @@ bool Compiler::may_read_undefined_variable_in_block(const SPIRBlock &block, uint return true; } -Bitset Compiler::get_buffer_block_flags(uint32_t id) const +Bitset Compiler::get_buffer_block_flags(VariableID id) const { return ir.get_buffer_block_flags(get(id)); } @@ -3796,7 +3802,12 @@ bool Compiler::CombinedImageSamplerDrefHandler::handle(spv::Op opcode, const uin const CFG &Compiler::get_cfg_for_current_function() const { assert(current_function); - auto cfg_itr = function_cfgs.find(current_function->self); + return get_cfg_for_function(current_function->self); +} + +const CFG &Compiler::get_cfg_for_function(uint32_t id) const +{ + auto cfg_itr = function_cfgs.find(id); assert(cfg_itr != end(function_cfgs)); assert(cfg_itr->second); return *cfg_itr->second; @@ -3951,13 +3962,13 @@ bool Compiler::CombinedImageSamplerUsageHandler::handle(Op opcode, const uint32_ return true; } -bool Compiler::buffer_is_hlsl_counter_buffer(uint32_t id) const +bool Compiler::buffer_is_hlsl_counter_buffer(VariableID id) const { auto *m = ir.find_meta(id); return m && m->hlsl_is_magic_counter_buffer; } -bool Compiler::buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const +bool Compiler::buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const { auto *m = ir.find_meta(id); @@ -4022,7 +4033,7 @@ const SmallVector &Compiler::get_declared_extensions() const return ir.declared_extensions; } -std::string Compiler::get_remapped_declared_block_name(uint32_t id) const +std::string Compiler::get_remapped_declared_block_name(VariableID id) const { return get_remapped_declared_block_name(id, false); } @@ -4247,6 +4258,316 @@ void Compiler::analyze_non_block_pointer_types() sort(begin(physical_storage_non_block_pointer_types), end(physical_storage_non_block_pointer_types)); } +bool Compiler::InterlockedResourceAccessPrepassHandler::handle(Op op, const uint32_t *, uint32_t) +{ + if (op == OpBeginInvocationInterlockEXT || op == OpEndInvocationInterlockEXT) + { + if (interlock_function_id != 0 && interlock_function_id != call_stack.back()) + { + // Most complex case, we have no sensible way of dealing with this + // other than taking the 100% conservative approach, exit early. + split_function_case = true; + return false; + } + else + { + interlock_function_id = call_stack.back(); + // If this call is performed inside control flow we have a problem. + auto &cfg = compiler.get_cfg_for_function(interlock_function_id); + + uint32_t from_block_id = compiler.get(interlock_function_id).entry_block; + bool outside_control_flow = cfg.node_terminates_control_flow_in_sub_graph(from_block_id, current_block_id); + if (!outside_control_flow) + control_flow_interlock = true; + } + } + return true; +} + +void Compiler::InterlockedResourceAccessPrepassHandler::rearm_current_block(const SPIRBlock &block) +{ + current_block_id = block.self; +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + call_stack.push_back(args[2]); + return true; +} + +bool Compiler::InterlockedResourceAccessPrepassHandler::end_function_scope(const uint32_t *, uint32_t) +{ + call_stack.pop_back(); + return true; +} + +bool Compiler::InterlockedResourceAccessHandler::begin_function_scope(const uint32_t *args, uint32_t length) +{ + if (length < 3) + return false; + + if (args[2] == interlock_function_id) + call_stack_is_interlocked = true; + + call_stack.push_back(args[2]); + return true; +} + +bool Compiler::InterlockedResourceAccessHandler::end_function_scope(const uint32_t *, uint32_t) +{ + if (call_stack.back() == interlock_function_id) + call_stack_is_interlocked = false; + + call_stack.pop_back(); + return true; +} + +void Compiler::InterlockedResourceAccessHandler::access_potential_resource(uint32_t id) +{ + if ((use_critical_section && in_crit_sec) || (control_flow_interlock && call_stack_is_interlocked) || + split_function_case) + { + compiler.interlocked_resources.insert(id); + } +} + +bool Compiler::InterlockedResourceAccessHandler::handle(Op opcode, const uint32_t *args, uint32_t length) +{ + // Only care about critical section analysis if we have simple case. + if (use_critical_section) + { + if (opcode == OpBeginInvocationInterlockEXT) + { + in_crit_sec = true; + return true; + } + + if (opcode == OpEndInvocationInterlockEXT) + { + // End critical section--nothing more to do. + return false; + } + } + + // We need to figure out where images and buffers are loaded from, so do only the bare bones compilation we need. + switch (opcode) + { + case OpLoad: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // We're only concerned with buffer and image memory here. + if (!var) + break; + + switch (var->storage) + { + default: + break; + + case StorageClassUniformConstant: + { + uint32_t result_type = args[0]; + uint32_t id = args[1]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + break; + } + + case StorageClassUniform: + // Must have BufferBlock; we only care about SSBOs. + if (!compiler.has_decoration(compiler.get(var->basetype).self, DecorationBufferBlock)) + break; + // fallthrough + case StorageClassStorageBuffer: + access_potential_resource(var->self); + break; + } + break; + } + + case OpInBoundsAccessChain: + case OpAccessChain: + case OpPtrAccessChain: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + + auto &type = compiler.get(result_type); + if (type.storage == StorageClassUniform || type.storage == StorageClassUniformConstant || + type.storage == StorageClassStorageBuffer) + { + uint32_t id = args[1]; + uint32_t ptr = args[2]; + compiler.set(id, "", result_type, true); + compiler.register_read(id, ptr, true); + compiler.ir.ids[id].set_allow_type_rewrite(); + } + break; + } + + case OpImageTexelPointer: + { + if (length < 3) + return false; + + uint32_t result_type = args[0]; + uint32_t id = args[1]; + uint32_t ptr = args[2]; + auto &e = compiler.set(id, "", result_type, true); + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var) + e.loaded_from = var->self; + break; + } + + case OpStore: + case OpImageWrite: + case OpAtomicStore: + { + if (length < 1) + return false; + + uint32_t ptr = args[0]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || + var->storage == StorageClassStorageBuffer)) + { + access_potential_resource(var->self); + } + + break; + } + + case OpCopyMemory: + { + if (length < 2) + return false; + + uint32_t dst = args[0]; + uint32_t src = args[1]; + auto *dst_var = compiler.maybe_get_backing_variable(dst); + auto *src_var = compiler.maybe_get_backing_variable(src); + + if (dst_var && (dst_var->storage == StorageClassUniform || dst_var->storage == StorageClassStorageBuffer)) + access_potential_resource(dst_var->self); + + if (src_var) + { + if (src_var->storage != StorageClassUniform && src_var->storage != StorageClassStorageBuffer) + break; + + if (src_var->storage == StorageClassUniform && + !compiler.has_decoration(compiler.get(src_var->basetype).self, DecorationBufferBlock)) + { + break; + } + + access_potential_resource(src_var->self); + } + + break; + } + + case OpImageRead: + case OpAtomicLoad: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + + // We're only concerned with buffer and image memory here. + if (!var) + break; + + switch (var->storage) + { + default: + break; + + case StorageClassUniform: + // Must have BufferBlock; we only care about SSBOs. + if (!compiler.has_decoration(compiler.get(var->basetype).self, DecorationBufferBlock)) + break; + // fallthrough + case StorageClassUniformConstant: + case StorageClassStorageBuffer: + access_potential_resource(var->self); + break; + } + break; + } + + case OpAtomicExchange: + case OpAtomicCompareExchange: + case OpAtomicIIncrement: + case OpAtomicIDecrement: + case OpAtomicIAdd: + case OpAtomicISub: + case OpAtomicSMin: + case OpAtomicUMin: + case OpAtomicSMax: + case OpAtomicUMax: + case OpAtomicAnd: + case OpAtomicOr: + case OpAtomicXor: + { + if (length < 3) + return false; + + uint32_t ptr = args[2]; + auto *var = compiler.maybe_get_backing_variable(ptr); + if (var && (var->storage == StorageClassUniform || var->storage == StorageClassUniformConstant || + var->storage == StorageClassStorageBuffer)) + { + access_potential_resource(var->self); + } + + break; + } + + default: + break; + } + + return true; +} + +void Compiler::analyze_interlocked_resource_usage() +{ + if (get_execution_model() == ExecutionModelFragment && + (get_entry_point().flags.get(ExecutionModePixelInterlockOrderedEXT) || + get_entry_point().flags.get(ExecutionModePixelInterlockUnorderedEXT) || + get_entry_point().flags.get(ExecutionModeSampleInterlockOrderedEXT) || + get_entry_point().flags.get(ExecutionModeSampleInterlockUnorderedEXT))) + { + InterlockedResourceAccessPrepassHandler prepass_handler(*this, ir.default_entry_point); + traverse_all_reachable_opcodes(get(ir.default_entry_point), prepass_handler); + + InterlockedResourceAccessHandler handler(*this, ir.default_entry_point); + handler.interlock_function_id = prepass_handler.interlock_function_id; + handler.split_function_case = prepass_handler.split_function_case; + handler.control_flow_interlock = prepass_handler.control_flow_interlock; + handler.use_critical_section = !handler.split_function_case && !handler.control_flow_interlock; + + traverse_all_reachable_opcodes(get(ir.default_entry_point), handler); + + // For GLSL. If we hit any of these cases, we have to fall back to conservative approach. + interlocked_is_complex = + !handler.use_critical_section || handler.interlock_function_id != ir.default_entry_point; + } +} + bool Compiler::type_is_array_of_pointers(const SPIRType &type) const { if (!type.pointer) @@ -4256,7 +4577,7 @@ bool Compiler::type_is_array_of_pointers(const SPIRType &type) const return type.pointer_depth == get(type.parent_type).pointer_depth; } -bool Compiler::flush_phi_required(uint32_t from, uint32_t to) const +bool Compiler::flush_phi_required(BlockID from, BlockID to) const { auto &child = get(to); for (auto &phi : child.phi_variables) diff --git a/3rdparty/spirv-cross/spirv_cross.hpp b/3rdparty/spirv-cross/spirv_cross.hpp index 90cc99566..7385a6c33 100644 --- a/3rdparty/spirv-cross/spirv_cross.hpp +++ b/3rdparty/spirv-cross/spirv_cross.hpp @@ -27,18 +27,18 @@ struct Resource { // Resources are identified with their SPIR-V ID. // This is the ID of the OpVariable. - uint32_t id; + ID id; // The type ID of the variable which includes arrays and all type modifications. // This type ID is not suitable for parsing OpMemberDecoration of a struct and other decorations in general // since these modifications typically happen on the base_type_id. - uint32_t type_id; + TypeID type_id; // The base type of the declared resource. // This type is the base type which ignores pointers and arrays of the type_id. // This is mostly useful to parse decorations of the underlying type. // base_type_id can also be obtained with get_type(get_type(type_id).self). - uint32_t base_type_id; + TypeID base_type_id; // The declared name (OpName) of the resource. // For Buffer blocks, the name actually reflects the externally @@ -77,17 +77,17 @@ struct ShaderResources struct CombinedImageSampler { // The ID of the sampler2D variable. - uint32_t combined_id; + VariableID combined_id; // The ID of the texture2D variable. - uint32_t image_id; + VariableID image_id; // The ID of the sampler variable. - uint32_t sampler_id; + VariableID sampler_id; }; struct SpecializationConstant { // The ID of the specialization constant. - uint32_t id; + ConstantID id; // The constant ID of the constant, used in Vulkan during pipeline creation. uint32_t constant_id; }; @@ -142,81 +142,81 @@ public: virtual std::string compile(); // Gets the identifier (OpName) of an ID. If not defined, an empty string will be returned. - const std::string &get_name(uint32_t id) const; + const std::string &get_name(ID id) const; // Applies a decoration to an ID. Effectively injects OpDecorate. - void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0); - void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument); + void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0); + void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument); // Overrides the identifier OpName of an ID. // Identifiers beginning with underscores or identifiers which contain double underscores // are reserved by the implementation. - void set_name(uint32_t id, const std::string &name); + void set_name(ID id, const std::string &name); // Gets a bitmask for the decorations which are applied to ID. // I.e. (1ull << spv::DecorationFoo) | (1ull << spv::DecorationBar) - const Bitset &get_decoration_bitset(uint32_t id) const; + const Bitset &get_decoration_bitset(ID id) const; // Returns whether the decoration has been applied to the ID. - bool has_decoration(uint32_t id, spv::Decoration decoration) const; + bool has_decoration(ID id, spv::Decoration decoration) const; // Gets the value for decorations which take arguments. // If the decoration is a boolean (i.e. spv::DecorationNonWritable), // 1 will be returned. // If decoration doesn't exist or decoration is not recognized, // 0 will be returned. - uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const; - const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const; + uint32_t get_decoration(ID id, spv::Decoration decoration) const; + const std::string &get_decoration_string(ID id, spv::Decoration decoration) const; // Removes the decoration for an ID. - void unset_decoration(uint32_t id, spv::Decoration decoration); + void unset_decoration(ID id, spv::Decoration decoration); // Gets the SPIR-V type associated with ID. // Mostly used with Resource::type_id and Resource::base_type_id to parse the underlying type of a resource. - const SPIRType &get_type(uint32_t id) const; + const SPIRType &get_type(TypeID id) const; // Gets the SPIR-V type of a variable. - const SPIRType &get_type_from_variable(uint32_t id) const; + const SPIRType &get_type_from_variable(VariableID id) const; // Gets the underlying storage class for an OpVariable. - spv::StorageClass get_storage_class(uint32_t id) const; + spv::StorageClass get_storage_class(VariableID id) const; // If get_name() is an empty string, get the fallback name which will be used // instead in the disassembled source. - virtual const std::string get_fallback_name(uint32_t id) const; + virtual const std::string get_fallback_name(ID id) const; // If get_name() of a Block struct is an empty string, get the fallback name. // This needs to be per-variable as multiple variables can use the same block type. - virtual const std::string get_block_fallback_name(uint32_t id) const; + virtual const std::string get_block_fallback_name(VariableID id) const; // Given an OpTypeStruct in ID, obtain the identifier for member number "index". // This may be an empty string. - const std::string &get_member_name(uint32_t id, uint32_t index) const; + const std::string &get_member_name(TypeID id, uint32_t index) const; // Given an OpTypeStruct in ID, obtain the OpMemberDecoration for member number "index". - uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; - const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const; + uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const; // Sets the member identifier for OpTypeStruct ID, member number "index". - void set_member_name(uint32_t id, uint32_t index, const std::string &name); + void set_member_name(TypeID id, uint32_t index, const std::string &name); // Returns the qualified member identifier for OpTypeStruct ID, member number "index", // or an empty string if no qualified alias exists - const std::string &get_member_qualified_name(uint32_t type_id, uint32_t index) const; + const std::string &get_member_qualified_name(TypeID type_id, uint32_t index) const; // Gets the decoration mask for a member of a struct, similar to get_decoration_mask. - const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const; + const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const; // Returns whether the decoration has been applied to a member of a struct. - bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; + bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; // Similar to set_decoration, but for struct members. - void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); - void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, + void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, const std::string &argument); // Unsets a member decoration, similar to unset_decoration. - void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration); + void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration); // Gets the fallback name for a member, similar to get_fallback_name. virtual const std::string get_fallback_member_name(uint32_t index) const @@ -228,7 +228,7 @@ public: // SPIR-V shader. The granularity of this analysis is per-member of a struct. // This can be used for Buffer (UBO), BufferBlock/StorageBuffer (SSBO) and PushConstant blocks. // ID is the Resource::id obtained from get_shader_resources(). - SmallVector get_active_buffer_ranges(uint32_t id) const; + SmallVector get_active_buffer_ranges(VariableID id) const; // Returns the effective size of a buffer block. size_t get_declared_struct_size(const SPIRType &struct_type) const; @@ -256,12 +256,12 @@ public: // // To use the returned set as the filter for which variables are used during compilation, // this set can be moved to set_enabled_interface_variables(). - std::unordered_set get_active_interface_variables() const; + std::unordered_set get_active_interface_variables() const; // Sets the interface variables which are used during compilation. // By default, all variables are used. // Once set, compile() will only consider the set in active_variables. - void set_enabled_interface_variables(std::unordered_set active_variables); + void set_enabled_interface_variables(std::unordered_set active_variables); // Query shader resources, use ids with reflection interface to modify or query binding points, etc. ShaderResources get_shader_resources() const; @@ -269,19 +269,19 @@ public: // Query shader resources, but only return the variables which are part of active_variables. // E.g.: get_shader_resources(get_active_variables()) to only return the variables which are statically // accessed. - ShaderResources get_shader_resources(const std::unordered_set &active_variables) const; + ShaderResources get_shader_resources(const std::unordered_set &active_variables) const; // Remapped variables are considered built-in variables and a backend will // not emit a declaration for this variable. // This is mostly useful for making use of builtins which are dependent on extensions. - void set_remapped_variable_state(uint32_t id, bool remap_enable); - bool get_remapped_variable_state(uint32_t id) const; + void set_remapped_variable_state(VariableID id, bool remap_enable); + bool get_remapped_variable_state(VariableID id) const; // For subpassInput variables which are remapped to plain variables, // the number of components in the remapped // variable must be specified as the backing type of subpass inputs are opaque. - void set_subpass_input_remapped_components(uint32_t id, uint32_t components); - uint32_t get_subpass_input_remapped_components(uint32_t id) const; + void set_subpass_input_remapped_components(VariableID id, uint32_t components); + uint32_t get_subpass_input_remapped_components(VariableID id) const; // All operations work on the current entry point. // Entry points can be swapped out with set_entry_point(). @@ -362,7 +362,7 @@ public: // If the returned ID is non-zero, it can be decorated with set/bindings as desired before calling compile(). // Calling this function also invalidates get_active_interface_variables(), so this should be called // before that function. - uint32_t build_dummy_sampler_for_combined_images(); + VariableID build_dummy_sampler_for_combined_images(); // Analyzes all separate image and samplers used from the currently selected entry point, // and re-routes them all to a combined image sampler instead. @@ -411,8 +411,8 @@ public: // constant_type is the SPIRType for the specialization constant, // which can be queried to determine which fields in the unions should be poked at. SmallVector get_specialization_constants() const; - SPIRConstant &get_constant(uint32_t id); - const SPIRConstant &get_constant(uint32_t id) const; + SPIRConstant &get_constant(ConstantID id); + const SPIRConstant &get_constant(ConstantID id) const; uint32_t get_current_id_bound() const { @@ -435,7 +435,7 @@ public: // If the decoration was declared, sets the word_offset to an offset into the provided SPIR-V binary buffer and returns true, // otherwise, returns false. // If the decoration does not have any value attached to it (e.g. DecorationRelaxedPrecision), this function will also return false. - bool get_binary_offset_for_decoration(uint32_t id, spv::Decoration decoration, uint32_t &word_offset) const; + bool get_binary_offset_for_decoration(VariableID id, spv::Decoration decoration, uint32_t &word_offset) const; // HLSL counter buffer reflection interface. // Append/Consume/Increment/Decrement in HLSL is implemented as two "neighbor" buffer objects where @@ -450,7 +450,7 @@ public: // only return true if OpSource was reported HLSL. // To rely on this functionality, ensure that the SPIR-V module is not stripped. - bool buffer_is_hlsl_counter_buffer(uint32_t id) const; + bool buffer_is_hlsl_counter_buffer(VariableID id) const; // Queries if a buffer object has a neighbor "counter" buffer. // If so, the ID of that counter buffer will be returned in counter_id. @@ -458,7 +458,7 @@ public: // Otherwise, this query is purely based on OpName identifiers as found in the SPIR-V module, and will // only return true if OpSource was reported HLSL. // To rely on this functionality, ensure that the SPIR-V module is not stripped. - bool buffer_get_hlsl_counter_buffer(uint32_t id, uint32_t &counter_id) const; + bool buffer_get_hlsl_counter_buffer(VariableID id, uint32_t &counter_id) const; // Gets the list of all SPIR-V Capabilities which were declared in the SPIR-V module. const SmallVector &get_declared_capabilities() const; @@ -479,13 +479,13 @@ public: // ID is the name of a variable as returned by Resource::id, and must be a variable with a Block-like type. // // This also applies to HLSL cbuffers. - std::string get_remapped_declared_block_name(uint32_t id) const; + std::string get_remapped_declared_block_name(VariableID id) const; // For buffer block variables, get the decorations for that variable. // Sometimes, decorations for buffer blocks are found in member decorations instead // of direct decorations on the variable itself. // The most common use here is to check if a buffer is readonly or writeonly. - Bitset get_buffer_block_flags(uint32_t id) const; + Bitset get_buffer_block_flags(VariableID id) const; protected: const uint32_t *stream(const Instruction &instr) const @@ -509,7 +509,7 @@ protected: SPIRFunction *current_function = nullptr; SPIRBlock *current_block = nullptr; - std::unordered_set active_interface_variables; + std::unordered_set active_interface_variables; bool check_active_interface_variables = false; // If our IDs are out of range here as part of opcodes, throw instead of @@ -549,7 +549,9 @@ protected: template const T *maybe_get(uint32_t id) const { - if (ir.ids[id].get_type() == static_cast(T::type)) + if (id >= ir.ids.size()) + return nullptr; + else if (ir.ids[id].get_type() == static_cast(T::type)) return &get(id); else return nullptr; @@ -618,7 +620,7 @@ protected: inline bool is_single_block_loop(uint32_t next) const { auto &block = get(next); - return block.merge == SPIRBlock::MergeLoop && block.continue_block == next; + return block.merge == SPIRBlock::MergeLoop && block.continue_block == ID(next); } inline bool is_break(uint32_t next) const @@ -710,6 +712,13 @@ protected: { } + // Called after returning from a function or when entering a block, + // can be called multiple times per block, + // while set_current_block is only called on block entry. + virtual void rearm_current_block(const SPIRBlock &) + { + } + virtual bool begin_function_scope(const uint32_t *, uint32_t) { return true; @@ -741,7 +750,7 @@ protected: struct InterfaceVariableAccessHandler : OpcodeHandler { - InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set &variables_) + InterfaceVariableAccessHandler(const Compiler &compiler_, std::unordered_set &variables_) : compiler(compiler_) , variables(variables_) { @@ -750,7 +759,7 @@ protected: bool handle(spv::Op opcode, const uint32_t *args, uint32_t length) override; const Compiler &compiler; - std::unordered_set &variables; + std::unordered_set &variables; }; struct CombinedImageSamplerHandler : OpcodeHandler @@ -772,8 +781,8 @@ protected: uint32_t remap_parameter(uint32_t id); void push_remap_parameters(const SPIRFunction &func, const uint32_t *args, uint32_t length); void pop_remap_parameters(); - void register_combined_image_sampler(SPIRFunction &caller, uint32_t combined_id, uint32_t texture_id, - uint32_t sampler_id, bool depth); + void register_combined_image_sampler(SPIRFunction &caller, VariableID combined_id, VariableID texture_id, + VariableID sampler_id, bool depth); }; struct DummySamplerForCombinedImageHandler : OpcodeHandler @@ -806,7 +815,7 @@ protected: // This must be an ordered data structure so we always pick the same type aliases. SmallVector global_struct_cache; - ShaderResources get_shader_resources(const std::unordered_set *active_variables) const; + ShaderResources get_shader_resources(const std::unordered_set *active_variables) const; VariableTypeRemapCallback variable_remap_callback; @@ -884,10 +893,11 @@ protected: void build_function_control_flow_graphs_and_analyze(); std::unordered_map> function_cfgs; const CFG &get_cfg_for_current_function() const; + const CFG &get_cfg_for_function(uint32_t id) const; struct CFGBuilder : OpcodeHandler { - CFGBuilder(Compiler &compiler_); + explicit CFGBuilder(Compiler &compiler_); bool follow_function_call(const SPIRFunction &func) override; bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; @@ -932,7 +942,7 @@ protected: struct PhysicalStorageBufferPointerHandler : OpcodeHandler { - PhysicalStorageBufferPointerHandler(Compiler &compiler_); + explicit PhysicalStorageBufferPointerHandler(Compiler &compiler_); bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; Compiler &compiler; std::unordered_set types; @@ -945,6 +955,61 @@ protected: bool single_function); bool may_read_undefined_variable_in_block(const SPIRBlock &block, uint32_t var); + // Finds all resources that are written to from inside the critical section, if present. + // The critical section is delimited by OpBeginInvocationInterlockEXT and + // OpEndInvocationInterlockEXT instructions. In MSL and HLSL, any resources written + // while inside the critical section must be placed in a raster order group. + struct InterlockedResourceAccessHandler : OpcodeHandler + { + InterlockedResourceAccessHandler(Compiler &compiler_, uint32_t entry_point_id) + : compiler(compiler_) + { + call_stack.push_back(entry_point_id); + } + + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + bool in_crit_sec = false; + + uint32_t interlock_function_id = 0; + bool split_function_case = false; + bool control_flow_interlock = false; + bool use_critical_section = false; + bool call_stack_is_interlocked = false; + SmallVector call_stack; + + void access_potential_resource(uint32_t id); + }; + + struct InterlockedResourceAccessPrepassHandler : OpcodeHandler + { + InterlockedResourceAccessPrepassHandler(Compiler &compiler_, uint32_t entry_point_id) + : compiler(compiler_) + { + call_stack.push_back(entry_point_id); + } + + void rearm_current_block(const SPIRBlock &block) override; + bool handle(spv::Op op, const uint32_t *args, uint32_t length) override; + bool begin_function_scope(const uint32_t *args, uint32_t length) override; + bool end_function_scope(const uint32_t *args, uint32_t length) override; + + Compiler &compiler; + uint32_t interlock_function_id = 0; + uint32_t current_block_id = 0; + bool split_function_case = false; + bool control_flow_interlock = false; + SmallVector call_stack; + }; + + void analyze_interlocked_resource_usage(); + // The set of all resources written while inside the critical section, if present. + std::unordered_set interlocked_resources; + bool interlocked_is_complex = false; + void make_constant_null(uint32_t id, uint32_t type); std::unordered_map declared_block_names; @@ -975,7 +1040,7 @@ protected: bool reflection_ssbo_instance_name_is_significant() const; std::string get_remapped_declared_block_name(uint32_t id, bool fallback_prefer_instance_name) const; - bool flush_phi_required(uint32_t from, uint32_t to) const; + bool flush_phi_required(BlockID from, BlockID to) const; private: // Used only to implement the old deprecated get_entry_point() interface. diff --git a/3rdparty/spirv-cross/spirv_cross_c.cpp b/3rdparty/spirv-cross/spirv_cross_c.cpp index 7add2685e..c9ac8fca3 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.cpp +++ b/3rdparty/spirv-cross/spirv_cross_c.cpp @@ -162,7 +162,7 @@ struct spvc_compiler_options_s : ScratchMemoryAllocation struct spvc_set_s : ScratchMemoryAllocation { - std::unordered_set set; + std::unordered_set set; }; // Dummy-inherit to we can keep our opaque type handle type safe in C-land as well, @@ -545,6 +545,30 @@ spvc_result spvc_compiler_options_set_uint(spvc_compiler_options options, spvc_c case SPVC_COMPILER_OPTION_MSL_BUFFER_SIZE_BUFFER_INDEX: options->msl.buffer_size_buffer_index = value; break; + + case SPVC_COMPILER_OPTION_MSL_MULTIVIEW: + options->msl.multiview = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX: + options->msl.view_mask_buffer_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX: + options->msl.device_index = value; + break; + + case SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX: + options->msl.view_index_from_device_index = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE: + options->msl.dispatch_base = value != 0; + break; + + case SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX: + options->msl.dynamic_offsets_buffer_index = value; + break; #endif default: @@ -882,6 +906,27 @@ spvc_result spvc_compiler_msl_add_resource_binding(spvc_compiler compiler, #endif } +spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + msl.add_dynamic_buffer(desc_set, binding, index); + return SPVC_SUCCESS; +#else + (void)binding; + (void)desc_set; + (void)index; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_result spvc_compiler_msl_add_discrete_descriptor_set(spvc_compiler compiler, unsigned desc_set) { #if SPIRV_CROSS_C_API_MSL @@ -960,6 +1005,22 @@ static void spvc_convert_msl_sampler(MSLConstexprSampler &samp, const spvc_msl_c samp.coord = static_cast(sampler->coord); samp.border_color = static_cast(sampler->border_color); } + +static void spvc_convert_msl_sampler_ycbcr_conversion(MSLConstexprSampler &samp, const spvc_msl_sampler_ycbcr_conversion *conv) +{ + samp.ycbcr_conversion_enable = conv != nullptr; + if (conv == nullptr) return; + samp.planes = conv->planes; + samp.resolution = static_cast(conv->resolution); + samp.chroma_filter = static_cast(conv->chroma_filter); + samp.x_chroma_offset = static_cast(conv->x_chroma_offset); + samp.y_chroma_offset = static_cast(conv->y_chroma_offset); + for (int i = 0; i < 4; i++) + samp.swizzle[i] = static_cast(conv->swizzle[i]); + samp.ycbcr_model = static_cast(conv->ycbcr_model); + samp.ycbcr_range = static_cast(conv->ycbcr_range); + samp.bpc = conv->bpc; +} #endif spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, @@ -1010,6 +1071,60 @@ spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler c #endif } +spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, + const spvc_msl_constexpr_sampler *sampler, + const spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + spvc_convert_msl_sampler_ycbcr_conversion(samp, conv); + msl.remap_constexpr_sampler(id, samp); + return SPVC_SUCCESS; +#else + (void)id; + (void)sampler; + (void)conv; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + +spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, + unsigned desc_set, unsigned binding, + const spvc_msl_constexpr_sampler *sampler, + const spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + if (compiler->backend != SPVC_BACKEND_MSL) + { + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; + } + + auto &msl = *static_cast(compiler->compiler.get()); + MSLConstexprSampler samp; + spvc_convert_msl_sampler(samp, sampler); + spvc_convert_msl_sampler_ycbcr_conversion(samp, conv); + msl.remap_constexpr_sampler_by_binding(desc_set, binding, samp); + return SPVC_SUCCESS; +#else + (void)desc_set; + (void)binding; + (void)sampler; + (void)conv; + compiler->context->report_error("MSL function used on a non-MSL backend."); + return SPVC_ERROR_INVALID_ARGUMENT; +#endif +} + spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components) { @@ -1997,6 +2112,24 @@ void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler) #endif } +void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv) +{ +#if SPIRV_CROSS_C_API_MSL + MSLConstexprSampler defaults; + conv->planes = defaults.planes; + conv->resolution = static_cast(defaults.resolution); + conv->chroma_filter = static_cast(defaults.chroma_filter); + conv->x_chroma_offset = static_cast(defaults.x_chroma_offset); + conv->y_chroma_offset = static_cast(defaults.y_chroma_offset); + for (int i = 0; i < 4; i++) + conv->swizzle[i] = static_cast(defaults.swizzle[i]); + conv->ycbcr_model = static_cast(defaults.ycbcr_model); + conv->ycbcr_range = static_cast(defaults.ycbcr_range); +#else + memset(conv, 0, sizeof(*conv)); +#endif +} + unsigned spvc_compiler_get_current_id_bound(spvc_compiler compiler) { return compiler->compiler->get_current_id_bound(); diff --git a/3rdparty/spirv-cross/spirv_cross_c.h b/3rdparty/spirv-cross/spirv_cross_c.h index 6efaf8543..c9f7a0061 100644 --- a/3rdparty/spirv-cross/spirv_cross_c.h +++ b/3rdparty/spirv-cross/spirv_cross_c.h @@ -33,7 +33,7 @@ extern "C" { /* Bumped if ABI or API breaks backwards compatibility. */ #define SPVC_C_API_VERSION_MAJOR 0 /* Bumped if APIs or enumerations are added in a backwards compatible way. */ -#define SPVC_C_API_VERSION_MINOR 16 +#define SPVC_C_API_VERSION_MINOR 18 /* Bumped if internal implementation details change. */ #define SPVC_C_API_VERSION_PATCH 0 @@ -370,6 +370,55 @@ typedef enum spvc_msl_sampler_border_color SPVC_MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff } spvc_msl_sampler_border_color; +/* Maps to C++ API. */ +typedef enum spvc_msl_format_resolution +{ + SPVC_MSL_FORMAT_RESOLUTION_444 = 0, + SPVC_MSL_FORMAT_RESOLUTION_422, + SPVC_MSL_FORMAT_RESOLUTION_420, + SPVC_MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff +} spvc_msl_format_resolution; + +/* Maps to C++ API. */ +typedef enum spvc_msl_chroma_location +{ + SPVC_MSL_CHROMA_LOCATION_COSITED_EVEN = 0, + SPVC_MSL_CHROMA_LOCATION_MIDPOINT, + SPVC_MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff +} spvc_msl_chroma_location; + +/* Maps to C++ API. */ +typedef enum spvc_msl_component_swizzle +{ + SPVC_MSL_COMPONENT_SWIZZLE_IDENTITY = 0, + SPVC_MSL_COMPONENT_SWIZZLE_ZERO, + SPVC_MSL_COMPONENT_SWIZZLE_ONE, + SPVC_MSL_COMPONENT_SWIZZLE_R, + SPVC_MSL_COMPONENT_SWIZZLE_G, + SPVC_MSL_COMPONENT_SWIZZLE_B, + SPVC_MSL_COMPONENT_SWIZZLE_A, + SPVC_MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff +} spvc_msl_component_swizzle; + +/* Maps to C++ API. */ +typedef enum spvc_msl_sampler_ycbcr_model_conversion +{ + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, + SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff +} spvc_msl_sampler_ycbcr_model_conversion; + +/* Maps to C+ API. */ +typedef enum spvc_msl_sampler_ycbcr_range +{ + SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, + SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, + SPVC_MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff +} spvc_msl_sampler_ycbcr_range; + /* Maps to C++ API. */ typedef struct spvc_msl_constexpr_sampler { @@ -397,6 +446,26 @@ typedef struct spvc_msl_constexpr_sampler */ SPVC_PUBLIC_API void spvc_msl_constexpr_sampler_init(spvc_msl_constexpr_sampler *sampler); +/* Maps to the sampler Y'CbCr conversion-related portions of MSLConstexprSampler. See C++ API for defaults and details. */ +typedef struct spvc_msl_sampler_ycbcr_conversion +{ + unsigned planes; + spvc_msl_format_resolution resolution; + spvc_msl_sampler_filter chroma_filter; + spvc_msl_chroma_location x_chroma_offset; + spvc_msl_chroma_location y_chroma_offset; + spvc_msl_component_swizzle swizzle[4]; + spvc_msl_sampler_ycbcr_model_conversion ycbcr_model; + spvc_msl_sampler_ycbcr_range ycbcr_range; + unsigned bpc; +} spvc_msl_sampler_ycbcr_conversion; + +/* + * Initializes the constexpr sampler struct. + * The defaults are non-zero. + */ +SPVC_PUBLIC_API void spvc_msl_sampler_ycbcr_conversion_init(spvc_msl_sampler_ycbcr_conversion *conv); + /* Maps to the various spirv_cross::Compiler*::Option structures. See C++ API for defaults and details. */ typedef enum spvc_compiler_option { @@ -452,6 +521,13 @@ typedef enum spvc_compiler_option SPVC_COMPILER_OPTION_EMIT_LINE_DIRECTIVES = 37 | SPVC_COMPILER_OPTION_COMMON_BIT, + SPVC_COMPILER_OPTION_MSL_MULTIVIEW = 38 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VIEW_MASK_BUFFER_INDEX = 39 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DEVICE_INDEX = 40 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_VIEW_INDEX_FROM_DEVICE_INDEX = 41 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DISPATCH_BASE = 42 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_MSL_DYNAMIC_OFFSETS_BUFFER_INDEX = 43 | SPVC_COMPILER_OPTION_MSL_BIT, + SPVC_COMPILER_OPTION_INT_MAX = 0x7fffffff } spvc_compiler_option; @@ -550,11 +626,15 @@ SPVC_PUBLIC_API spvc_bool spvc_compiler_msl_is_resource_used(spvc_compiler compi unsigned binding); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_ycbcr(spvc_compiler compiler, spvc_variable_id id, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(spvc_compiler compiler, unsigned desc_set, unsigned binding, const spvc_msl_constexpr_sampler *sampler, const spvc_msl_sampler_ycbcr_conversion *conv); SPVC_PUBLIC_API spvc_result spvc_compiler_msl_set_fragment_output_components(spvc_compiler compiler, unsigned location, unsigned components); SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding(spvc_compiler compiler, spvc_variable_id id); SPVC_PUBLIC_API unsigned spvc_compiler_msl_get_automatic_resource_binding_secondary(spvc_compiler compiler, spvc_variable_id id); +SPVC_PUBLIC_API spvc_result spvc_compiler_msl_add_dynamic_buffer(spvc_compiler compiler, unsigned desc_set, unsigned binding, unsigned index); + /* * Reflect resources. * Maps almost 1:1 to C++ API. diff --git a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp index a5c87f45c..238726716 100644 --- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp +++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.cpp @@ -162,7 +162,7 @@ static string ensure_valid_identifier(const string &name, bool member) return str; } -const string &ParsedIR::get_name(uint32_t id) const +const string &ParsedIR::get_name(ID id) const { auto *m = find_meta(id); if (m) @@ -171,7 +171,7 @@ const string &ParsedIR::get_name(uint32_t id) const return empty_string; } -const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const +const string &ParsedIR::get_member_name(TypeID id, uint32_t index) const { auto *m = find_meta(id); if (m) @@ -184,7 +184,7 @@ const string &ParsedIR::get_member_name(uint32_t id, uint32_t index) const return empty_string; } -void ParsedIR::set_name(uint32_t id, const string &name) +void ParsedIR::set_name(ID id, const string &name) { auto &str = meta[id].decoration.alias; str.clear(); @@ -199,7 +199,7 @@ void ParsedIR::set_name(uint32_t id, const string &name) str = ensure_valid_identifier(name, false); } -void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name) +void ParsedIR::set_member_name(TypeID id, uint32_t index, const string &name) { meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); @@ -215,7 +215,7 @@ void ParsedIR::set_member_name(uint32_t id, uint32_t index, const string &name) str = ensure_valid_identifier(name, true); } -void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const string &argument) +void ParsedIR::set_decoration_string(ID id, Decoration decoration, const string &argument) { auto &dec = meta[id].decoration; dec.decoration_flags.set(decoration); @@ -231,7 +231,7 @@ void ParsedIR::set_decoration_string(uint32_t id, Decoration decoration, const s } } -void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argument) +void ParsedIR::set_decoration(ID id, Decoration decoration, uint32_t argument) { auto &dec = meta[id].decoration; dec.decoration_flags.set(decoration); @@ -297,7 +297,7 @@ void ParsedIR::set_decoration(uint32_t id, Decoration decoration, uint32_t argum } } -void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration decoration, uint32_t argument) +void ParsedIR::set_member_decoration(TypeID id, uint32_t index, Decoration decoration, uint32_t argument) { meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); auto &dec = meta[id].members[index]; @@ -345,7 +345,7 @@ void ParsedIR::set_member_decoration(uint32_t id, uint32_t index, Decoration dec // Recursively marks any constants referenced by the specified constant instruction as being used // as an array length. The id must be a constant instruction (SPIRConstant or SPIRConstantOp). -void ParsedIR::mark_used_as_array_length(uint32_t id) +void ParsedIR::mark_used_as_array_length(ID id) { switch (ids[id].get_type()) { @@ -356,8 +356,16 @@ void ParsedIR::mark_used_as_array_length(uint32_t id) case TypeConstantOp: { auto &cop = get(id); - for (uint32_t arg_id : cop.arguments) - mark_used_as_array_length(arg_id); + if (cop.opcode == OpCompositeExtract) + mark_used_as_array_length(cop.arguments[0]); + else if (cop.opcode == OpCompositeInsert) + { + mark_used_as_array_length(cop.arguments[0]); + mark_used_as_array_length(cop.arguments[1]); + } + else + for (uint32_t arg_id : cop.arguments) + mark_used_as_array_length(arg_id); break; } @@ -393,7 +401,7 @@ Bitset ParsedIR::get_buffer_block_flags(const SPIRVariable &var) const return base_flags; } -const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index) const +const Bitset &ParsedIR::get_member_decoration_bitset(TypeID id, uint32_t index) const { auto *m = find_meta(id); if (m) @@ -406,12 +414,12 @@ const Bitset &ParsedIR::get_member_decoration_bitset(uint32_t id, uint32_t index return cleared_bitset; } -bool ParsedIR::has_decoration(uint32_t id, Decoration decoration) const +bool ParsedIR::has_decoration(ID id, Decoration decoration) const { return get_decoration_bitset(id).get(decoration); } -uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const +uint32_t ParsedIR::get_decoration(ID id, Decoration decoration) const { auto *m = find_meta(id); if (!m) @@ -452,7 +460,7 @@ uint32_t ParsedIR::get_decoration(uint32_t id, Decoration decoration) const } } -const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration) const +const string &ParsedIR::get_decoration_string(ID id, Decoration decoration) const { auto *m = find_meta(id); if (!m) @@ -473,7 +481,7 @@ const string &ParsedIR::get_decoration_string(uint32_t id, Decoration decoration } } -void ParsedIR::unset_decoration(uint32_t id, Decoration decoration) +void ParsedIR::unset_decoration(ID id, Decoration decoration) { auto &dec = meta[id].decoration; dec.decoration_flags.clear(decoration); @@ -535,12 +543,12 @@ void ParsedIR::unset_decoration(uint32_t id, Decoration decoration) } } -bool ParsedIR::has_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +bool ParsedIR::has_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { return get_member_decoration_bitset(id, index).get(decoration); } -uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration decoration) const +uint32_t ParsedIR::get_member_decoration(TypeID id, uint32_t index, Decoration decoration) const { auto *m = find_meta(id); if (!m) @@ -574,7 +582,7 @@ uint32_t ParsedIR::get_member_decoration(uint32_t id, uint32_t index, Decoration } } -const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const +const Bitset &ParsedIR::get_decoration_bitset(ID id) const { auto *m = find_meta(id); if (m) @@ -586,7 +594,7 @@ const Bitset &ParsedIR::get_decoration_bitset(uint32_t id) const return cleared_bitset; } -void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration, const string &argument) +void ParsedIR::set_member_decoration_string(TypeID id, uint32_t index, Decoration decoration, const string &argument) { meta[id].members.resize(max(meta[id].members.size(), size_t(index) + 1)); auto &dec = meta[id].members[index]; @@ -603,7 +611,7 @@ void ParsedIR::set_member_decoration_string(uint32_t id, uint32_t index, Decorat } } -const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index, Decoration decoration) const +const string &ParsedIR::get_member_decoration_string(TypeID id, uint32_t index, Decoration decoration) const { auto *m = find_meta(id); if (m) @@ -626,7 +634,7 @@ const string &ParsedIR::get_member_decoration_string(uint32_t id, uint32_t index return empty_string; } -void ParsedIR::unset_member_decoration(uint32_t id, uint32_t index, Decoration decoration) +void ParsedIR::unset_member_decoration(TypeID id, uint32_t index, Decoration decoration) { auto &m = meta[id]; if (index >= m.members.size()) @@ -679,7 +687,7 @@ uint32_t ParsedIR::increase_bound_by(uint32_t incr_amount) return uint32_t(curr_bound); } -void ParsedIR::remove_typed_id(Types type, uint32_t id) +void ParsedIR::remove_typed_id(Types type, ID id) { auto &type_ids = ids_for_type[type]; type_ids.erase(remove(begin(type_ids), end(type_ids), id), end(type_ids)); @@ -694,7 +702,7 @@ void ParsedIR::reset_all_of_type(Types type) ids_for_type[type].clear(); } -void ParsedIR::add_typed_id(Types type, uint32_t id) +void ParsedIR::add_typed_id(Types type, ID id) { if (loop_iteration_depth_hard != 0) SPIRV_CROSS_THROW("Cannot add typed ID while looping over it."); @@ -740,7 +748,7 @@ void ParsedIR::add_typed_id(Types type, uint32_t id) } } -const Meta *ParsedIR::find_meta(uint32_t id) const +const Meta *ParsedIR::find_meta(ID id) const { auto itr = meta.find(id); if (itr != end(meta)) @@ -749,7 +757,7 @@ const Meta *ParsedIR::find_meta(uint32_t id) const return nullptr; } -Meta *ParsedIR::find_meta(uint32_t id) +Meta *ParsedIR::find_meta(ID id) { auto itr = meta.find(id); if (itr != end(meta)) diff --git a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp index c4ea28b49..97bc9ea69 100644 --- a/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp +++ b/3rdparty/spirv-cross/spirv_cross_parsed_ir.hpp @@ -57,19 +57,19 @@ public: SmallVector ids; // Various meta data for IDs, decorations, names, etc. - std::unordered_map meta; + std::unordered_map meta; // Holds all IDs which have a certain type. // This is needed so we can iterate through a specific kind of resource quickly, // and in-order of module declaration. - SmallVector ids_for_type[TypeCount]; + SmallVector ids_for_type[TypeCount]; // Special purpose lists which contain a union of types. // This is needed so we can declare specialization constants and structs in an interleaved fashion, // among other things. // Constants can be of struct type, and struct array sizes can use specialization constants. - SmallVector ids_for_constant_or_type; - SmallVector ids_for_constant_or_variable; + SmallVector ids_for_constant_or_type; + SmallVector ids_for_constant_or_variable; // Declared capabilities and extensions in the SPIR-V module. // Not really used except for reflection at the moment. @@ -88,12 +88,12 @@ public: }; using BlockMetaFlags = uint8_t; SmallVector block_meta; - std::unordered_map continue_block_to_loop_header; + std::unordered_map continue_block_to_loop_header; // Normally, we'd stick SPIREntryPoint in ids array, but it conflicts with SPIRFunction. // Entry points can therefore be seen as some sort of meta structure. - std::unordered_map entry_points; - uint32_t default_entry_point = 0; + std::unordered_map entry_points; + FunctionID default_entry_point = 0; struct Source { @@ -114,34 +114,34 @@ public: // Can be useful for simple "raw" reflection. // However, most members are here because the Parser needs most of these, // and might as well just have the whole suite of decoration/name handling in one place. - void set_name(uint32_t id, const std::string &name); - const std::string &get_name(uint32_t id) const; - void set_decoration(uint32_t id, spv::Decoration decoration, uint32_t argument = 0); - void set_decoration_string(uint32_t id, spv::Decoration decoration, const std::string &argument); - bool has_decoration(uint32_t id, spv::Decoration decoration) const; - uint32_t get_decoration(uint32_t id, spv::Decoration decoration) const; - const std::string &get_decoration_string(uint32_t id, spv::Decoration decoration) const; - const Bitset &get_decoration_bitset(uint32_t id) const; - void unset_decoration(uint32_t id, spv::Decoration decoration); + void set_name(ID id, const std::string &name); + const std::string &get_name(ID id) const; + void set_decoration(ID id, spv::Decoration decoration, uint32_t argument = 0); + void set_decoration_string(ID id, spv::Decoration decoration, const std::string &argument); + bool has_decoration(ID id, spv::Decoration decoration) const; + uint32_t get_decoration(ID id, spv::Decoration decoration) const; + const std::string &get_decoration_string(ID id, spv::Decoration decoration) const; + const Bitset &get_decoration_bitset(ID id) const; + void unset_decoration(ID id, spv::Decoration decoration); // Decoration handling methods (for members of a struct). - void set_member_name(uint32_t id, uint32_t index, const std::string &name); - const std::string &get_member_name(uint32_t id, uint32_t index) const; - void set_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); - void set_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration, + void set_member_name(TypeID id, uint32_t index, const std::string &name); + const std::string &get_member_name(TypeID id, uint32_t index) const; + void set_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration, uint32_t argument = 0); + void set_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration, const std::string &argument); - uint32_t get_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; - const std::string &get_member_decoration_string(uint32_t id, uint32_t index, spv::Decoration decoration) const; - bool has_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration) const; - const Bitset &get_member_decoration_bitset(uint32_t id, uint32_t index) const; - void unset_member_decoration(uint32_t id, uint32_t index, spv::Decoration decoration); + uint32_t get_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const std::string &get_member_decoration_string(TypeID id, uint32_t index, spv::Decoration decoration) const; + bool has_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration) const; + const Bitset &get_member_decoration_bitset(TypeID id, uint32_t index) const; + void unset_member_decoration(TypeID id, uint32_t index, spv::Decoration decoration); - void mark_used_as_array_length(uint32_t id); + void mark_used_as_array_length(ID id); uint32_t increase_bound_by(uint32_t count); Bitset get_buffer_block_flags(const SPIRVariable &var) const; - void add_typed_id(Types type, uint32_t id); - void remove_typed_id(Types type, uint32_t id); + void add_typed_id(Types type, ID id); + void remove_typed_id(Types type, ID id); class LoopLock { @@ -198,8 +198,8 @@ public: void reset_all_of_type(Types type); - Meta *find_meta(uint32_t id); - const Meta *find_meta(uint32_t id) const; + Meta *find_meta(ID id); + const Meta *find_meta(ID id) const; const std::string &get_empty_string() const { diff --git a/3rdparty/spirv-cross/spirv_glsl.cpp b/3rdparty/spirv-cross/spirv_glsl.cpp index d390e5950..6ecdafcd8 100644 --- a/3rdparty/spirv-cross/spirv_glsl.cpp +++ b/3rdparty/spirv-cross/spirv_glsl.cpp @@ -511,6 +511,7 @@ string CompilerGLSL::compile() fixup_image_load_store_access(); update_active_builtins(); analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); // Shaders might cast unrelated data to pointers of non-block types. // Find all such instances and make sure we can cast the pointers to a synthesized block type. @@ -535,6 +536,25 @@ string CompilerGLSL::compile() pass_count++; } while (is_forcing_recompilation()); + // Implement the interlocked wrapper function at the end. + // The body was implemented in lieu of main(). + if (interlocked_is_complex) + { + statement("void main()"); + begin_scope(); + statement("// Interlocks were used in a way not compatible with GLSL, this is very slow."); + if (options.es) + statement("beginInvocationInterlockNV();"); + else + statement("beginInvocationInterlockARB();"); + statement("spvMainInterlockedBody();"); + if (options.es) + statement("endInvocationInterlockNV();"); + else + statement("endInvocationInterlockARB();"); + end_scope(); + } + // Entry point in GLSL is always main(). get_entry_point().name = "main"; @@ -605,6 +625,26 @@ void CompilerGLSL::emit_header() if (execution.flags.get(ExecutionModePostDepthCoverage)) require_extension_internal("GL_ARB_post_depth_coverage"); + // Needed for: layout({pixel,sample}_interlock_[un]ordered) in; + if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT) || + execution.flags.get(ExecutionModePixelInterlockUnorderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockOrderedEXT) || + execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) + { + if (options.es) + { + if (options.version < 310) + SPIRV_CROSS_THROW("At least ESSL 3.10 required for fragment shader interlock."); + require_extension_internal("GL_NV_fragment_shader_interlock"); + } + else + { + if (options.version < 420) + require_extension_internal("GL_ARB_shader_image_load_store"); + require_extension_internal("GL_ARB_fragment_shader_interlock"); + } + } + for (auto &ext : forced_extensions) { if (ext == "GL_EXT_shader_explicit_arithmetic_types_float16") @@ -727,7 +767,8 @@ void CompilerGLSL::emit_header() // If there are any spec constants on legacy GLSL, defer declaration, we need to set up macro // declarations before we can emit the work group size. - if (options.vulkan_semantics || ((wg_x.id == 0) && (wg_y.id == 0) && (wg_z.id == 0))) + if (options.vulkan_semantics || + ((wg_x.id == ConstantID(0)) && (wg_y.id == ConstantID(0)) && (wg_z.id == ConstantID(0)))) build_workgroup_size(inputs, wg_x, wg_y, wg_z); } else @@ -784,6 +825,15 @@ void CompilerGLSL::emit_header() if (execution.flags.get(ExecutionModePostDepthCoverage)) inputs.push_back("post_depth_coverage"); + if (execution.flags.get(ExecutionModePixelInterlockOrderedEXT)) + inputs.push_back("pixel_interlock_ordered"); + else if (execution.flags.get(ExecutionModePixelInterlockUnorderedEXT)) + inputs.push_back("pixel_interlock_unordered"); + else if (execution.flags.get(ExecutionModeSampleInterlockOrderedEXT)) + inputs.push_back("sample_interlock_ordered"); + else if (execution.flags.get(ExecutionModeSampleInterlockUnorderedEXT)) + inputs.push_back("sample_interlock_unordered"); + if (!options.es && execution.flags.get(ExecutionModeDepthGreater)) statement("layout(depth_greater) out float gl_FragDepth;"); else if (!options.es && execution.flags.get(ExecutionModeDepthLess)) @@ -815,7 +865,8 @@ void CompilerGLSL::emit_struct(SPIRType &type) // Type-punning with these types is legal, which complicates things // when we are storing struct and array types in an SSBO for example. // If the type master is packed however, we can no longer assume that the struct declaration will be redundant. - if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) return; add_resource_name(type.self); @@ -2083,7 +2134,7 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant) auto name = to_name(constant.self); SpecializationConstant wg_x, wg_y, wg_z; - uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); // This specialization constant is implicitly declared by emitting layout() in; if (constant.self == workgroup_size_id) @@ -2092,7 +2143,8 @@ void CompilerGLSL::emit_constant(const SPIRConstant &constant) // These specialization constants are implicitly declared by emitting layout() in; // In legacy GLSL, we will still need to emit macros for these, so a layout() in; declaration // later can use macro overrides for work group size. - bool is_workgroup_size_constant = constant.self == wg_x.id || constant.self == wg_y.id || constant.self == wg_z.id; + bool is_workgroup_size_constant = ConstantID(constant.self) == wg_x.id || ConstantID(constant.self) == wg_y.id || + ConstantID(constant.self) == wg_z.id; if (options.vulkan_semantics && is_workgroup_size_constant) { @@ -2442,7 +2494,7 @@ void CompilerGLSL::declare_undefined_values() bool CompilerGLSL::variable_is_lut(const SPIRVariable &var) const { - bool statically_assigned = var.statically_assigned && var.static_expression != 0 && var.remapped_variable; + bool statically_assigned = var.statically_assigned && var.static_expression != ID(0) && var.remapped_variable; if (statically_assigned) { @@ -2571,7 +2623,7 @@ void CompilerGLSL::emit_resources() SpecializationConstant wg_x, wg_y, wg_z; get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); - if ((wg_x.id != 0) || (wg_y.id != 0) || (wg_z.id != 0)) + if ((wg_x.id != ConstantID(0)) || (wg_y.id != ConstantID(0)) || (wg_z.id != ConstantID(0))) { SmallVector inputs; build_workgroup_size(inputs, wg_x, wg_y, wg_z); @@ -2719,7 +2771,7 @@ void CompilerGLSL::emit_resources() // Returns a string representation of the ID, usable as a function arg. // Default is to simply return the expression representation fo the arg ID. // Subclasses may override to modify the return value. -string CompilerGLSL::to_func_call_arg(uint32_t id) +string CompilerGLSL::to_func_call_arg(const SPIRFunction::Parameter &, uint32_t id) { // Make sure that we use the name of the original variable, and not the parameter alias. uint32_t name_id = id; @@ -4683,7 +4735,7 @@ void CompilerGLSL::emit_mix_op(uint32_t result_type, uint32_t id, uint32_t left, emit_trinary_func_op(result_type, id, left, right, lerp, "mix"); } -string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_id) +string CompilerGLSL::to_combined_image_sampler(VariableID image_id, VariableID samp_id) { // Keep track of the array indices we have used to load the image. // We'll need to use the same array index into the combined image sampler array. @@ -4705,18 +4757,18 @@ string CompilerGLSL::to_combined_image_sampler(uint32_t image_id, uint32_t samp_ samp_id = samp->self; auto image_itr = find_if(begin(args), end(args), - [image_id](const SPIRFunction::Parameter ¶m) { return param.id == image_id; }); + [image_id](const SPIRFunction::Parameter ¶m) { return image_id == param.id; }); auto sampler_itr = find_if(begin(args), end(args), - [samp_id](const SPIRFunction::Parameter ¶m) { return param.id == samp_id; }); + [samp_id](const SPIRFunction::Parameter ¶m) { return samp_id == param.id; }); if (image_itr != end(args) || sampler_itr != end(args)) { // If any parameter originates from a parameter, we will find it in our argument list. bool global_image = image_itr == end(args); bool global_sampler = sampler_itr == end(args); - uint32_t iid = global_image ? image_id : uint32_t(image_itr - begin(args)); - uint32_t sid = global_sampler ? samp_id : uint32_t(sampler_itr - begin(args)); + VariableID iid = global_image ? image_id : VariableID(image_itr - begin(args)); + VariableID sid = global_sampler ? samp_id : VariableID(sampler_itr - begin(args)); auto &combined = current_function->combined_parameters; auto itr = find_if(begin(combined), end(combined), [=](const SPIRFunction::CombinedImageSamplerParameter &p) { @@ -4796,13 +4848,41 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) { auto *ops = stream(i); auto op = static_cast(i.op); - uint32_t length = i.length; SmallVector inherited_expressions; uint32_t result_type_id = ops[0]; uint32_t id = ops[1]; - uint32_t img = ops[2]; + + bool forward = false; + string expr = to_texture_op(i, &forward, inherited_expressions); + emit_op(result_type_id, id, expr, forward); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(id, inherit); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(id); + break; + + default: + break; + } +} + +std::string CompilerGLSL::to_texture_op(const Instruction &i, bool *forward, + SmallVector &inherited_expressions) +{ + auto *ops = stream(i); + auto op = static_cast(i.op); + uint32_t length = i.length; + + uint32_t result_type_id = ops[0]; + VariableID img = ops[2]; uint32_t coord = ops[3]; uint32_t dref = 0; uint32_t comp = 0; @@ -4942,12 +5022,11 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) test(minlod, ImageOperandsMinLodMask); string expr; - bool forward = false; expr += to_function_name(img, imgtype, !!fetch, !!gather, !!proj, !!coffsets, (!!coffset || !!offset), (!!grad_x || !!grad_y), !!dref, lod, minlod); expr += "("; expr += to_function_args(img, imgtype, fetch, gather, proj, coord, coord_components, dref, grad_x, grad_y, lod, - coffset, offset, bias, comp, sample, minlod, &forward); + coffset, offset, bias, comp, sample, minlod, forward); expr += ")"; // texture(samplerXShadow) returns float. shadowX() returns vec4. Swizzle here. @@ -4960,7 +5039,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) { bool image_is_depth = false; const auto *combined = maybe_get(img); - uint32_t image_id = combined ? combined->image : img; + VariableID image_id = combined ? combined->image : img; if (combined && image_is_comparison(imgtype, combined->image)) image_is_depth = true; @@ -4989,22 +5068,7 @@ void CompilerGLSL::emit_texture_op(const Instruction &i) if (op == OpImageRead) expr = remap_swizzle(result_type, 4, expr); - emit_op(result_type_id, id, expr, forward); - for (auto &inherit : inherited_expressions) - inherit_expression_dependencies(id, inherit); - - switch (op) - { - case OpImageSampleDrefImplicitLod: - case OpImageSampleImplicitLod: - case OpImageSampleProjImplicitLod: - case OpImageSampleProjDrefImplicitLod: - register_control_dependent_expression(id); - break; - - default: - break; - } + return expr; } bool CompilerGLSL::expression_is_constant_null(uint32_t id) const @@ -5017,7 +5081,7 @@ bool CompilerGLSL::expression_is_constant_null(uint32_t id) const // Returns the function name for a texture sampling function for the specified image and sampling characteristics. // For some subclasses, the function is a method on the specified image. -string CompilerGLSL::to_function_name(uint32_t tex, const SPIRType &imgtype, bool is_fetch, bool is_gather, +string CompilerGLSL::to_function_name(VariableID tex, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool, uint32_t lod, uint32_t minlod) { @@ -5108,7 +5172,7 @@ std::string CompilerGLSL::convert_separate_image_to_expression(uint32_t id) } // Returns the function args for a texture sampling function for the specified image and sampling characteristics. -string CompilerGLSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, +string CompilerGLSL::to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, uint32_t /*minlod*/, @@ -7526,7 +7590,7 @@ bool CompilerGLSL::remove_unity_swizzle(uint32_t base, string &op) string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32_t *elems, uint32_t length) { - uint32_t base = 0; + ID base = 0; string op; string subop; @@ -7587,7 +7651,7 @@ string CompilerGLSL::build_composite_combiner(uint32_t return_type, const uint32 subop = to_composite_constructor_expression(elems[i]); } - base = e ? e->base_expression : 0; + base = e ? e->base_expression : ID(0); } if (swizzle_optimization) @@ -7949,7 +8013,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) auto &expr = set(ops[1], move(e), ops[0], should_forward(ops[2])); auto *backing_variable = maybe_get_backing_variable(ops[2]); - expr.loaded_from = backing_variable ? backing_variable->self : ops[2]; + expr.loaded_from = backing_variable ? backing_variable->self : ID(ops[2]); expr.need_transpose = meta.need_transpose; expr.access_chain = true; @@ -8078,13 +8142,13 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (skip_argument(arg[i])) continue; - arglist.push_back(to_func_call_arg(arg[i])); + arglist.push_back(to_func_call_arg(callee.arguments[i], arg[i])); } for (auto &combined : callee.combined_parameters) { - uint32_t image_id = combined.global_image ? combined.image_id : arg[combined.image_id]; - uint32_t sampler_id = combined.global_sampler ? combined.sampler_id : arg[combined.sampler_id]; + auto image_id = combined.global_image ? combined.image_id : VariableID(arg[combined.image_id]); + auto sampler_id = combined.global_sampler ? combined.sampler_id : VariableID(arg[combined.sampler_id]); arglist.push_back(to_combined_image_sampler(image_id, sampler_id)); } @@ -8391,7 +8455,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) if (pointer) { auto *var = maybe_get_backing_variable(rhs); - e.loaded_from = var ? var->self : 0; + e.loaded_from = var ? var->self : ID(0); } // If we're copying an access chain, need to inherit the read expressions. @@ -8446,7 +8510,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // a value we might not need, and bog down codegen. SPIRConstant c; c.constant_type = type0.parent_type; - assert(type0.parent_type != 0); + assert(type0.parent_type != ID(0)); args.push_back(constant_expression(c)); } else if (elems[i] >= type0.vecsize) @@ -9327,7 +9391,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // When using the image, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); - e.loaded_from = var ? var->self : 0; + e.loaded_from = var ? var->self : ID(0); break; } @@ -9550,7 +9614,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); - e.loaded_from = var ? var->self : 0; + e.loaded_from = var ? var->self : ID(0); break; } @@ -10097,6 +10161,34 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction) emit_op(ops[0], ops[1], "helperInvocationEXT()", false); break; + case OpBeginInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) + { + if (options.es) + statement("beginInvocationInterlockNV();"); + else + statement("beginInvocationInterlockARB();"); + + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; + + case OpEndInvocationInterlockEXT: + // If the interlock is complex, we emit this elsewhere. + if (!interlocked_is_complex) + { + if (options.es) + statement("endInvocationInterlockNV();"); + else + statement("endInvocationInterlockARB();"); + + flush_all_active_variables(); + // Make sure forwarding doesn't propagate outside interlock region. + } + break; + default: statement("// unimplemented op ", instruction.op); break; @@ -10126,7 +10218,7 @@ void CompilerGLSL::append_global_func_args(const SPIRFunction &func, uint32_t in if (var_id) flush_variable_declaration(var_id); - arglist.push_back(to_func_call_arg(arg.id)); + arglist.push_back(to_func_call_arg(arg, arg.id)); } } @@ -10894,7 +10986,7 @@ void CompilerGLSL::require_extension_internal(const string &ext) } } -void CompilerGLSL::flatten_buffer_block(uint32_t id) +void CompilerGLSL::flatten_buffer_block(VariableID id) { auto &var = get(id); auto &type = get(var.basetype); @@ -11010,7 +11102,13 @@ void CompilerGLSL::emit_function_prototype(SPIRFunction &func, const Bitset &ret if (func.self == ir.default_entry_point) { - decl += "main"; + // If we need complex fallback in GLSL, we just wrap main() in a function + // and interlock the entire shader ... + if (interlocked_is_complex) + decl += "spvMainInterlockedBody"; + else + decl += "main"; + processing_entry_point = true; } else @@ -11209,7 +11307,7 @@ void CompilerGLSL::emit_fixup() } } -void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) +void CompilerGLSL::flush_phi(BlockID from, BlockID to) { auto &child = get(to); if (child.ignore_phi_from_block == from) @@ -11238,7 +11336,7 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) // This is judged to be extremely rare, so deal with it here using a simple, but suboptimal algorithm. bool need_saved_temporary = find_if(itr + 1, end(child.phi_variables), [&](const SPIRBlock::Phi &future_phi) -> bool { - return future_phi.local_variable == phi.function_variable && future_phi.parent == from; + return future_phi.local_variable == ID(phi.function_variable) && future_phi.parent == from; }) != end(child.phi_variables); if (need_saved_temporary) @@ -11273,7 +11371,7 @@ void CompilerGLSL::flush_phi(uint32_t from, uint32_t to) } } -void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) +void CompilerGLSL::branch_to_continue(BlockID from, BlockID to) { auto &to_block = get(to); if (from == to) @@ -11303,7 +11401,7 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) // so just use "self" here. loop_dominator = from; } - else if (from_block.loop_dominator != SPIRBlock::NoDominator) + else if (from_block.loop_dominator != BlockID(SPIRBlock::NoDominator)) { loop_dominator = from_block.loop_dominator; } @@ -11327,7 +11425,7 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to) } } -void CompilerGLSL::branch(uint32_t from, uint32_t to) +void CompilerGLSL::branch(BlockID from, BlockID to) { flush_phi(from, to); flush_control_dependent_expressions(from); @@ -11349,7 +11447,8 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to) // Only sensible solution is to make a ladder variable, which we declare at the top of the switch block, // write to the ladder here, and defer the break. // The loop we're breaking out of must dominate the switch block, or there is no ladder breaking case. - if (current_emitting_switch && is_loop_break(to) && current_emitting_switch->loop_dominator != ~0u && + if (current_emitting_switch && is_loop_break(to) && + current_emitting_switch->loop_dominator != BlockID(SPIRBlock::NoDominator) && get(current_emitting_switch->loop_dominator).merge_block == to) { if (!current_emitting_switch->need_ladder_break) @@ -11391,10 +11490,10 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to) // Inner scope always takes precedence. } -void CompilerGLSL::branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block) +void CompilerGLSL::branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block) { auto &from_block = get(from); - uint32_t merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : 0; + BlockID merge_block = from_block.merge == SPIRBlock::MergeSelection ? from_block.next_block : BlockID(0); // If we branch directly to a selection merge target, we don't need a code path. // This covers both merge out of if () / else () as well as a break for switch blocks. @@ -11793,12 +11892,12 @@ void CompilerGLSL::flush_undeclared_variables(SPIRBlock &block) flush_variable_declaration(v); } -void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &temporaries) +void CompilerGLSL::emit_hoisted_temporaries(SmallVector> &temporaries) { // If we need to force temporaries for certain IDs due to continue blocks, do it before starting loop header. // Need to sort these to ensure that reference output is stable. sort(begin(temporaries), end(temporaries), - [](const pair &a, const pair &b) { return a.second < b.second; }); + [](const pair &a, const pair &b) { return a.second < b.second; }); for (auto &tmp : temporaries) { @@ -12252,7 +12351,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) } if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || - block.loop_dominator != SPIRBlock::NoDominator) + block.loop_dominator != BlockID(SPIRBlock::NoDominator)) { statement("return;"); } @@ -12265,7 +12364,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) } } else if (!cfg.node_terminates_control_flow_in_sub_graph(current_function->entry_block, block.self) || - block.loop_dominator != SPIRBlock::NoDominator) + block.loop_dominator != BlockID(SPIRBlock::NoDominator)) { // If this block is the very final block and not called from control flow, // we do not need an explicit return which looks out of place. Just end the function here. @@ -12312,7 +12411,7 @@ void CompilerGLSL::emit_block_chain(SPIRBlock &block) assert(block.merge == SPIRBlock::MergeSelection); branch_to_continue(block.self, block.next_block); } - else if (block.self != block.next_block) + else if (BlockID(block.self) != block.next_block) emit_block_chain(get(block.next_block)); } } @@ -12388,6 +12487,14 @@ void CompilerGLSL::end_scope() statement("}"); } +void CompilerGLSL::end_scope(const string &trailer) +{ + if (!indent) + SPIRV_CROSS_THROW("Popping empty indent stack."); + indent--; + statement("}", trailer); +} + void CompilerGLSL::end_scope_decl() { if (!indent) @@ -12679,10 +12786,11 @@ void CompilerGLSL::reorder_type_alias() for (auto alias_itr = begin(type_ids); alias_itr != end(type_ids); ++alias_itr) { auto &type = get(*alias_itr); - if (type.type_alias != 0 && !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) + if (type.type_alias != TypeID(0) && + !has_extended_decoration(type.type_alias, SPIRVCrossDecorationBufferBlockRepacked)) { // We will skip declaring this type, so make sure the type_alias type comes before. - auto master_itr = find(begin(type_ids), end(type_ids), type.type_alias); + auto master_itr = find(begin(type_ids), end(type_ids), ID(type.type_alias)); assert(master_itr != end(type_ids)); if (alias_itr < master_itr) diff --git a/3rdparty/spirv-cross/spirv_glsl.hpp b/3rdparty/spirv-cross/spirv_glsl.hpp index 4c273edb4..e989a7904 100644 --- a/3rdparty/spirv-cross/spirv_glsl.hpp +++ b/3rdparty/spirv-cross/spirv_glsl.hpp @@ -209,7 +209,7 @@ public: // For this to work, all types in the block must be the same basic type, e.g. mixing vec2 and vec4 is fine, but // mixing int and float is not. // The name of the uniform array will be the same as the interface block name. - void flatten_buffer_block(uint32_t id); + void flatten_buffer_block(VariableID id); protected: void reset(); @@ -244,6 +244,8 @@ protected: virtual void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id); virtual void emit_texture_op(const Instruction &i); + virtual std::string to_texture_op(const Instruction &i, bool *forward, + SmallVector &inherited_expressions); virtual void emit_subgroup_op(const Instruction &i); virtual std::string type_to_glsl(const SPIRType &type, uint32_t id = 0); virtual std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage); @@ -256,11 +258,11 @@ protected: virtual std::string constant_expression_vector(const SPIRConstant &c, uint32_t vector); virtual void emit_fixup(); virtual std::string variable_decl(const SPIRType &type, const std::string &name, uint32_t id = 0); - virtual std::string to_func_call_arg(uint32_t id); - virtual std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, + virtual std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id); + virtual std::string to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref, uint32_t lod, uint32_t minlod); - virtual std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, + virtual std::string to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, @@ -330,6 +332,7 @@ protected: void begin_scope(); void end_scope(); + void end_scope(const std::string &trailer); void end_scope_decl(); void end_scope_decl(const std::string &decl); @@ -425,17 +428,17 @@ protected: void emit_interface_block(const SPIRVariable &type); void emit_flattened_io_block(const SPIRVariable &var, const char *qual); void emit_block_chain(SPIRBlock &block); - void emit_hoisted_temporaries(SmallVector> &temporaries); + void emit_hoisted_temporaries(SmallVector> &temporaries); std::string constant_value_macro_name(uint32_t id); void emit_constant(const SPIRConstant &constant); void emit_specialization_constant_op(const SPIRConstantOp &constant); std::string emit_continue_block(uint32_t continue_block, bool follow_true_block, bool follow_false_block); bool attempt_emit_loop_header(SPIRBlock &block, SPIRBlock::Method method); - void branch(uint32_t from, uint32_t to); - void branch_to_continue(uint32_t from, uint32_t to); - void branch(uint32_t from, uint32_t cond, uint32_t true_block, uint32_t false_block); - void flush_phi(uint32_t from, uint32_t to); + void branch(BlockID from, BlockID to); + void branch_to_continue(BlockID from, BlockID to); + void branch(BlockID from, uint32_t cond, BlockID true_block, BlockID false_block); + void flush_phi(BlockID from, BlockID to); void flush_variable_declaration(uint32_t id); void flush_undeclared_variables(SPIRBlock &block); void emit_variable_temporary_copies(const SPIRVariable &var); @@ -540,7 +543,7 @@ protected: virtual std::string layout_for_member(const SPIRType &type, uint32_t index); virtual std::string to_interpolation_qualifiers(const Bitset &flags); std::string layout_for_variable(const SPIRVariable &variable); - std::string to_combined_image_sampler(uint32_t image_id, uint32_t samp_id); + std::string to_combined_image_sampler(VariableID image_id, VariableID samp_id); virtual bool skip_argument(uint32_t id) const; virtual void emit_array_copy(const std::string &lhs, uint32_t rhs_id, spv::StorageClass lhs_storage, spv::StorageClass rhs_storage); diff --git a/3rdparty/spirv-cross/spirv_hlsl.cpp b/3rdparty/spirv-cross/spirv_hlsl.cpp index 1af127f2f..1946096e1 100644 --- a/3rdparty/spirv-cross/spirv_hlsl.cpp +++ b/3rdparty/spirv-cross/spirv_hlsl.cpp @@ -203,7 +203,7 @@ static string image_format_to_type(ImageFormat fmt, SPIRType::BaseType basetype) } } -string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) +string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t id) { auto &imagetype = get(type.image.type); const char *dim = nullptr; @@ -235,7 +235,12 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) if (type.image.sampled == 1) return join("Buffer<", type_to_glsl(imagetype), components, ">"); else if (type.image.sampled == 2) + { + if (interlocked_resources.count(id)) + return join("RasterizerOrderedBuffer<", image_format_to_type(type.image.format, imagetype.basetype), + ">"); return join("RWBuffer<", image_format_to_type(type.image.format, imagetype.basetype), ">"); + } else SPIRV_CROSS_THROW("Sampler buffers must be either sampled or unsampled. Cannot deduce in runtime."); case DimSubpassData: @@ -248,6 +253,8 @@ string CompilerHLSL::image_type_hlsl_modern(const SPIRType &type, uint32_t) const char *arrayed = type.image.arrayed ? "Array" : ""; const char *ms = type.image.ms ? "MS" : ""; const char *rw = typed_load ? "RW" : ""; + if (typed_load && interlocked_resources.count(id)) + rw = "RasterizerOrdered"; return join(rw, "Texture", dim, ms, arrayed, "<", typed_load ? image_format_to_type(type.image.format, imagetype.basetype) : join(type_to_glsl(imagetype), components), @@ -1038,7 +1045,7 @@ void CompilerHLSL::emit_specialization_constants_and_structs() { bool emitted = false; SpecializationConstant wg_x, wg_y, wg_z; - uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); auto loop_lock = ir.create_loop_hard_lock(); for (auto &id_ : ir.ids_for_constant_or_type) @@ -1848,9 +1855,13 @@ void CompilerHLSL::emit_buffer_block(const SPIRVariable &var) Bitset flags = ir.get_buffer_block_flags(var); bool is_readonly = flags.get(DecorationNonWritable); bool is_coherent = flags.get(DecorationCoherent); + bool is_interlocked = interlocked_resources.count(var.self) > 0; + const char *type_name = "ByteAddressBuffer "; + if (!is_readonly) + type_name = is_interlocked ? "RasterizerOrderedByteAddressBuffer " : "RWByteAddressBuffer "; add_resource_name(var.self); - statement(is_coherent ? "globallycoherent " : "", is_readonly ? "ByteAddressBuffer " : "RWByteAddressBuffer ", - to_name(var.self), type_to_array_glsl(type), to_resource_binding(var), ";"); + statement(is_coherent ? "globallycoherent " : "", type_name, to_name(var.self), type_to_array_glsl(type), + to_resource_binding(var), ";"); } else { @@ -2014,9 +2025,9 @@ void CompilerHLSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_i } } -string CompilerHLSL::to_func_call_arg(uint32_t id) +string CompilerHLSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) { - string arg_str = CompilerGLSL::to_func_call_arg(id); + string arg_str = CompilerGLSL::to_func_call_arg(arg, id); if (hlsl_options.shader_model <= 30) return arg_str; @@ -2478,7 +2489,7 @@ void CompilerHLSL::emit_texture_op(const Instruction &i) uint32_t result_type = ops[0]; uint32_t id = ops[1]; - uint32_t img = ops[2]; + VariableID img = ops[2]; uint32_t coord = ops[3]; uint32_t dref = 0; uint32_t comp = 0; @@ -3713,7 +3724,7 @@ void CompilerHLSL::emit_access_chain(const Instruction &instruction) e.row_major_matrix = row_major_matrix; e.matrix_stride = matrix_stride; e.immutable = should_forward(ops[2]); - e.loaded_from = backing_variable ? backing_variable->self : 0; + e.loaded_from = backing_variable ? backing_variable->self : ID(0); if (chain) { @@ -4483,7 +4494,7 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) // When using the pointer, we need to know which variable it is actually loaded from. auto *var = maybe_get_backing_variable(ops[2]); - e.loaded_from = var ? var->self : 0; + e.loaded_from = var ? var->self : ID(0); break; } @@ -4673,6 +4684,12 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction) case OpIsHelperInvocationEXT: SPIRV_CROSS_THROW("helperInvocationEXT() is not supported in HLSL."); + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (hlsl_options.shader_model < 51) + SPIRV_CROSS_THROW("Rasterizer order views require Shader Model 5.1."); + break; // Nothing to do in the body + default: CompilerGLSL::emit_instruction(instruction); break; @@ -4747,7 +4764,7 @@ void CompilerHLSL::add_vertex_attribute_remap(const HLSLVertexAttributeRemap &ve remap_vertex_attributes.push_back(vertex_attributes); } -uint32_t CompilerHLSL::remap_num_workgroups_builtin() +VariableID CompilerHLSL::remap_num_workgroups_builtin() { update_active_builtins(); @@ -4850,6 +4867,7 @@ string CompilerHLSL::compile() validate_shader_model(); update_active_builtins(); analyze_image_and_sampler_usage(); + analyze_interlocked_resource_usage(); // Subpass input needs SV_Position. if (need_subpass_input) diff --git a/3rdparty/spirv-cross/spirv_hlsl.hpp b/3rdparty/spirv-cross/spirv_hlsl.hpp index 6f1014145..eb968f003 100644 --- a/3rdparty/spirv-cross/spirv_hlsl.hpp +++ b/3rdparty/spirv-cross/spirv_hlsl.hpp @@ -114,7 +114,7 @@ public: // If non-zero, this returns the variable ID of a cbuffer which corresponds to // the cbuffer declared above. By default, no binding or descriptor set decoration is set, // so the calling application should declare explicit bindings on this ID before calling compile(). - uint32_t remap_num_workgroups_builtin(); + VariableID remap_num_workgroups_builtin(); private: std::string type_to_glsl(const SPIRType &type, uint32_t id = 0) override; @@ -145,7 +145,7 @@ private: std::string layout_for_member(const SPIRType &type, uint32_t index) override; std::string to_interpolation_qualifiers(const Bitset &flags) override; std::string bitcast_glsl_op(const SPIRType &result_type, const SPIRType &argument_type) override; - std::string to_func_call_arg(uint32_t id) override; + std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; std::string to_sampler_expression(uint32_t id); std::string to_resource_binding(const SPIRVariable &var); std::string to_resource_binding_sampler(const SPIRVariable &var); diff --git a/3rdparty/spirv-cross/spirv_msl.cpp b/3rdparty/spirv-cross/spirv_msl.cpp index 4dbb3ddb1..9c7bc2328 100644 --- a/3rdparty/spirv-cross/spirv_msl.cpp +++ b/3rdparty/spirv-cross/spirv_msl.cpp @@ -61,6 +61,12 @@ void CompilerMSL::add_msl_resource_binding(const MSLResourceBinding &binding) resource_bindings[tuple] = { binding, false }; } +void CompilerMSL::add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index) +{ + SetBindingPair pair = { desc_set, binding }; + buffers_requiring_dynamic_offset[pair] = { index, 0 }; +} + void CompilerMSL::add_discrete_descriptor_set(uint32_t desc_set) { if (desc_set < kMaxArgumentBuffers) @@ -89,6 +95,16 @@ uint32_t CompilerMSL::get_automatic_msl_resource_binding_secondary(uint32_t id) return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexSecondary); } +uint32_t CompilerMSL::get_automatic_msl_resource_binding_tertiary(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexTertiary); +} + +uint32_t CompilerMSL::get_automatic_msl_resource_binding_quaternary(uint32_t id) const +{ + return get_extended_decoration(id, SPIRVCrossDecorationResourceIndexQuaternary); +} + void CompilerMSL::set_fragment_output_components(uint32_t location, uint32_t components) { fragment_output_components[location] = components; @@ -538,6 +554,18 @@ void CompilerMSL::build_implicit_builtins() set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, msl_options.view_mask_buffer_index); view_mask_buffer_id = var_id; } + + if (!buffers_requiring_dynamic_offset.empty()) + { + uint32_t var_id = build_constant_uint_array_pointer(); + set_name(var_id, "spvDynamicOffsets"); + // This should never match anything. + set_decoration(var_id, DecorationDescriptorSet, ~(5u)); + set_decoration(var_id, DecorationBinding, msl_options.dynamic_offsets_buffer_index); + set_extended_decoration(var_id, SPIRVCrossDecorationResourceIndexPrimary, + msl_options.dynamic_offsets_buffer_index); + dynamic_offsets_buffer_id = var_id; + } } void CompilerMSL::mark_implicit_builtin(StorageClass storage, BuiltIn builtin, uint32_t id) @@ -764,9 +792,82 @@ void CompilerMSL::emit_entry_point_declarations() convert_to_string(s.lod_clamp_max, current_locale_radix_character), ")")); } - statement("constexpr sampler ", - type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), - "(", merge(args), ");"); + // If we would emit no arguments, then omit the parentheses entirely. Otherwise, + // we'll wind up with a "most vexing parse" situation. + if (args.empty()) + statement("constexpr sampler ", + type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), + ";"); + else + statement("constexpr sampler ", + type.basetype == SPIRType::SampledImage ? to_sampler_expression(samp.first) : to_name(samp.first), + "(", merge(args), ");"); + } + + // Emit dynamic buffers here. + for (auto &dynamic_buffer : buffers_requiring_dynamic_offset) + { + if (!dynamic_buffer.second.second) + { + // Could happen if no buffer was used at requested binding point. + continue; + } + + const auto &var = get(dynamic_buffer.second.second); + uint32_t var_id = var.self; + const auto &type = get_variable_data_type(var); + string name = to_name(var.self); + uint32_t desc_set = get_decoration(var.self, DecorationDescriptorSet); + uint32_t arg_id = argument_buffer_ids[desc_set]; + uint32_t base_index = dynamic_buffer.second.first; + + if (!type.array.empty()) + { + // This is complicated, because we need to support arrays of arrays. + // And it's even worse if the outermost dimension is a runtime array, because now + // all this complicated goop has to go into the shader itself. (FIXME) + if (!type.array[type.array.size() - 1]) + SPIRV_CROSS_THROW("Runtime arrays with dynamic offsets are not supported yet."); + else + { + statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id), name, + type_to_array_glsl(type), " ="); + uint32_t dim = uint32_t(type.array.size()); + uint32_t j = 0; + for (SmallVector indices(type.array.size()); + indices[type.array.size() - 1] < to_array_size_literal(type); j++) + { + while (dim > 0) + { + begin_scope(); + --dim; + } + + string arrays; + for (uint32_t i = uint32_t(type.array.size()); i; --i) + arrays += join("[", indices[i - 1], "]"); + statement("(", get_argument_address_space(var), " ", type_to_glsl(type), "* ", + to_restrict(var_id, false), ")((", get_argument_address_space(var), " char* ", + to_restrict(var_id, false), ")", to_name(arg_id), ".", ensure_valid_name(name, "m"), + arrays, " + ", to_name(dynamic_offsets_buffer_id), "[", base_index + j, "]),"); + + while (++indices[dim] >= to_array_size_literal(type, dim) && dim < type.array.size() - 1) + { + end_scope(","); + indices[dim++] = 0; + } + } + end_scope_decl(); + statement_no_indent(""); + } + } + else + { + statement(get_argument_address_space(var), " auto& ", to_restrict(var_id), name, " = *(", + get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(var_id, false), ")((", + get_argument_address_space(var), " char* ", to_restrict(var_id, false), ")", to_name(arg_id), ".", + ensure_valid_name(name, "m"), " + ", to_name(dynamic_offsets_buffer_id), "[", base_index, "]);"); + } } // Emit buffer arrays here. @@ -777,8 +878,8 @@ void CompilerMSL::emit_entry_point_declarations() string name = to_name(array_id); statement(get_argument_address_space(var), " ", type_to_glsl(type), "* ", to_restrict(array_id), name, "[] ="); begin_scope(); - for (uint32_t i = 0; i < type.array[0]; ++i) - statement(name + "_" + convert_to_string(i) + ","); + for (uint32_t i = 0; i < to_array_size_literal(type); ++i) + statement(name, "_", i, ","); end_scope_decl(); statement_no_indent(""); } @@ -835,6 +936,7 @@ string CompilerMSL::compile() update_active_builtins(); analyze_image_and_sampler_usage(); analyze_sampled_image_usage(); + analyze_interlocked_resource_usage(); preprocess_op_codes(); build_implicit_builtins(); @@ -847,6 +949,8 @@ string CompilerMSL::compile() active_interface_variables.insert(buffer_size_buffer_id); if (view_mask_buffer_id) active_interface_variables.insert(view_mask_buffer_id); + if (dynamic_offsets_buffer_id) + active_interface_variables.insert(dynamic_offsets_buffer_id); if (builtin_layer_id) active_interface_variables.insert(builtin_layer_id); if (builtin_dispatch_base_id && !msl_options.supports_msl_version(1, 2)) @@ -1329,7 +1433,7 @@ void CompilerMSL::add_plain_variable_to_interface_block(StorageClass storage, co else if (!strip_array) ir.meta[var.self].decoration.qualified_alias = qual_var_name; - if (var.storage == StorageClassOutput && var.initializer != 0) + if (var.storage == StorageClassOutput && var.initializer != ID(0)) { entry_func.fixup_hooks_in.push_back( [=, &var]() { statement(qual_var_name, " = ", to_expression(var.initializer), ";"); }); @@ -2549,7 +2653,7 @@ void CompilerMSL::mark_scalar_layout_structs(const SPIRType &type) void CompilerMSL::align_struct(SPIRType &ib_type, unordered_set &aligned_structs) { // We align structs recursively, so stop any redundant work. - uint32_t &ib_type_id = ib_type.self; + ID &ib_type_id = ib_type.self; if (aligned_structs.count(ib_type_id)) return; aligned_structs.insert(ib_type_id); @@ -2828,8 +2932,9 @@ void CompilerMSL::emit_store_statement(uint32_t lhs_expression, uint32_t rhs_exp // Special handling when storing to a remapped physical type. // This is mostly to deal with std140 padded matrices or vectors. - uint32_t physical_type_id = - lhs_remapped_type ? get_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID) : type.self; + TypeID physical_type_id = lhs_remapped_type ? + ID(get_extended_decoration(lhs_expression, SPIRVCrossDecorationPhysicalTypeID)) : + type.self; auto &physical_type = get(physical_type_id); @@ -3109,6 +3214,39 @@ void CompilerMSL::emit_custom_functions() if (spv_function_implementations.count(static_cast(SPVFuncImplArrayCopyMultidimBase + i))) spv_function_implementations.insert(static_cast(SPVFuncImplArrayCopyMultidimBase + i - 1)); + if (spv_function_implementations.count(SPVFuncImplDynamicImageSampler)) + { + // Unfortunately, this one needs a lot of the other functions to compile OK. + if (!msl_options.supports_msl_version(2)) + SPIRV_CROSS_THROW( + "spvDynamicImageSampler requires default-constructible texture objects, which require MSL 2.0."); + spv_function_implementations.insert(SPVFuncImplForwardArgs); + spv_function_implementations.insert(SPVFuncImplTextureSwizzle); + if (msl_options.swizzle_texture_samples) + spv_function_implementations.insert(SPVFuncImplGatherSwizzle); + for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane; + i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++) + spv_function_implementations.insert(static_cast(i)); + spv_function_implementations.insert(SPVFuncImplExpandITUFullRange); + spv_function_implementations.insert(SPVFuncImplExpandITUNarrowRange); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT709); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT601); + spv_function_implementations.insert(SPVFuncImplConvertYCbCrBT2020); + } + + for (uint32_t i = SPVFuncImplChromaReconstructNearest2Plane; + i <= SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane; i++) + if (spv_function_implementations.count(static_cast(i))) + spv_function_implementations.insert(SPVFuncImplForwardArgs); + + if (spv_function_implementations.count(SPVFuncImplTextureSwizzle) || + spv_function_implementations.count(SPVFuncImplGatherSwizzle) || + spv_function_implementations.count(SPVFuncImplGatherCompareSwizzle)) + { + spv_function_implementations.insert(SPVFuncImplForwardArgs); + spv_function_implementations.insert(SPVFuncImplGetSwizzle); + } + for (auto &spv_func : spv_function_implementations) { switch (spv_func) @@ -3391,18 +3529,7 @@ void CompilerMSL::emit_custom_functions() statement(""); break; - case SPVFuncImplTextureSwizzle: - statement("enum class spvSwizzle : uint"); - begin_scope(); - statement("none = 0,"); - statement("zero,"); - statement("one,"); - statement("red,"); - statement("green,"); - statement("blue,"); - statement("alpha"); - end_scope_decl(); - statement(""); + case SPVFuncImplForwardArgs: statement("template struct spvRemoveReference { typedef T type; };"); statement("template struct spvRemoveReference { typedef T type; };"); statement("template struct spvRemoveReference { typedef T type; };"); @@ -3417,6 +3544,20 @@ void CompilerMSL::emit_custom_functions() statement("return static_cast(x);"); end_scope(); statement(""); + break; + + case SPVFuncImplGetSwizzle: + statement("enum class spvSwizzle : uint"); + begin_scope(); + statement("none = 0,"); + statement("zero,"); + statement("one,"); + statement("red,"); + statement("green,"); + statement("blue,"); + statement("alpha"); + end_scope_decl(); + statement(""); statement("template"); statement("inline T spvGetSwizzle(vec x, T c, spvSwizzle s)"); begin_scope(); @@ -3439,6 +3580,9 @@ void CompilerMSL::emit_custom_functions() end_scope(); end_scope(); statement(""); + break; + + case SPVFuncImplTextureSwizzle: statement("// Wrapper function that swizzles texture samples and fetches."); statement("template"); statement("inline vec spvTextureSwizzle(vec x, uint s)"); @@ -3457,11 +3601,14 @@ void CompilerMSL::emit_custom_functions() statement("return spvTextureSwizzle(vec(x, 0, 0, 1), s).x;"); end_scope(); statement(""); + break; + + case SPVFuncImplGatherSwizzle: statement("// Wrapper function that swizzles texture gathers."); - statement("template"); - statement( - "inline vec spvGatherSwizzle(sampler s, const thread Tex& t, Ts... params, component c, uint sw) " - "METAL_CONST_ARG(c)"); + statement("template class Tex, " + "typename... Ts>"); + statement("inline vec spvGatherSwizzle(const thread Tex& t, sampler s, " + "uint sw, component c, Ts... params) METAL_CONST_ARG(c)"); begin_scope(); statement("if (sw)"); begin_scope(); @@ -3498,10 +3645,14 @@ void CompilerMSL::emit_custom_functions() end_scope(); end_scope(); statement(""); + break; + + case SPVFuncImplGatherCompareSwizzle: statement("// Wrapper function that swizzles depth texture gathers."); - statement("template"); - statement( - "inline vec spvGatherCompareSwizzle(sampler s, const thread Tex& t, Ts... params, uint sw) "); + statement("template class Tex, " + "typename... Ts>"); + statement("inline vec spvGatherCompareSwizzle(const thread Tex& t, sampler " + "s, uint sw, Ts... params) "); begin_scope(); statement("if (sw)"); begin_scope(); @@ -3647,6 +3798,704 @@ void CompilerMSL::emit_custom_functions() statement(""); break; + case SPVFuncImplChromaReconstructNearest2Plane: + statement("template"); + statement("inline vec spvChromaReconstructNearest(texture2d plane0, texture2d plane1, sampler " + "samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.br = plane1.sample(samp, coord, spvForward(options)...).rg;"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructNearest3Plane: + statement("template"); + statement("inline vec spvChromaReconstructNearest(texture2d plane0, texture2d plane1, " + "texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.b = plane1.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.r = plane2.sample(samp, coord, spvForward(options)...).r;"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422CositedEven2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422CositedEven(texture2d plane0, texture2d " + "plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("if (fract(coord.x * plane1.get_width()) != 0.0)"); + begin_scope(); + statement("ycbcr.br = vec(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).rg);"); + end_scope(); + statement("else"); + begin_scope(); + statement("ycbcr.br = plane1.sample(samp, coord, spvForward(options)...).rg;"); + end_scope(); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422CositedEven3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422CositedEven(texture2d plane0, texture2d " + "plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("if (fract(coord.x * plane1.get_width()) != 0.0)"); + begin_scope(); + statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).r);"); + statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), 0.5).r);"); + end_scope(); + statement("else"); + begin_scope(); + statement("ycbcr.b = plane1.sample(samp, coord, spvForward(options)...).r;"); + statement("ycbcr.r = plane2.sample(samp, coord, spvForward(options)...).r;"); + end_scope(); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422Midpoint2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422Midpoint(texture2d plane0, texture2d " + "plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);"); + statement("ycbcr.br = vec(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., offs), 0.25).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear422Midpoint3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear422Midpoint(texture2d plane0, texture2d " + "plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("int2 offs = int2(fract(coord.x * plane1.get_width()) != 0.0 ? 1 : -1, 0);"); + statement("ycbcr.b = T(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., offs), 0.25).r);"); + statement("ycbcr.r = T(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., offs), 0.25).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYCositedEven(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract(round(coord * float2(plane0.get_width(), plane0.get_height())) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYCositedEven(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0)) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYCositedEven(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, " + "0.5)) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XCositedEvenYMidpoint(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0, " + "0.5)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYMidpoint(texture2d plane0, " + "texture2d plane1, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0.5)) * 0.5);"); + statement("ycbcr.br = vec(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).rg);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane: + statement("template"); + statement("inline vec spvChromaReconstructLinear420XMidpointYMidpoint(texture2d plane0, " + "texture2d plane1, texture2d plane2, sampler samp, float2 coord, LodOptions... options)"); + begin_scope(); + statement("vec ycbcr = vec(0, 0, 0, 1);"); + statement("ycbcr.g = plane0.sample(samp, coord, spvForward(options)...).r;"); + statement("float2 ab = fract((round(coord * float2(plane0.get_width(), plane0.get_height())) - float2(0.5, " + "0.5)) * 0.5);"); + statement("ycbcr.b = T(mix(mix(plane1.sample(samp, coord, spvForward(options)...), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane1.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane1.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("ycbcr.r = T(mix(mix(plane2.sample(samp, coord, spvForward(options)...), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 0)), ab.x), " + "mix(plane2.sample(samp, coord, spvForward(options)..., int2(0, 1)), " + "plane2.sample(samp, coord, spvForward(options)..., int2(1, 1)), ab.x), ab.y).r);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplExpandITUFullRange: + statement("template"); + statement("inline vec spvExpandITUFullRange(vec ycbcr, int n)"); + begin_scope(); + statement("ycbcr.br -= exp2(T(n-1))/(exp2(T(n))-1);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplExpandITUNarrowRange: + statement("template"); + statement("inline vec spvExpandITUNarrowRange(vec ycbcr, int n)"); + begin_scope(); + statement("ycbcr.g = (ycbcr.g * (exp2(T(n)) - 1) - ldexp(T(16), n - 8))/ldexp(T(219), n - 8);"); + statement("ycbcr.br = (ycbcr.br * (exp2(T(n)) - 1) - ldexp(T(128), n - 8))/ldexp(T(224), n - 8);"); + statement("return ycbcr;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT709: + statement("// cf. Khronos Data Format Specification, section 15.1.1"); + statement("constant float3x3 spvBT709Factors = {{1, 1, 1}, {0, -0.13397432/0.7152, 1.8556}, {1.5748, " + "-0.33480248/0.7152, 0}};"); + statement(""); + statement("template"); + statement("inline vec spvConvertYCbCrBT709(vec ycbcr)"); + begin_scope(); + statement("vec rgba;"); + statement("rgba.rgb = vec(spvBT709Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT601: + statement("// cf. Khronos Data Format Specification, section 15.1.2"); + statement("constant float3x3 spvBT601Factors = {{1, 1, 1}, {0, -0.202008/0.587, 1.772}, {1.402, " + "-0.419198/0.587, 0}};"); + statement(""); + statement("template"); + statement("inline vec spvConvertYCbCrBT601(vec ycbcr)"); + begin_scope(); + statement("vec rgba;"); + statement("rgba.rgb = vec(spvBT601Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplConvertYCbCrBT2020: + statement("// cf. Khronos Data Format Specification, section 15.1.3"); + statement("constant float3x3 spvBT2020Factors = {{1, 1, 1}, {0, -0.11156702/0.6780, 1.8814}, {1.4746, " + "-0.38737742/0.6780, 0}};"); + statement(""); + statement("template"); + statement("inline vec spvConvertYCbCrBT2020(vec ycbcr)"); + begin_scope(); + statement("vec rgba;"); + statement("rgba.rgb = vec(spvBT2020Factors * ycbcr.gbr);"); + statement("rgba.a = ycbcr.a;"); + statement("return rgba;"); + end_scope(); + statement(""); + break; + + case SPVFuncImplDynamicImageSampler: + statement("enum class spvFormatResolution"); + begin_scope(); + statement("_444 = 0,"); + statement("_422,"); + statement("_420"); + end_scope_decl(); + statement(""); + statement("enum class spvChromaFilter"); + begin_scope(); + statement("nearest = 0,"); + statement("linear"); + end_scope_decl(); + statement(""); + statement("enum class spvXChromaLocation"); + begin_scope(); + statement("cosited_even = 0,"); + statement("midpoint"); + end_scope_decl(); + statement(""); + statement("enum class spvYChromaLocation"); + begin_scope(); + statement("cosited_even = 0,"); + statement("midpoint"); + end_scope_decl(); + statement(""); + statement("enum class spvYCbCrModelConversion"); + begin_scope(); + statement("rgb_identity = 0,"); + statement("ycbcr_identity,"); + statement("ycbcr_bt_709,"); + statement("ycbcr_bt_601,"); + statement("ycbcr_bt_2020"); + end_scope_decl(); + statement(""); + statement("enum class spvYCbCrRange"); + begin_scope(); + statement("itu_full = 0,"); + statement("itu_narrow"); + end_scope_decl(); + statement(""); + statement("struct spvComponentBits"); + begin_scope(); + statement("constexpr explicit spvComponentBits(int v) thread : value(v) {}"); + statement("uchar value : 6;"); + end_scope_decl(); + statement("// A class corresponding to metal::sampler which holds sampler"); + statement("// Y'CbCr conversion info."); + statement("struct spvYCbCrSampler"); + begin_scope(); + statement("constexpr spvYCbCrSampler() thread : val(build()) {}"); + statement("template"); + statement("constexpr spvYCbCrSampler(Ts... t) thread : val(build(t...)) {}"); + statement("constexpr spvYCbCrSampler(const thread spvYCbCrSampler& s) thread = default;"); + statement(""); + statement("spvFormatResolution get_resolution() const thread"); + begin_scope(); + statement("return spvFormatResolution((val & resolution_mask) >> resolution_base);"); + end_scope(); + statement("spvChromaFilter get_chroma_filter() const thread"); + begin_scope(); + statement("return spvChromaFilter((val & chroma_filter_mask) >> chroma_filter_base);"); + end_scope(); + statement("spvXChromaLocation get_x_chroma_offset() const thread"); + begin_scope(); + statement("return spvXChromaLocation((val & x_chroma_off_mask) >> x_chroma_off_base);"); + end_scope(); + statement("spvYChromaLocation get_y_chroma_offset() const thread"); + begin_scope(); + statement("return spvYChromaLocation((val & y_chroma_off_mask) >> y_chroma_off_base);"); + end_scope(); + statement("spvYCbCrModelConversion get_ycbcr_model() const thread"); + begin_scope(); + statement("return spvYCbCrModelConversion((val & ycbcr_model_mask) >> ycbcr_model_base);"); + end_scope(); + statement("spvYCbCrRange get_ycbcr_range() const thread"); + begin_scope(); + statement("return spvYCbCrRange((val & ycbcr_range_mask) >> ycbcr_range_base);"); + end_scope(); + statement("int get_bpc() const thread { return (val & bpc_mask) >> bpc_base; }"); + statement(""); + statement("private:"); + statement("ushort val;"); + statement(""); + statement("constexpr static constant ushort resolution_bits = 2;"); + statement("constexpr static constant ushort chroma_filter_bits = 2;"); + statement("constexpr static constant ushort x_chroma_off_bit = 1;"); + statement("constexpr static constant ushort y_chroma_off_bit = 1;"); + statement("constexpr static constant ushort ycbcr_model_bits = 3;"); + statement("constexpr static constant ushort ycbcr_range_bit = 1;"); + statement("constexpr static constant ushort bpc_bits = 6;"); + statement(""); + statement("constexpr static constant ushort resolution_base = 0;"); + statement("constexpr static constant ushort chroma_filter_base = 2;"); + statement("constexpr static constant ushort x_chroma_off_base = 4;"); + statement("constexpr static constant ushort y_chroma_off_base = 5;"); + statement("constexpr static constant ushort ycbcr_model_base = 6;"); + statement("constexpr static constant ushort ycbcr_range_base = 9;"); + statement("constexpr static constant ushort bpc_base = 10;"); + statement(""); + statement( + "constexpr static constant ushort resolution_mask = ((1 << resolution_bits) - 1) << resolution_base;"); + statement("constexpr static constant ushort chroma_filter_mask = ((1 << chroma_filter_bits) - 1) << " + "chroma_filter_base;"); + statement("constexpr static constant ushort x_chroma_off_mask = ((1 << x_chroma_off_bit) - 1) << " + "x_chroma_off_base;"); + statement("constexpr static constant ushort y_chroma_off_mask = ((1 << y_chroma_off_bit) - 1) << " + "y_chroma_off_base;"); + statement("constexpr static constant ushort ycbcr_model_mask = ((1 << ycbcr_model_bits) - 1) << " + "ycbcr_model_base;"); + statement("constexpr static constant ushort ycbcr_range_mask = ((1 << ycbcr_range_bit) - 1) << " + "ycbcr_range_base;"); + statement("constexpr static constant ushort bpc_mask = ((1 << bpc_bits) - 1) << bpc_base;"); + statement(""); + statement("static constexpr ushort build()"); + begin_scope(); + statement("return 0;"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvFormatResolution res, Ts... t)"); + begin_scope(); + statement("return (ushort(res) << resolution_base) | (build(t...) & ~resolution_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvChromaFilter filt, Ts... t)"); + begin_scope(); + statement("return (ushort(filt) << chroma_filter_base) | (build(t...) & ~chroma_filter_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvXChromaLocation loc, Ts... t)"); + begin_scope(); + statement("return (ushort(loc) << x_chroma_off_base) | (build(t...) & ~x_chroma_off_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvYChromaLocation loc, Ts... t)"); + begin_scope(); + statement("return (ushort(loc) << y_chroma_off_base) | (build(t...) & ~y_chroma_off_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvYCbCrModelConversion model, Ts... t)"); + begin_scope(); + statement("return (ushort(model) << ycbcr_model_base) | (build(t...) & ~ycbcr_model_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvYCbCrRange range, Ts... t)"); + begin_scope(); + statement("return (ushort(range) << ycbcr_range_base) | (build(t...) & ~ycbcr_range_mask);"); + end_scope(); + statement(""); + statement("template"); + statement("static constexpr ushort build(spvComponentBits bpc, Ts... t)"); + begin_scope(); + statement("return (ushort(bpc.value) << bpc_base) | (build(t...) & ~bpc_mask);"); + end_scope(); + end_scope_decl(); + statement(""); + statement("// A class which can hold up to three textures and a sampler, including"); + statement("// Y'CbCr conversion info, used to pass combined image-samplers"); + statement("// dynamically to functions."); + statement("template"); + statement("struct spvDynamicImageSampler"); + begin_scope(); + statement("texture2d plane0;"); + statement("texture2d plane1;"); + statement("texture2d plane2;"); + statement("sampler samp;"); + statement("spvYCbCrSampler ycbcr_samp;"); + statement("uint swizzle = 0;"); + statement(""); + if (msl_options.swizzle_texture_samples) + { + statement("constexpr spvDynamicImageSampler(texture2d tex, sampler samp, uint sw) thread :"); + statement(" plane0(tex), samp(samp), swizzle(sw) {}"); + } + else + { + statement("constexpr spvDynamicImageSampler(texture2d tex, sampler samp) thread :"); + statement(" plane0(tex), samp(samp) {}"); + } + statement("constexpr spvDynamicImageSampler(texture2d tex, sampler samp, spvYCbCrSampler ycbcr_samp, " + "uint sw) thread :"); + statement(" plane0(tex), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}"); + statement("constexpr spvDynamicImageSampler(texture2d plane0, texture2d plane1,"); + statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :"); + statement(" plane0(plane0), plane1(plane1), samp(samp), ycbcr_samp(ycbcr_samp), swizzle(sw) {}"); + statement( + "constexpr spvDynamicImageSampler(texture2d plane0, texture2d plane1, texture2d plane2,"); + statement(" sampler samp, spvYCbCrSampler ycbcr_samp, uint sw) thread :"); + statement(" plane0(plane0), plane1(plane1), plane2(plane2), samp(samp), ycbcr_samp(ycbcr_samp), " + "swizzle(sw) {}"); + statement(""); + // XXX This is really hard to follow... I've left comments to make it a bit easier. + statement("template"); + statement("vec do_sample(float2 coord, LodOptions... options) const thread"); + begin_scope(); + statement("if (!is_null_texture(plane1))"); + begin_scope(); + statement("if (ycbcr_samp.get_resolution() == spvFormatResolution::_444 ||"); + statement(" ycbcr_samp.get_chroma_filter() == spvChromaFilter::nearest)"); + begin_scope(); + statement("if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructNearest(plane0, plane1, plane2, samp, coord,"); + statement(" spvForward(options)...);"); + statement( + "return spvChromaReconstructNearest(plane0, plane1, samp, coord, spvForward(options)...);"); + end_scope(); // if (resolution == 422 || chroma_filter == nearest) + statement("switch (ycbcr_samp.get_resolution())"); + begin_scope(); + statement("case spvFormatResolution::_444: break;"); + statement("case spvFormatResolution::_422:"); + begin_scope(); + statement("switch (ycbcr_samp.get_x_chroma_offset())"); + begin_scope(); + statement("case spvXChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear422CositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear422CositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + statement("case spvXChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear422Midpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear422Midpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + end_scope(); // switch (x_chroma_offset) + end_scope(); // case 422: + statement("case spvFormatResolution::_420:"); + begin_scope(); + statement("switch (ycbcr_samp.get_x_chroma_offset())"); + begin_scope(); + statement("case spvXChromaLocation::cosited_even:"); + begin_scope(); + statement("switch (ycbcr_samp.get_y_chroma_offset())"); + begin_scope(); + statement("case spvYChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XCositedEvenYCositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + statement("case spvYChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XCositedEvenYMidpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + end_scope(); // switch (y_chroma_offset) + end_scope(); // case x::cosited_even: + statement("case spvXChromaLocation::midpoint:"); + begin_scope(); + statement("switch (ycbcr_samp.get_y_chroma_offset())"); + begin_scope(); + statement("case spvYChromaLocation::cosited_even:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XMidpointYCositedEven("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XMidpointYCositedEven("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + statement("case spvYChromaLocation::midpoint:"); + statement(" if (!is_null_texture(plane2))"); + statement(" return spvChromaReconstructLinear420XMidpointYMidpoint("); + statement(" plane0, plane1, plane2, samp,"); + statement(" coord, spvForward(options)...);"); + statement(" return spvChromaReconstructLinear420XMidpointYMidpoint("); + statement(" plane0, plane1, samp, coord,"); + statement(" spvForward(options)...);"); + end_scope(); // switch (y_chroma_offset) + end_scope(); // case x::midpoint + end_scope(); // switch (x_chroma_offset) + end_scope(); // case 420: + end_scope(); // switch (resolution) + end_scope(); // if (multiplanar) + statement("return plane0.sample(samp, coord, spvForward(options)...);"); + end_scope(); // do_sample() + statement("template "); + statement("vec sample(float2 coord, LodOptions... options) const thread"); + begin_scope(); + statement( + "vec s = spvTextureSwizzle(do_sample(coord, spvForward(options)...), swizzle);"); + statement("if (ycbcr_samp.get_ycbcr_model() == spvYCbCrModelConversion::rgb_identity)"); + statement(" return s;"); + statement(""); + statement("switch (ycbcr_samp.get_ycbcr_range())"); + begin_scope(); + statement("case spvYCbCrRange::itu_full:"); + statement(" s = spvExpandITUFullRange(s, ycbcr_samp.get_bpc());"); + statement(" break;"); + statement("case spvYCbCrRange::itu_narrow:"); + statement(" s = spvExpandITUNarrowRange(s, ycbcr_samp.get_bpc());"); + statement(" break;"); + end_scope(); + statement(""); + statement("switch (ycbcr_samp.get_ycbcr_model())"); + begin_scope(); + statement("case spvYCbCrModelConversion::rgb_identity:"); // Silence Clang warning + statement("case spvYCbCrModelConversion::ycbcr_identity:"); + statement(" return s;"); + statement("case spvYCbCrModelConversion::ycbcr_bt_709:"); + statement(" return spvConvertYCbCrBT709(s);"); + statement("case spvYCbCrModelConversion::ycbcr_bt_601:"); + statement(" return spvConvertYCbCrBT601(s);"); + statement("case spvYCbCrModelConversion::ycbcr_bt_2020:"); + statement(" return spvConvertYCbCrBT2020(s);"); + end_scope(); + end_scope(); + statement(""); + // Sampler Y'CbCr conversion forbids offsets. + statement("vec sample(float2 coord, int2 offset) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvTextureSwizzle(plane0.sample(samp, coord, offset), swizzle);"); + else + statement("return plane0.sample(samp, coord, offset);"); + end_scope(); + statement("template"); + statement("vec sample(float2 coord, lod_options options, int2 offset) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvTextureSwizzle(plane0.sample(samp, coord, options, offset), swizzle);"); + else + statement("return plane0.sample(samp, coord, options, offset);"); + end_scope(); + statement("#if __HAVE_MIN_LOD_CLAMP__"); + statement("vec sample(float2 coord, bias b, min_lod_clamp min_lod, int2 offset) const thread"); + begin_scope(); + statement("return plane0.sample(samp, coord, b, min_lod, offset);"); + end_scope(); + statement( + "vec sample(float2 coord, gradient2d grad, min_lod_clamp min_lod, int2 offset) const thread"); + begin_scope(); + statement("return plane0.sample(samp, coord, grad, min_lod, offset);"); + end_scope(); + statement("#endif"); + statement(""); + // Y'CbCr conversion forbids all operations but sampling. + statement("vec read(uint2 coord, uint lod = 0) const thread"); + begin_scope(); + statement("return plane0.read(coord, lod);"); + end_scope(); + statement(""); + statement("vec gather(float2 coord, int2 offset = int2(0), component c = component::x) const thread"); + begin_scope(); + if (msl_options.swizzle_texture_samples) + statement("return spvGatherSwizzle(plane0, samp, swizzle, c, coord, offset);"); + else + statement("return plane0.gather(samp, coord, offset, c);"); + end_scope(); + end_scope_decl(); + statement(""); + default: break; } @@ -3707,7 +4556,7 @@ void CompilerMSL::emit_resources() void CompilerMSL::emit_specialization_constants_and_structs() { SpecializationConstant wg_x, wg_y, wg_z; - uint32_t workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); + ID workgroup_size_id = get_work_group_size_specialization_constants(wg_x, wg_y, wg_z); bool emitted = false; unordered_set declared_structs; @@ -3801,7 +4650,7 @@ void CompilerMSL::emit_specialization_constants_and_structs() // Output non-builtin interface structs. These include local function structs // and structs nested within uniform and read-write buffers. auto &type = id.get(); - uint32_t type_id = type.self; + TypeID type_id = type.self; bool is_struct = (type.basetype == SPIRType::Struct) && type.array.empty(); bool is_block = @@ -3940,7 +4789,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l } else if (is_array(mbr_type)) { - for (uint32_t k = 0; k < mbr_type.array[0]; k++, index++) + for (uint32_t k = 0; k < to_array_size_literal(mbr_type, 0); k++, index++) { set(const_mbr_id, type_id, index, false); auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), mbr_type, nullptr, @@ -3969,7 +4818,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l else // Must be an array { assert(is_array(*type)); - for (uint32_t j = 0; j < type->array[0]; j++, index++) + for (uint32_t j = 0; j < to_array_size_literal(*type, 0); j++, index++) { set(const_mbr_id, type_id, index, false); auto e = access_chain(ptr, indices.data(), uint32_t(indices.size()), *type, nullptr, true); @@ -4037,7 +4886,7 @@ bool CompilerMSL::emit_tessellation_access_chain(const uint32_t *ops, uint32_t l // expression so we don't try to dereference it as a variable pointer. // Don't do this if the index is a constant 1, though. We need to drop stores // to that one. - auto *m = ir.find_meta(var ? var->self : 0); + auto *m = ir.find_meta(var ? var->self : ID(0)); if (get_execution_model() == ExecutionModelTessellationControl && var && m && m->decoration.builtin_type == BuiltInTessLevelInner && get_entry_point().flags.get(ExecutionModeTriangles)) { @@ -4547,10 +5396,14 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) } else { - auto &e = emit_op(result_type, id, to_expression(ops[2]), true, true); auto *var = maybe_get_backing_variable(ops[2]); + SPIRExpression *e; + if (var && has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler)) + e = &emit_op(result_type, id, join(to_expression(ops[2]), ".plane0"), true, true); + else + e = &emit_op(result_type, id, to_expression(ops[2]), true, true); if (var) - e.loaded_from = var->self; + e->loaded_from = var->self; } break; } @@ -4776,6 +5629,12 @@ void CompilerMSL::emit_instruction(const Instruction &instruction) emit_op(ops[0], ops[1], "simd_is_helper_thread()", false); break; + case OpBeginInvocationInterlockEXT: + case OpEndInvocationInterlockEXT: + if (!msl_options.supports_msl_version(2, 0)) + SPIRV_CROSS_THROW("Raster order groups require MSL 2.0."); + break; // Nothing to do in the body + default: CompilerGLSL::emit_instruction(instruction); break; @@ -4909,19 +5768,10 @@ void CompilerMSL::emit_array_copy(const string &lhs, uint32_t rhs_id, StorageCla if (type.array.size() > SPVFuncImplArrayCopyMultidimMax) SPIRV_CROSS_THROW("Cannot support this many dimensions for arrays of arrays."); auto func = static_cast(SPVFuncImplArrayCopyMultidimBase + type.array.size()); - if (spv_function_implementations.count(func) == 0) - { - spv_function_implementations.insert(func); - suppress_missing_prototypes = true; - force_recompile(); - } - } - else if (spv_function_implementations.count(SPVFuncImplArrayCopy) == 0) - { - spv_function_implementations.insert(SPVFuncImplArrayCopy); - suppress_missing_prototypes = true; - force_recompile(); + add_spv_func_and_recompile(func); } + else + add_spv_func_and_recompile(SPVFuncImplArrayCopy); bool lhs_thread = lhs_storage == StorageClassFunction || lhs_storage == StorageClassGeneric || lhs_storage == StorageClassPrivate; @@ -5370,7 +6220,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) local_variable_names = resource_names; - processing_entry_point = (func.self == ir.default_entry_point); + processing_entry_point = func.self == ir.default_entry_point; string decl = processing_entry_point ? "" : "inline "; @@ -5415,7 +6265,7 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) for (auto var_id : vars_needing_early_declaration) { auto &ed_var = get(var_id); - uint32_t &initializer = ed_var.initializer; + ID &initializer = ed_var.initializer; if (!initializer) initializer = ir.increase_bound_by(1); @@ -5444,13 +6294,27 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) decl += argument_decl(arg); - // Manufacture automatic sampler arg for SampledImage texture + bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + auto &arg_type = get(arg.type); - if (arg_type.basetype == SPIRType::SampledImage && arg_type.image.dim != DimBuffer) - decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id)); + if (arg_type.basetype == SPIRType::SampledImage && !is_dynamic_img_sampler) + { + // Manufacture automatic plane args for multiplanar texture + uint32_t planes = 1; + if (auto *constexpr_sampler = find_constexpr_sampler(name_id)) + if (constexpr_sampler->ycbcr_conversion_enable) + planes = constexpr_sampler->planes; + for (uint32_t i = 1; i < planes; i++) + decl += join(", ", argument_decl(arg), plane_name_suffix, i); + + // Manufacture automatic sampler arg for SampledImage texture + if (arg_type.image.dim != DimBuffer) + decl += join(", thread const ", sampler_type(arg_type), " ", to_sampler_expression(arg.id)); + } // Manufacture automatic swizzle arg. - if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type)) + if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(arg_type) && + !is_dynamic_img_sampler) { bool arg_is_array = !arg_type.array.empty(); decl += join(", constant uint", arg_is_array ? "* " : "& ", to_swizzle_expression(arg.id)); @@ -5470,60 +6334,157 @@ void CompilerMSL::emit_function_prototype(SPIRFunction &func, const Bitset &) statement(decl); } -// Returns the texture sampling function string for the specified image and sampling characteristics. -string CompilerMSL::to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool, - bool has_offset, bool, bool has_dref, uint32_t, uint32_t) +static bool needs_chroma_reconstruction(const MSLConstexprSampler *constexpr_sampler) { + // For now, only multiplanar images need explicit reconstruction. GBGR and BGRG images + // use implicit reconstruction. + return constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && constexpr_sampler->planes > 1; +} + +// Returns the texture sampling function string for the specified image and sampling characteristics. +string CompilerMSL::to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool, bool, + bool, bool, bool has_dref, uint32_t, uint32_t) +{ + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + // Special-case gather. We have to alter the component being looked up // in the swizzle case. - if (msl_options.swizzle_texture_samples && is_gather) + if (msl_options.swizzle_texture_samples && is_gather && !is_dynamic_img_sampler && + (!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable)) { - string fname = imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; - fname += "<" + type_to_glsl(get(imgtype.image.type)) + ", metal::" + type_to_glsl(imgtype); - // Add the arg types ourselves. Yes, this sucks, but Clang can't - // deduce template pack parameters in the middle of an argument list. - switch (imgtype.image.dim) - { - case Dim2D: - fname += ", float2"; - if (imgtype.image.arrayed) - fname += ", uint"; - if (imgtype.image.depth) - fname += ", float"; - if (!imgtype.image.depth || has_offset) - fname += ", int2"; - break; - case DimCube: - fname += ", float3"; - if (imgtype.image.arrayed) - fname += ", uint"; - if (imgtype.image.depth) - fname += ", float"; - break; - default: - SPIRV_CROSS_THROW("Invalid texture dimension for gather op."); - } - fname += ">"; - return fname; + add_spv_func_and_recompile(imgtype.image.depth ? SPVFuncImplGatherCompareSwizzle : SPVFuncImplGatherSwizzle); + return imgtype.image.depth ? "spvGatherCompareSwizzle" : "spvGatherSwizzle"; } auto *combined = maybe_get(img); // Texture reference - string fname = to_expression(combined ? combined->image : img) + "."; - if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype)) - fname = "spvTextureSwizzle(" + fname; - - // Texture function and sampler - if (is_fetch) - fname += "read"; - else if (is_gather) - fname += "gather"; + string fname; + if (needs_chroma_reconstruction(constexpr_sampler) && !is_dynamic_img_sampler) + { + if (constexpr_sampler->planes != 2 && constexpr_sampler->planes != 3) + SPIRV_CROSS_THROW("Unhandled number of color image planes!"); + // 444 images aren't downsampled, so we don't need to do linear filtering. + if (constexpr_sampler->resolution == MSL_FORMAT_RESOLUTION_444 || + constexpr_sampler->chroma_filter == MSL_SAMPLER_FILTER_NEAREST) + { + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructNearest3Plane); + fname = "spvChromaReconstructNearest"; + } + else // Linear with a downsampled format + { + fname = "spvChromaReconstructLinear"; + switch (constexpr_sampler->resolution) + { + case MSL_FORMAT_RESOLUTION_444: + assert(false); + break; // not reached + case MSL_FORMAT_RESOLUTION_422: + switch (constexpr_sampler->x_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422CositedEven3Plane); + fname += "422CositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear422Midpoint3Plane); + fname += "422Midpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid chroma location."); + } + break; + case MSL_FORMAT_RESOLUTION_420: + fname += "420"; + switch (constexpr_sampler->x_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + switch (constexpr_sampler->y_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane); + fname += "XCositedEvenYCositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane); + fname += "XCositedEvenYMidpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid Y chroma location."); + } + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + switch (constexpr_sampler->y_chroma_offset) + { + case MSL_CHROMA_LOCATION_COSITED_EVEN: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane); + else + add_spv_func_and_recompile( + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane); + fname += "XMidpointYCositedEven"; + break; + case MSL_CHROMA_LOCATION_MIDPOINT: + if (constexpr_sampler->planes == 2) + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane); + else + add_spv_func_and_recompile(SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane); + fname += "XMidpointYMidpoint"; + break; + default: + SPIRV_CROSS_THROW("Invalid Y chroma location."); + } + break; + default: + SPIRV_CROSS_THROW("Invalid X chroma location."); + } + break; + default: + SPIRV_CROSS_THROW("Invalid format resolution."); + } + } + } else - fname += "sample"; + { + fname = to_expression(combined ? combined->image : img) + "."; - if (has_dref) - fname += "_compare"; + // Texture function and sampler + if (is_fetch) + fname += "read"; + else if (is_gather) + fname += "gather"; + else + fname += "sample"; + + if (has_dref) + fname += "_compare"; + } return fname; } @@ -5544,26 +6505,66 @@ static inline bool sampling_type_needs_f32_conversion(const SPIRType &type) } // Returns the function args for a texture sampling function for the specified image and sampling characteristics. -string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, - uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, uint32_t grad_y, - uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, - uint32_t sample, uint32_t minlod, bool *p_forward) +string CompilerMSL::to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, + bool is_proj, uint32_t coord, uint32_t, uint32_t dref, uint32_t grad_x, + uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, + uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward) { - string farg_str; - if (!is_fetch) - farg_str += to_sampler_expression(img); - - if (msl_options.swizzle_texture_samples && is_gather) + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) { - if (!farg_str.empty()) - farg_str += ", "; + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } - auto *combined = maybe_get(img); - farg_str += to_expression(combined ? combined->image : img); + string farg_str; + bool forward = true; + + if (!is_dynamic_img_sampler) + { + // Texture reference (for some cases) + if (needs_chroma_reconstruction(constexpr_sampler)) + { + // Multiplanar images need two or three textures. + farg_str += to_expression(img); + for (uint32_t i = 1; i < constexpr_sampler->planes; i++) + farg_str += join(", ", to_expression(img), plane_name_suffix, i); + } + else if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) && + msl_options.swizzle_texture_samples && is_gather) + { + auto *combined = maybe_get(img); + farg_str += to_expression(combined ? combined->image : img); + } + + // Sampler reference + if (!is_fetch) + { + if (!farg_str.empty()) + farg_str += ", "; + farg_str += to_sampler_expression(img); + } + + if ((!constexpr_sampler || !constexpr_sampler->ycbcr_conversion_enable) && + msl_options.swizzle_texture_samples && is_gather) + { + // Add the swizzle constant from the swizzle buffer. + farg_str += ", " + to_swizzle_expression(img); + used_swizzle_buffer = true; + } + + // Swizzled gather puts the component before the other args, to allow template + // deduction to work. + if (comp && msl_options.swizzle_texture_samples) + { + forward = should_forward(comp); + farg_str += ", " + to_component_argument(comp); + } } // Texture coordinates - bool forward = should_forward(coord); + forward = forward && should_forward(coord); auto coord_expr = to_enclosed_expression(coord); auto &coord_type = expression_type(coord); bool coord_is_fp = type_is_floating_point(coord_type); @@ -5882,8 +6883,11 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool if (imgtype.image.dim == Dim2D && offset_expr.empty()) farg_str += ", int2(0)"; - forward = forward && should_forward(comp); - farg_str += ", " + to_component_argument(comp); + if (!msl_options.swizzle_texture_samples || is_dynamic_img_sampler) + { + forward = forward && should_forward(comp); + farg_str += ", " + to_component_argument(comp); + } } if (sample) @@ -5893,15 +6897,6 @@ string CompilerMSL::to_function_args(uint32_t img, const SPIRType &imgtype, bool farg_str += to_expression(sample); } - if (msl_options.swizzle_texture_samples && is_sampled_image_type(imgtype)) - { - // Add the swizzle constant from the swizzle buffer. - if (!is_gather) - farg_str += ")"; - farg_str += ", " + to_swizzle_expression(img); - used_swizzle_buffer = true; - } - *p_forward = forward; return farg_str; @@ -5948,12 +6943,216 @@ void CompilerMSL::emit_sampled_image_op(uint32_t result_type, uint32_t result_id set(result_id, result_type, image_id, samp_id); } +string CompilerMSL::to_texture_op(const Instruction &i, bool *forward, SmallVector &inherited_expressions) +{ + auto *ops = stream(i); + uint32_t result_type_id = ops[0]; + uint32_t img = ops[2]; + auto &result_type = get(result_type_id); + auto op = static_cast(i.op); + bool is_gather = (op == OpImageGather || op == OpImageDrefGather); + + // Bypass pointers because we need the real image struct + auto &type = expression_type(img); + auto &imgtype = get(type.self); + + const MSLConstexprSampler *constexpr_sampler = nullptr; + bool is_dynamic_img_sampler = false; + if (auto *var = maybe_get_backing_variable(img)) + { + constexpr_sampler = find_constexpr_sampler(var->basevariable ? var->basevariable : VariableID(var->self)); + is_dynamic_img_sampler = has_extended_decoration(var->self, SPIRVCrossDecorationDynamicImageSampler); + } + + string expr; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler) + { + // If this needs sampler Y'CbCr conversion, we need to do some additional + // processing. + switch (constexpr_sampler->ycbcr_model) + { + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT709); + expr += "spvConvertYCbCrBT709("; + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT601); + expr += "spvConvertYCbCrBT601("; + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020: + add_spv_func_and_recompile(SPVFuncImplConvertYCbCrBT2020); + expr += "spvConvertYCbCrBT2020("; + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion."); + } + + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) + { + switch (constexpr_sampler->ycbcr_range) + { + case MSL_SAMPLER_YCBCR_RANGE_ITU_FULL: + add_spv_func_and_recompile(SPVFuncImplExpandITUFullRange); + expr += "spvExpandITUFullRange("; + break; + case MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW: + add_spv_func_and_recompile(SPVFuncImplExpandITUNarrowRange); + expr += "spvExpandITUNarrowRange("; + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr range."); + } + } + } + else if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) && + !is_dynamic_img_sampler) + { + add_spv_func_and_recompile(SPVFuncImplTextureSwizzle); + expr += "spvTextureSwizzle("; + } + + string inner_expr = CompilerGLSL::to_texture_op(i, forward, inherited_expressions); + + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable && !is_dynamic_img_sampler) + { + if (!constexpr_sampler->swizzle_is_identity()) + { + static const char swizzle_names[] = "rgba"; + if (!constexpr_sampler->swizzle_has_one_or_zero()) + { + // If we can, do it inline. + expr += inner_expr + "."; + for (uint32_t c = 0; c < 4; c++) + { + switch (constexpr_sampler->swizzle[c]) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + expr += swizzle_names[c]; + break; + case MSL_COMPONENT_SWIZZLE_R: + case MSL_COMPONENT_SWIZZLE_G: + case MSL_COMPONENT_SWIZZLE_B: + case MSL_COMPONENT_SWIZZLE_A: + expr += swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R]; + break; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + } + } + } + else + { + // Otherwise, we need to emit a temporary and swizzle that. + uint32_t temp_id = ir.increase_bound_by(1); + emit_op(result_type_id, temp_id, inner_expr, false); + for (auto &inherit : inherited_expressions) + inherit_expression_dependencies(temp_id, inherit); + inherited_expressions.clear(); + inherited_expressions.push_back(temp_id); + + switch (op) + { + case OpImageSampleDrefImplicitLod: + case OpImageSampleImplicitLod: + case OpImageSampleProjImplicitLod: + case OpImageSampleProjDrefImplicitLod: + register_control_dependent_expression(temp_id); + break; + + default: + break; + } + expr += type_to_glsl(result_type) + "("; + for (uint32_t c = 0; c < 4; c++) + { + switch (constexpr_sampler->swizzle[c]) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + expr += to_expression(temp_id) + "." + swizzle_names[c]; + break; + case MSL_COMPONENT_SWIZZLE_ZERO: + expr += "0"; + break; + case MSL_COMPONENT_SWIZZLE_ONE: + expr += "1"; + break; + case MSL_COMPONENT_SWIZZLE_R: + case MSL_COMPONENT_SWIZZLE_G: + case MSL_COMPONENT_SWIZZLE_B: + case MSL_COMPONENT_SWIZZLE_A: + expr += to_expression(temp_id) + "." + + swizzle_names[constexpr_sampler->swizzle[c] - MSL_COMPONENT_SWIZZLE_R]; + break; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + } + if (c < 3) + expr += ", "; + } + expr += ")"; + } + } + else + expr += inner_expr; + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY) + { + expr += join(", ", constexpr_sampler->bpc, ")"); + if (constexpr_sampler->ycbcr_model != MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY) + expr += ")"; + } + } + else + { + expr += inner_expr; + if (msl_options.swizzle_texture_samples && !is_gather && is_sampled_image_type(imgtype) && + !is_dynamic_img_sampler) + { + // Add the swizzle constant from the swizzle buffer. + expr += ", " + to_swizzle_expression(img) + ")"; + used_swizzle_buffer = true; + } + } + + return expr; +} + +static string create_swizzle(MSLComponentSwizzle swizzle) +{ + switch (swizzle) + { + case MSL_COMPONENT_SWIZZLE_IDENTITY: + return "spvSwizzle::none"; + case MSL_COMPONENT_SWIZZLE_ZERO: + return "spvSwizzle::zero"; + case MSL_COMPONENT_SWIZZLE_ONE: + return "spvSwizzle::one"; + case MSL_COMPONENT_SWIZZLE_R: + return "spvSwizzle::red"; + case MSL_COMPONENT_SWIZZLE_G: + return "spvSwizzle::green"; + case MSL_COMPONENT_SWIZZLE_B: + return "spvSwizzle::blue"; + case MSL_COMPONENT_SWIZZLE_A: + return "spvSwizzle::alpha"; + default: + SPIRV_CROSS_THROW("Invalid component swizzle."); + return ""; + } +} + // Returns a string representation of the ID, usable as a function arg. // Manufacture automatic sampler arg for SampledImage texture. -string CompilerMSL::to_func_call_arg(uint32_t id) +string CompilerMSL::to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) { string arg_str; + auto &type = expression_type(id); + bool is_dynamic_img_sampler = has_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + // If the argument *itself* is a "dynamic" combined-image sampler, then we can just pass that around. + bool arg_is_dynamic_img_sampler = has_extended_decoration(id, SPIRVCrossDecorationDynamicImageSampler); + if (is_dynamic_img_sampler && !arg_is_dynamic_img_sampler) + arg_str = join("spvDynamicImageSampler<", type_to_glsl(get(type.image.type)), ">("); + auto *c = maybe_get(id); if (c && !get(c->constant_type).array.empty()) { @@ -5968,7 +7167,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id) // so just create a thread local copy in the current function. arg_str = join("_", id, "_array_copy"); auto &constants = current_function->constant_arrays_needed_on_stack; - auto itr = find(begin(constants), end(constants), id); + auto itr = find(begin(constants), end(constants), ID(id)); if (itr == end(constants)) { force_recompile(); @@ -5976,35 +7175,107 @@ string CompilerMSL::to_func_call_arg(uint32_t id) } } else - arg_str = CompilerGLSL::to_func_call_arg(id); + arg_str += CompilerGLSL::to_func_call_arg(arg, id); - // Manufacture automatic sampler arg if the arg is a SampledImage texture. - auto &type = expression_type(id); - if (type.basetype == SPIRType::SampledImage && type.image.dim != DimBuffer) + if (!arg_is_dynamic_img_sampler) { // Need to check the base variable in case we need to apply a qualified alias. uint32_t var_id = 0; - auto *sampler_var = maybe_get(id); - if (sampler_var) - var_id = sampler_var->basevariable; + auto *var = maybe_get(id); + if (var) + var_id = var->basevariable; - arg_str += ", " + to_sampler_expression(var_id ? var_id : id); + auto *constexpr_sampler = find_constexpr_sampler(var_id ? var_id : id); + if (type.basetype == SPIRType::SampledImage) + { + // Manufacture automatic plane args for multiplanar texture + uint32_t planes = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + { + planes = constexpr_sampler->planes; + // If this parameter isn't aliasing a global, then we need to use + // the special "dynamic image-sampler" class to pass it--and we need + // to use it for *every* non-alias parameter, in case a combined + // image-sampler with a Y'CbCr conversion is passed. Hopefully, this + // pathological case is so rare that it should never be hit in practice. + if (!arg.alias_global_variable) + add_spv_func_and_recompile(SPVFuncImplDynamicImageSampler); + } + for (uint32_t i = 1; i < planes; i++) + arg_str += join(", ", CompilerGLSL::to_func_call_arg(arg, id), plane_name_suffix, i); + // Manufacture automatic sampler arg if the arg is a SampledImage texture. + if (type.image.dim != DimBuffer) + arg_str += ", " + to_sampler_expression(var_id ? var_id : id); + + // Add sampler Y'CbCr conversion info if we have it + if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + { + SmallVector samp_args; + + switch (constexpr_sampler->resolution) + { + case MSL_FORMAT_RESOLUTION_444: + // Default + break; + case MSL_FORMAT_RESOLUTION_422: + samp_args.push_back("spvFormatResolution::_422"); + break; + case MSL_FORMAT_RESOLUTION_420: + samp_args.push_back("spvFormatResolution::_420"); + break; + default: + SPIRV_CROSS_THROW("Invalid format resolution."); + } + + if (constexpr_sampler->chroma_filter != MSL_SAMPLER_FILTER_NEAREST) + samp_args.push_back("spvChromaFilter::linear"); + + if (constexpr_sampler->x_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN) + samp_args.push_back("spvXChromaLocation::midpoint"); + if (constexpr_sampler->y_chroma_offset != MSL_CHROMA_LOCATION_COSITED_EVEN) + samp_args.push_back("spvYChromaLocation::midpoint"); + switch (constexpr_sampler->ycbcr_model) + { + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY: + // Default + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_identity"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_709"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_601"); + break; + case MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020: + samp_args.push_back("spvYCbCrModelConversion::ycbcr_bt_2020"); + break; + default: + SPIRV_CROSS_THROW("Invalid Y'CbCr model conversion."); + } + if (constexpr_sampler->ycbcr_range != MSL_SAMPLER_YCBCR_RANGE_ITU_FULL) + samp_args.push_back("spvYCbCrRange::itu_narrow"); + samp_args.push_back(join("spvComponentBits(", constexpr_sampler->bpc, ")")); + arg_str += join(", spvYCbCrSampler(", merge(samp_args), ")"); + } + } + + if (is_dynamic_img_sampler && constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + arg_str += join(", (uint(", create_swizzle(constexpr_sampler->swizzle[3]), ") << 24) | (uint(", + create_swizzle(constexpr_sampler->swizzle[2]), ") << 16) | (uint(", + create_swizzle(constexpr_sampler->swizzle[1]), ") << 8) | uint(", + create_swizzle(constexpr_sampler->swizzle[0]), ")"); + else if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) + arg_str += ", " + to_swizzle_expression(var_id ? var_id : id); + + if (buffers_requiring_array_length.count(var_id)) + arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id); + + if (is_dynamic_img_sampler) + arg_str += ")"; } - uint32_t var_id = 0; - auto *var = maybe_get(id); - if (var) - var_id = var->basevariable; - - if (msl_options.swizzle_texture_samples && has_sampled_images && is_sampled_image_type(type)) - { - // Need to check the base variable in case we need to apply a qualified alias. - arg_str += ", " + to_swizzle_expression(var_id ? var_id : id); - } - - if (buffers_requiring_array_length.count(var_id)) - arg_str += ", " + to_buffer_size_expression(var_id ? var_id : id); - return arg_str; } @@ -6014,7 +7285,7 @@ string CompilerMSL::to_func_call_arg(uint32_t id) string CompilerMSL::to_sampler_expression(uint32_t id) { auto *combined = maybe_get(id); - auto expr = to_expression(combined ? combined->image : id); + auto expr = to_expression(combined ? combined->image : VariableID(id)); auto index = expr.find_first_of('['); uint32_t samp_id = 0; @@ -6035,7 +7306,7 @@ string CompilerMSL::to_swizzle_expression(uint32_t id) { auto *combined = maybe_get(id); - auto expr = to_expression(combined ? combined->image : id); + auto expr = to_expression(combined ? combined->image : VariableID(id)); auto index = expr.find_first_of('['); // If an image is part of an argument buffer translate this to a legal identifier. @@ -6259,8 +7530,15 @@ string CompilerMSL::member_attribute_qualifier(const SPIRType &type, uint32_t in bool is_builtin = is_member_builtin(type, index, &builtin); if (has_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary)) - return join(" [[id(", - get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")]]"); + { + string quals = join( + " [[id(", get_extended_member_decoration(type.self, index, SPIRVCrossDecorationResourceIndexPrimary), ")"); + if (interlocked_resources.count( + get_extended_member_decoration(type.self, index, SPIRVCrossDecorationInterfaceOrigID))) + quals += ", raster_order_group(0)"; + quals += "]]"; + return quals; + } // Vertex function inputs if (execution.model == ExecutionModelVertex && type.storage == StorageClassInput) @@ -6965,19 +8243,24 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) string name; SPIRType::BaseType basetype; uint32_t index; + uint32_t plane; }; SmallVector resources; - ir.for_each_typed_id([&](uint32_t, SPIRVariable &var) { + ir.for_each_typed_id([&](uint32_t var_id, SPIRVariable &var) { if ((var.storage == StorageClassUniform || var.storage == StorageClassUniformConstant || var.storage == StorageClassPushConstant || var.storage == StorageClassStorageBuffer) && !is_hidden_variable(var)) { auto &type = get_variable_data_type(var); - uint32_t var_id = var.self; - if (var.storage != StorageClassPushConstant) + // Very specifically, image load-store in argument buffers are disallowed on MSL on iOS. + // But we won't know when the argument buffer is encoded whether this image will have + // a NonWritable decoration. So just use discrete arguments for all storage images + // on iOS. + if (!(msl_options.is_ios() && type.basetype == SPIRType::Image && type.image.sampled == 2) && + var.storage != StorageClassPushConstant) { uint32_t desc_set = get_decoration(var_id, DecorationDescriptorSet); if (descriptor_set_is_argument_buffer(desc_set)) @@ -6998,13 +8281,19 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) if (type.basetype == SPIRType::SampledImage) { add_resource_name(var_id); - resources.push_back( - { &var, to_name(var_id), SPIRType::Image, get_metal_resource_index(var, SPIRType::Image) }); + + uint32_t plane_count = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + plane_count = constexpr_sampler->planes; + + for (uint32_t i = 0; i < plane_count; i++) + resources.push_back({ &var, to_name(var_id), SPIRType::Image, + get_metal_resource_index(var, SPIRType::Image, i), i }); if (type.image.dim != DimBuffer && !constexpr_sampler) { resources.push_back({ &var, to_sampler_expression(var_id), SPIRType::Sampler, - get_metal_resource_index(var, SPIRType::Sampler) }); + get_metal_resource_index(var, SPIRType::Sampler), 0 }); } } else if (!constexpr_sampler) @@ -7012,7 +8301,7 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) // constexpr samplers are not declared as resources. add_resource_name(var_id); resources.push_back( - { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) }); + { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 }); } } }); @@ -7055,7 +8344,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) ep_args += ", "; ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "* " + to_restrict(var_id) + r.name + "_" + convert_to_string(i); - ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")]]"; + ep_args += " [[buffer(" + convert_to_string(r.index + i) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; } } else @@ -7064,7 +8356,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) ep_args += ", "; ep_args += get_argument_address_space(var) + " " + type_to_glsl(type) + "& " + to_restrict(var_id) + r.name; - ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; } break; } @@ -7078,7 +8373,12 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) if (!ep_args.empty()) ep_args += ", "; ep_args += image_type_glsl(type, var_id) + " " + r.name; - ep_args += " [[texture(" + convert_to_string(r.index) + ")]]"; + if (r.plane > 0) + ep_args += join(plane_name_suffix, r.plane); + ep_args += " [[texture(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; break; default: if (!ep_args.empty()) @@ -7088,7 +8388,10 @@ void CompilerMSL::entry_point_args_discrete_descriptors(string &ep_args) type_to_glsl(type, var_id) + "& " + r.name; else ep_args += type_to_glsl(type, var_id) + " " + r.name; - ep_args += " [[buffer(" + convert_to_string(r.index) + ")]]"; + ep_args += " [[buffer(" + convert_to_string(r.index) + ")"; + if (interlocked_resources.count(var_id)) + ep_args += ", raster_order_group(0)"; + ep_args += "]]"; break; } } @@ -7444,7 +8747,7 @@ void CompilerMSL::fix_up_shader_inputs_outputs() } // Returns the Metal index of the resource of the specified type as used by the specified variable. -uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype) +uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane) { auto &execution = get_entry_point(); auto &var_dec = ir.meta[var.self].decoration; @@ -7458,6 +8761,10 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base auto resource_decoration = var_type.basetype == SPIRType::SampledImage && basetype == SPIRType::Sampler ? SPIRVCrossDecorationResourceIndexSecondary : SPIRVCrossDecorationResourceIndexPrimary; + if (plane == 1) + resource_decoration = SPIRVCrossDecorationResourceIndexTertiary; + if (plane == 2) + resource_decoration = SPIRVCrossDecorationResourceIndexQuaternary; if (itr != end(resource_bindings)) { @@ -7466,8 +8773,8 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base switch (basetype) { case SPIRType::Image: - set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture); - return remap.first.msl_texture; + set_extended_decoration(var.self, resource_decoration, remap.first.msl_texture + plane); + return remap.first.msl_texture + plane; case SPIRType::Sampler: set_extended_decoration(var.self, resource_decoration, remap.first.msl_sampler); return remap.first.msl_sampler; @@ -7487,7 +8794,7 @@ uint32_t CompilerMSL::get_metal_resource_index(SPIRVariable &var, SPIRType::Base uint32_t binding_stride = 1; auto &type = get(var.basetype); for (uint32_t i = 0; i < uint32_t(type.array.size()); i++) - binding_stride *= type.array_size_literal[i] ? type.array[i] : get(type.array[i]).scalar(); + binding_stride *= to_array_size_literal(type, i); assert(binding_stride != 0); @@ -7560,13 +8867,28 @@ string CompilerMSL::argument_decl(const SPIRFunction::Parameter &arg) if (constref) decl += "const "; + // If this is a combined image-sampler for a 2D image with floating-point type, + // we emitted the 'spvDynamicImageSampler' type, and this is *not* an alias parameter + // for a global, then we need to emit a "dynamic" combined image-sampler. + // Unfortunately, this is necessary to properly support passing around + // combined image-samplers with Y'CbCr conversions on them. + bool is_dynamic_img_sampler = !arg.alias_global_variable && type.basetype == SPIRType::SampledImage && + type.image.dim == Dim2D && type_is_floating_point(get(type.image.type)) && + spv_function_implementations.count(SPVFuncImplDynamicImageSampler); + bool builtin = is_builtin_variable(var); - if (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id) + if (var.basevariable && (var.basevariable == stage_in_ptr_var_id || var.basevariable == stage_out_ptr_var_id)) decl += type_to_glsl(type, arg.id); else if (builtin) decl += builtin_type_decl(static_cast(get_decoration(arg.id, DecorationBuiltIn)), arg.id); else if ((storage == StorageClassUniform || storage == StorageClassStorageBuffer) && is_array(type)) decl += join(type_to_glsl(type, arg.id), "*"); + else if (is_dynamic_img_sampler) + { + decl += join("spvDynamicImageSampler<", type_to_glsl(get(type.image.type)), ">"); + // Mark the variable so that we can handle passing it to another function. + set_extended_decoration(arg.id, SPIRVCrossDecorationDynamicImageSampler); + } else decl += type_to_glsl(type, arg.id); @@ -9245,6 +10567,17 @@ bool CompilerMSL::SampledImageScanner::handle(spv::Op opcode, const uint32_t *ar return true; } +// If a needed custom function wasn't added before, add it and force a recompile. +void CompilerMSL::add_spv_func_and_recompile(SPVFuncImpl spv_func) +{ + if (spv_function_implementations.count(spv_func) == 0) + { + spv_function_implementations.insert(spv_func); + suppress_missing_prototypes = true; + force_recompile(); + } +} + bool CompilerMSL::OpCodePreprocessor::handle(Op opcode, const uint32_t *args, uint32_t length) { // Since MSL exists in a single execution scope, function prototype declarations are not @@ -9422,27 +10755,9 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o uint32_t tid = result_types[args[opcode == OpImageWrite ? 0 : 2]]; if (tid && compiler.get(tid).image.dim == DimBuffer && !compiler.msl_options.texture_buffer_native) return SPVFuncImplTexelBufferCoords; - - if (opcode == OpImageFetch && compiler.msl_options.swizzle_texture_samples) - return SPVFuncImplTextureSwizzle; - break; } - case OpImageSampleExplicitLod: - case OpImageSampleProjExplicitLod: - case OpImageSampleDrefExplicitLod: - case OpImageSampleProjDrefExplicitLod: - case OpImageSampleImplicitLod: - case OpImageSampleProjImplicitLod: - case OpImageSampleDrefImplicitLod: - case OpImageSampleProjDrefImplicitLod: - case OpImageGather: - case OpImageDrefGather: - if (compiler.msl_options.swizzle_texture_samples) - return SPVFuncImplTextureSwizzle; - break; - case OpExtInst: { uint32_t extension_set = args[2]; @@ -9468,24 +10783,21 @@ CompilerMSL::SPVFuncImpl CompilerMSL::OpCodePreprocessor::get_spv_func_impl(Op o auto &type = compiler.get(args[0]); if (type.vecsize == 1) return SPVFuncImplReflectScalar; - else - return SPVFuncImplNone; + break; } case GLSLstd450Refract: { auto &type = compiler.get(args[0]); if (type.vecsize == 1) return SPVFuncImplRefractScalar; - else - return SPVFuncImplNone; + break; } case GLSLstd450FaceForward: { auto &type = compiler.get(args[0]); if (type.vecsize == 1) return SPVFuncImplFaceForwardScalar; - else - return SPVFuncImplNone; + break; } case GLSLstd450MatrixInverse: { @@ -9593,7 +10905,7 @@ CompilerMSL::MemberSorter::MemberSorter(SPIRType &t, Meta &m, SortAspect sa) meta.members.resize(max(type.member_types.size(), meta.members.size())); } -void CompilerMSL::remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler) +void CompilerMSL::remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler) { auto &type = get(get(id).basetype); if (type.basetype != SPIRType::SampledImage && type.basetype != SPIRType::Sampler) @@ -9753,6 +11065,7 @@ void CompilerMSL::analyze_argument_buffers() string name; SPIRType::BaseType basetype; uint32_t index; + uint32_t plane; }; SmallVector resources_in_set[kMaxArgumentBuffers]; @@ -9791,23 +11104,33 @@ void CompilerMSL::analyze_argument_buffers() { add_resource_name(var_id); - uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image); - uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler); + uint32_t plane_count = 1; + if (constexpr_sampler && constexpr_sampler->ycbcr_conversion_enable) + plane_count = constexpr_sampler->planes; - resources_in_set[desc_set].push_back({ &var, to_name(var_id), SPIRType::Image, image_resource_index }); + for (uint32_t i = 0; i < plane_count; i++) + { + uint32_t image_resource_index = get_metal_resource_index(var, SPIRType::Image, i); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), SPIRType::Image, image_resource_index, i }); + } if (type.image.dim != DimBuffer && !constexpr_sampler) { + uint32_t sampler_resource_index = get_metal_resource_index(var, SPIRType::Sampler); resources_in_set[desc_set].push_back( - { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index }); + { &var, to_sampler_expression(var_id), SPIRType::Sampler, sampler_resource_index, 0 }); } } else if (!constexpr_sampler) { // constexpr samplers are not declared as resources. - add_resource_name(var_id); - resources_in_set[desc_set].push_back( - { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype) }); + if (!msl_options.is_ios() || type.basetype != SPIRType::Image || type.image.sampled != 2) + { + add_resource_name(var_id); + resources_in_set[desc_set].push_back( + { &var, to_name(var_id), type.basetype, get_metal_resource_index(var, type.basetype), 0 }); + } } // Check if this descriptor set needs a swizzle buffer. @@ -9860,7 +11183,7 @@ void CompilerMSL::analyze_argument_buffers() set_decoration(var_id, DecorationDescriptorSet, desc_set); set_decoration(var_id, DecorationBinding, kSwizzleBufferBinding); resources_in_set[desc_set].push_back( - { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt) }); + { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 }); } if (set_needs_buffer_sizes[desc_set]) @@ -9871,7 +11194,7 @@ void CompilerMSL::analyze_argument_buffers() set_decoration(var_id, DecorationDescriptorSet, desc_set); set_decoration(var_id, DecorationBinding, kBufferSizeBufferBinding); resources_in_set[desc_set].push_back( - { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt) }); + { &var, to_name(var_id), SPIRType::UInt, get_metal_resource_index(var, SPIRType::UInt), 0 }); } } } @@ -9915,6 +11238,8 @@ void CompilerMSL::analyze_argument_buffers() auto &var = *resource.var; auto &type = get_variable_data_type(var); string mbr_name = ensure_valid_name(resource.name, "m"); + if (resource.plane > 0) + mbr_name += join(plane_name_suffix, resource.plane); set_member_name(buffer_type.self, member_index, mbr_name); if (resource.basetype == SPIRType::Sampler && type.basetype != SPIRType::Sampler) @@ -9942,12 +11267,22 @@ void CompilerMSL::analyze_argument_buffers() } else { + uint32_t binding = get_decoration(var.self, DecorationBinding); + SetBindingPair pair = { desc_set, binding }; + if (resource.basetype == SPIRType::Image || resource.basetype == SPIRType::Sampler || resource.basetype == SPIRType::SampledImage) { // Drop pointer information when we emit the resources into a struct. buffer_type.member_types.push_back(get_variable_data_type_id(var)); - set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + if (resource.plane == 0) + set_qualified_name(var.self, join(to_name(buffer_variable_id), ".", mbr_name)); + } + else if (buffers_requiring_dynamic_offset.count(pair)) + { + // Don't set the qualified name here; we'll define a variable holding the corrected buffer address later. + buffer_type.member_types.push_back(var.basetype); + buffers_requiring_dynamic_offset[pair].second = var.self; } else { @@ -9974,6 +11309,11 @@ bool CompilerMSL::SetBindingPair::operator==(const SetBindingPair &other) const return desc_set == other.desc_set && binding == other.binding; } +bool CompilerMSL::SetBindingPair::operator<(const SetBindingPair &other) const +{ + return desc_set < other.desc_set || (desc_set == other.desc_set && binding < other.binding); +} + bool CompilerMSL::StageSetBinding::operator==(const StageSetBinding &other) const { return model == other.model && desc_set == other.desc_set && binding == other.binding; diff --git a/3rdparty/spirv-cross/spirv_msl.hpp b/3rdparty/spirv-cross/spirv_msl.hpp index 7666c743b..481960761 100644 --- a/3rdparty/spirv-cross/spirv_msl.hpp +++ b/3rdparty/spirv-cross/spirv_msl.hpp @@ -54,9 +54,9 @@ struct MSLVertexAttr // Matches the binding index of a MSL resource for a binding within a descriptor set. // Taken together, the stage, desc_set and binding combine to form a reference to a resource // descriptor used in a particular shading stage. -// If using MSL 2.0 argument buffers, and the descriptor set is not marked as a discrete descriptor set, -// the binding reference we remap to will become an [[id(N)]] attribute within -// the "descriptor set" argument buffer structure. +// If using MSL 2.0 argument buffers, the descriptor set is not marked as a discrete descriptor set, +// and (for iOS only) the resource is not a storage image (sampled != 2), the binding reference we +// remap to will become an [[id(N)]] attribute within the "descriptor set" argument buffer structure. // For resources which are bound in the "classic" MSL 1.0 way or discrete descriptors, the remap will become a // [[buffer(N)]], [[texture(N)]] or [[sampler(N)]] depending on the resource types used. struct MSLResourceBinding @@ -122,6 +122,50 @@ enum MSLSamplerBorderColor MSL_SAMPLER_BORDER_COLOR_INT_MAX = 0x7fffffff }; +enum MSLFormatResolution +{ + MSL_FORMAT_RESOLUTION_444 = 0, + MSL_FORMAT_RESOLUTION_422, + MSL_FORMAT_RESOLUTION_420, + MSL_FORMAT_RESOLUTION_INT_MAX = 0x7fffffff +}; + +enum MSLChromaLocation +{ + MSL_CHROMA_LOCATION_COSITED_EVEN = 0, + MSL_CHROMA_LOCATION_MIDPOINT, + MSL_CHROMA_LOCATION_INT_MAX = 0x7fffffff +}; + +enum MSLComponentSwizzle +{ + MSL_COMPONENT_SWIZZLE_IDENTITY = 0, + MSL_COMPONENT_SWIZZLE_ZERO, + MSL_COMPONENT_SWIZZLE_ONE, + MSL_COMPONENT_SWIZZLE_R, + MSL_COMPONENT_SWIZZLE_G, + MSL_COMPONENT_SWIZZLE_B, + MSL_COMPONENT_SWIZZLE_A, + MSL_COMPONENT_SWIZZLE_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerYCbCrModelConversion +{ + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY = 0, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_IDENTITY, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_709, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_601, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020, + MSL_SAMPLER_YCBCR_MODEL_CONVERSION_INT_MAX = 0x7fffffff +}; + +enum MSLSamplerYCbCrRange +{ + MSL_SAMPLER_YCBCR_RANGE_ITU_FULL = 0, + MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW, + MSL_SAMPLER_YCBCR_RANGE_INT_MAX = 0x7fffffff +}; + struct MSLConstexprSampler { MSLSamplerCoord coord = MSL_SAMPLER_COORD_NORMALIZED; @@ -137,9 +181,39 @@ struct MSLConstexprSampler float lod_clamp_max = 1000.0f; int max_anisotropy = 1; + // Sampler Y'CbCr conversion parameters + uint32_t planes = 0; + MSLFormatResolution resolution = MSL_FORMAT_RESOLUTION_444; + MSLSamplerFilter chroma_filter = MSL_SAMPLER_FILTER_NEAREST; + MSLChromaLocation x_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; + MSLChromaLocation y_chroma_offset = MSL_CHROMA_LOCATION_COSITED_EVEN; + MSLComponentSwizzle swizzle[4]; // IDENTITY, IDENTITY, IDENTITY, IDENTITY + MSLSamplerYCbCrModelConversion ycbcr_model = MSL_SAMPLER_YCBCR_MODEL_CONVERSION_RGB_IDENTITY; + MSLSamplerYCbCrRange ycbcr_range = MSL_SAMPLER_YCBCR_RANGE_ITU_FULL; + uint32_t bpc = 8; + bool compare_enable = false; bool lod_clamp_enable = false; bool anisotropy_enable = false; + bool ycbcr_conversion_enable = false; + + MSLConstexprSampler() + { + for (uint32_t i = 0; i < 4; i++) + swizzle[i] = MSL_COMPONENT_SWIZZLE_IDENTITY; + } + bool swizzle_is_identity() const + { + return (swizzle[0] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[1] == MSL_COMPONENT_SWIZZLE_IDENTITY && + swizzle[2] == MSL_COMPONENT_SWIZZLE_IDENTITY && swizzle[3] == MSL_COMPONENT_SWIZZLE_IDENTITY); + } + bool swizzle_has_one_or_zero() const + { + return (swizzle[0] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[0] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[1] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[1] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[2] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[2] == MSL_COMPONENT_SWIZZLE_ONE || + swizzle[3] == MSL_COMPONENT_SWIZZLE_ZERO || swizzle[3] == MSL_COMPONENT_SWIZZLE_ONE); + } }; // Special constant used in a MSLResourceBinding desc_set @@ -189,6 +263,7 @@ public: uint32_t shader_tess_factor_buffer_index = 26; uint32_t buffer_size_buffer_index = 25; uint32_t view_mask_buffer_index = 24; + uint32_t dynamic_offsets_buffer_index = 23; uint32_t shader_input_wg_index = 0; uint32_t device_index = 0; bool enable_point_size_builtin = true; @@ -288,21 +363,21 @@ public: // buffer if the shader needs it. bool needs_output_buffer() const { - return capture_output_to_buffer && stage_out_var_id != 0; + return capture_output_to_buffer && stage_out_var_id != ID(0); } // Provide feedback to calling API to allow it to pass a patch output // buffer if the shader needs it. bool needs_patch_output_buffer() const { - return capture_output_to_buffer && patch_stage_out_var_id != 0; + return capture_output_to_buffer && patch_stage_out_var_id != ID(0); } // Provide feedback to calling API to allow it to pass an input threadgroup // buffer if the shader needs it. bool needs_input_threadgroup_mem() const { - return capture_output_to_buffer && stage_in_var_id != 0; + return capture_output_to_buffer && stage_in_var_id != ID(0); } explicit CompilerMSL(std::vector spirv); @@ -323,6 +398,14 @@ public: // the set/binding combination was used by the MSL code. void add_msl_resource_binding(const MSLResourceBinding &resource); + // desc_set and binding are the SPIR-V descriptor set and binding of a buffer resource + // in this shader. index is the index within the dynamic offset buffer to use. This + // function marks that resource as using a dynamic offset (VK_DESCRIPTOR_TYPE_UNIFORM_BUFFER_DYNAMIC + // or VK_DESCRIPTOR_TYPE_STORAGE_BUFFER_DYNAMIC). This function only has any effect if argument buffers + // are enabled. If so, the buffer will have its address adjusted at the beginning of the shader with + // an offset taken from the dynamic offset buffer. + void add_dynamic_buffer(uint32_t desc_set, uint32_t binding, uint32_t index); + // When using MSL argument buffers, we can force "classic" MSL 1.0 binding schemes for certain descriptor sets. // This corresponds to VK_KHR_push_descriptor in Vulkan. void add_discrete_descriptor_set(uint32_t desc_set); @@ -349,6 +432,14 @@ public: // sampler's binding is returned instead. For any other resource type, -1 is returned. uint32_t get_automatic_msl_resource_binding_secondary(uint32_t id) const; + // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for multiplanar images, + // in which case the second plane's binding is returned instead. For any other resource type, -1 is returned. + uint32_t get_automatic_msl_resource_binding_tertiary(uint32_t id) const; + + // Same as get_automatic_msl_resource_binding, but should only be used for combined image samplers for triplanar images, + // in which case the third plane's binding is returned instead. For any other resource type, -1 is returned. + uint32_t get_automatic_msl_resource_binding_quaternary(uint32_t id) const; + // Compiles the SPIR-V code into Metal Shading Language. std::string compile() override; @@ -359,7 +450,7 @@ public: // This can be used on both combined image/samplers (sampler2D) or standalone samplers. // The remapped sampler must not be an array of samplers. // Prefer remap_constexpr_sampler_by_binding unless you're also doing reflection anyways. - void remap_constexpr_sampler(uint32_t id, const MSLConstexprSampler &sampler); + void remap_constexpr_sampler(VariableID id, const MSLConstexprSampler &sampler); // Same as remap_constexpr_sampler, except you provide set/binding, rather than variable ID. // Remaps based on ID take priority over set/binding remaps. @@ -395,7 +486,14 @@ protected: SPVFuncImplInverse4x4, SPVFuncImplInverse3x3, SPVFuncImplInverse2x2, + // It is very important that this come before *Swizzle and ChromaReconstruct*, to ensure it's + // emitted before them. + SPVFuncImplForwardArgs, + // Likewise, this must come before *Swizzle. + SPVFuncImplGetSwizzle, SPVFuncImplTextureSwizzle, + SPVFuncImplGatherSwizzle, + SPVFuncImplGatherCompareSwizzle, SPVFuncImplSubgroupBallot, SPVFuncImplSubgroupBallotBitExtract, SPVFuncImplSubgroupBallotFindLSB, @@ -405,6 +503,27 @@ protected: SPVFuncImplReflectScalar, SPVFuncImplRefractScalar, SPVFuncImplFaceForwardScalar, + SPVFuncImplChromaReconstructNearest2Plane, + SPVFuncImplChromaReconstructNearest3Plane, + SPVFuncImplChromaReconstructLinear422CositedEven2Plane, + SPVFuncImplChromaReconstructLinear422CositedEven3Plane, + SPVFuncImplChromaReconstructLinear422Midpoint2Plane, + SPVFuncImplChromaReconstructLinear422Midpoint3Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven2Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYCositedEven3Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven2Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYCositedEven3Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint2Plane, + SPVFuncImplChromaReconstructLinear420XCositedEvenYMidpoint3Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint2Plane, + SPVFuncImplChromaReconstructLinear420XMidpointYMidpoint3Plane, + SPVFuncImplExpandITUFullRange, + SPVFuncImplExpandITUNarrowRange, + SPVFuncImplConvertYCbCrBT709, + SPVFuncImplConvertYCbCrBT601, + SPVFuncImplConvertYCbCrBT2020, + SPVFuncImplDynamicImageSampler, + SPVFuncImplArrayCopyMultidimMax = 6 }; @@ -418,6 +537,8 @@ protected: void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags) override; void emit_sampled_image_op(uint32_t result_type, uint32_t result_id, uint32_t image_id, uint32_t samp_id) override; void emit_subgroup_op(const Instruction &i) override; + std::string to_texture_op(const Instruction &i, bool *forward, + SmallVector &inherited_expressions) override; void emit_fixup() override; std::string to_struct_member(const SPIRType &type, uint32_t member_type_id, uint32_t index, const std::string &qualifier = ""); @@ -428,12 +549,12 @@ protected: std::string image_type_glsl(const SPIRType &type, uint32_t id = 0) override; std::string sampler_type(const SPIRType &type); std::string builtin_to_glsl(spv::BuiltIn builtin, spv::StorageClass storage) override; - std::string to_func_call_arg(uint32_t id) override; + std::string to_func_call_arg(const SPIRFunction::Parameter &arg, uint32_t id) override; std::string to_name(uint32_t id, bool allow_alias = true) const override; - std::string to_function_name(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, + std::string to_function_name(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, bool has_array_offsets, bool has_offset, bool has_grad, bool has_dref, uint32_t lod, uint32_t minlod) override; - std::string to_function_args(uint32_t img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, + std::string to_function_args(VariableID img, const SPIRType &imgtype, bool is_fetch, bool is_gather, bool is_proj, uint32_t coord, uint32_t coord_components, uint32_t dref, uint32_t grad_x, uint32_t grad_y, uint32_t lod, uint32_t coffset, uint32_t offset, uint32_t bias, uint32_t comp, uint32_t sample, uint32_t minlod, bool *p_forward) override; @@ -513,7 +634,7 @@ protected: std::string member_attribute_qualifier(const SPIRType &type, uint32_t index); std::string argument_decl(const SPIRFunction::Parameter &arg); std::string round_fp_tex_coords(std::string tex_coords, bool coord_is_fp); - uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype); + uint32_t get_metal_resource_index(SPIRVariable &var, SPIRType::BaseType basetype, uint32_t plane = 0); uint32_t get_ordered_member_location(uint32_t type_id, uint32_t index, uint32_t *comp = nullptr); // MSL packing rules. These compute the effective packing rules as observed by the MSL compiler in the MSL output. @@ -576,6 +697,7 @@ protected: uint32_t swizzle_buffer_id = 0; uint32_t buffer_size_buffer_id = 0; uint32_t view_mask_buffer_id = 0; + uint32_t dynamic_offsets_buffer_id = 0; void bitcast_to_builtin_store(uint32_t target_id, std::string &expr, const SPIRType &expr_type) override; void bitcast_from_builtin_load(uint32_t source_id, std::string &expr, const SPIRType &expr_type) override; @@ -605,6 +727,7 @@ protected: uint32_t desc_set; uint32_t binding; bool operator==(const SetBindingPair &other) const; + bool operator<(const SetBindingPair &other) const; }; struct StageSetBinding @@ -629,12 +752,12 @@ protected: // Intentionally uninitialized, works around MSVC 2013 bug. uint32_t next_metal_resource_ids[kMaxArgumentBuffers]; - uint32_t stage_in_var_id = 0; - uint32_t stage_out_var_id = 0; - uint32_t patch_stage_in_var_id = 0; - uint32_t patch_stage_out_var_id = 0; - uint32_t stage_in_ptr_var_id = 0; - uint32_t stage_out_ptr_var_id = 0; + VariableID stage_in_var_id = 0; + VariableID stage_out_var_id = 0; + VariableID patch_stage_in_var_id = 0; + VariableID patch_stage_out_var_id = 0; + VariableID stage_in_ptr_var_id = 0; + VariableID stage_out_ptr_var_id = 0; bool has_sampled_images = false; bool needs_vertex_idx_arg = false; bool needs_instance_idx_arg = false; @@ -652,6 +775,7 @@ protected: std::string sampler_name_suffix = "Smplr"; std::string swizzle_name_suffix = "Swzl"; std::string buffer_size_name_suffix = "BufferSize"; + std::string plane_name_suffix = "Plane"; std::string input_wg_var_name = "gl_in"; std::string output_buffer_var_name = "spvOut"; std::string patch_output_buffer_var_name = "spvPatchOut"; @@ -666,6 +790,9 @@ protected: std::unordered_set buffers_requiring_array_length; SmallVector buffer_arrays; + // Must be ordered since array is in a specific order. + std::map> buffers_requiring_dynamic_offset; + uint32_t argument_buffer_ids[kMaxArgumentBuffers]; uint32_t argument_buffer_discrete_mask = 0; void analyze_argument_buffers(); @@ -676,6 +803,8 @@ protected: bool suppress_missing_prototypes = false; + void add_spv_func_and_recompile(SPVFuncImpl spv_func); + // OpcodeHandler that handles several MSL preprocessing operations. struct OpCodePreprocessor : OpcodeHandler { diff --git a/3rdparty/spirv-cross/spirv_parser.cpp b/3rdparty/spirv-cross/spirv_parser.cpp index d5a16337d..34a0d9026 100644 --- a/3rdparty/spirv-cross/spirv_parser.cpp +++ b/3rdparty/spirv-cross/spirv_parser.cpp @@ -278,7 +278,9 @@ void Parser::parse(const Instruction &instruction) // Strings need nul-terminator and consume the whole word. uint32_t strlen_words = uint32_t((e.name.size() + 1 + 3) >> 2); - e.interface_variables.insert(end(e.interface_variables), ops + strlen_words + 2, ops + instruction.length); + + for (uint32_t i = strlen_words + 2; i < instruction.length; i++) + e.interface_variables.push_back(ops[i]); // Set the name of the entry point in case OpName is not provided later. ir.set_name(ops[1], e.name); @@ -658,7 +660,7 @@ void Parser::parse(const Instruction &instruction) } } - if (type.type_alias == 0) + if (type.type_alias == TypeID(0)) global_struct_cache.push_back(id); } break; @@ -1008,12 +1010,12 @@ void Parser::parse(const Instruction &instruction) ir.block_meta[current_block->self] |= ParsedIR::BLOCK_META_LOOP_HEADER_BIT; ir.block_meta[current_block->merge_block] |= ParsedIR::BLOCK_META_LOOP_MERGE_BIT; - ir.continue_block_to_loop_header[current_block->continue_block] = current_block->self; + ir.continue_block_to_loop_header[current_block->continue_block] = BlockID(current_block->self); // Don't add loop headers to continue blocks, // which would make it impossible branch into the loop header since // they are treated as continues. - if (current_block->continue_block != current_block->self) + if (current_block->continue_block != BlockID(current_block->self)) ir.block_meta[current_block->continue_block] |= ParsedIR::BLOCK_META_CONTINUE_BIT; if (length >= 3) diff --git a/3rdparty/spirv-cross/spirv_reflect.cpp b/3rdparty/spirv-cross/spirv_reflect.cpp index b187a7fa6..f3eac7c49 100644 --- a/3rdparty/spirv-cross/spirv_reflect.cpp +++ b/3rdparty/spirv-cross/spirv_reflect.cpp @@ -285,7 +285,7 @@ void CompilerReflection::emit_type(const SPIRType &type, bool &emitted_open_tag) { auto name = type_to_glsl(type); - if (type.type_alias != 0) + if (type.type_alias != TypeID(0)) return; if (!emitted_open_tag) @@ -468,7 +468,7 @@ void CompilerReflection::emit_resources(const char *tag, const SmallVectorbegin_json_object(); diff --git a/3rdparty/spirv-cross/test_shaders.py b/3rdparty/spirv-cross/test_shaders.py index 27466b398..9709e2cdd 100755 --- a/3rdparty/spirv-cross/test_shaders.py +++ b/3rdparty/spirv-cross/test_shaders.py @@ -209,6 +209,14 @@ def cross_compile_msl(shader, spirv, opt, iterations, paths): msl_args.append('--msl-view-index-from-device-index') if '.dispatchbase.' in shader: msl_args.append('--msl-dispatch-base') + if '.dynamic-buffer.' in shader: + # Arbitrary for testing purposes. + msl_args.append('--msl-dynamic-buffer') + msl_args.append('0') + msl_args.append('0') + msl_args.append('--msl-dynamic-buffer') + msl_args.append('1') + msl_args.append('2') subprocess.check_call(msl_args) diff --git a/3rdparty/spirv-cross/tests-other/msl_ycbcr_conversion_test.cpp b/3rdparty/spirv-cross/tests-other/msl_ycbcr_conversion_test.cpp new file mode 100644 index 000000000..deab27bec --- /dev/null +++ b/3rdparty/spirv-cross/tests-other/msl_ycbcr_conversion_test.cpp @@ -0,0 +1,103 @@ +// Testbench for MSL constexpr samplers, with Y'CbCr conversion. +// It does not validate output, but it's useful for ad-hoc testing. + +#ifdef _MSC_VER +#define _CRT_SECURE_NO_WARNINGS +#endif + +#include +#include +#include +#include + +#define SPVC_CHECKED_CALL(x) do { \ + if ((x) != SPVC_SUCCESS) { \ + fprintf(stderr, "Failed at line %d.\n", __LINE__); \ + exit(1); \ + } \ +} while(0) +#define SPVC_CHECKED_CALL_NEGATIVE(x) do { \ + g_fail_on_error = SPVC_FALSE; \ + if ((x) == SPVC_SUCCESS) { \ + fprintf(stderr, "Failed at line %d.\n", __LINE__); \ + exit(1); \ + } \ + g_fail_on_error = SPVC_TRUE; \ +} while(0) + +static std::vector read_file(const char *path) +{ + long len; + FILE *file = fopen(path, "rb"); + + if (!file) + return {}; + + fseek(file, 0, SEEK_END); + len = ftell(file); + rewind(file); + + std::vector buffer(len / sizeof(SpvId)); + if (fread(buffer.data(), 1, len, file) != (size_t)len) + { + fclose(file); + return {}; + } + + fclose(file); + return buffer; +} + +int main(int argc, char **argv) +{ + if (argc != 2) + return EXIT_FAILURE; + + auto buffer = read_file(argv[1]); + if (buffer.empty()) + return EXIT_FAILURE; + + spvc_context ctx; + spvc_parsed_ir parsed_ir; + spvc_compiler compiler; + spvc_compiler_options options; + + SPVC_CHECKED_CALL(spvc_context_create(&ctx)); + SPVC_CHECKED_CALL(spvc_context_parse_spirv(ctx, buffer.data(), buffer.size(), &parsed_ir)); + SPVC_CHECKED_CALL(spvc_context_create_compiler(ctx, SPVC_BACKEND_MSL, parsed_ir, SPVC_CAPTURE_MODE_TAKE_OWNERSHIP, &compiler)); + SPVC_CHECKED_CALL(spvc_compiler_create_compiler_options(compiler, &options)); + SPVC_CHECKED_CALL(spvc_compiler_options_set_uint(options, SPVC_COMPILER_OPTION_MSL_VERSION, SPVC_MAKE_MSL_VERSION(2, 0, 0))); + SPVC_CHECKED_CALL(spvc_compiler_install_compiler_options(compiler, options)); + + spvc_msl_resource_binding binding; + spvc_msl_resource_binding_init(&binding); + binding.desc_set = 1; + binding.binding = 2; + binding.stage = SpvExecutionModelFragment; + binding.msl_texture = 0; + binding.msl_sampler = 0; + SPVC_CHECKED_CALL(spvc_compiler_msl_add_resource_binding(compiler, &binding)); + + spvc_msl_constexpr_sampler samp; + spvc_msl_sampler_ycbcr_conversion conv; + spvc_msl_constexpr_sampler_init(&samp); + spvc_msl_sampler_ycbcr_conversion_init(&conv); + conv.planes = 3; + conv.resolution = SPVC_MSL_FORMAT_RESOLUTION_422; + conv.chroma_filter = SPVC_MSL_SAMPLER_FILTER_LINEAR; + conv.x_chroma_offset = SPVC_MSL_CHROMA_LOCATION_MIDPOINT; + conv.ycbcr_model = SPVC_MSL_SAMPLER_YCBCR_MODEL_CONVERSION_YCBCR_BT_2020; + conv.ycbcr_range = SPVC_MSL_SAMPLER_YCBCR_RANGE_ITU_NARROW; + conv.bpc = 8; + SPVC_CHECKED_CALL(spvc_compiler_msl_remap_constexpr_sampler_by_binding_ycbcr(compiler, 1, 2, &samp, &conv)); + + const char *str; + SPVC_CHECKED_CALL(spvc_compiler_compile(compiler, &str)); + + // Should be marked, as a sanity check. + if (!spvc_compiler_msl_is_resource_used(compiler, SpvExecutionModelFragment, 1, 2)) + return EXIT_FAILURE; + + fprintf(stderr, "Output:\n%s\n", str); +} + diff --git a/3rdparty/spirv-cross/tests-other/msl_ycbcr_conversion_test.spv b/3rdparty/spirv-cross/tests-other/msl_ycbcr_conversion_test.spv new file mode 100644 index 000000000..62372d5c6 Binary files /dev/null and b/3rdparty/spirv-cross/tests-other/msl_ycbcr_conversion_test.spv differ diff --git a/3rdparty/spirv-cross/tests-other/msl_ycbcr_conversion_test_2.spv b/3rdparty/spirv-cross/tests-other/msl_ycbcr_conversion_test_2.spv new file mode 100644 index 000000000..10fa7690d Binary files /dev/null and b/3rdparty/spirv-cross/tests-other/msl_ycbcr_conversion_test_2.spv differ diff --git a/3rdparty/spirv-cross/tests-other/typed_id_test.cpp b/3rdparty/spirv-cross/tests-other/typed_id_test.cpp new file mode 100644 index 000000000..e8ecb16cc --- /dev/null +++ b/3rdparty/spirv-cross/tests-other/typed_id_test.cpp @@ -0,0 +1,49 @@ +#include "spirv_common.hpp" + +using namespace SPIRV_CROSS_NAMESPACE; + +int main() +{ + // Construct from uint32_t. + VariableID var_id = 10; + TypeID type_id = 20; + ConstantID constant_id = 30; + + // Assign from uint32_t. + var_id = 100; + type_id = 40; + constant_id = 60; + + // Construct generic ID. + ID generic_var_id = var_id; + ID generic_type_id = type_id; + ID generic_constant_id = constant_id; + + // Assign generic id. + generic_var_id = var_id; + generic_type_id = type_id; + generic_constant_id = constant_id; + + // Assign generic ID to typed ID + var_id = generic_var_id; + type_id = generic_type_id; + constant_id = generic_constant_id; + + // Implicit conversion to uint32_t. + uint32_t a; + a = var_id; + a = type_id; + a = constant_id; + a = generic_var_id; + a = generic_type_id; + a = generic_constant_id; + + // Copy assignment. + var_id = VariableID(10); + type_id = TypeID(10); + constant_id = ConstantID(10); + + // These operations are blocked, assign or construction from mismatched types. + //var_id = type_id; + //var_id = TypeID(100); +} \ No newline at end of file