From 555f813c4fa378a39ca4f16f97140947be5a9dad Mon Sep 17 00:00:00 2001 From: Jaebaek Seo Date: Tue, 7 Sep 2021 23:32:16 -0400 Subject: [PATCH] [spirv] add -fspv-reduce-load-size option (#3931) Since loading a big object takes the memory pressure, reduction of the load size can have some performance benefit. In particular, it is useful for mobile GPUs. `-fspv-reduce-load-size` removes OpLoad/OpCompositeExtract of struct/array types by running spirv-opt --reduce-load-size pass. Fixes #3889 --- external/SPIRV-Headers | 2 +- external/SPIRV-Tools | 2 +- include/dxc/Support/HLSLOptions.td | 2 ++ include/dxc/Support/SPIRVOptions.h | 1 + lib/DxcSupport/HLSLOptions.cpp | 3 ++ tools/clang/lib/SPIRV/SpirvEmitter.cpp | 17 +++++++--- .../test/CodeGenSPIRV/reduce.load.size.hlsl | 32 +++++++++++++++++++ .../unittests/SPIRV/CodeGenSpirvTest.cpp | 2 ++ 8 files changed, 54 insertions(+), 7 deletions(-) create mode 100644 tools/clang/test/CodeGenSPIRV/reduce.load.size.hlsl diff --git a/external/SPIRV-Headers b/external/SPIRV-Headers index 449bc986b..6cae8216a 160000 --- a/external/SPIRV-Headers +++ b/external/SPIRV-Headers @@ -1 +1 @@ -Subproject commit 449bc986ba6f4c5e10e32828783f9daef2a77644 +Subproject commit 6cae8216a6ea19ff3f237af01e54378c1ff81fcd diff --git a/external/SPIRV-Tools b/external/SPIRV-Tools index bd3a271ce..0c09258e0 160000 --- a/external/SPIRV-Tools +++ b/external/SPIRV-Tools @@ -1 +1 @@ -Subproject commit bd3a271ce3fcc8b9ed8e6eb2e94392d1b220adc9 +Subproject commit 0c09258e073bdbae19d2de8386125b35b783928b diff --git a/include/dxc/Support/HLSLOptions.td b/include/dxc/Support/HLSLOptions.td index 4ed7577b0..fd6901c61 100644 --- a/include/dxc/Support/HLSLOptions.td +++ b/include/dxc/Support/HLSLOptions.td @@ -339,6 +339,8 @@ def fspv_target_env_EQ : Joined<["-"], "fspv-target-env=">, Group, HelpText<"Specify the target environment: vulkan1.0 (default) or vulkan1.1">; def fspv_flatten_resource_arrays: Flag<["-"], "fspv-flatten-resource-arrays">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Flatten arrays of resources so each array element takes one binding number">; +def fspv_reduce_load_size: Flag<["-"], "fspv-reduce-load-size">, Group, Flags<[CoreOption, DriverOption]>, + HelpText<"Replaces loads of composite objects to reduce memory pressure for the loads">; def fvk_auto_shift_bindings: Flag<["-"], "fvk-auto-shift-bindings">, Group, Flags<[CoreOption, DriverOption]>, HelpText<"Apply fvk-*-shift to resources without an explicit register assignment.">; def Wno_vk_ignored_features : Joined<["-"], "Wno-vk-ignored-features">, Group, Flags<[CoreOption, DriverOption, HelpHidden]>, diff --git a/include/dxc/Support/SPIRVOptions.h b/include/dxc/Support/SPIRVOptions.h index d2a31ca9e..440be2784 100644 --- a/include/dxc/Support/SPIRVOptions.h +++ b/include/dxc/Support/SPIRVOptions.h @@ -55,6 +55,7 @@ struct SpirvCodeGenOptions { bool useGlLayout; bool useScalarLayout; bool flattenResourceArrays; + bool reduceLoadSize; bool autoShiftBindings; bool supportNonzeroBaseInstance; SpirvLayoutRule cBufferLayoutRule; diff --git a/lib/DxcSupport/HLSLOptions.cpp b/lib/DxcSupport/HLSLOptions.cpp index 9c39130d3..1fa7f20ac 100644 --- a/lib/DxcSupport/HLSLOptions.cpp +++ b/lib/DxcSupport/HLSLOptions.cpp @@ -859,6 +859,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, opts.SpirvOptions.noWarnEmulatedFeatures = Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false); opts.SpirvOptions.flattenResourceArrays = Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false); + opts.SpirvOptions.reduceLoadSize = + Args.hasFlag(OPT_fspv_reduce_load_size, OPT_INVALID, false); opts.SpirvOptions.autoShiftBindings = Args.hasFlag(OPT_fvk_auto_shift_bindings, OPT_INVALID, false); if (!handleVkShiftArgs(Args, OPT_fvk_b_shift, "b", &opts.SpirvOptions.bShift, errors) || @@ -947,6 +949,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude, Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false) || Args.hasFlag(OPT_fvk_use_scalar_layout, OPT_INVALID, false) || Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false) || + Args.hasFlag(OPT_fspv_reduce_load_size, OPT_INVALID, false) || Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false) || Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false) || Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false) || diff --git a/tools/clang/lib/SPIRV/SpirvEmitter.cpp b/tools/clang/lib/SPIRV/SpirvEmitter.cpp index 561ed92cb..c9a22fa47 100644 --- a/tools/clang/lib/SPIRV/SpirvEmitter.cpp +++ b/tools/clang/lib/SPIRV/SpirvEmitter.cpp @@ -740,11 +740,11 @@ void SpirvEmitter::HandleTranslationUnit(ASTContext &context) { // In order to flatten composite resources, we must also unroll loops. // Therefore we should run legalization before optimization. - needsLegalization = needsLegalization || - declIdMapper.requiresLegalization() || - spirvOptions.flattenResourceArrays || - declIdMapper.requiresFlatteningCompositeResources() || - !dsetbindingsToCombineImageSampler.empty(); + needsLegalization = + needsLegalization || declIdMapper.requiresLegalization() || + spirvOptions.flattenResourceArrays || spirvOptions.reduceLoadSize || + declIdMapper.requiresFlatteningCompositeResources() || + !dsetbindingsToCombineImageSampler.empty(); if (spirvOptions.codeGenHighLevel) { beforeHlslLegalization = needsLegalization; @@ -12466,6 +12466,13 @@ bool SpirvEmitter::spirvToolsLegalize(std::vector *mod, // types. optimizer.RegisterPass(spvtools::CreateAggressiveDCEPass()); } + if (spirvOptions.reduceLoadSize) { + // The threshold must be bigger than 1.0 to reduce all possible loads. + optimizer.RegisterPass(spvtools::CreateReduceLoadSizePass(1.1)); + // ADCE should be run after reduce-load-size pass in order to remove + // dead instructions. + optimizer.RegisterPass(spvtools::CreateAggressiveDCEPass()); + } optimizer.RegisterPass(spvtools::CreateReplaceInvalidOpcodePass()); optimizer.RegisterPass(spvtools::CreateCompactIdsPass()); diff --git a/tools/clang/test/CodeGenSPIRV/reduce.load.size.hlsl b/tools/clang/test/CodeGenSPIRV/reduce.load.size.hlsl new file mode 100644 index 000000000..4059d56ac --- /dev/null +++ b/tools/clang/test/CodeGenSPIRV/reduce.load.size.hlsl @@ -0,0 +1,32 @@ +// Run: %dxc -T cs_6_0 -E main -fspv-reduce-load-size -O0 + +struct S { + uint f; +}; + +cbuffer gBuffer { uint a[6]; }; + +RWStructuredBuffer gRWSBuffer; + +// CHECK-NOT: OpCompositeExtract + +// CHECK: [[p0:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_0 +// CHECK: OpLoad %uint [[p0]] +// CHECK: [[p1:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_1 +// CHECK: OpLoad %uint [[p1]] +// CHECK: [[p2:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_2 +// CHECK: OpLoad %uint [[p2]] +// CHECK: [[p3:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_3 +// CHECK: OpLoad %uint [[p3]] +// CHECK: [[p4:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_4 +// CHECK: OpLoad %uint [[p4]] +// CHECK: [[p5:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_5 +// CHECK: OpLoad %uint [[p5]] +uint foo(uint p[6]) { + return p[0] + p[1] + p[2] + p[3] + p[4] + p[5]; +} + +[numthreads(1,1,1)] +void main() { + gRWSBuffer[0].f = foo(a); +} diff --git a/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp b/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp index 75ed5d9ae..922c54675 100644 --- a/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp +++ b/tools/clang/unittests/SPIRV/CodeGenSpirvTest.cpp @@ -2656,6 +2656,8 @@ TEST_F(FileTest, MeshShadingNVAmplificationError4) { runFileTest("meshshading.nv.error3.amplification.hlsl", Expect::Failure); } +TEST_F(FileTest, ReduceLoadSize) { runFileTest("reduce.load.size.hlsl"); } + // Test OpEntryPoint in the Vulkan1.2 target environment TEST_F(FileTest, Vk1p2EntryPoint) { runFileTest("vk.1p2.entry-point.hlsl");