[spirv] add -fspv-reduce-load-size option (#3931)

Since loading a big object takes the memory pressure, reduction of the
load size can have some performance benefit. In particular, it is
useful for mobile GPUs. `-fspv-reduce-load-size` removes
OpLoad/OpCompositeExtract of struct/array types by running spirv-opt
--reduce-load-size pass.

Fixes #3889
This commit is contained in:
Jaebaek Seo 2021-09-07 23:32:16 -04:00 коммит произвёл GitHub
Родитель 614472fb4d
Коммит 555f813c4f
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
8 изменённых файлов: 54 добавлений и 7 удалений

2
external/SPIRV-Headers поставляемый

@ -1 +1 @@
Subproject commit 449bc986ba6f4c5e10e32828783f9daef2a77644
Subproject commit 6cae8216a6ea19ff3f237af01e54378c1ff81fcd

2
external/SPIRV-Tools поставляемый

@ -1 +1 @@
Subproject commit bd3a271ce3fcc8b9ed8e6eb2e94392d1b220adc9
Subproject commit 0c09258e073bdbae19d2de8386125b35b783928b

Просмотреть файл

@ -339,6 +339,8 @@ def fspv_target_env_EQ : Joined<["-"], "fspv-target-env=">, Group<spirv_Group>,
HelpText<"Specify the target environment: vulkan1.0 (default) or vulkan1.1">;
def fspv_flatten_resource_arrays: Flag<["-"], "fspv-flatten-resource-arrays">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
HelpText<"Flatten arrays of resources so each array element takes one binding number">;
def fspv_reduce_load_size: Flag<["-"], "fspv-reduce-load-size">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
HelpText<"Replaces loads of composite objects to reduce memory pressure for the loads">;
def fvk_auto_shift_bindings: Flag<["-"], "fvk-auto-shift-bindings">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
HelpText<"Apply fvk-*-shift to resources without an explicit register assignment.">;
def Wno_vk_ignored_features : Joined<["-"], "Wno-vk-ignored-features">, Group<spirv_Group>, Flags<[CoreOption, DriverOption, HelpHidden]>,

Просмотреть файл

@ -55,6 +55,7 @@ struct SpirvCodeGenOptions {
bool useGlLayout;
bool useScalarLayout;
bool flattenResourceArrays;
bool reduceLoadSize;
bool autoShiftBindings;
bool supportNonzeroBaseInstance;
SpirvLayoutRule cBufferLayoutRule;

Просмотреть файл

@ -859,6 +859,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
opts.SpirvOptions.noWarnEmulatedFeatures = Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false);
opts.SpirvOptions.flattenResourceArrays =
Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false);
opts.SpirvOptions.reduceLoadSize =
Args.hasFlag(OPT_fspv_reduce_load_size, OPT_INVALID, false);
opts.SpirvOptions.autoShiftBindings = Args.hasFlag(OPT_fvk_auto_shift_bindings, OPT_INVALID, false);
if (!handleVkShiftArgs(Args, OPT_fvk_b_shift, "b", &opts.SpirvOptions.bShift, errors) ||
@ -947,6 +949,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false) ||
Args.hasFlag(OPT_fvk_use_scalar_layout, OPT_INVALID, false) ||
Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false) ||
Args.hasFlag(OPT_fspv_reduce_load_size, OPT_INVALID, false) ||
Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false) ||
Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false) ||
Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false) ||

Просмотреть файл

@ -740,9 +740,9 @@ void SpirvEmitter::HandleTranslationUnit(ASTContext &context) {
// In order to flatten composite resources, we must also unroll loops.
// Therefore we should run legalization before optimization.
needsLegalization = needsLegalization ||
declIdMapper.requiresLegalization() ||
spirvOptions.flattenResourceArrays ||
needsLegalization =
needsLegalization || declIdMapper.requiresLegalization() ||
spirvOptions.flattenResourceArrays || spirvOptions.reduceLoadSize ||
declIdMapper.requiresFlatteningCompositeResources() ||
!dsetbindingsToCombineImageSampler.empty();
@ -12466,6 +12466,13 @@ bool SpirvEmitter::spirvToolsLegalize(std::vector<uint32_t> *mod,
// types.
optimizer.RegisterPass(spvtools::CreateAggressiveDCEPass());
}
if (spirvOptions.reduceLoadSize) {
// The threshold must be bigger than 1.0 to reduce all possible loads.
optimizer.RegisterPass(spvtools::CreateReduceLoadSizePass(1.1));
// ADCE should be run after reduce-load-size pass in order to remove
// dead instructions.
optimizer.RegisterPass(spvtools::CreateAggressiveDCEPass());
}
optimizer.RegisterPass(spvtools::CreateReplaceInvalidOpcodePass());
optimizer.RegisterPass(spvtools::CreateCompactIdsPass());

Просмотреть файл

@ -0,0 +1,32 @@
// Run: %dxc -T cs_6_0 -E main -fspv-reduce-load-size -O0
struct S {
uint f;
};
cbuffer gBuffer { uint a[6]; };
RWStructuredBuffer<S> gRWSBuffer;
// CHECK-NOT: OpCompositeExtract
// CHECK: [[p0:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_0
// CHECK: OpLoad %uint [[p0]]
// CHECK: [[p1:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_1
// CHECK: OpLoad %uint [[p1]]
// CHECK: [[p2:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_2
// CHECK: OpLoad %uint [[p2]]
// CHECK: [[p3:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_3
// CHECK: OpLoad %uint [[p3]]
// CHECK: [[p4:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_4
// CHECK: OpLoad %uint [[p4]]
// CHECK: [[p5:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_5
// CHECK: OpLoad %uint [[p5]]
uint foo(uint p[6]) {
return p[0] + p[1] + p[2] + p[3] + p[4] + p[5];
}
[numthreads(1,1,1)]
void main() {
gRWSBuffer[0].f = foo(a);
}

Просмотреть файл

@ -2656,6 +2656,8 @@ TEST_F(FileTest, MeshShadingNVAmplificationError4) {
runFileTest("meshshading.nv.error3.amplification.hlsl", Expect::Failure);
}
TEST_F(FileTest, ReduceLoadSize) { runFileTest("reduce.load.size.hlsl"); }
// Test OpEntryPoint in the Vulkan1.2 target environment
TEST_F(FileTest, Vk1p2EntryPoint) {
runFileTest("vk.1p2.entry-point.hlsl");