[spirv] add -fspv-reduce-load-size option (#3931)
Since loading a big object takes the memory pressure, reduction of the load size can have some performance benefit. In particular, it is useful for mobile GPUs. `-fspv-reduce-load-size` removes OpLoad/OpCompositeExtract of struct/array types by running spirv-opt --reduce-load-size pass. Fixes #3889
This commit is contained in:
Родитель
614472fb4d
Коммит
555f813c4f
|
@ -1 +1 @@
|
|||
Subproject commit 449bc986ba6f4c5e10e32828783f9daef2a77644
|
||||
Subproject commit 6cae8216a6ea19ff3f237af01e54378c1ff81fcd
|
|
@ -1 +1 @@
|
|||
Subproject commit bd3a271ce3fcc8b9ed8e6eb2e94392d1b220adc9
|
||||
Subproject commit 0c09258e073bdbae19d2de8386125b35b783928b
|
|
@ -339,6 +339,8 @@ def fspv_target_env_EQ : Joined<["-"], "fspv-target-env=">, Group<spirv_Group>,
|
|||
HelpText<"Specify the target environment: vulkan1.0 (default) or vulkan1.1">;
|
||||
def fspv_flatten_resource_arrays: Flag<["-"], "fspv-flatten-resource-arrays">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
|
||||
HelpText<"Flatten arrays of resources so each array element takes one binding number">;
|
||||
def fspv_reduce_load_size: Flag<["-"], "fspv-reduce-load-size">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
|
||||
HelpText<"Replaces loads of composite objects to reduce memory pressure for the loads">;
|
||||
def fvk_auto_shift_bindings: Flag<["-"], "fvk-auto-shift-bindings">, Group<spirv_Group>, Flags<[CoreOption, DriverOption]>,
|
||||
HelpText<"Apply fvk-*-shift to resources without an explicit register assignment.">;
|
||||
def Wno_vk_ignored_features : Joined<["-"], "Wno-vk-ignored-features">, Group<spirv_Group>, Flags<[CoreOption, DriverOption, HelpHidden]>,
|
||||
|
|
|
@ -55,6 +55,7 @@ struct SpirvCodeGenOptions {
|
|||
bool useGlLayout;
|
||||
bool useScalarLayout;
|
||||
bool flattenResourceArrays;
|
||||
bool reduceLoadSize;
|
||||
bool autoShiftBindings;
|
||||
bool supportNonzeroBaseInstance;
|
||||
SpirvLayoutRule cBufferLayoutRule;
|
||||
|
|
|
@ -859,6 +859,8 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
|
|||
opts.SpirvOptions.noWarnEmulatedFeatures = Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false);
|
||||
opts.SpirvOptions.flattenResourceArrays =
|
||||
Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false);
|
||||
opts.SpirvOptions.reduceLoadSize =
|
||||
Args.hasFlag(OPT_fspv_reduce_load_size, OPT_INVALID, false);
|
||||
opts.SpirvOptions.autoShiftBindings = Args.hasFlag(OPT_fvk_auto_shift_bindings, OPT_INVALID, false);
|
||||
|
||||
if (!handleVkShiftArgs(Args, OPT_fvk_b_shift, "b", &opts.SpirvOptions.bShift, errors) ||
|
||||
|
@ -947,6 +949,7 @@ int ReadDxcOpts(const OptTable *optionTable, unsigned flagsToInclude,
|
|||
Args.hasFlag(OPT_fvk_use_dx_layout, OPT_INVALID, false) ||
|
||||
Args.hasFlag(OPT_fvk_use_scalar_layout, OPT_INVALID, false) ||
|
||||
Args.hasFlag(OPT_fspv_flatten_resource_arrays, OPT_INVALID, false) ||
|
||||
Args.hasFlag(OPT_fspv_reduce_load_size, OPT_INVALID, false) ||
|
||||
Args.hasFlag(OPT_fspv_reflect, OPT_INVALID, false) ||
|
||||
Args.hasFlag(OPT_Wno_vk_ignored_features, OPT_INVALID, false) ||
|
||||
Args.hasFlag(OPT_Wno_vk_emulated_features, OPT_INVALID, false) ||
|
||||
|
|
|
@ -740,9 +740,9 @@ void SpirvEmitter::HandleTranslationUnit(ASTContext &context) {
|
|||
|
||||
// In order to flatten composite resources, we must also unroll loops.
|
||||
// Therefore we should run legalization before optimization.
|
||||
needsLegalization = needsLegalization ||
|
||||
declIdMapper.requiresLegalization() ||
|
||||
spirvOptions.flattenResourceArrays ||
|
||||
needsLegalization =
|
||||
needsLegalization || declIdMapper.requiresLegalization() ||
|
||||
spirvOptions.flattenResourceArrays || spirvOptions.reduceLoadSize ||
|
||||
declIdMapper.requiresFlatteningCompositeResources() ||
|
||||
!dsetbindingsToCombineImageSampler.empty();
|
||||
|
||||
|
@ -12466,6 +12466,13 @@ bool SpirvEmitter::spirvToolsLegalize(std::vector<uint32_t> *mod,
|
|||
// types.
|
||||
optimizer.RegisterPass(spvtools::CreateAggressiveDCEPass());
|
||||
}
|
||||
if (spirvOptions.reduceLoadSize) {
|
||||
// The threshold must be bigger than 1.0 to reduce all possible loads.
|
||||
optimizer.RegisterPass(spvtools::CreateReduceLoadSizePass(1.1));
|
||||
// ADCE should be run after reduce-load-size pass in order to remove
|
||||
// dead instructions.
|
||||
optimizer.RegisterPass(spvtools::CreateAggressiveDCEPass());
|
||||
}
|
||||
optimizer.RegisterPass(spvtools::CreateReplaceInvalidOpcodePass());
|
||||
optimizer.RegisterPass(spvtools::CreateCompactIdsPass());
|
||||
|
||||
|
|
|
@ -0,0 +1,32 @@
|
|||
// Run: %dxc -T cs_6_0 -E main -fspv-reduce-load-size -O0
|
||||
|
||||
struct S {
|
||||
uint f;
|
||||
};
|
||||
|
||||
cbuffer gBuffer { uint a[6]; };
|
||||
|
||||
RWStructuredBuffer<S> gRWSBuffer;
|
||||
|
||||
// CHECK-NOT: OpCompositeExtract
|
||||
|
||||
// CHECK: [[p0:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_0
|
||||
// CHECK: OpLoad %uint [[p0]]
|
||||
// CHECK: [[p1:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_1
|
||||
// CHECK: OpLoad %uint [[p1]]
|
||||
// CHECK: [[p2:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_2
|
||||
// CHECK: OpLoad %uint [[p2]]
|
||||
// CHECK: [[p3:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_3
|
||||
// CHECK: OpLoad %uint [[p3]]
|
||||
// CHECK: [[p4:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_4
|
||||
// CHECK: OpLoad %uint [[p4]]
|
||||
// CHECK: [[p5:%\w+]] = OpAccessChain %_ptr_Uniform_uint {{%\w+}} %uint_5
|
||||
// CHECK: OpLoad %uint [[p5]]
|
||||
uint foo(uint p[6]) {
|
||||
return p[0] + p[1] + p[2] + p[3] + p[4] + p[5];
|
||||
}
|
||||
|
||||
[numthreads(1,1,1)]
|
||||
void main() {
|
||||
gRWSBuffer[0].f = foo(a);
|
||||
}
|
|
@ -2656,6 +2656,8 @@ TEST_F(FileTest, MeshShadingNVAmplificationError4) {
|
|||
runFileTest("meshshading.nv.error3.amplification.hlsl", Expect::Failure);
|
||||
}
|
||||
|
||||
TEST_F(FileTest, ReduceLoadSize) { runFileTest("reduce.load.size.hlsl"); }
|
||||
|
||||
// Test OpEntryPoint in the Vulkan1.2 target environment
|
||||
TEST_F(FileTest, Vk1p2EntryPoint) {
|
||||
runFileTest("vk.1p2.entry-point.hlsl");
|
||||
|
|
Загрузка…
Ссылка в новой задаче