From 8841f15283b48482aa5ad770a262920393abf007 Mon Sep 17 00:00:00 2001 From: Hans-Kristian Arntzen Date: Thu, 1 Feb 2018 09:59:17 +0100 Subject: [PATCH] Fix case where loop header branches into continue block. --- .../asm/frag/loop-header-to-continue.asm.frag | 39 ++++++ .../asm/frag/loop-header-to-continue.asm.frag | 39 ++++++ .../asm/frag/loop-header-to-continue.asm.frag | 132 ++++++++++++++++++ spirv_glsl.cpp | 26 +++- 4 files changed, 232 insertions(+), 4 deletions(-) create mode 100644 reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag create mode 100644 reference/shaders/asm/frag/loop-header-to-continue.asm.frag create mode 100644 shaders/asm/frag/loop-header-to-continue.asm.frag diff --git a/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag new file mode 100644 index 00000000..f3a6b4ec --- /dev/null +++ b/reference/opt/shaders/asm/frag/loop-header-to-continue.asm.frag @@ -0,0 +1,39 @@ +#version 450 + +struct Params +{ + vec4 TextureSize; + vec4 Params1; + vec4 Params2; + vec4 Params3; + vec4 Params4; + vec4 Bloom; +}; + +layout(binding = 1, std140) uniform CB1 +{ + Params CB1; +} _8; + +uniform sampler2D SPIRV_Cross_CombinedmapTexturemapSampler; + +layout(location = 0) in vec2 IN_uv; +layout(location = 0) out vec4 _entryPointOutput; + +void main() +{ + vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv); + float _50 = _49.y; + float _55; + float _58; + _55 = 0.0; + _58 = 0.0; + float _64; + vec4 _72; + float _78; + for (int _60 = -3; _60 <= 3; _64 = float(_60), _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64)), _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375)), _55 += (_72.x * _78), _58 += _78, _60++) + { + } + _entryPointOutput = vec4(_55 / _58, _50, 0.0, 1.0); +} + diff --git a/reference/shaders/asm/frag/loop-header-to-continue.asm.frag b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag new file mode 100644 index 00000000..f3a6b4ec --- /dev/null +++ b/reference/shaders/asm/frag/loop-header-to-continue.asm.frag @@ -0,0 +1,39 @@ +#version 450 + +struct Params +{ + vec4 TextureSize; + vec4 Params1; + vec4 Params2; + vec4 Params3; + vec4 Params4; + vec4 Bloom; +}; + +layout(binding = 1, std140) uniform CB1 +{ + Params CB1; +} _8; + +uniform sampler2D SPIRV_Cross_CombinedmapTexturemapSampler; + +layout(location = 0) in vec2 IN_uv; +layout(location = 0) out vec4 _entryPointOutput; + +void main() +{ + vec4 _49 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv); + float _50 = _49.y; + float _55; + float _58; + _55 = 0.0; + _58 = 0.0; + float _64; + vec4 _72; + float _78; + for (int _60 = -3; _60 <= 3; _64 = float(_60), _72 = texture(SPIRV_Cross_CombinedmapTexturemapSampler, IN_uv + (vec2(0.0, _8.CB1.TextureSize.w) * _64)), _78 = exp(((-_64) * _64) * 0.2222220003604888916015625) * float(abs(_72.y - _50) < clamp((_50 * 80.0) * 0.0007999999797903001308441162109375, 7.999999797903001308441162109375e-05, 0.008000000379979610443115234375)), _55 += (_72.x * _78), _58 += _78, _60++) + { + } + _entryPointOutput = vec4(_55 / _58, _50, 0.0, 1.0); +} + diff --git a/shaders/asm/frag/loop-header-to-continue.asm.frag b/shaders/asm/frag/loop-header-to-continue.asm.frag new file mode 100644 index 00000000..54807d91 --- /dev/null +++ b/shaders/asm/frag/loop-header-to-continue.asm.frag @@ -0,0 +1,132 @@ +; SPIR-V +; Version: 1.0 +; Generator: Khronos Glslang Reference Front End; 3 +; Bound: 279 +; Schema: 0 + OpCapability Shader + %1 = OpExtInstImport "GLSL.std.450" + OpMemoryModel Logical GLSL450 + OpEntryPoint Fragment %main "main" %IN_p %IN_uv %_entryPointOutput + OpExecutionMode %main OriginUpperLeft + OpSource HLSL 500 + OpName %main "main" + OpName %Params "Params" + OpMemberName %Params 0 "TextureSize" + OpMemberName %Params 1 "Params1" + OpMemberName %Params 2 "Params2" + OpMemberName %Params 3 "Params3" + OpMemberName %Params 4 "Params4" + OpMemberName %Params 5 "Bloom" + OpName %CB1 "CB1" + OpMemberName %CB1 0 "CB1" + OpName %_ "" + OpName %mapSampler "mapSampler" + OpName %mapTexture "mapTexture" + OpName %IN_p "IN.p" + OpName %IN_uv "IN.uv" + OpName %_entryPointOutput "@entryPointOutput" + OpMemberDecorate %Params 0 Offset 0 + OpMemberDecorate %Params 1 Offset 16 + OpMemberDecorate %Params 2 Offset 32 + OpMemberDecorate %Params 3 Offset 48 + OpMemberDecorate %Params 4 Offset 64 + OpMemberDecorate %Params 5 Offset 80 + OpMemberDecorate %CB1 0 Offset 0 + OpDecorate %CB1 Block + OpDecorate %_ DescriptorSet 0 + OpDecorate %_ Binding 1 + OpDecorate %mapSampler DescriptorSet 1 + OpDecorate %mapSampler Binding 2 + OpDecorate %mapTexture DescriptorSet 1 + OpDecorate %mapTexture Binding 2 + OpDecorate %IN_p BuiltIn FragCoord + OpDecorate %IN_uv Location 0 + OpDecorate %_entryPointOutput Location 0 + %void = OpTypeVoid + %3 = OpTypeFunction %void + %float = OpTypeFloat 32 + %v2float = OpTypeVector %float 2 + %9 = OpTypeSampler + %11 = OpTypeImage %float 2D 0 0 0 1 Unknown + %v4float = OpTypeVector %float 4 +%float_0_222222 = OpConstant %float 0.222222 + %33 = OpTypeSampledImage %11 + %uint = OpTypeInt 32 0 + %float_80 = OpConstant %float 80 +%float_0_0008 = OpConstant %float 0.0008 +%float_8en05 = OpConstant %float 8e-05 +%float_0_008 = OpConstant %float 0.008 + %float_0 = OpConstant %float 0 + %int = OpTypeInt 32 1 + %int_n3 = OpConstant %int -3 + %int_3 = OpConstant %int 3 + %bool = OpTypeBool + %float_1 = OpConstant %float 1 + %int_1 = OpConstant %int 1 + %Params = OpTypeStruct %v4float %v4float %v4float %v4float %v4float %v4float + %CB1 = OpTypeStruct %Params +%_ptr_Uniform_CB1 = OpTypePointer Uniform %CB1 + %_ = OpVariable %_ptr_Uniform_CB1 Uniform + %int_0 = OpConstant %int 0 + %uint_3 = OpConstant %uint 3 +%_ptr_Uniform_float = OpTypePointer Uniform %float +%_ptr_UniformConstant_9 = OpTypePointer UniformConstant %9 + %mapSampler = OpVariable %_ptr_UniformConstant_9 UniformConstant +%_ptr_UniformConstant_11 = OpTypePointer UniformConstant %11 + %mapTexture = OpVariable %_ptr_UniformConstant_11 UniformConstant +%_ptr_Input_v4float = OpTypePointer Input %v4float + %IN_p = OpVariable %_ptr_Input_v4float Input +%_ptr_Input_v2float = OpTypePointer Input %v2float + %IN_uv = OpVariable %_ptr_Input_v2float Input +%_ptr_Output_v4float = OpTypePointer Output %v4float +%_entryPointOutput = OpVariable %_ptr_Output_v4float Output + %main = OpFunction %void None %3 + %5 = OpLabel + %158 = OpLoad %v2float %IN_uv + %178 = OpAccessChain %_ptr_Uniform_float %_ %int_0 %int_0 %uint_3 + %179 = OpLoad %float %178 + %180 = OpCompositeConstruct %v2float %float_0 %179 + %184 = OpLoad %9 %mapSampler + %185 = OpLoad %11 %mapTexture + %204 = OpSampledImage %33 %185 %184 + %206 = OpImageSampleImplicitLod %v4float %204 %158 + %207 = OpCompositeExtract %float %206 1 + %209 = OpFMul %float %207 %float_80 + %210 = OpFMul %float %209 %float_0_0008 + %211 = OpExtInst %float %1 FClamp %210 %float_8en05 %float_0_008 + OpBranch %212 + %212 = OpLabel + %276 = OpPhi %float %float_0 %5 %252 %218 + %277 = OpPhi %float %float_0 %5 %255 %218 + %278 = OpPhi %int %int_n3 %5 %257 %218 + %217 = OpSLessThanEqual %bool %278 %int_3 + OpLoopMerge %213 %218 None + OpBranchConditional %217 %218 %213 + %218 = OpLabel + %220 = OpConvertSToF %float %278 + %222 = OpFNegate %float %220 + %224 = OpFMul %float %222 %220 + %226 = OpFMul %float %224 %float_0_222222 + %227 = OpExtInst %float %1 Exp %226 + %230 = OpSampledImage %33 %185 %184 + %234 = OpVectorTimesScalar %v2float %180 %220 + %235 = OpFAdd %v2float %158 %234 + %236 = OpImageSampleImplicitLod %v4float %230 %235 + %273 = OpCompositeExtract %float %236 1 + %241 = OpFSub %float %273 %207 + %242 = OpExtInst %float %1 FAbs %241 + %244 = OpFOrdLessThan %bool %242 %211 + %245 = OpSelect %float %244 %float_1 %float_0 + %246 = OpFMul %float %227 %245 + %275 = OpCompositeExtract %float %236 0 + %250 = OpFMul %float %275 %246 + %252 = OpFAdd %float %276 %250 + %255 = OpFAdd %float %277 %246 + %257 = OpIAdd %int %278 %int_1 + OpBranch %212 + %213 = OpLabel + %260 = OpFDiv %float %276 %277 + %190 = OpCompositeConstruct %v4float %260 %207 %float_0 %float_1 + OpStore %_entryPointOutput %190 + OpReturn + OpFunctionEnd diff --git a/spirv_glsl.cpp b/spirv_glsl.cpp index 2aacf9f4..f4773884 100644 --- a/spirv_glsl.cpp +++ b/spirv_glsl.cpp @@ -7881,11 +7881,29 @@ void CompilerGLSL::branch(uint32_t from, uint32_t to) else { auto &from_block = get(from); - auto &dominator = get(from_block.loop_dominator); + bool outside_control_flow = false; + uint32_t loop_dominator = 0; - // For non-complex continue blocks, we implicitly branch to the continue block - // by having the continue block be part of the loop header in for (; ; continue-block). - bool outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block); + // FIXME: Refactor this to not use the old loop_dominator tracking. + if (from_block.merge_block) + { + // If we are a loop header, we don't set the loop dominator, + // so just use "self" here. + loop_dominator = from; + } + else if (from_block.loop_dominator != -1u) + { + loop_dominator = from_block.loop_dominator; + } + + if (loop_dominator != 0) + { + auto &dominator = get(loop_dominator); + + // For non-complex continue blocks, we implicitly branch to the continue block + // by having the continue block be part of the loop header in for (; ; continue-block). + outside_control_flow = block_is_outside_flow_control_from_block(dominator, from_block); + } // Some simplification for for-loops. We always end up with a useless continue; // statement since we branch to a loop block.