Handle control-dependent temporaries.
Derivatives, subgroup and implicit-lod instructions all need to happen in the block they were created.
This commit is contained in:
Родитель
012377f811
Коммит
938c7debed
|
@ -14,8 +14,8 @@ env:
|
|||
|
||||
before_script:
|
||||
- if [[ "$TRAVIS_OS_NAME" == "osx" ]]; then brew upgrade python3; fi
|
||||
- ./checkout_glslang_spirv_tools.sh
|
||||
|
||||
script:
|
||||
- ./checkout_glslang_spirv_tools.sh
|
||||
- make -j2
|
||||
- ./test_shaders.sh
|
||||
|
|
|
@ -0,0 +1,54 @@
|
|||
Texture2D<float4> uSampler : register(t0);
|
||||
SamplerState _uSampler_sampler : register(s0);
|
||||
|
||||
static float4 FragColor;
|
||||
static float4 vInput;
|
||||
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
float4 vInput : TEXCOORD0;
|
||||
};
|
||||
|
||||
struct SPIRV_Cross_Output
|
||||
{
|
||||
float4 FragColor : SV_Target0;
|
||||
};
|
||||
|
||||
void frag_main()
|
||||
{
|
||||
FragColor = vInput;
|
||||
float4 _23 = uSampler.Sample(_uSampler_sampler, vInput.xy);
|
||||
float4 _26 = ddx(vInput);
|
||||
float4 _29 = ddy(vInput);
|
||||
float4 _32 = fwidth(vInput);
|
||||
float4 _35 = ddx_coarse(vInput);
|
||||
float4 _38 = ddy_coarse(vInput);
|
||||
float4 _41 = fwidth(vInput);
|
||||
float4 _44 = ddx_fine(vInput);
|
||||
float4 _47 = ddy_fine(vInput);
|
||||
float4 _50 = fwidth(vInput);
|
||||
float _56_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vInput.zw);
|
||||
if (vInput.y > 10.0f)
|
||||
{
|
||||
FragColor += _23;
|
||||
FragColor += _26;
|
||||
FragColor += _29;
|
||||
FragColor += _32;
|
||||
FragColor += _35;
|
||||
FragColor += _38;
|
||||
FragColor += _41;
|
||||
FragColor += _44;
|
||||
FragColor += _47;
|
||||
FragColor += _50;
|
||||
FragColor += float2(_56_tmp, _56_tmp).xyxy;
|
||||
}
|
||||
}
|
||||
|
||||
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
vInput = stage_input.vInput;
|
||||
frag_main();
|
||||
SPIRV_Cross_Output stage_output;
|
||||
stage_output.FragColor = FragColor;
|
||||
return stage_output;
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 vInput [[user(locn0)]];
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 FragColor [[color(0)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.FragColor = in.vInput;
|
||||
float4 _23 = uSampler.sample(uSamplerSmplr, in.vInput.xy);
|
||||
float4 _26 = dfdx(in.vInput);
|
||||
float4 _29 = dfdy(in.vInput);
|
||||
float4 _32 = fwidth(in.vInput);
|
||||
float4 _35 = dfdx(in.vInput);
|
||||
float4 _38 = dfdy(in.vInput);
|
||||
float4 _41 = fwidth(in.vInput);
|
||||
float4 _44 = dfdx(in.vInput);
|
||||
float4 _47 = dfdy(in.vInput);
|
||||
float4 _50 = fwidth(in.vInput);
|
||||
if (in.vInput.y > 10.0)
|
||||
{
|
||||
out.FragColor += _23;
|
||||
out.FragColor += _26;
|
||||
out.FragColor += _29;
|
||||
out.FragColor += _32;
|
||||
out.FragColor += _35;
|
||||
out.FragColor += _38;
|
||||
out.FragColor += _41;
|
||||
out.FragColor += _44;
|
||||
out.FragColor += _47;
|
||||
out.FragColor += _50;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
|
@ -18,9 +18,11 @@ void main()
|
|||
{
|
||||
float _25 = _12.inputDataArray[gl_LocalInvocationID.x];
|
||||
bool _31 = _25 > 0.0;
|
||||
uvec4 _37 = uvec4(unpackUint2x32(ballotARB(_31)), 0u, 0u);
|
||||
uint _44 = mbcntAMD(packUint2x32(uvec2(_37.xy)));
|
||||
if (_31)
|
||||
{
|
||||
_74.inputDataArray[mbcntAMD(packUint2x32(uvec2(unpackUint2x32(ballotARB(_31)).xy)))] = _25;
|
||||
_74.inputDataArray[_44] = _25;
|
||||
}
|
||||
}
|
||||
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D uSampler;
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(location = 0) in vec4 vInput;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vInput;
|
||||
vec4 _23 = texture(uSampler, vInput.xy);
|
||||
vec4 _26 = dFdx(vInput);
|
||||
vec4 _29 = dFdy(vInput);
|
||||
vec4 _32 = fwidth(vInput);
|
||||
vec4 _35 = dFdxCoarse(vInput);
|
||||
vec4 _38 = dFdyCoarse(vInput);
|
||||
vec4 _41 = fwidthCoarse(vInput);
|
||||
vec4 _44 = dFdxFine(vInput);
|
||||
vec4 _47 = dFdyFine(vInput);
|
||||
vec4 _50 = fwidthFine(vInput);
|
||||
vec2 _56 = textureQueryLod(uSampler, vInput.zw);
|
||||
if (vInput.y > 10.0)
|
||||
{
|
||||
FragColor += _23;
|
||||
FragColor += _26;
|
||||
FragColor += _29;
|
||||
FragColor += _32;
|
||||
FragColor += _35;
|
||||
FragColor += _38;
|
||||
FragColor += _41;
|
||||
FragColor += _44;
|
||||
FragColor += _47;
|
||||
FragColor += _50;
|
||||
FragColor += _56.xyxy;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,55 @@
|
|||
Texture2D<float4> uSampler : register(t0);
|
||||
SamplerState _uSampler_sampler : register(s0);
|
||||
|
||||
static float4 FragColor;
|
||||
static float4 vInput;
|
||||
|
||||
struct SPIRV_Cross_Input
|
||||
{
|
||||
float4 vInput : TEXCOORD0;
|
||||
};
|
||||
|
||||
struct SPIRV_Cross_Output
|
||||
{
|
||||
float4 FragColor : SV_Target0;
|
||||
};
|
||||
|
||||
void frag_main()
|
||||
{
|
||||
FragColor = vInput;
|
||||
float4 t = uSampler.Sample(_uSampler_sampler, vInput.xy);
|
||||
float4 d0 = ddx(vInput);
|
||||
float4 d1 = ddy(vInput);
|
||||
float4 d2 = fwidth(vInput);
|
||||
float4 d3 = ddx_coarse(vInput);
|
||||
float4 d4 = ddy_coarse(vInput);
|
||||
float4 d5 = fwidth(vInput);
|
||||
float4 d6 = ddx_fine(vInput);
|
||||
float4 d7 = ddy_fine(vInput);
|
||||
float4 d8 = fwidth(vInput);
|
||||
float _56_tmp = uSampler.CalculateLevelOfDetail(_uSampler_sampler, vInput.zw);
|
||||
float2 lod = float2(_56_tmp, _56_tmp);
|
||||
if (vInput.y > 10.0f)
|
||||
{
|
||||
FragColor += t;
|
||||
FragColor += d0;
|
||||
FragColor += d1;
|
||||
FragColor += d2;
|
||||
FragColor += d3;
|
||||
FragColor += d4;
|
||||
FragColor += d5;
|
||||
FragColor += d6;
|
||||
FragColor += d7;
|
||||
FragColor += d8;
|
||||
FragColor += lod.xyxy;
|
||||
}
|
||||
}
|
||||
|
||||
SPIRV_Cross_Output main(SPIRV_Cross_Input stage_input)
|
||||
{
|
||||
vInput = stage_input.vInput;
|
||||
frag_main();
|
||||
SPIRV_Cross_Output stage_output;
|
||||
stage_output.FragColor = FragColor;
|
||||
return stage_output;
|
||||
}
|
|
@ -0,0 +1,45 @@
|
|||
#include <metal_stdlib>
|
||||
#include <simd/simd.h>
|
||||
|
||||
using namespace metal;
|
||||
|
||||
struct main0_in
|
||||
{
|
||||
float4 vInput [[user(locn0)]];
|
||||
};
|
||||
|
||||
struct main0_out
|
||||
{
|
||||
float4 FragColor [[color(0)]];
|
||||
};
|
||||
|
||||
fragment main0_out main0(main0_in in [[stage_in]], texture2d<float> uSampler [[texture(0)]], sampler uSamplerSmplr [[sampler(0)]])
|
||||
{
|
||||
main0_out out = {};
|
||||
out.FragColor = in.vInput;
|
||||
float4 t = uSampler.sample(uSamplerSmplr, in.vInput.xy);
|
||||
float4 d0 = dfdx(in.vInput);
|
||||
float4 d1 = dfdy(in.vInput);
|
||||
float4 d2 = fwidth(in.vInput);
|
||||
float4 d3 = dfdx(in.vInput);
|
||||
float4 d4 = dfdy(in.vInput);
|
||||
float4 d5 = fwidth(in.vInput);
|
||||
float4 d6 = dfdx(in.vInput);
|
||||
float4 d7 = dfdy(in.vInput);
|
||||
float4 d8 = fwidth(in.vInput);
|
||||
if (in.vInput.y > 10.0)
|
||||
{
|
||||
out.FragColor += t;
|
||||
out.FragColor += d0;
|
||||
out.FragColor += d1;
|
||||
out.FragColor += d2;
|
||||
out.FragColor += d3;
|
||||
out.FragColor += d4;
|
||||
out.FragColor += d5;
|
||||
out.FragColor += d6;
|
||||
out.FragColor += d7;
|
||||
out.FragColor += d8;
|
||||
}
|
||||
return out;
|
||||
}
|
||||
|
|
@ -18,7 +18,7 @@ void main()
|
|||
{
|
||||
float thisLaneData = _12.inputDataArray[gl_LocalInvocationID.x];
|
||||
bool laneActive = thisLaneData > 0.0;
|
||||
uint thisLaneOutputSlot = mbcntAMD(packUint2x32(uvec2(unpackUint2x32(ballotARB(laneActive)).xy)));
|
||||
uint thisLaneOutputSlot = mbcntAMD(packUint2x32(uvec2(uvec4(unpackUint2x32(ballotARB(laneActive)), 0u, 0u).xy)));
|
||||
int firstInvocation = readFirstInvocationARB(1);
|
||||
int invocation = readInvocationARB(1, 0u);
|
||||
vec3 swizzleInvocations = swizzleInvocationsAMD(vec3(0.0, 2.0, 1.0), uvec4(3u));
|
||||
|
|
|
@ -0,0 +1,37 @@
|
|||
#version 450
|
||||
|
||||
layout(binding = 0) uniform sampler2D uSampler;
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(location = 0) in vec4 vInput;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vInput;
|
||||
vec4 t = texture(uSampler, vInput.xy);
|
||||
vec4 d0 = dFdx(vInput);
|
||||
vec4 d1 = dFdy(vInput);
|
||||
vec4 d2 = fwidth(vInput);
|
||||
vec4 d3 = dFdxCoarse(vInput);
|
||||
vec4 d4 = dFdyCoarse(vInput);
|
||||
vec4 d5 = fwidthCoarse(vInput);
|
||||
vec4 d6 = dFdxFine(vInput);
|
||||
vec4 d7 = dFdyFine(vInput);
|
||||
vec4 d8 = fwidthFine(vInput);
|
||||
vec2 lod = textureQueryLod(uSampler, vInput.zw);
|
||||
if (vInput.y > 10.0)
|
||||
{
|
||||
FragColor += t;
|
||||
FragColor += d0;
|
||||
FragColor += d1;
|
||||
FragColor += d2;
|
||||
FragColor += d3;
|
||||
FragColor += d4;
|
||||
FragColor += d5;
|
||||
FragColor += d6;
|
||||
FragColor += d7;
|
||||
FragColor += d8;
|
||||
FragColor += lod.xyxy;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
#version 450
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(binding = 0) uniform sampler2D uSampler;
|
||||
layout(location = 0) in vec4 vInput;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vInput;
|
||||
vec4 t = texture(uSampler, vInput.xy);
|
||||
vec4 d0 = dFdx(vInput);
|
||||
vec4 d1 = dFdy(vInput);
|
||||
vec4 d2 = fwidth(vInput);
|
||||
vec4 d3 = dFdxCoarse(vInput);
|
||||
vec4 d4 = dFdyCoarse(vInput);
|
||||
vec4 d5 = fwidthCoarse(vInput);
|
||||
vec4 d6 = dFdxFine(vInput);
|
||||
vec4 d7 = dFdyFine(vInput);
|
||||
vec4 d8 = fwidthFine(vInput);
|
||||
vec2 lod = textureQueryLod(uSampler, vInput.zw);
|
||||
if (vInput.y > 10.0)
|
||||
{
|
||||
FragColor += t;
|
||||
FragColor += d0;
|
||||
FragColor += d1;
|
||||
FragColor += d2;
|
||||
FragColor += d3;
|
||||
FragColor += d4;
|
||||
FragColor += d5;
|
||||
FragColor += d6;
|
||||
FragColor += d7;
|
||||
FragColor += d8;
|
||||
FragColor += lod.xyxy;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,34 @@
|
|||
#version 450
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(binding = 0) uniform sampler2D uSampler;
|
||||
layout(location = 0) in vec4 vInput;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vInput;
|
||||
vec4 t = texture(uSampler, vInput.xy);
|
||||
vec4 d0 = dFdx(vInput);
|
||||
vec4 d1 = dFdy(vInput);
|
||||
vec4 d2 = fwidth(vInput);
|
||||
vec4 d3 = dFdxCoarse(vInput);
|
||||
vec4 d4 = dFdyCoarse(vInput);
|
||||
vec4 d5 = fwidthCoarse(vInput);
|
||||
vec4 d6 = dFdxFine(vInput);
|
||||
vec4 d7 = dFdyFine(vInput);
|
||||
vec4 d8 = fwidthFine(vInput);
|
||||
if (vInput.y > 10.0)
|
||||
{
|
||||
FragColor += t;
|
||||
FragColor += d0;
|
||||
FragColor += d1;
|
||||
FragColor += d2;
|
||||
FragColor += d3;
|
||||
FragColor += d4;
|
||||
FragColor += d5;
|
||||
FragColor += d6;
|
||||
FragColor += d7;
|
||||
FragColor += d8;
|
||||
}
|
||||
}
|
||||
|
|
@ -0,0 +1,36 @@
|
|||
#version 450
|
||||
|
||||
layout(location = 0) out vec4 FragColor;
|
||||
layout(binding = 0) uniform sampler2D uSampler;
|
||||
layout(location = 0) in vec4 vInput;
|
||||
|
||||
void main()
|
||||
{
|
||||
FragColor = vInput;
|
||||
vec4 t = texture(uSampler, vInput.xy);
|
||||
vec4 d0 = dFdx(vInput);
|
||||
vec4 d1 = dFdy(vInput);
|
||||
vec4 d2 = fwidth(vInput);
|
||||
vec4 d3 = dFdxCoarse(vInput);
|
||||
vec4 d4 = dFdyCoarse(vInput);
|
||||
vec4 d5 = fwidthCoarse(vInput);
|
||||
vec4 d6 = dFdxFine(vInput);
|
||||
vec4 d7 = dFdyFine(vInput);
|
||||
vec4 d8 = fwidthFine(vInput);
|
||||
vec2 lod = textureQueryLod(uSampler, vInput.zw);
|
||||
if (vInput.y > 10.0)
|
||||
{
|
||||
FragColor += t;
|
||||
FragColor += d0;
|
||||
FragColor += d1;
|
||||
FragColor += d2;
|
||||
FragColor += d3;
|
||||
FragColor += d4;
|
||||
FragColor += d5;
|
||||
FragColor += d6;
|
||||
FragColor += d7;
|
||||
FragColor += d8;
|
||||
FragColor += lod.xyxy;
|
||||
}
|
||||
}
|
||||
|
|
@ -662,6 +662,11 @@ struct SPIRBlock : IVariant
|
|||
// fail to use a classic for-loop,
|
||||
// we remove these variables, and fall back to regular variables outside the loop.
|
||||
std::vector<uint32_t> loop_variables;
|
||||
|
||||
// Some expressions are control-flow dependent, i.e. any instruction which relies on derivatives or
|
||||
// sub-group-like operations.
|
||||
// Make sure that we only use these expressions in the original block.
|
||||
std::vector<uint32_t> invalidate_expressions;
|
||||
};
|
||||
|
||||
struct SPIRFunction : IVariant
|
||||
|
|
|
@ -351,6 +351,14 @@ void Compiler::flush_all_atomic_capable_variables()
|
|||
flush_all_aliased_variables();
|
||||
}
|
||||
|
||||
void Compiler::flush_control_dependent_expressions(uint32_t block_id)
|
||||
{
|
||||
auto &block = get<SPIRBlock>(block_id);
|
||||
for (auto &expr : block.invalidate_expressions)
|
||||
invalid_expressions.insert(expr);
|
||||
block.invalidate_expressions.clear();
|
||||
}
|
||||
|
||||
void Compiler::flush_all_active_variables()
|
||||
{
|
||||
// Invalidate all temporaries we read from variables in this block since they were forwarded.
|
||||
|
|
|
@ -612,6 +612,7 @@ protected:
|
|||
// Dependency tracking for temporaries read from variables.
|
||||
void flush_dependees(SPIRVariable &var);
|
||||
void flush_all_active_variables();
|
||||
void flush_control_dependent_expressions(uint32_t block);
|
||||
void flush_all_atomic_capable_variables();
|
||||
void flush_all_aliased_variables();
|
||||
void register_global_read_dependencies(const SPIRBlock &func, uint32_t id);
|
||||
|
|
|
@ -3753,6 +3753,19 @@ void CompilerGLSL::emit_texture_op(const Instruction &i)
|
|||
emit_op(result_type, id, expr, forward);
|
||||
for (auto &inherit : inherited_expressions)
|
||||
inherit_expression_dependencies(id, inherit);
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case OpImageSampleDrefImplicitLod:
|
||||
case OpImageSampleImplicitLod:
|
||||
case OpImageSampleProjImplicitLod:
|
||||
case OpImageSampleProjDrefImplicitLod:
|
||||
register_control_dependent_expression(id);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
// Returns the function name for a texture sampling function for the specified image and sampling characteristics.
|
||||
|
@ -4307,18 +4320,22 @@ void CompilerGLSL::emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t
|
|||
{
|
||||
case SwizzleInvocationsAMD:
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsAMD");
|
||||
register_control_dependent_expression(id);
|
||||
break;
|
||||
|
||||
case SwizzleInvocationsMaskedAMD:
|
||||
emit_binary_func_op(result_type, id, args[0], args[1], "swizzleInvocationsMaskedAMD");
|
||||
register_control_dependent_expression(id);
|
||||
break;
|
||||
|
||||
case WriteInvocationAMD:
|
||||
emit_trinary_func_op(result_type, id, args[0], args[1], args[2], "writeInvocationAMD");
|
||||
register_control_dependent_expression(id);
|
||||
break;
|
||||
|
||||
case MbcntAMD:
|
||||
emit_unary_func_op(result_type, id, args[0], "mbcntAMD");
|
||||
register_control_dependent_expression(id);
|
||||
break;
|
||||
|
||||
default:
|
||||
|
@ -4423,6 +4440,7 @@ void CompilerGLSL::emit_spv_amd_gcn_shader_op(uint32_t result_type, uint32_t id,
|
|||
{
|
||||
string expr = "timeAMD()";
|
||||
emit_op(result_type, id, expr, true);
|
||||
register_control_dependent_expression(id);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -5570,7 +5588,16 @@ bool CompilerGLSL::optimize_read_modify_write(const string &lhs, const string &r
|
|||
return true;
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_block_instructions(const SPIRBlock &block)
|
||||
void CompilerGLSL::register_control_dependent_expression(uint32_t expr)
|
||||
{
|
||||
if (forwarded_temporaries.find(expr) == end(forwarded_temporaries))
|
||||
return;
|
||||
|
||||
assert(current_emitting_block);
|
||||
current_emitting_block->invalidate_expressions.push_back(expr);
|
||||
}
|
||||
|
||||
void CompilerGLSL::emit_block_instructions(SPIRBlock &block)
|
||||
{
|
||||
current_emitting_block = █
|
||||
for (auto &op : block.ops)
|
||||
|
@ -6529,12 +6556,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
UFOP(dFdx);
|
||||
if (is_legacy_es())
|
||||
require_extension("GL_OES_standard_derivatives");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdy:
|
||||
UFOP(dFdy);
|
||||
if (is_legacy_es())
|
||||
require_extension("GL_OES_standard_derivatives");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdxFine:
|
||||
|
@ -6545,6 +6574,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
}
|
||||
if (options.version < 450)
|
||||
require_extension("GL_ARB_derivative_control");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdyFine:
|
||||
|
@ -6555,6 +6585,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
}
|
||||
if (options.version < 450)
|
||||
require_extension("GL_ARB_derivative_control");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdxCoarse:
|
||||
|
@ -6565,6 +6596,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
UFOP(dFdxCoarse);
|
||||
if (options.version < 450)
|
||||
require_extension("GL_ARB_derivative_control");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdyCoarse:
|
||||
|
@ -6575,12 +6607,14 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
}
|
||||
if (options.version < 450)
|
||||
require_extension("GL_ARB_derivative_control");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpFwidth:
|
||||
UFOP(fwidth);
|
||||
if (is_legacy_es())
|
||||
require_extension("GL_OES_standard_derivatives");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpFwidthCoarse:
|
||||
|
@ -6591,6 +6625,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
}
|
||||
if (options.version < 450)
|
||||
require_extension("GL_ARB_derivative_control");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpFwidthFine:
|
||||
|
@ -6601,6 +6636,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
}
|
||||
if (options.version < 450)
|
||||
require_extension("GL_ARB_derivative_control");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
// Bitfield
|
||||
|
@ -6810,6 +6846,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
SPIRV_CROSS_THROW("textureQueryLod not supported in ES profile.");
|
||||
else
|
||||
BFOP(textureQueryLod);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7160,7 +7197,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
// We are synchronizing some memory or syncing execution,
|
||||
// so we cannot forward any loads beyond the memory barrier.
|
||||
if (semantics || opcode == OpControlBarrier)
|
||||
{
|
||||
assert(current_emitting_block);
|
||||
flush_control_dependent_expressions(current_emitting_block->self);
|
||||
flush_all_active_variables();
|
||||
}
|
||||
|
||||
if (memory == ScopeWorkgroup) // Only need to consider memory within a group
|
||||
{
|
||||
|
@ -7242,10 +7283,11 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
uint32_t result_type = ops[0];
|
||||
uint32_t id = ops[1];
|
||||
string expr;
|
||||
expr = join("unpackUint2x32(ballotARB(" + to_expression(ops[2]) + "))");
|
||||
expr = join("uvec4(unpackUint2x32(ballotARB(" + to_expression(ops[2]) + ")), 0u, 0u)");
|
||||
emit_op(result_type, id, expr, true);
|
||||
|
||||
require_extension("GL_ARB_shader_ballot");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7256,6 +7298,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
emit_unary_func_op(result_type, id, ops[2], "readFirstInvocationARB");
|
||||
|
||||
require_extension("GL_ARB_shader_ballot");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7266,6 +7309,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
emit_binary_func_op(result_type, id, ops[2], ops[3], "readInvocationARB");
|
||||
|
||||
require_extension("GL_ARB_shader_ballot");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7276,6 +7320,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
emit_unary_func_op(result_type, id, ops[2], "allInvocationsARB");
|
||||
|
||||
require_extension("GL_ARB_shader_group_vote");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7286,6 +7331,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
emit_unary_func_op(result_type, id, ops[2], "anyInvocationARB");
|
||||
|
||||
require_extension("GL_ARB_shader_group_vote");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7296,6 +7342,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
emit_unary_func_op(result_type, id, ops[2], "allInvocationsEqualARB");
|
||||
|
||||
require_extension("GL_ARB_shader_group_vote");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7307,6 +7354,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
emit_unary_func_op(result_type, id, ops[4], "addInvocationsNonUniformAMD");
|
||||
|
||||
require_extension("GL_AMD_shader_ballot");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7319,6 +7367,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
emit_unary_func_op(result_type, id, ops[4], "minInvocationsNonUniformAMD");
|
||||
|
||||
require_extension("GL_AMD_shader_ballot");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -7331,6 +7380,7 @@ void CompilerGLSL::emit_instruction(const Instruction &instruction)
|
|||
emit_unary_func_op(result_type, id, ops[4], "maxInvocationsNonUniformAMD");
|
||||
|
||||
require_extension("GL_AMD_shader_ballot");
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
}
|
||||
|
||||
|
@ -8423,6 +8473,7 @@ void CompilerGLSL::branch_to_continue(uint32_t from, uint32_t to)
|
|||
void CompilerGLSL::branch(uint32_t from, uint32_t to)
|
||||
{
|
||||
flush_phi(from, to);
|
||||
flush_control_dependent_expressions(from);
|
||||
flush_all_active_variables();
|
||||
|
||||
// This is only a continue if we branch to our loop dominator.
|
||||
|
|
|
@ -199,11 +199,10 @@ protected:
|
|||
// Virtualize methods which need to be overridden by subclass targets like C++ and such.
|
||||
virtual void emit_function_prototype(SPIRFunction &func, const Bitset &return_flags);
|
||||
|
||||
// Kinda ugly way to let opcodes peek at their neighbor instructions for trivial peephole scenarios.
|
||||
const SPIRBlock *current_emitting_block = nullptr;
|
||||
SPIRBlock *current_emitting_block = nullptr;
|
||||
|
||||
virtual void emit_instruction(const Instruction &instr);
|
||||
void emit_block_instructions(const SPIRBlock &block);
|
||||
void emit_block_instructions(SPIRBlock &block);
|
||||
virtual void emit_glsl_op(uint32_t result_type, uint32_t result_id, uint32_t op, const uint32_t *args,
|
||||
uint32_t count);
|
||||
virtual void emit_spv_amd_shader_ballot_op(uint32_t result_type, uint32_t result_id, uint32_t op,
|
||||
|
@ -517,6 +516,7 @@ protected:
|
|||
bool args_will_forward(uint32_t id, const uint32_t *args, uint32_t num_args, bool pure);
|
||||
void register_call_out_argument(uint32_t id);
|
||||
void register_impure_function_call();
|
||||
void register_control_dependent_expression(uint32_t expr);
|
||||
|
||||
// GL_EXT_shader_pixel_local_storage support.
|
||||
std::vector<PlsRemap> pls_inputs;
|
||||
|
|
|
@ -2692,6 +2692,20 @@ void CompilerHLSL::emit_texture_op(const Instruction &i)
|
|||
|
||||
for (auto &inherit : inherited_expressions)
|
||||
inherit_expression_dependencies(id, inherit);
|
||||
|
||||
switch (op)
|
||||
{
|
||||
case OpImageSampleDrefImplicitLod:
|
||||
case OpImageSampleImplicitLod:
|
||||
case OpImageSampleProjImplicitLod:
|
||||
case OpImageSampleProjDrefImplicitLod:
|
||||
case OpImageQueryLod:
|
||||
register_control_dependent_expression(id);
|
||||
break;
|
||||
|
||||
default:
|
||||
break;
|
||||
}
|
||||
}
|
||||
|
||||
string CompilerHLSL::to_resource_binding(const SPIRVariable &var)
|
||||
|
@ -3599,32 +3613,39 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
|
|||
|
||||
case OpDPdx:
|
||||
UFOP(ddx);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdy:
|
||||
UFOP(ddy);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdxFine:
|
||||
UFOP(ddx_fine);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdyFine:
|
||||
UFOP(ddy_fine);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdxCoarse:
|
||||
UFOP(ddx_coarse);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdyCoarse:
|
||||
UFOP(ddy_coarse);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpFwidth:
|
||||
case OpFwidthCoarse:
|
||||
case OpFwidthFine:
|
||||
UFOP(fwidth);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpLogicalNot:
|
||||
|
@ -4031,7 +4052,11 @@ void CompilerHLSL::emit_instruction(const Instruction &instruction)
|
|||
// We are synchronizing some memory or syncing execution,
|
||||
// so we cannot forward any loads beyond the memory barrier.
|
||||
if (semantics || opcode == OpControlBarrier)
|
||||
{
|
||||
assert(current_emitting_block);
|
||||
flush_control_dependent_expressions(current_emitting_block->self);
|
||||
flush_all_active_variables();
|
||||
}
|
||||
|
||||
if (opcode == OpControlBarrier)
|
||||
{
|
||||
|
|
|
@ -1464,18 +1464,21 @@ void CompilerMSL::emit_instruction(const Instruction &instruction)
|
|||
case OpDPdxFine:
|
||||
case OpDPdxCoarse:
|
||||
UFOP(dfdx);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpDPdy:
|
||||
case OpDPdyFine:
|
||||
case OpDPdyCoarse:
|
||||
UFOP(dfdy);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
case OpFwidth:
|
||||
case OpFwidthCoarse:
|
||||
case OpFwidthFine:
|
||||
UFOP(fwidth);
|
||||
register_control_dependent_expression(ops[1]);
|
||||
break;
|
||||
|
||||
// Bitfield
|
||||
|
@ -1874,6 +1877,10 @@ void CompilerMSL::emit_barrier(uint32_t id_exe_scope, uint32_t id_mem_scope, uin
|
|||
bar_stmt += ");";
|
||||
|
||||
statement(bar_stmt);
|
||||
|
||||
assert(current_emitting_block);
|
||||
flush_control_dependent_expressions(current_emitting_block->self);
|
||||
flush_all_active_variables();
|
||||
}
|
||||
|
||||
// Since MSL does not allow structs to be nested within the stage_in struct, the original input
|
||||
|
|
Загрузка…
Ссылка в новой задаче