Fix WriteSamplerFeedback and Gather as gradient (or not) (#2845)

* Gather should not be considered a gradient operation.
* Mark WriteSamplerFeedback[Bias] as gradient
* Remove SampleCmpLevelZero from DxilConvergent
* Add WriteSamplerFeedback[Bias] to DxilConvergent
This commit is contained in:
Tex Riddell 2020-04-22 18:37:37 -07:00 коммит произвёл GitHub
Родитель ed3d0ee0bf
Коммит d3d9b19ec8
Не найден ключ, соответствующий данной подписи
Идентификатор ключа GPG: 4AEE18F83AFDEB23
7 изменённых файлов: 256 добавлений и 158 удалений

Просмотреть файл

@ -586,10 +586,10 @@ bool OP::IsDxilOpGradient(OpCode C) {
unsigned op = (unsigned)C;
/* <py::lines('OPCODE-GRADIENT')>hctdb_instrhelp.get_instrs_pred("op", "is_gradient")</py>*/
// OPCODE-GRADIENT:BEGIN
// Instructions: Sample=60, SampleBias=61, SampleCmp=64, TextureGather=73,
// TextureGatherCmp=74, CalculateLOD=81, DerivCoarseX=83, DerivCoarseY=84,
// DerivFineX=85, DerivFineY=86
return (60 <= op && op <= 61) || op == 64 || (73 <= op && op <= 74) || op == 81 || (83 <= op && op <= 86);
// Instructions: Sample=60, SampleBias=61, SampleCmp=64, CalculateLOD=81,
// DerivCoarseX=83, DerivCoarseY=84, DerivFineX=85, DerivFineY=86,
// WriteSamplerFeedback=174, WriteSamplerFeedbackBias=175
return (60 <= op && op <= 61) || op == 64 || op == 81 || (83 <= op && op <= 86) || (174 <= op && op <= 175);
// OPCODE-GRADIENT:END
}

Просмотреть файл

@ -189,21 +189,12 @@ Value *DxilConvergentMark::FindConvergentOperand(Instruction *I) {
case IntrinsicOp::MOP_Sample:
case IntrinsicOp::MOP_SampleBias:
case IntrinsicOp::MOP_SampleCmp:
case IntrinsicOp::MOP_SampleCmpLevelZero:
case IntrinsicOp::MOP_CalculateLevelOfDetail:
case IntrinsicOp::MOP_CalculateLevelOfDetailUnclamped:
return CI->getArgOperand(HLOperandIndex::kSampleCoordArgIndex);
case IntrinsicOp::MOP_Gather:
case IntrinsicOp::MOP_GatherAlpha:
case IntrinsicOp::MOP_GatherBlue:
case IntrinsicOp::MOP_GatherCmp:
case IntrinsicOp::MOP_GatherCmpAlpha:
case IntrinsicOp::MOP_GatherCmpBlue:
case IntrinsicOp::MOP_GatherCmpGreen:
case IntrinsicOp::MOP_GatherCmpRed:
case IntrinsicOp::MOP_GatherGreen:
case IntrinsicOp::MOP_GatherRed:
return CI->getArgOperand(HLOperandIndex::kGatherCoordArgIndex);
case IntrinsicOp::MOP_WriteSamplerFeedback:
case IntrinsicOp::MOP_WriteSamplerFeedbackBias:
return CI->getArgOperand(HLOperandIndex::kWriteSamplerFeedbackCoordArgIndex);
default:
// No other ops have convergent operands.
break;

Просмотреть файл

@ -0,0 +1,23 @@
// RUN: %dxc -E main -T ps_6_5 %s | FileCheck %s
FeedbackTexture2D<SAMPLER_FEEDBACK_MIN_MIP> feedbackMinMip;
Texture2D<float> texture2D;
SamplerState samp;
// CHECK: define void @main()
float main(float2 coord0 : TEXCOORD0, float2 coord1 : TEXCOORD1) : SV_Target
{
// Ensure WriteSamplerFeedback coord is considered convergent, and fmul does not sink
// CHECK: fmul
// CHECK: fmul
float2 a = coord0 * coord1;
// CHECK: br i1
if (coord0.x > 0.5) {
// CHECK: call void @dx.op.writeSamplerFeedback
feedbackMinMip.WriteSamplerFeedback(texture2D, samp, a, 1.0);
}
// CHECK: br label
return 0;
}

Просмотреть файл

@ -18,64 +18,65 @@ float4 main(float4 a : A) : SV_Target
///////////////////////////////////////////////
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0)
r += cube.Gather(samp, a.xyz);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cube.Gather(samp, a.xyz+0.05, status); r += CheckAccessFullyMapped(status);
// TextureCubeArray
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0)
r += cubeArray.Gather(samp, a.xyzw);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cubeArray.Gather(samp, a.xyzw+0.05, status); r += CheckAccessFullyMapped(status);
a *= 1.125; // Prevent GatherRed from being optimized to equivalent Gather above
///////////////////////////////////////////////
// GatherRed
///////////////////////////////////////////////
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0)
r += cube.GatherRed(samp, a.xyz);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cube.GatherRed(samp, a.xyz+0.05, status); r += CheckAccessFullyMapped(status);
// TextureCubeArray
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0)
r += cubeArray.GatherRed(samp, a.xyzw);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cubeArray.GatherRed(samp, a.xyzw+0.05, status); r += CheckAccessFullyMapped(status);
@ -86,30 +87,30 @@ float4 main(float4 a : A) : SV_Target
///////////////////////////////////////////////
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 1)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 1)
r += cube.GatherGreen(samp, a.xyz);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 1)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 1)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cube.GatherGreen(samp, a.xyz+0.05, status); r += CheckAccessFullyMapped(status);
// TextureCubeArray
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 1)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 1)
r += cubeArray.GatherGreen(samp, a.xyzw);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 1)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 1)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cubeArray.GatherGreen(samp, a.xyzw+0.05, status); r += CheckAccessFullyMapped(status);
@ -120,30 +121,30 @@ float4 main(float4 a : A) : SV_Target
///////////////////////////////////////////////
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 2)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 2)
r += cube.GatherBlue(samp, a.xyz);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 2)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 2)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cube.GatherBlue(samp, a.xyz+0.05, status); r += CheckAccessFullyMapped(status);
// TextureCubeArray
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 2)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 2)
r += cubeArray.GatherBlue(samp, a.xyzw);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 2)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 2)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cubeArray.GatherBlue(samp, a.xyzw+0.05, status); r += CheckAccessFullyMapped(status);
@ -154,30 +155,30 @@ float4 main(float4 a : A) : SV_Target
///////////////////////////////////////////////
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 3)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 3)
r += cube.GatherAlpha(samp, a.xyz);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 3)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 3)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cube.GatherAlpha(samp, a.xyz+0.05, status); r += CheckAccessFullyMapped(status);
// TextureCubeArray
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 3)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 3)
r += cubeArray.GatherAlpha(samp, a.xyzw);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 73,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 3)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 3)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cubeArray.GatherAlpha(samp, a.xyzw+0.05, status); r += CheckAccessFullyMapped(status);
@ -188,72 +189,73 @@ float4 main(float4 a : A) : SV_Target
///////////////////////////////////////////////
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0
// CHECK-SAME: , float 5.000000e-01)
r += cube.GatherCmp(sampcmp, a.xyz, CMP);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0
// CHECK-SAME: , float 5.000000e-01)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cube.GatherCmp(sampcmp, a.xyz+0.05, CMP, status); r += CheckAccessFullyMapped(status);
// TextureCubeArray
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0
// CHECK-SAME: , float 5.000000e-01)
r += cubeArray.GatherCmp(sampcmp, a.xyzw, CMP);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0
// CHECK-SAME: , float 5.000000e-01)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cubeArray.GatherCmp(sampcmp, a.xyzw+0.05, CMP, status); r += CheckAccessFullyMapped(status);
a *= 1.125; // Prevent GatherCmpRed from being optimized to equivalent GatherCmp above
///////////////////////////////////////////////
// GatherCmpRed
///////////////////////////////////////////////
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0
// CHECK-SAME: , float 5.000000e-01)
r += cube.GatherCmpRed(sampcmp, a.xyz, CMP);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0
// CHECK-SAME: , float 5.000000e-01)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cube.GatherCmpRed(sampcmp, a.xyz+0.05, CMP, status); r += CheckAccessFullyMapped(status);
// TextureCubeArray
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0
// CHECK-SAME: , float 5.000000e-01)
r += cubeArray.GatherCmpRed(sampcmp, a.xyzw, CMP);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 0
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 0
// CHECK-SAME: , float 5.000000e-01)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cubeArray.GatherCmpRed(sampcmp, a.xyzw+0.05, CMP, status); r += CheckAccessFullyMapped(status);
@ -264,34 +266,34 @@ float4 main(float4 a : A) : SV_Target
///////////////////////////////////////////////
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 1
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 1
// CHECK-SAME: , float 5.000000e-01)
r += cube.GatherCmpGreen(sampcmp, a.xyz, CMP);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 1
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 1
// CHECK-SAME: , float 5.000000e-01)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cube.GatherCmpGreen(sampcmp, a.xyz+0.05, CMP, status); r += CheckAccessFullyMapped(status);
// TextureCubeArray
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 1
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 1
// CHECK-SAME: , float 5.000000e-01)
r += cubeArray.GatherCmpGreen(sampcmp, a.xyzw, CMP);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 1
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 1
// CHECK-SAME: , float 5.000000e-01)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cubeArray.GatherCmpGreen(sampcmp, a.xyzw+0.05, CMP, status); r += CheckAccessFullyMapped(status);
@ -302,34 +304,34 @@ float4 main(float4 a : A) : SV_Target
///////////////////////////////////////////////
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 2
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 2
// CHECK-SAME: , float 5.000000e-01)
r += cube.GatherCmpBlue(sampcmp, a.xyz, CMP);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 2
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 2
// CHECK-SAME: , float 5.000000e-01)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cube.GatherCmpBlue(sampcmp, a.xyz+0.05, CMP, status); r += CheckAccessFullyMapped(status);
// TextureCubeArray
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 2
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 2
// CHECK-SAME: , float 5.000000e-01)
r += cubeArray.GatherCmpBlue(sampcmp, a.xyzw, CMP);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 2
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 2
// CHECK-SAME: , float 5.000000e-01)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cubeArray.GatherCmpBlue(sampcmp, a.xyzw+0.05, CMP, status); r += CheckAccessFullyMapped(status);
@ -340,34 +342,34 @@ float4 main(float4 a : A) : SV_Target
///////////////////////////////////////////////
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 3
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 3
// CHECK-SAME: , float 5.000000e-01)
r += cube.GatherCmpAlpha(sampcmp, a.xyz, CMP);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 3
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float undef
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 3
// CHECK-SAME: , float 5.000000e-01)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cube.GatherCmpAlpha(sampcmp, a.xyz+0.05, CMP, status); r += CheckAccessFullyMapped(status);
// TextureCubeArray
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 3
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 3
// CHECK-SAME: , float 5.000000e-01)
r += cubeArray.GatherCmpAlpha(sampcmp, a.xyzw, CMP);
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 74,
// CHECK: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK: , i32 undef, i32 undef
// CHECK: , i32 3
// CHECK: , float 5.000000e-01)
// CHECK-SAME: , float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}, float %{{[^,]+}}
// CHECK-SAME: , i32 undef, i32 undef
// CHECK-SAME: , i32 3
// CHECK-SAME: , float 5.000000e-01)
// CHECK: extractvalue %dx.types.ResRet.f32 %{{[^,]+}}, 4
// CHECK: call i1 @dx.op.checkAccessFullyMapped.i32(i32 71,
r += cubeArray.GatherCmpAlpha(sampcmp, a.xyzw+0.05, CMP, status); r += CheckAccessFullyMapped(status);

Просмотреть файл

@ -0,0 +1,53 @@
// RUN: %dxc -E main -T cs_6_0 %s | FileCheck %s
SamplerState samp1 : register(s5);
Texture2D<float4> text1 : register(t3);
struct InOut {
float2 coord;
float4 result;
};
RWStructuredBuffer<InOut> Data;
// CHECK: define void @main()
[numthreads(64,1,1)]
void main(uint id : SV_GroupIndex)
{
float2 a = Data[id].coord;
uint status;
float4 r = 0;
r += text1.Gather(samp1, a);
r += text1.Gather(samp1, a, uint2(-5, 7));
r += text1.Gather(samp1, a, uint2(-3, 2), status); r += CheckAccessFullyMapped(status);
a *= 1.125; // Prevent GatherCmpRed from being optimized to equivalent GatherCmp above
r += text1.GatherAlpha(samp1, a);
r += text1.GatherAlpha(samp1, a, uint2(-3,8));
r += text1.GatherAlpha(samp1, a, uint2(-3,8),status); r += CheckAccessFullyMapped(status);
r += text1.GatherAlpha(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));
r += text1.GatherAlpha(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=CheckAccessFullyMapped(status);
r += text1.GatherBlue(samp1, a);
r += text1.GatherBlue(samp1, a, uint2(-3,8));
r += text1.GatherBlue(samp1, a, uint2(-3,8),status); r += CheckAccessFullyMapped(status);
r += text1.GatherBlue(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));
r += text1.GatherBlue(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=CheckAccessFullyMapped(status);
r += text1.GatherGreen(samp1, a);
r += text1.GatherGreen(samp1, a, uint2(-3,8));
r += text1.GatherGreen(samp1, a, uint2(-3,8),status); r += CheckAccessFullyMapped(status);
r += text1.GatherGreen(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));
r += text1.GatherGreen(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=CheckAccessFullyMapped(status);
r += text1.GatherRed(samp1, a);
r += text1.GatherRed(samp1, a, uint2(-3,8));
r += text1.GatherRed(samp1, a, uint2(-3,8),status); r += CheckAccessFullyMapped(status);
r += text1.GatherRed(samp1, a, uint2(-3,8),uint2(-2,3), uint2(-3,8),uint2(-2,3));
r += text1.GatherRed(samp1, a, uint2(-3,8),uint2(8,-3), uint2(8,-3), uint2(-3,2), status); r+=CheckAccessFullyMapped(status);
Data[id].result = r;
}

Просмотреть файл

@ -0,0 +1,28 @@
// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
SamplerState samp1 : register(s5);
Texture2D<float4> text1 : register(t3);
// CHECK: define void @main()
float4 main(float2 coord0 : TEXCOORD0, float2 coord1 : TEXCOORD1) : SV_Target
{
float2 a = coord0 * coord1;
uint status;
float4 r = 0;
// Ensure Gather is allowed under wave-sensitive flow.
// CHECK: br i1
if (WaveIsFirstLane()) {
// Ensure Gather coord is not considered convergent,
// and fmul of id * factor sinks
// CHECK: fmul
// CHECK: fmul
// CHECK: call %dx.types.ResRet.f32 @dx.op.textureGather.f32
r += text1.Gather(samp1, a);
}
// CHECK: br label
// CHECK: call void @dx.op.storeOutput
return r;
}

Просмотреть файл

@ -409,6 +409,7 @@ class db_dxil(object):
for i in "WriteSamplerFeedback,WriteSamplerFeedbackBias".split(","):
self.name_idx[i].category = "Sampler Feedback"
self.name_idx[i].is_feedback = True
self.name_idx[i].is_gradient = True
self.name_idx[i].shader_model = 6,5
self.name_idx[i].shader_stages = ("library", "pixel",)
for i in "WriteSamplerFeedbackLevel,WriteSamplerFeedbackGrad".split(","):
@ -1749,11 +1750,11 @@ class db_dxil(object):
# End of DXIL 1.6 opcodes.
self.set_op_count_for_version(1, 6, next_op_idx)
assert next_op_idx == 218, "next operation index is %d rather than 165 and thus opcodes are broken" % next_op_idx
assert next_op_idx == 218, "218 is expected next operation index but encountered %d and thus opcodes are broken" % next_op_idx
# Set interesting properties.
self.build_indices()
for i in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,Sample,SampleBias,SampleCmp,TextureGather,TextureGatherCmp".split(","):
for i in "CalculateLOD,DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY,Sample,SampleBias,SampleCmp".split(","):
self.name_idx[i].is_gradient = True
for i in "DerivCoarseX,DerivCoarseY,DerivFineX,DerivFineY".split(","):
assert self.name_idx[i].is_gradient == True, "all derivatives are marked as requiring gradients"