Merge remote-tracking branch 'refs/remotes/origin/dxil-v1.0'

2017-01-31 13:46:27 -05:00 · 2017-01-31 13:46:27 -05:00 · 27a00a9fdc
--- a/docs/DXIL.rst
+++ b/docs/DXIL.rst
@ -1919,135 +1919,125 @@ ID  Name                          Description
 17  Atan                          returns the Atan
 18  Hcos                          returns the Hcos
 19  Hsin                          returns the Hsin
-20  Exp                           returns the Exp
-21  Frc                           returns the Frc
-22  Log                           returns the Log
-23  Sqrt                          returns the Sqrt
-24  Rsqrt                         returns the Rsqrt
-25  Round_ne                      returns the Round_ne
-26  Round_ni                      returns the Round_ni
-27  Round_pi                      returns the Round_pi
-28  Round_z                       returns the Round_z
-29  Bfrev                         returns the reverse bit pattern of the input value
-30  Countbits                     returns the Countbits
-31  FirstbitLo                    returns the FirstbitLo
-32  FirstbitHi                    returns src != 0? (BitWidth-1 - FirstbitHi) : -1
-33  FirstbitSHi                   returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
-34  FMax                          returns the FMax of the input values
-35  FMin                          returns the FMin of the input values
-36  IMax                          returns the IMax of the input values
-37  IMin                          returns the IMin of the input values
-38  UMax                          returns the UMax of the input values
-39  UMin                          returns the UMin of the input values
-40  IMul                          returns the IMul of the input values
-41  UMul                          returns the UMul of the input values
-42  UDiv                          returns the UDiv of the input values
-43  IAddc                         returns the IAddc of the input values
-44  UAddc                         returns the UAddc of the input values
-45  ISubc                         returns the ISubc of the input values
-46  USubc                         returns the USubc of the input values
-47  FMad                          performs a fused multiply add (FMA) of the form a * b + c
-48  Fma                           performs a fused multiply add (FMA) of the form a * b + c
-49  IMad                          performs an integral IMad
-50  UMad                          performs an integral UMad
-51  Msad                          performs an integral Msad
-52  Ibfe                          performs an integral Ibfe
-53  Ubfe                          performs an integral Ubfe
-54  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
-55  Dot2                          two-dimensional vector dot-product
-56  Dot3                          three-dimensional vector dot-product
-57  Dot4                          four-dimensional vector dot-product
-58  CreateHandle                  creates the handle to a resource
-59  CBufferLoad                   loads a value from a constant buffer resource
-60  CBufferLoadLegacy             loads a value from a constant buffer resource
-61  Sample                        samples a texture
-62  SampleBias                    samples a texture after applying the input bias to the mipmap level
-63  SampleLevel                   samples a texture using a mipmap-level offset
-64  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
-65  SampleCmp                     samples a texture and compares a single component against the specified comparison value
-66  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
-67  TextureLoad                   reads texel data without any filtering or sampling
-68  TextureStore                  reads texel data without any filtering or sampling
-69  BufferLoad                    reads from a TypedBuffer
-70  BufferStore                   writes to a RWTypedBuffer
-71  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-72  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-73  GetDimensions                 gets texture size information
-74  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
-75  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
-76  ToDelete5                     reserved
-77  ToDelete6                     reserved
-78  Texture2DMSGetSamplePosition  gets the position of the specified sample
-79  RenderTargetGetSamplePosition gets the position of the specified sample
-80  RenderTargetGetSampleCount    gets the number of samples for a render target
-81  AtomicBinOp                   performs an atomic operation on two operands
-82  AtomicCompareExchange         atomic compare and exchange to memory
-83  Barrier                       inserts a memory barrier in the shader
-84  CalculateLOD                  calculates the level of detail
-85  Discard                       discard the current pixel
-86  DerivCoarseX                  computes the rate of change of components per stamp
-87  DerivCoarseY                  computes the rate of change of components per stamp
-88  DerivFineX                    computes the rate of change of components per pixel
-89  DerivFineY                    computes the rate of change of components per pixel
-90  EvalSnapped                   evaluates an input attribute at pixel center with an offset
-91  EvalSampleIndex               evaluates an input attribute at a sample location
-92  EvalCentroid                  evaluates an input attribute at pixel center
-93  ThreadId                      reads the thread ID
-94  GroupId                       reads the group ID (SV_GroupID)
-95  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
-96  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
-97  EmitStream                    emits a vertex to a given stream
-98  CutStream                     completes the current primitive topology at the specified stream
-99  EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
-100 MakeDouble                    creates a double value
-101 ToDelete1                     reserved
-102 ToDelete2                     reserved
-103 SplitDouble                   splits a double into low and high parts
-104 ToDelete3                     reserved
-105 ToDelete4                     reserved
-106 LoadOutputControlPoint        LoadOutputControlPoint
-107 LoadPatchConstant             LoadPatchConstant
-108 DomainLocation                DomainLocation
-109 StorePatchConstant            StorePatchConstant
-110 OutputControlPointID          OutputControlPointID
-111 PrimitiveID                   PrimitiveID
-112 CycleCounterLegacy            CycleCounterLegacy
-113 Htan                          returns the hyperbolic tangent of the specified value
-114 WaveCaptureReserved           reserved
-115 WaveIsFirstLane               returns 1 for the first lane in the wave
-116 WaveGetLaneIndex              returns the index of the current lane in the wave
-117 WaveGetLaneCount              returns the number of lanes in the wave
-118 WaveIsHelperLaneReserved      reserved
-119 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
-120 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
-121 WaveActiveAllEqual            returns 1 if all the lanes have the same value
-122 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
-123 WaveReadLaneAt                returns the value from the specified lane
-124 WaveReadLaneFirst             returns the value from the first lane
-125 WaveActiveOp                  returns the result the operation across waves
-126 WaveActiveBit                 returns the result of the operation across all lanes
-127 WavePrefixOp                  returns the result of the operation on prior lanes
-128 WaveGetOrderedIndex           reserved
-129 GlobalOrderedCountIncReserved reserved
-130 QuadReadLaneAt                reads from a lane in the quad
-131 QuadOp                        returns the result of a quad-level operation
-132 BitcastI16toF16               bitcast between different sizes
-133 BitcastF16toI16               bitcast between different sizes
-134 BitcastI32toF32               bitcast between different sizes
-135 BitcastF32toI32               bitcast between different sizes
-136 BitcastI64toF64               bitcast between different sizes
-137 BitcastF64toI64               bitcast between different sizes
-138 GSInstanceID                  GSInstanceID
-139 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
-140 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-141 LegacyDoubleToFloat           legacy fuction to convert double to float
-142 LegacyDoubleToSInt32          legacy fuction to convert double to int32
-143 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
-144 WaveAllBitCount               returns the count of bits set to 1 across the wave
-145 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
-146 SampleIndex                   returns the sample index in a sample-frequency pixel shader
-147 Coverage                      returns the coverage mask input in a pixel shader
-148 InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
+20  Htan                          returns the Htan
+21  Exp                           returns the Exp
+22  Frc                           returns the Frc
+23  Log                           returns the Log
+24  Sqrt                          returns the Sqrt
+25  Rsqrt                         returns the Rsqrt
+26  Round_ne                      returns the Round_ne
+27  Round_ni                      returns the Round_ni
+28  Round_pi                      returns the Round_pi
+29  Round_z                       returns the Round_z
+30  Bfrev                         returns the reverse bit pattern of the input value
+31  Countbits                     returns the Countbits
+32  FirstbitLo                    returns the FirstbitLo
+33  FirstbitHi                    returns src != 0? (BitWidth-1 - FirstbitHi) : -1
+34  FirstbitSHi                   returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
+35  FMax                          returns the FMax of the input values
+36  FMin                          returns the FMin of the input values
+37  IMax                          returns the IMax of the input values
+38  IMin                          returns the IMin of the input values
+39  UMax                          returns the UMax of the input values
+40  UMin                          returns the UMin of the input values
+41  IMul                          returns the IMul of the input values
+42  UMul                          returns the UMul of the input values
+43  UDiv                          returns the UDiv of the input values
+44  IAddc                         returns the IAddc of the input values
+45  UAddc                         returns the UAddc of the input values
+46  ISubc                         returns the ISubc of the input values
+47  USubc                         returns the USubc of the input values
+48  FMad                          performs a fused multiply add (FMA) of the form a * b + c
+49  Fma                           performs a fused multiply add (FMA) of the form a * b + c
+50  IMad                          performs an integral IMad
+51  UMad                          performs an integral UMad
+52  Msad                          performs an integral Msad
+53  Ibfe                          performs an integral Ibfe
+54  Ubfe                          performs an integral Ubfe
+55  Bfi                           given a bit range from the LSB of a number, places that number of bits in another number at any offset
+56  Dot2                          two-dimensional vector dot-product
+57  Dot3                          three-dimensional vector dot-product
+58  Dot4                          four-dimensional vector dot-product
+59  CreateHandle                  creates the handle to a resource
+60  CBufferLoad                   loads a value from a constant buffer resource
+61  CBufferLoadLegacy             loads a value from a constant buffer resource
+62  Sample                        samples a texture
+63  SampleBias                    samples a texture after applying the input bias to the mipmap level
+64  SampleLevel                   samples a texture using a mipmap-level offset
+65  SampleGrad                    samples a texture using a gradient to influence the way the sample location is calculated
+66  SampleCmp                     samples a texture and compares a single component against the specified comparison value
+67  SampleCmpLevelZero            samples a texture and compares a single component against the specified comparison value
+68  TextureLoad                   reads texel data without any filtering or sampling
+69  TextureStore                  reads texel data without any filtering or sampling
+70  BufferLoad                    reads from a TypedBuffer
+71  BufferStore                   writes to a RWTypedBuffer
+72  BufferUpdateCounter           atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+73  CheckAccessFullyMapped        determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+74  GetDimensions                 gets texture size information
+75  TextureGather                 gathers the four texels that would be used in a bi-linear filtering operation
+76  TextureGatherCmp              same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+77  Texture2DMSGetSamplePosition  gets the position of the specified sample
+78  RenderTargetGetSamplePosition gets the position of the specified sample
+79  RenderTargetGetSampleCount    gets the number of samples for a render target
+80  AtomicBinOp                   performs an atomic operation on two operands
+81  AtomicCompareExchange         atomic compare and exchange to memory
+82  Barrier                       inserts a memory barrier in the shader
+83  CalculateLOD                  calculates the level of detail
+84  Discard                       discard the current pixel
+85  DerivCoarseX                  computes the rate of change of components per stamp
+86  DerivCoarseY                  computes the rate of change of components per stamp
+87  DerivFineX                    computes the rate of change of components per pixel
+88  DerivFineY                    computes the rate of change of components per pixel
+89  EvalSnapped                   evaluates an input attribute at pixel center with an offset
+90  EvalSampleIndex               evaluates an input attribute at a sample location
+91  EvalCentroid                  evaluates an input attribute at pixel center
+92  SampleIndex                   returns the sample index in a sample-frequency pixel shader
+93  Coverage                      returns the coverage mask input in a pixel shader
+94  InnerCoverage                 returns underestimated coverage input from conservative rasterization in a pixel shader
+95  ThreadId                      reads the thread ID
+96  GroupId                       reads the group ID (SV_GroupID)
+97  ThreadIdInGroup               reads the thread ID within the group (SV_GroupThreadID)
+98  FlattenedThreadIdInGroup      provides a flattened index for a given thread within a given group (SV_GroupIndex)
+99  EmitStream                    emits a vertex to a given stream
+100 CutStream                     completes the current primitive topology at the specified stream
+101 EmitThenCutStream             equivalent to an EmitStream followed by a CutStream
+102 GSInstanceID                  GSInstanceID
+103 MakeDouble                    creates a double value
+104 SplitDouble                   splits a double into low and high parts
+105 LoadOutputControlPoint        LoadOutputControlPoint
+106 LoadPatchConstant             LoadPatchConstant
+107 DomainLocation                DomainLocation
+108 StorePatchConstant            StorePatchConstant
+109 OutputControlPointID          OutputControlPointID
+110 PrimitiveID                   PrimitiveID
+111 CycleCounterLegacy            CycleCounterLegacy
+112 WaveIsFirstLane               returns 1 for the first lane in the wave
+113 WaveGetLaneIndex              returns the index of the current lane in the wave
+114 WaveGetLaneCount              returns the number of lanes in the wave
+115 WaveAnyTrue                   returns 1 if any of the lane evaluates the value to true
+116 WaveAllTrue                   returns 1 if all the lanes evaluate the value to true
+117 WaveActiveAllEqual            returns 1 if all the lanes have the same value
+118 WaveActiveBallot              returns a struct with a bit set for each lane where the condition is true
+119 WaveReadLaneAt                returns the value from the specified lane
+120 WaveReadLaneFirst             returns the value from the first lane
+121 WaveActiveOp                  returns the result the operation across waves
+122 WaveActiveBit                 returns the result of the operation across all lanes
+123 WavePrefixOp                  returns the result of the operation on prior lanes
+124 QuadReadLaneAt                reads from a lane in the quad
+125 QuadOp                        returns the result of a quad-level operation
+126 BitcastI16toF16               bitcast between different sizes
+127 BitcastF16toI16               bitcast between different sizes
+128 BitcastI32toF32               bitcast between different sizes
+129 BitcastF32toI32               bitcast between different sizes
+130 BitcastI64toF64               bitcast between different sizes
+131 BitcastF64toI64               bitcast between different sizes
+132 LegacyF32ToF16                legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+133 LegacyF16ToF32                legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+134 LegacyDoubleToFloat           legacy fuction to convert double to float
+135 LegacyDoubleToSInt32          legacy fuction to convert double to int32
+136 LegacyDoubleToUInt32          legacy fuction to convert double to uint32
+137 WaveAllBitCount               returns the count of bits set to 1 across the wave
+138 WavePrefixBitCount            returns the count of bits set to 1 on prior lanes
 === ============================= ================================================================================================================


@ -2284,6 +2274,7 @@ SM.TRIOUTPUTPRIMITIVEMISMATCH         Hull Shader declared with Tri Domain must
 SM.UNDEFINEDOUTPUT                    Not all elements of output %0 were written
 SM.VALIDDOMAIN                        Invalid Tessellator Domain specified. Must be isoline, tri or quad
 TYPES.DEFINED                         Type must be defined based on DXIL primitives
+TYPES.I8                              I8 can only used as immediate value for intrinsic
 TYPES.INTWIDTH                        Int type must be of valid width
 TYPES.NOMULTIDIM                      Only one dimension allowed for array type
 TYPES.NOVECTOR                        Vector types must not be present
--- a/include/dxc/HLSL/DxilConstants.h
+++ b/include/dxc/HLSL/DxilConstants.h
@ -26,8 +26,8 @@ import hctdb_instrhelp

 namespace DXIL {
  // DXIL version.
-  const unsigned kDxilMajor = 0;
-  const unsigned kDxilMinor = 7;
+  const unsigned kDxilMajor = 1;
+  const unsigned kDxilMinor = 0;

  inline unsigned MakeDxilVersion(unsigned DxilMajor, unsigned DxilMinor) {
    return 0 | (DxilMajor << 8) | (DxilMinor);
@ -246,137 +246,126 @@ namespace DXIL {
  // OPCODE-ENUM:BEGIN
  // Enumeration for operations specified by DXIL
  enum class OpCode : unsigned {
-    // 
-    GlobalOrderedCountIncReserved = 129, // reserved
-    ToDelete1 = 101, // reserved
-    ToDelete2 = 102, // reserved
-    ToDelete3 = 104, // reserved
-    ToDelete4 = 105, // reserved
-    ToDelete5 = 76, // reserved
-    ToDelete6 = 77, // reserved
-  
    // Binary float
-    FMax = 34, // returns the FMax of the input values
-    FMin = 35, // returns the FMin of the input values
+    FMax = 35, // returns the FMax of the input values
+    FMin = 36, // returns the FMin of the input values
  
    // Binary int with carry
-    IAddc = 43, // returns the IAddc of the input values
-    ISubc = 45, // returns the ISubc of the input values
-    UAddc = 44, // returns the UAddc of the input values
-    USubc = 46, // returns the USubc of the input values
+    IAddc = 44, // returns the IAddc of the input values
+    ISubc = 46, // returns the ISubc of the input values
+    UAddc = 45, // returns the UAddc of the input values
+    USubc = 47, // returns the USubc of the input values
  
    // Binary int with two outputs
-    IMul = 40, // returns the IMul of the input values
-    UDiv = 42, // returns the UDiv of the input values
-    UMul = 41, // returns the UMul of the input values
+    IMul = 41, // returns the IMul of the input values
+    UDiv = 43, // returns the UDiv of the input values
+    UMul = 42, // returns the UMul of the input values
  
    // Binary int
-    IMax = 36, // returns the IMax of the input values
-    IMin = 37, // returns the IMin of the input values
-    UMax = 38, // returns the UMax of the input values
-    UMin = 39, // returns the UMin of the input values
+    IMax = 37, // returns the IMax of the input values
+    IMin = 38, // returns the IMin of the input values
+    UMax = 39, // returns the UMax of the input values
+    UMin = 40, // returns the UMin of the input values
  
    // Bitcasts with different sizes
-    BitcastF16toI16 = 133, // bitcast between different sizes
-    BitcastF32toI32 = 135, // bitcast between different sizes
-    BitcastF64toI64 = 137, // bitcast between different sizes
-    BitcastI16toF16 = 132, // bitcast between different sizes
-    BitcastI32toF32 = 134, // bitcast between different sizes
-    BitcastI64toF64 = 136, // bitcast between different sizes
+    BitcastF16toI16 = 127, // bitcast between different sizes
+    BitcastF32toI32 = 129, // bitcast between different sizes
+    BitcastF64toI64 = 131, // bitcast between different sizes
+    BitcastI16toF16 = 126, // bitcast between different sizes
+    BitcastI32toF32 = 128, // bitcast between different sizes
+    BitcastI64toF64 = 130, // bitcast between different sizes
  
    // Compute shader
-    FlattenedThreadIdInGroup = 96, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
-    GroupId = 94, // reads the group ID (SV_GroupID)
-    ThreadId = 93, // reads the thread ID
-    ThreadIdInGroup = 95, // reads the thread ID within the group (SV_GroupThreadID)
+    FlattenedThreadIdInGroup = 98, // provides a flattened index for a given thread within a given group (SV_GroupIndex)
+    GroupId = 96, // reads the group ID (SV_GroupID)
+    ThreadId = 95, // reads the thread ID
+    ThreadIdInGroup = 97, // reads the thread ID within the group (SV_GroupThreadID)
  
    // Domain and hull shader
-    LoadOutputControlPoint = 106, // LoadOutputControlPoint
-    LoadPatchConstant = 107, // LoadPatchConstant
+    LoadOutputControlPoint = 105, // LoadOutputControlPoint
+    LoadPatchConstant = 106, // LoadPatchConstant
  
    // Domain shader
-    DomainLocation = 108, // DomainLocation
+    DomainLocation = 107, // DomainLocation
  
    // Dot
-    Dot2 = 55, // two-dimensional vector dot-product
-    Dot3 = 56, // three-dimensional vector dot-product
-    Dot4 = 57, // four-dimensional vector dot-product
+    Dot2 = 56, // two-dimensional vector dot-product
+    Dot3 = 57, // three-dimensional vector dot-product
+    Dot4 = 58, // four-dimensional vector dot-product
  
    // Double precision
-    LegacyDoubleToFloat = 141, // legacy fuction to convert double to float
-    LegacyDoubleToSInt32 = 142, // legacy fuction to convert double to int32
-    LegacyDoubleToUInt32 = 143, // legacy fuction to convert double to uint32
-    MakeDouble = 100, // creates a double value
-    SplitDouble = 103, // splits a double into low and high parts
-  
-    // GS
-    GSInstanceID = 138, // GSInstanceID
+    LegacyDoubleToFloat = 134, // legacy fuction to convert double to float
+    LegacyDoubleToSInt32 = 135, // legacy fuction to convert double to int32
+    LegacyDoubleToUInt32 = 136, // legacy fuction to convert double to uint32
+    MakeDouble = 103, // creates a double value
+    SplitDouble = 104, // splits a double into low and high parts
  
    // Geometry shader
-    CutStream = 98, // completes the current primitive topology at the specified stream
-    EmitStream = 97, // emits a vertex to a given stream
-    EmitThenCutStream = 99, // equivalent to an EmitStream followed by a CutStream
+    CutStream = 100, // completes the current primitive topology at the specified stream
+    EmitStream = 99, // emits a vertex to a given stream
+    EmitThenCutStream = 101, // equivalent to an EmitStream followed by a CutStream
+    GSInstanceID = 102, // GSInstanceID
  
    // Hull shader
-    OutputControlPointID = 110, // OutputControlPointID
-    PrimitiveID = 111, // PrimitiveID
-    StorePatchConstant = 109, // StorePatchConstant
+    OutputControlPointID = 109, // OutputControlPointID
+    PrimitiveID = 110, // PrimitiveID
+    StorePatchConstant = 108, // StorePatchConstant
  
    // Legacy floating-point
-    LegacyF16ToF32 = 140, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
-    LegacyF32ToF16 = 139, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
+    LegacyF16ToF32 = 133, // legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)
+    LegacyF32ToF16 = 132, // legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
  
    // Other
-    CycleCounterLegacy = 112, // CycleCounterLegacy
+    CycleCounterLegacy = 111, // CycleCounterLegacy
  
    // Pixel shader
-    CalculateLOD = 84, // calculates the level of detail
-    Coverage = 147, // returns the coverage mask input in a pixel shader
-    DerivCoarseX = 86, // computes the rate of change of components per stamp
-    DerivCoarseY = 87, // computes the rate of change of components per stamp
-    DerivFineX = 88, // computes the rate of change of components per pixel
-    DerivFineY = 89, // computes the rate of change of components per pixel
-    Discard = 85, // discard the current pixel
-    EvalCentroid = 92, // evaluates an input attribute at pixel center
-    EvalSampleIndex = 91, // evaluates an input attribute at a sample location
-    EvalSnapped = 90, // evaluates an input attribute at pixel center with an offset
-    InnerCoverage = 148, // returns underestimated coverage input from conservative rasterization in a pixel shader
-    SampleIndex = 146, // returns the sample index in a sample-frequency pixel shader
+    CalculateLOD = 83, // calculates the level of detail
+    Coverage = 93, // returns the coverage mask input in a pixel shader
+    DerivCoarseX = 85, // computes the rate of change of components per stamp
+    DerivCoarseY = 86, // computes the rate of change of components per stamp
+    DerivFineX = 87, // computes the rate of change of components per pixel
+    DerivFineY = 88, // computes the rate of change of components per pixel
+    Discard = 84, // discard the current pixel
+    EvalCentroid = 91, // evaluates an input attribute at pixel center
+    EvalSampleIndex = 90, // evaluates an input attribute at a sample location
+    EvalSnapped = 89, // evaluates an input attribute at pixel center with an offset
+    InnerCoverage = 94, // returns underestimated coverage input from conservative rasterization in a pixel shader
+    SampleIndex = 92, // returns the sample index in a sample-frequency pixel shader
  
    // Quaternary
-    Bfi = 54, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
+    Bfi = 55, // given a bit range from the LSB of a number, places that number of bits in another number at any offset
  
    // Resources - gather
-    TextureGather = 74, // gathers the four texels that would be used in a bi-linear filtering operation
-    TextureGatherCmp = 75, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
+    TextureGather = 75, // gathers the four texels that would be used in a bi-linear filtering operation
+    TextureGatherCmp = 76, // same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp
  
    // Resources - sample
-    RenderTargetGetSampleCount = 80, // gets the number of samples for a render target
-    RenderTargetGetSamplePosition = 79, // gets the position of the specified sample
-    Sample = 61, // samples a texture
-    SampleBias = 62, // samples a texture after applying the input bias to the mipmap level
-    SampleCmp = 65, // samples a texture and compares a single component against the specified comparison value
-    SampleCmpLevelZero = 66, // samples a texture and compares a single component against the specified comparison value
-    SampleGrad = 64, // samples a texture using a gradient to influence the way the sample location is calculated
-    SampleLevel = 63, // samples a texture using a mipmap-level offset
-    Texture2DMSGetSamplePosition = 78, // gets the position of the specified sample
+    RenderTargetGetSampleCount = 79, // gets the number of samples for a render target
+    RenderTargetGetSamplePosition = 78, // gets the position of the specified sample
+    Sample = 62, // samples a texture
+    SampleBias = 63, // samples a texture after applying the input bias to the mipmap level
+    SampleCmp = 66, // samples a texture and compares a single component against the specified comparison value
+    SampleCmpLevelZero = 67, // samples a texture and compares a single component against the specified comparison value
+    SampleGrad = 65, // samples a texture using a gradient to influence the way the sample location is calculated
+    SampleLevel = 64, // samples a texture using a mipmap-level offset
+    Texture2DMSGetSamplePosition = 77, // gets the position of the specified sample
  
    // Resources
-    BufferLoad = 69, // reads from a TypedBuffer
-    BufferStore = 70, // writes to a RWTypedBuffer
-    BufferUpdateCounter = 71, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
-    CBufferLoad = 59, // loads a value from a constant buffer resource
-    CBufferLoadLegacy = 60, // loads a value from a constant buffer resource
-    CheckAccessFullyMapped = 72, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
-    CreateHandle = 58, // creates the handle to a resource
-    GetDimensions = 73, // gets texture size information
-    TextureLoad = 67, // reads texel data without any filtering or sampling
-    TextureStore = 68, // reads texel data without any filtering or sampling
+    BufferLoad = 70, // reads from a TypedBuffer
+    BufferStore = 71, // writes to a RWTypedBuffer
+    BufferUpdateCounter = 72, // atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV
+    CBufferLoad = 60, // loads a value from a constant buffer resource
+    CBufferLoadLegacy = 61, // loads a value from a constant buffer resource
+    CheckAccessFullyMapped = 73, // determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource
+    CreateHandle = 59, // creates the handle to a resource
+    GetDimensions = 74, // gets texture size information
+    TextureLoad = 68, // reads texel data without any filtering or sampling
+    TextureStore = 69, // reads texel data without any filtering or sampling
  
    // Synchronization
-    AtomicBinOp = 81, // performs an atomic operation on two operands
-    AtomicCompareExchange = 82, // atomic compare and exchange to memory
-    Barrier = 83, // inserts a memory barrier in the shader
+    AtomicBinOp = 80, // performs an atomic operation on two operands
+    AtomicCompareExchange = 81, // atomic compare and exchange to memory
+    Barrier = 82, // inserts a memory barrier in the shader
  
    // Temporary, indexable, input, output registers
    LoadInput = 4, // loads the value from shader input
@ -387,73 +376,70 @@ namespace DXIL {
    TempRegStore = 1, // helper store operation
  
    // Tertiary float
-    FMad = 47, // performs a fused multiply add (FMA) of the form a * b + c
-    Fma = 48, // performs a fused multiply add (FMA) of the form a * b + c
+    FMad = 48, // performs a fused multiply add (FMA) of the form a * b + c
+    Fma = 49, // performs a fused multiply add (FMA) of the form a * b + c
  
    // Tertiary int
-    IMad = 49, // performs an integral IMad
-    Ibfe = 52, // performs an integral Ibfe
-    Msad = 51, // performs an integral Msad
-    UMad = 50, // performs an integral UMad
-    Ubfe = 53, // performs an integral Ubfe
+    IMad = 50, // performs an integral IMad
+    Ibfe = 53, // performs an integral Ibfe
+    Msad = 52, // performs an integral Msad
+    UMad = 51, // performs an integral UMad
+    Ubfe = 54, // performs an integral Ubfe
  
    // Unary float - rounding
-    Round_ne = 25, // returns the Round_ne
-    Round_ni = 26, // returns the Round_ni
-    Round_pi = 27, // returns the Round_pi
-    Round_z = 28, // returns the Round_z
+    Round_ne = 26, // returns the Round_ne
+    Round_ni = 27, // returns the Round_ni
+    Round_pi = 28, // returns the Round_pi
+    Round_z = 29, // returns the Round_z
  
    // Unary float
    Acos = 15, // returns the Acos
    Asin = 16, // returns the Asin
    Atan = 17, // returns the Atan
    Cos = 12, // returns cosine(theta) for theta in radians.
-    Exp = 20, // returns the Exp
+    Exp = 21, // returns the Exp
    FAbs = 6, // returns the absolute value of the input value.
-    Frc = 21, // returns the Frc
+    Frc = 22, // returns the Frc
    Hcos = 18, // returns the Hcos
    Hsin = 19, // returns the Hsin
-    Htan = 113, // returns the hyperbolic tangent of the specified value
+    Htan = 20, // returns the Htan
    IsFinite = 10, // returns the IsFinite
    IsInf = 9, // returns the IsInf
    IsNaN = 8, // returns the IsNaN
    IsNormal = 11, // returns the IsNormal
-    Log = 22, // returns the Log
-    Rsqrt = 24, // returns the Rsqrt
+    Log = 23, // returns the Log
+    Rsqrt = 25, // returns the Rsqrt
    Saturate = 7, // clamps the result of a single or double precision floating point value to [0.0f...1.0f]
    Sin = 13, // returns the Sin
-    Sqrt = 23, // returns the Sqrt
+    Sqrt = 24, // returns the Sqrt
    Tan = 14, // returns the Tan
  
    // Unary int
-    Bfrev = 29, // returns the reverse bit pattern of the input value
-    Countbits = 30, // returns the Countbits
-    FirstbitHi = 32, // returns src != 0? (BitWidth-1 - FirstbitHi) : -1
-    FirstbitLo = 31, // returns the FirstbitLo
-    FirstbitSHi = 33, // returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
+    Bfrev = 30, // returns the reverse bit pattern of the input value
+    Countbits = 31, // returns the Countbits
+    FirstbitHi = 33, // returns src != 0? (BitWidth-1 - FirstbitHi) : -1
+    FirstbitLo = 32, // returns the FirstbitLo
+    FirstbitSHi = 34, // returns src != 0? (BitWidth-1 - FirstbitSHi) : -1
  
    // Wave
-    QuadOp = 131, // returns the result of a quad-level operation
-    QuadReadLaneAt = 130, // reads from a lane in the quad
-    WaveActiveAllEqual = 121, // returns 1 if all the lanes have the same value
-    WaveActiveBallot = 122, // returns a struct with a bit set for each lane where the condition is true
-    WaveActiveBit = 126, // returns the result of the operation across all lanes
-    WaveActiveOp = 125, // returns the result the operation across waves
-    WaveAllBitCount = 144, // returns the count of bits set to 1 across the wave
-    WaveAllTrue = 120, // returns 1 if all the lanes evaluate the value to true
-    WaveAnyTrue = 119, // returns 1 if any of the lane evaluates the value to true
-    WaveCaptureReserved = 114, // reserved
-    WaveGetLaneCount = 117, // returns the number of lanes in the wave
-    WaveGetLaneIndex = 116, // returns the index of the current lane in the wave
-    WaveGetOrderedIndex = 128, // reserved
-    WaveIsFirstLane = 115, // returns 1 for the first lane in the wave
-    WaveIsHelperLaneReserved = 118, // reserved
-    WavePrefixBitCount = 145, // returns the count of bits set to 1 on prior lanes
-    WavePrefixOp = 127, // returns the result of the operation on prior lanes
-    WaveReadLaneAt = 123, // returns the value from the specified lane
-    WaveReadLaneFirst = 124, // returns the value from the first lane
+    QuadOp = 125, // returns the result of a quad-level operation
+    QuadReadLaneAt = 124, // reads from a lane in the quad
+    WaveActiveAllEqual = 117, // returns 1 if all the lanes have the same value
+    WaveActiveBallot = 118, // returns a struct with a bit set for each lane where the condition is true
+    WaveActiveBit = 122, // returns the result of the operation across all lanes
+    WaveActiveOp = 121, // returns the result the operation across waves
+    WaveAllBitCount = 137, // returns the count of bits set to 1 across the wave
+    WaveAllTrue = 116, // returns 1 if all the lanes evaluate the value to true
+    WaveAnyTrue = 115, // returns 1 if any of the lane evaluates the value to true
+    WaveGetLaneCount = 114, // returns the number of lanes in the wave
+    WaveGetLaneIndex = 113, // returns the index of the current lane in the wave
+    WaveIsFirstLane = 112, // returns 1 for the first lane in the wave
+    WavePrefixBitCount = 138, // returns the count of bits set to 1 on prior lanes
+    WavePrefixOp = 123, // returns the result of the operation on prior lanes
+    WaveReadLaneAt = 119, // returns the value from the specified lane
+    WaveReadLaneFirst = 120, // returns the value from the first lane
  
-    NumOpCodes = 149 // exclusive last value of enumeration
+    NumOpCodes = 139 // exclusive last value of enumeration
  };
  // OPCODE-ENUM:END

@ -461,9 +447,6 @@ namespace DXIL {
  // OPCODECLASS-ENUM:BEGIN
  // Groups for DXIL operations with equivalent function templates
  enum class OpCodeClass : unsigned {
-    // 
-    Reserved,
-  
    // Binary int with carry
    BinaryWithCarry,
  
@ -506,13 +489,11 @@ namespace DXIL {
    MakeDouble,
    SplitDouble,
  
-    // GS
-    GSInstanceID,
-  
    // Geometry shader
    CutStream,
    EmitStream,
    EmitThenCutStream,
+    GSInstanceID,
  
    // Hull shader
    OutputControlPointID,
@ -538,6 +519,7 @@ namespace DXIL {
    EvalSnapped,
    InnerCoverage,
    SampleIndex,
+    Unary,
  
    // Quaternary
    Quaternary,
@ -587,7 +569,6 @@ namespace DXIL {
  
    // Unary float
    IsSpecialFloat,
-    Unary,
  
    // Unary int
    UnaryBits,
@ -609,7 +590,7 @@ namespace DXIL {
    WaveReadLaneAt,
    WaveReadLaneFirst,
  
-    NumOpClasses = 94 // exclusive last value of enumeration
+    NumOpClasses = 93 // exclusive last value of enumeration
  };
  // OPCODECLASS-ENUM:END

--- a/include/dxc/HLSL/DxilInstructions.h
+++ b/include/dxc/HLSL/DxilInstructions.h
@ -1058,6 +1058,24 @@ struct DxilInst_Hsin {
  llvm::Value *get_value() const { return Instr->getOperand(1); }
 };

+/// This instruction returns the Htan
+struct DxilInst_Htan {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_Htan(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Htan);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+  // Accessors
+  llvm::Value *get_value() const { return Instr->getOperand(1); }
+};
+
 /// This instruction returns the Exp
 struct DxilInst_Exp {
  const llvm::Instruction *Instr;
@ -2519,6 +2537,54 @@ struct DxilInst_EvalCentroid {
  llvm::Value *get_inputColIndex() const { return Instr->getOperand(3); }
 };

+/// This instruction returns the sample index in a sample-frequency pixel shader
+struct DxilInst_SampleIndex {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_SampleIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::SampleIndex);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction returns the coverage mask input in a pixel shader
+struct DxilInst_Coverage {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_Coverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Coverage);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
+/// This instruction returns underestimated coverage input from conservative rasterization in a pixel shader
+struct DxilInst_InnerCoverage {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_InnerCoverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::InnerCoverage);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
 /// This instruction reads the thread ID
 struct DxilInst_ThreadId {
  const llvm::Instruction *Instr;
@ -2643,6 +2709,22 @@ struct DxilInst_EmitThenCutStream {
  llvm::Value *get_streamId() const { return Instr->getOperand(1); }
 };

+/// This instruction GSInstanceID
+struct DxilInst_GSInstanceID {
+  const llvm::Instruction *Instr;
+  // Construction and identification
+  DxilInst_GSInstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {}
+  operator bool() const {
+    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::GSInstanceID);
+  }
+  // Validation support
+  bool isAllowed() const { return true; }
+  bool isArgumentListValid() const {
+    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
+    return true;
+  }
+};
+
 /// This instruction creates a double value
 struct DxilInst_MakeDouble {
  const llvm::Instruction *Instr;
@ -2809,24 +2891,6 @@ struct DxilInst_CycleCounterLegacy {
  }
 };

-/// This instruction returns the hyperbolic tangent of the specified value
-struct DxilInst_Htan {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_Htan(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Htan);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (2 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-  // Accessors
-  llvm::Value *get_value() const { return Instr->getOperand(1); }
-};
-
 /// This instruction returns 1 for the first lane in the wave
 struct DxilInst_WaveIsFirstLane {
  const llvm::Instruction *Instr;
@ -3196,22 +3260,6 @@ struct DxilInst_BitcastF64toI64 {
  llvm::Value *get_value() const { return Instr->getOperand(1); }
 };

-/// This instruction GSInstanceID
-struct DxilInst_GSInstanceID {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_GSInstanceID(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::GSInstanceID);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-};
-
 /// This instruction legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)
 struct DxilInst_LegacyF32ToF16 {
  const llvm::Instruction *Instr;
@ -3337,53 +3385,5 @@ struct DxilInst_WavePrefixBitCount {
  // Accessors
  llvm::Value *get_value() const { return Instr->getOperand(1); }
 };
-
-/// This instruction returns the sample index in a sample-frequency pixel shader
-struct DxilInst_SampleIndex {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_SampleIndex(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::SampleIndex);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-};
-
-/// This instruction returns the coverage mask input in a pixel shader
-struct DxilInst_Coverage {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_Coverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::Coverage);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-};
-
-/// This instruction returns underestimated coverage input from conservative rasterization in a pixel shader
-struct DxilInst_InnerCoverage {
-  const llvm::Instruction *Instr;
-  // Construction and identification
-  DxilInst_InnerCoverage(llvm::Instruction *pInstr) : Instr(pInstr) {}
-  operator bool() const {
-    return hlsl::OP::IsDxilOpFuncCallInst(Instr, hlsl::OP::OpCode::InnerCoverage);
-  }
-  // Validation support
-  bool isAllowed() const { return true; }
-  bool isArgumentListValid() const {
-    if (1 != llvm::dyn_cast<llvm::CallInst>(Instr)->getNumArgOperands()) return false;
-    return true;
-  }
-};
 // INSTR-HELPER:END
 } // namespace hlsl
--- a/include/dxc/HLSL/DxilValidation.h
+++ b/include/dxc/HLSL/DxilValidation.h
@ -205,6 +205,7 @@ enum class ValidationRule : unsigned {

  // Type system
  TypesDefined, // Type must be defined based on DXIL primitives
+  TypesI8, // I8 can only used as immediate value for intrinsic
  TypesIntWidth, // Int type must be of valid width
  TypesNoMultiDim, // Only one dimension allowed for array type
  TypesNoVector, // Vector types must not be present
--- a/include/dxc/HLSL/HLOperationLowerExtension.h
+++ b/include/dxc/HLSL/HLOperationLowerExtension.h
@ -14,15 +14,19 @@
 #include "dxc/HLSL/HLSLExtensionsCodegenHelper.h"
 #include "llvm/ADT/StringRef.h"
 #include <string>
+#include <unordered_map>

 namespace llvm {
  class Value;
  class CallInst;
  class Function;
  class StringRef;
+  class Instruction;
 }

 namespace hlsl {
+  class OP;
+
  // Lowers HLSL extensions from HL operation to DXIL operation.
  class ExtensionLowering {
  public:
@ -32,11 +36,14 @@ namespace hlsl {
      NoTranslation,  // Propagate the call arguments as is down to dxil.
      Replicate,      // Scalarize the vector arguments and replicate the call.
      Pack,           // Convert the vector arguments into structs.
+      Resource,       // Convert return value to resource return and explode vectors.
    };

+    typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
+
    // Create the lowering using the given strategy and custom codegen helper.
-    ExtensionLowering(llvm::StringRef strategy, HLSLExtensionsCodegenHelper *helper);
-    ExtensionLowering(Strategy strategy, HLSLExtensionsCodegenHelper *helper);
+    ExtensionLowering(llvm::StringRef strategy, HLSLExtensionsCodegenHelper *helper, const HandleMap &handleMap, OP& hlslOp);
+    ExtensionLowering(Strategy strategy, HLSLExtensionsCodegenHelper *helper, const HandleMap &handleMap, OP& hlslOp);

    // Translate the HL op call to a DXIL op call.
    // Returns a new value if translation was successful.
@ -62,11 +69,14 @@ namespace hlsl {
  private:
    Strategy m_strategy;
    HLSLExtensionsCodegenHelper *m_helper;
+    const HandleMap &m_handleMap;
+    OP &m_hlslOp;

    llvm::Value *Unknown(llvm::CallInst *CI);
    llvm::Value *NoTranslation(llvm::CallInst *CI);
    llvm::Value *Replicate(llvm::CallInst *CI);
    llvm::Value *Pack(llvm::CallInst *CI);
+    llvm::Value *Resource(llvm::CallInst *CI);

    // Translate the HL call by replicating the call for each vector element.
    //
--- a/lib/HLSL/DxilGenerationPass.cpp
+++ b/lib/HLSL/DxilGenerationPass.cpp
@ -154,8 +154,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
    b->SetSize(C->GetSize());
    if (HasDebugInfo)
      LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-    else
-      b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
+
+    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
    M.AddCBuffer(std::move(b));
  }
  for (auto && C : H.GetUAVs()) {
@ -163,8 +163,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
    InitResource(C.get(), b.get());
    if (HasDebugInfo)
      LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-    else
-      b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
+
+    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
    M.AddUAV(std::move(b));
  }
  for (auto && C : H.GetSRVs()) {
@ -172,8 +172,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
    InitResource(C.get(), b.get());
    if (HasDebugInfo)
      LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-    else
-      b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
+
+    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
    M.AddSRV(std::move(b));
  }
  for (auto && C : H.GetSamplers()) {
@ -182,8 +182,8 @@ void InitDxilModuleFromHLModule(HLModule &H, DxilModule &M, bool HasDebugInfo) {
    b->SetSamplerKind(C->GetSamplerKind());
    if (HasDebugInfo)
      LLVMUsed.emplace_back(cast<GlobalVariable>(b->GetGlobalSymbol()));
-    else
-      b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
+
+    b->SetGlobalSymbol(UndefValue::get(b->GetGlobalSymbol()->getType()));
    M.AddSampler(std::move(b));
  }

--- a/lib/HLSL/DxilMetadataHelper.cpp
+++ b/lib/HLSL/DxilMetadataHelper.cpp
@ -17,7 +17,6 @@
 #include "dxc/HLSL/DxilSignature.h"
 #include "dxc/HLSL/DxilTypeSystem.h"
 #include "dxc/HLSL/DxilRootSignature.h"
-#include "dxc/HLSL/DxilValidation.h"

 #include "llvm/IR/Constants.h"
 #include "llvm/IR/Function.h"
@ -27,6 +26,8 @@
 #include "llvm/Support/raw_ostream.h"
 #include <array>

+#include "dxc/Support/WinIncludes.h"
+
 using namespace llvm;
 using std::string;
 using std::vector;
@ -131,10 +132,10 @@ void DxilMDHelper::LoadDxilShaderModel(const ShaderModel *&pSM) {
  ShaderModelName += "_" + std::to_string(Major) + "_" + std::to_string(Minor);
  pSM = ShaderModel::GetByName(ShaderModelName.c_str());
  if (!pSM->IsValid()) {
-    string ErrorMsg = hlsl::GetValidationRuleText(hlsl::ValidationRule::SmName);
-    size_t offset = ErrorMsg.find("%0");
-    if (offset != string::npos)
-      ErrorMsg.replace(offset, 2, ShaderModelName);
+    char ErrorMsgTxt[40];
+    StringCchPrintfA(ErrorMsgTxt, _countof(ErrorMsgTxt),
+                     "Unknown shader model '%s'", ShaderModelName.c_str());
+    string ErrorMsg(ErrorMsgTxt);
    throw hlsl::Exception(DXC_E_INCORRECT_DXIL_METADATA, ErrorMsg);
  }
 }
@ -547,15 +548,7 @@ void DxilMDHelper::EmitDxilTypeSystem(DxilTypeSystem &TypeSystem, vector<GlobalV
    // Emit struct type field annotations.
    Metadata *pMD = EmitDxilStructAnnotation(*pA);

-    // Declare a global dummy variable.
-    string GVName = string(kDxilTypeSystemHelperVariablePrefix) + std::to_string(GVIdx);
-    GlobalVariable *pGV = new GlobalVariable(*m_pModule, pStructType, true, GlobalValue::ExternalLinkage, 
-                                             nullptr, GVName, nullptr,
-                                             GlobalVariable::NotThreadLocal, DXIL::kDeviceMemoryAddrSpace);
-    // Mark GV as being used for LLVM.
-    LLVMUsed.emplace_back(pGV);
-
-    MDVals.push_back(ValueAsMetadata::get(pGV));
+    MDVals.push_back(ValueAsMetadata::get(UndefValue::get(pStructType)));
    MDVals.push_back(pMD);
  }

@ -596,11 +589,11 @@ void DxilMDHelper::LoadDxilTypeSystemNode(const llvm::MDTuple &MDT,
    IFTBOOL((MDT.getNumOperands() & 0x1) == 1, DXC_E_INCORRECT_DXIL_METADATA);

    for (unsigned i = 1; i < MDT.getNumOperands(); i += 2) {
-      GlobalVariable *pGV =
-          dyn_cast<GlobalVariable>(ValueMDToValue(MDT.getOperand(i)));
+      Constant *pGV =
+          dyn_cast<Constant>(ValueMDToValue(MDT.getOperand(i)));
      IFTBOOL(pGV != nullptr, DXC_E_INCORRECT_DXIL_METADATA);
      StructType *pGVType =
-          dyn_cast<StructType>(pGV->getType()->getPointerElementType());
+          dyn_cast<StructType>(pGV->getType());
      IFTBOOL(pGVType != nullptr, DXC_E_INCORRECT_DXIL_METADATA);

      DxilStructAnnotation *pSA = TypeSystem.AddStructAnnotation(pGVType);
--- a/lib/HLSL/DxilModule.cpp
+++ b/lib/HLSL/DxilModule.cpp
@ -66,7 +66,7 @@ DxilModule::DxilModule(Module *pModule)

  m_NumThreads[0] = m_NumThreads[1] = m_NumThreads[2] = 0;

-#ifdef _DEBUG
+#if defined(_DEBUG) || defined(DBG)
  // Pin LLVM dump methods.
  void (__thiscall Module::*pfnModuleDump)() const = &Module::dump;
  void (__thiscall Type::*pfnTypeDump)() const = &Type::dump;
--- a/lib/HLSL/DxilOperations.cpp
+++ b/lib/HLSL/DxilOperations.cpp
@ -63,6 +63,7 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
  {  OC::Atan,                    "Atan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
  {  OC::Hcos,                    "Hcos",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
  {  OC::Hsin,                    "Hsin",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::Htan,                    "Htan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
  {  OC::Exp,                     "Exp",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
  {  OC::Frc,                     "Frc",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
  {  OC::Log,                     "Log",                      OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
@ -148,10 +149,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
  {  OC::TextureGather,           "TextureGather",            OCC::TextureGather,            "textureGather",              false, false,  true, false, false, false, false,  true, false, Attribute::ReadOnly, },
  {  OC::TextureGatherCmp,        "TextureGatherCmp",         OCC::TextureGatherCmp,         "textureGatherCmp",           false, false,  true, false, false, false, false,  true, false, Attribute::ReadOnly, },

-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::ToDelete5,               "ToDelete5",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::ToDelete6,               "ToDelete6",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
  // Resources - sample                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
  {  OC::Texture2DMSGetSamplePosition, "Texture2DMSGetSamplePosition", OCC::Texture2DMSGetSamplePosition, "texture2DMSGetSamplePosition",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
  {  OC::RenderTargetGetSamplePosition, "RenderTargetGetSamplePosition", OCC::RenderTargetGetSamplePosition, "renderTargetGetSamplePosition",   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
@ -172,6 +169,9 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
  {  OC::EvalSnapped,             "EvalSnapped",              OCC::EvalSnapped,              "evalSnapped",                false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
  {  OC::EvalSampleIndex,         "EvalSampleIndex",          OCC::EvalSampleIndex,          "evalSampleIndex",            false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
  {  OC::EvalCentroid,            "EvalCentroid",             OCC::EvalCentroid,             "evalCentroid",               false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
+  {  OC::SampleIndex,             "SampleIndex",              OCC::SampleIndex,              "sampleIndex",                false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+  {  OC::Coverage,                "Coverage",                 OCC::Coverage,                 "coverage",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
+  {  OC::InnerCoverage,           "InnerCoverage",            OCC::InnerCoverage,            "innerCoverage",              false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },

  // Compute shader                                                                                                         void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
  {  OC::ThreadId,                "ThreadId",                 OCC::ThreadId,                 "threadId",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
@ -183,21 +183,12 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
  {  OC::EmitStream,              "EmitStream",               OCC::EmitStream,               "emitStream",                  true, false, false, false, false, false, false, false, false, Attribute::None,     },
  {  OC::CutStream,               "CutStream",                OCC::CutStream,                "cutStream",                   true, false, false, false, false, false, false, false, false, Attribute::None,     },
  {  OC::EmitThenCutStream,       "EmitThenCutStream",        OCC::EmitThenCutStream,        "emitThenCutStream",           true, false, false, false, false, false, false, false, false, Attribute::None,     },
+  {  OC::GSInstanceID,            "GSInstanceID",             OCC::GSInstanceID,             "gsInstanceID",               false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },

  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
  {  OC::MakeDouble,              "MakeDouble",               OCC::MakeDouble,               "makeDouble",                 false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },
-
-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::ToDelete1,               "ToDelete1",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::ToDelete2,               "ToDelete2",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
-  // Double precision                                                                                                       void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
  {  OC::SplitDouble,             "SplitDouble",              OCC::SplitDouble,              "splitDouble",                false, false, false,  true, false, false, false, false, false, Attribute::ReadNone, },

-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::ToDelete3,               "ToDelete3",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-  {  OC::ToDelete4,               "ToDelete4",                OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
  // Domain and hull shader                                                                                                 void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
  {  OC::LoadOutputControlPoint,  "LoadOutputControlPoint",   OCC::LoadOutputControlPoint,   "loadOutputControlPoint",     false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
  {  OC::LoadPatchConstant,       "LoadPatchConstant",        OCC::LoadPatchConstant,        "loadPatchConstant",          false,  true,  true, false, false, false,  true,  true, false, Attribute::ReadNone, },
@ -213,15 +204,10 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
  // Other                                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
  {  OC::CycleCounterLegacy,      "CycleCounterLegacy",       OCC::CycleCounterLegacy,       "cycleCounterLegacy",          true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },

-  // Unary float                                                                                                            void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::Htan,                    "Htan",                     OCC::Unary,                    "unary",                      false,  true,  true, false, false, false, false, false, false, Attribute::ReadNone, },
-
  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::WaveCaptureReserved,     "WaveCaptureReserved",      OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
  {  OC::WaveIsFirstLane,         "WaveIsFirstLane",          OCC::WaveIsFirstLane,          "waveIsFirstLane",             true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
  {  OC::WaveGetLaneIndex,        "WaveGetLaneIndex",         OCC::WaveGetLaneIndex,         "waveGetLaneIndex",            true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
  {  OC::WaveGetLaneCount,        "WaveGetLaneCount",         OCC::WaveGetLaneCount,         "waveGetLaneCount",            true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-  {  OC::WaveIsHelperLaneReserved, "WaveIsHelperLaneReserved", OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
  {  OC::WaveAnyTrue,             "WaveAnyTrue",              OCC::WaveAnyTrue,              "waveAnyTrue",                 true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
  {  OC::WaveAllTrue,             "WaveAllTrue",              OCC::WaveAllTrue,              "waveAllTrue",                 true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
  {  OC::WaveActiveAllEqual,      "WaveActiveAllEqual",       OCC::WaveActiveAllEqual,       "waveActiveAllEqual",         false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::ReadOnly, },
@ -231,12 +217,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
  {  OC::WaveActiveOp,            "WaveActiveOp",             OCC::WaveActiveOp,             "waveActiveOp",               false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::ReadOnly, },
  {  OC::WaveActiveBit,           "WaveActiveBit",            OCC::WaveActiveBit,            "waveActiveBit",              false, false, false, false, false,  true,  true,  true,  true, Attribute::ReadOnly, },
  {  OC::WavePrefixOp,            "WavePrefixOp",             OCC::WavePrefixOp,             "wavePrefixOp",               false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::ReadOnly, },
-  {  OC::WaveGetOrderedIndex,     "WaveGetOrderedIndex",      OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
-  //                                                                                                                        void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::GlobalOrderedCountIncReserved, "GlobalOrderedCountIncReserved", OCC::Reserved,                 "reserved",                    true, false, false, false, false, false, false, false, false, Attribute::None,     },
-
-  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
  {  OC::QuadReadLaneAt,          "QuadReadLaneAt",           OCC::QuadReadLaneAt,           "quadReadLaneAt",             false,  true,  true,  true,  true,  true,  true,  true,  true, Attribute::ReadOnly, },
  {  OC::QuadOp,                  "QuadOp",                   OCC::QuadOp,                   "quadOp",                     false,  true,  true,  true, false,  true,  true,  true,  true, Attribute::ReadOnly, },

@ -248,9 +228,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
  {  OC::BitcastI64toF64,         "BitcastI64toF64",          OCC::BitcastI64toF64,          "bitcastI64toF64",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
  {  OC::BitcastF64toI64,         "BitcastF64toI64",          OCC::BitcastF64toI64,          "bitcastF64toI64",             true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },

-  // GS                                                                                                                     void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::GSInstanceID,            "GSInstanceID",             OCC::GSInstanceID,             "gsInstanceID",               false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-
  // Legacy floating-point                                                                                                  void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
  {  OC::LegacyF32ToF16,          "LegacyF32ToF16",           OCC::LegacyF32ToF16,           "legacyF32ToF16",              true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
  {  OC::LegacyF16ToF32,          "LegacyF16ToF32",           OCC::LegacyF16ToF32,           "legacyF16ToF32",              true, false, false, false, false, false, false, false, false, Attribute::ReadNone, },
@ -263,11 +240,6 @@ const OP::OpCodeProperty OP::m_OpCodeProps[(unsigned)OP::OpCode::NumOpCodes] = {
  // Wave                                                                                                                   void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
  {  OC::WaveAllBitCount,         "WaveAllBitCount",          OCC::WaveAllOp,                "waveAllOp",                   true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
  {  OC::WavePrefixBitCount,      "WavePrefixBitCount",       OCC::WavePrefixOp,             "wavePrefixOp",                true, false, false, false, false, false, false, false, false, Attribute::ReadOnly, },
-
-  // Pixel shader                                                                                                           void,     h,     f,     d,    i1,    i8,   i16,   i32,   i64  function attribute
-  {  OC::SampleIndex,             "SampleIndex",              OCC::SampleIndex,              "sampleIndex",                false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::Coverage,                "Coverage",                 OCC::Coverage,                 "coverage",                   false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
-  {  OC::InnerCoverage,           "InnerCoverage",            OCC::InnerCoverage,            "innerCoverage",              false, false, false, false, false, false, false,  true, false, Attribute::ReadNone, },
 };
 // OPCODE-OLOADS:END

@ -379,14 +351,13 @@ bool OP::IsDxilOpWave(OpCode C) {
  unsigned op = (unsigned)C;
  /* <py::lines('OPCODE-WAVE')>hctdb_instrhelp.get_instrs_pred("op", "is_wave")</py>*/
  // OPCODE-WAVE:BEGIN
-  // Instructions: WaveCaptureReserved=114, WaveIsFirstLane=115,
-  // WaveGetLaneIndex=116, WaveGetLaneCount=117, WaveIsHelperLaneReserved=118,
-  // WaveAnyTrue=119, WaveAllTrue=120, WaveActiveAllEqual=121,
-  // WaveActiveBallot=122, WaveReadLaneAt=123, WaveReadLaneFirst=124,
-  // WaveActiveOp=125, WaveActiveBit=126, WavePrefixOp=127,
-  // WaveGetOrderedIndex=128, QuadReadLaneAt=130, QuadOp=131,
-  // WaveAllBitCount=144, WavePrefixBitCount=145
-  return 114 <= op && op <= 128 || 130 <= op && op <= 131 || 144 <= op && op <= 145;
+  // Instructions: WaveIsFirstLane=112, WaveGetLaneIndex=113,
+  // WaveGetLaneCount=114, WaveAnyTrue=115, WaveAllTrue=116,
+  // WaveActiveAllEqual=117, WaveActiveBallot=118, WaveReadLaneAt=119,
+  // WaveReadLaneFirst=120, WaveActiveOp=121, WaveActiveBit=122,
+  // WavePrefixOp=123, QuadReadLaneAt=124, QuadOp=125, WaveAllBitCount=137,
+  // WavePrefixBitCount=138
+  return 112 <= op && op <= 125 || 137 <= op && op <= 138;
  // OPCODE-WAVE:END
 }

@ -394,10 +365,10 @@ bool OP::IsDxilOpGradient(OpCode C) {
  unsigned op = (unsigned)C;
  /* <py::lines('OPCODE-GRADIENT')>hctdb_instrhelp.get_instrs_pred("op", "is_gradient")</py>*/
  // OPCODE-GRADIENT:BEGIN
-  // Instructions: Sample=61, SampleBias=62, SampleCmp=65, TextureGather=74,
-  // TextureGatherCmp=75, CalculateLOD=84, DerivCoarseX=86, DerivCoarseY=87,
-  // DerivFineX=88, DerivFineY=89
-  return 61 <= op && op <= 62 || op == 65 || 74 <= op && op <= 75 || op == 84 || 86 <= op && op <= 89;
+  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, TextureGather=75,
+  // TextureGatherCmp=76, CalculateLOD=83, DerivCoarseX=85, DerivCoarseY=86,
+  // DerivFineX=87, DerivFineY=88
+  return 62 <= op && op <= 63 || op == 66 || 75 <= op && op <= 76 || op == 83 || 85 <= op && op <= 88;
  // OPCODE-GRADIENT:END
 }

@ -514,6 +485,7 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
  case OpCode::Atan:                   A(pETy);     A(pI32); A(pETy); break;
  case OpCode::Hcos:                   A(pETy);     A(pI32); A(pETy); break;
  case OpCode::Hsin:                   A(pETy);     A(pI32); A(pETy); break;
+  case OpCode::Htan:                   A(pETy);     A(pI32); A(pETy); break;
  case OpCode::Exp:                    A(pETy);     A(pI32); A(pETy); break;
  case OpCode::Frc:                    A(pETy);     A(pI32); A(pETy); break;
  case OpCode::Log:                    A(pETy);     A(pI32); A(pETy); break;
@ -599,10 +571,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
  case OpCode::TextureGather:          RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); break;
  case OpCode::TextureGatherCmp:       RRT(pETy);   A(pI32); A(pRes); A(pRes); A(pF32); A(pF32); A(pF32); A(pF32); A(pI32); A(pI32); A(pI32); A(pF32); break;

-    // 
-  case OpCode::ToDelete5:              A(pV);       A(pI32); break;
-  case OpCode::ToDelete6:              A(pV);       A(pI32); break;
-
    // Resources - sample
  case OpCode::Texture2DMSGetSamplePosition:A(pPos);     A(pI32); A(pRes); A(pI32); break;
  case OpCode::RenderTargetGetSamplePosition:A(pPos);     A(pI32); A(pI32); break;
@ -623,6 +591,9 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
  case OpCode::EvalSnapped:            A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  A(pI32); A(pI32); break;
  case OpCode::EvalSampleIndex:        A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  A(pI32); break;
  case OpCode::EvalCentroid:           A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  break;
+  case OpCode::SampleIndex:            A(pI32);     A(pI32); break;
+  case OpCode::Coverage:               A(pI32);     A(pI32); break;
+  case OpCode::InnerCoverage:          A(pI32);     A(pI32); break;

    // Compute shader
  case OpCode::ThreadId:               A(pI32);     A(pI32); A(pI32); break;
@ -634,21 +605,12 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
  case OpCode::EmitStream:             A(pV);       A(pI32); A(pI8);  break;
  case OpCode::CutStream:              A(pV);       A(pI32); A(pI8);  break;
  case OpCode::EmitThenCutStream:      A(pV);       A(pI32); A(pI8);  break;
+  case OpCode::GSInstanceID:           A(pI32);     A(pI32); break;

    // Double precision
  case OpCode::MakeDouble:             A(pF64);     A(pI32); A(pI32); A(pI32); break;
-
-    // 
-  case OpCode::ToDelete1:              A(pV);       A(pI32); break;
-  case OpCode::ToDelete2:              A(pV);       A(pI32); break;
-
-    // Double precision
  case OpCode::SplitDouble:            A(pSDT);     A(pI32); A(pF64); break;

-    // 
-  case OpCode::ToDelete3:              A(pV);       A(pI32); break;
-  case OpCode::ToDelete4:              A(pV);       A(pI32); break;
-
    // Domain and hull shader
  case OpCode::LoadOutputControlPoint: A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  A(pI32); break;
  case OpCode::LoadPatchConstant:      A(pETy);     A(pI32); A(pI32); A(pI32); A(pI8);  break;
@ -664,15 +626,10 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
    // Other
  case OpCode::CycleCounterLegacy:     A(p2I32);    A(pI32); break;

-    // Unary float
-  case OpCode::Htan:                   A(pETy);     A(pI32); A(pETy); break;
-
    // Wave
-  case OpCode::WaveCaptureReserved:    A(pV);       A(pI32); break;
  case OpCode::WaveIsFirstLane:        A(pI1);      A(pI32); break;
  case OpCode::WaveGetLaneIndex:       A(pI32);     A(pI32); break;
  case OpCode::WaveGetLaneCount:       A(pI32);     A(pI32); break;
-  case OpCode::WaveIsHelperLaneReserved:A(pV);       A(pI32); break;
  case OpCode::WaveAnyTrue:            A(pI1);      A(pI32); A(pI1);  break;
  case OpCode::WaveAllTrue:            A(pI1);      A(pI32); A(pI1);  break;
  case OpCode::WaveActiveAllEqual:     A(pI1);      A(pI32); A(pETy); break;
@ -682,12 +639,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
  case OpCode::WaveActiveOp:           A(pETy);     A(pI32); A(pETy); A(pI8);  A(pI8);  break;
  case OpCode::WaveActiveBit:          A(pETy);     A(pI32); A(pETy); A(pI8);  break;
  case OpCode::WavePrefixOp:           A(pETy);     A(pI32); A(pETy); A(pI8);  A(pI8);  break;
-  case OpCode::WaveGetOrderedIndex:    A(pV);       A(pI32); break;
-
-    // 
-  case OpCode::GlobalOrderedCountIncReserved:A(pV);       A(pI32); break;
-
-    // Wave
  case OpCode::QuadReadLaneAt:         A(pETy);     A(pI32); A(pETy); A(pI32); break;
  case OpCode::QuadOp:                 A(pETy);     A(pI32); A(pETy); A(pI8);  break;

@ -699,9 +650,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
  case OpCode::BitcastI64toF64:        A(pF64);     A(pI32); A(pI64); break;
  case OpCode::BitcastF64toI64:        A(pI64);     A(pI32); A(pF64); break;

-    // GS
-  case OpCode::GSInstanceID:           A(pI32);     A(pI32); break;
-
    // Legacy floating-point
  case OpCode::LegacyF32ToF16:         A(pI32);     A(pI32); A(pF32); break;
  case OpCode::LegacyF16ToF32:         A(pF32);     A(pI32); A(pI32); break;
@ -714,11 +662,6 @@ Function *OP::GetOpFunc(OpCode OpCode, Type *pOverloadType) {
    // Wave
  case OpCode::WaveAllBitCount:        A(pI32);     A(pI32); A(pI1);  break;
  case OpCode::WavePrefixBitCount:     A(pI32);     A(pI32); A(pI1);  break;
-
-    // Pixel shader
-  case OpCode::SampleIndex:            A(pI32);     A(pI32); break;
-  case OpCode::Coverage:               A(pI32);     A(pI32); break;
-  case OpCode::InnerCoverage:          A(pI32);     A(pI32); break;
  // OPCODE-OLOAD-FUNCS:END
  default: DXASSERT(false, "otherwise unhandled case"); break;
  }
--- a/lib/HLSL/DxilValidation.cpp
+++ b/lib/HLSL/DxilValidation.cpp
@ -162,6 +162,7 @@ const char *hlsl::GetValidationRuleText(ValidationRule value) {
    case hlsl::ValidationRule::TypesDefined: return "Type '%0' is not defined on DXIL primitives";
    case hlsl::ValidationRule::TypesIntWidth: return "Int type '%0' has an invalid width";
    case hlsl::ValidationRule::TypesNoMultiDim: return "Only one dimension allowed for array type";
+    case hlsl::ValidationRule::TypesI8: return "I8 can only used as immediate value for intrinsic";
    case hlsl::ValidationRule::SmName: return "Unknown shader model '%0'";
    case hlsl::ValidationRule::SmOpcode: return "Opcode must be defined in target shader model";
    case hlsl::ValidationRule::SmOperand: return "Operand must be defined in target shader model";
@ -486,32 +487,32 @@ static bool ValidateOpcodeInProfile(DXIL::OpCode opcode,
  unsigned op = (unsigned)opcode;
  /* <py::lines('VALOPCODESM-TEXT')>hctdb_instrhelp.get_valopcode_sm_text()</py>*/
  // VALOPCODESM-TEXT:BEGIN
-  // Instructions: ThreadId=93, GroupId=94, ThreadIdInGroup=95,
-  // FlattenedThreadIdInGroup=96
-  if (93 <= op && op <= 96)
+  // Instructions: ThreadId=95, GroupId=96, ThreadIdInGroup=97,
+  // FlattenedThreadIdInGroup=98
+  if (95 <= op && op <= 98)
    return pSM->IsCS();
-  // Instructions: DomainLocation=108
-  if (op == 108)
+  // Instructions: DomainLocation=107
+  if (op == 107)
    return pSM->IsDS();
-  // Instructions: LoadOutputControlPoint=106, LoadPatchConstant=107
-  if (106 <= op && op <= 107)
+  // Instructions: LoadOutputControlPoint=105, LoadPatchConstant=106
+  if (105 <= op && op <= 106)
    return pSM->IsDS() || pSM->IsHS();
-  // Instructions: EmitStream=97, CutStream=98, EmitThenCutStream=99,
-  // GSInstanceID=138
-  if (97 <= op && op <= 99 || op == 138)
+  // Instructions: EmitStream=99, CutStream=100, EmitThenCutStream=101,
+  // GSInstanceID=102
+  if (99 <= op && op <= 102)
    return pSM->IsGS();
-  // Instructions: PrimitiveID=111
-  if (op == 111)
+  // Instructions: PrimitiveID=110
+  if (op == 110)
    return pSM->IsGS() || pSM->IsDS() || pSM->IsHS() || pSM->IsPS();
-  // Instructions: StorePatchConstant=109, OutputControlPointID=110
-  if (109 <= op && op <= 110)
+  // Instructions: StorePatchConstant=108, OutputControlPointID=109
+  if (108 <= op && op <= 109)
    return pSM->IsHS();
-  // Instructions: Sample=61, SampleBias=62, SampleCmp=65, SampleCmpLevelZero=66,
-  // RenderTargetGetSamplePosition=79, RenderTargetGetSampleCount=80,
-  // CalculateLOD=84, Discard=85, DerivCoarseX=86, DerivCoarseY=87,
-  // DerivFineX=88, DerivFineY=89, EvalSnapped=90, EvalSampleIndex=91,
-  // EvalCentroid=92, SampleIndex=146, Coverage=147, InnerCoverage=148
-  if (61 <= op && op <= 62 || 65 <= op && op <= 66 || 79 <= op && op <= 80 || 84 <= op && op <= 92 || 146 <= op && op <= 148)
+  // Instructions: Sample=62, SampleBias=63, SampleCmp=66, SampleCmpLevelZero=67,
+  // RenderTargetGetSamplePosition=78, RenderTargetGetSampleCount=79,
+  // CalculateLOD=83, Discard=84, DerivCoarseX=85, DerivCoarseY=86,
+  // DerivFineX=87, DerivFineY=88, EvalSnapped=89, EvalSampleIndex=90,
+  // EvalCentroid=91, SampleIndex=92, Coverage=93, InnerCoverage=94
+  if (62 <= op && op <= 63 || 66 <= op && op <= 67 || 78 <= op && op <= 79 || 83 <= op && op <= 94)
    return pSM->IsPS();
  return true;
  // VALOPCODESM-TEXT:END
@ -572,7 +573,8 @@ static DxilSignatureElement *ValidateSignatureAccess(Instruction *I, DxilSignatu
  if (isOutput && SE.GetSemantic()->GetKind() == DXIL::SemanticKind::Position) {
    unsigned mask = ValCtx.OutputPositionMask[SE.GetOutputStream()];
    mask |= 1<<col;
-    ValCtx.OutputPositionMask[SE.GetOutputStream()] = mask;
+    if (SE.GetOutputStream() < DXIL::kNumOutputStreams)
+      ValCtx.OutputPositionMask[SE.GetOutputStream()] = mask;
  }
  return &SE;
 }
@ -923,6 +925,7 @@ static void ValidateGather(CallInst *CI, Value *srvHandle, Value *samplerHandle,

  if (resClass != DXIL::ResourceClass::SRV) {
    ValCtx.EmitInstrError(CI, ValidationRule::InstrResourceClassForSamplerGather);
+    return;
  }

  // Coord match resource kind.
@ -1316,6 +1319,7 @@ static void ValidateDxilOperationCallInProfile(CallInst *CI,
    if (resClass != DXIL::ResourceClass::SRV) {
      ValCtx.EmitInstrError(CI,
                            ValidationRule::InstrResourceClassForSamplerGather);
+      return;
    }
    // Coord match resource.
    ValidateCalcLODResourceDimensionCoord(
@ -1886,11 +1890,6 @@ static void ValidateExternalFunction(Function *F, ValidationContext &ValCtx) {

    DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;

-    if (OP::GetOpCodeClass(dxilOpcode) == DXIL::OpCodeClass::Reserved) {
-      // Diagnosed in body validation.
-      continue;
-    }
-
    // In some cases, no overloads are provided (void is exclusive to others)
    Function *dxilFunc;
    if (hlslOP->IsOverloadLegal(dxilOpcode, voidTy)) {
@ -2336,10 +2335,6 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {

          unsigned opcode = OpcodeConst->getLimitedValue();
          DXIL::OpCode dxilOpcode = (DXIL::OpCode)opcode;
-          if (OP::GetOpCodeClass(dxilOpcode) == DXIL::OpCodeClass::Reserved) {
-            ValCtx.EmitInstrError(&I, ValidationRule::InstrOpCodeReserved);
-            continue;
-          }

          if (OP::IsDxilOpGradient(dxilOpcode)) {
            gradientOps.push_back(CI);
@ -2355,20 +2350,34 @@ static void ValidateFunctionBody(Function *F, ValidationContext &ValCtx) {
        continue;
      }

-      if (!isa<PHINode>(&I)) {
-        for (Value *op : I.operands()) {
-          if (isa<UndefValue>(op)) {
-            ValCtx.EmitInstrError(&I,
-                                  ValidationRule::InstrNoReadingUninitialized);
-          } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(op)) {
-            for (Value *opCE : CE->operands()) {
-              if (isa<UndefValue>(opCE)) {
-                ValCtx.EmitInstrError(
-                    &I, ValidationRule::InstrNoReadingUninitialized);
-              }
+      for (Value *op : I.operands()) {
+        if (!isa<PHINode>(&I) && isa<UndefValue>(op)) {
+          ValCtx.EmitInstrError(&I,
+                                ValidationRule::InstrNoReadingUninitialized);
+        } else if (ConstantExpr *CE = dyn_cast<ConstantExpr>(op)) {
+          for (Value *opCE : CE->operands()) {
+            if (isa<UndefValue>(opCE)) {
+              ValCtx.EmitInstrError(
+                  &I, ValidationRule::InstrNoReadingUninitialized);
            }
          }
        }
+        if (IntegerType *IT = dyn_cast<IntegerType>(op->getType())) {
+          if (IT->getBitWidth() == 8) {
+            ValCtx.EmitInstrError(&I, ValidationRule::TypesI8);
+          }
+        }
+      }
+
+      Type *Ty = I.getType();
+      if (isa<PointerType>(Ty))
+        Ty = Ty->getPointerElementType();
+      while (isa<ArrayType>(Ty))
+        Ty = Ty->getArrayElementType();
+      if (IntegerType *IT = dyn_cast<IntegerType>(Ty)) {
+        if (IT->getBitWidth() == 8) {
+          ValCtx.EmitInstrError(&I, ValidationRule::TypesI8);
+        }
      }

      unsigned opcode = I.getOpcode();
--- a/lib/HLSL/HLOperationLower.cpp
+++ b/lib/HLSL/HLOperationLower.cpp
@ -4265,10 +4265,9 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);

  Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
-  Type *i8Ty = Type::getInt8Ty(EltTy->getContext());
  Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
-  bool isBool = EltTy == i1Ty || EltTy == i8Ty;
+  bool isBool = EltTy == i1Ty;
  bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  bool isNormal = !isBool && !is64;
  if (isNormal) {
@ -4283,14 +4282,12 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
    Value *Result = Builder.CreateExtractValue(loadLegacy, eltIdx);
    return Result;
  } else {
+    DXASSERT(isBool, "bool should be i1");
    Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, i32Ty);
    Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
    Value *Result = Builder.CreateExtractValue(loadLegacy, channelOffset);
-    if (EltTy == i1Ty)
-      return Builder.CreateICmpEQ(Result, hlslOP->GetU32Const(0));
-    else
-      return Builder.CreateTrunc(Result, i8Ty);
+    return Builder.CreateICmpEQ(Result, hlslOP->GetU32Const(0));
  }
 }

@ -4302,10 +4299,9 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
  Constant *OpArg = hlslOP->GetU32Const((unsigned)OP::OpCode::CBufferLoadLegacy);

  Type *i1Ty = Type::getInt1Ty(EltTy->getContext());
-  Type *i8Ty = Type::getInt8Ty(EltTy->getContext());
  Type *doubleTy = Type::getDoubleTy(EltTy->getContext());
  Type *i64Ty = Type::getInt64Ty(EltTy->getContext());
-  bool isBool = EltTy == i1Ty || EltTy == i8Ty;
+  bool isBool = EltTy == i1Ty;
  bool is64 = (EltTy == doubleTy) | (EltTy == i64Ty);
  bool isNormal = !isBool && !is64;
  if (isNormal) {
@ -4340,6 +4336,7 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
    }
    return Result;
  } else {
+    DXASSERT(isBool, "bool should be i1");
    Type *i32Ty = Type::getInt32Ty(EltTy->getContext());
    Function *CBLoad = hlslOP->GetOpFunc(OP::OpCode::CBufferLoadLegacy, i32Ty);
    Value *loadLegacy = Builder.CreateCall(CBLoad, {OpArg, handle, legacyIdx});
@ -4348,10 +4345,7 @@ Value *GenerateCBLoadLegacy(Value *handle, Value *legacyIdx,
      Value *NewElt = Builder.CreateExtractValue(loadLegacy, channelOffset+i);
      Result = Builder.CreateInsertElement(Result, NewElt, i);
    }
-    if (EltTy == i1Ty)
-      return Builder.CreateICmpEQ(Result, ConstantAggregateZero::get(Result->getType()));
-    else
-      return Builder.CreateTrunc(Result, VectorType::get(i8Ty, vecSize));
+    return Builder.CreateICmpEQ(Result, ConstantAggregateZero::get(Result->getType()));
  }
 }

@ -5723,7 +5717,11 @@ void TranslateHLBuiltinOperation(Function *F, HLOperationLowerHelper &helper,
  }
 }

-static void TranslateHLExtension(Function *F, HLSLExtensionsCodegenHelper *helper) {
+typedef std::unordered_map<llvm::Instruction *, llvm::Value *> HandleMap;
+static void TranslateHLExtension(Function *F,
+                                 HLSLExtensionsCodegenHelper *helper,
+                                 const HandleMap &handleMap,
+                                 OP& hlslOp) {
  // Find all calls to the function F.
  // Store the calls in a vector for now to be replaced the loop below.
  // We use a two step "find then replace" to avoid removing uses while
@ -5737,7 +5735,7 @@ static void TranslateHLExtension(Function *F, HLSLExtensionsCodegenHelper *helpe

  // Get the lowering strategy to use for this intrinsic.
  llvm::StringRef LowerStrategy = GetHLLowerStrategy(F);
-  ExtensionLowering lower(LowerStrategy, helper);
+  ExtensionLowering lower(LowerStrategy, helper, handleMap, hlslOp);

  // Replace all calls that were successfully translated.
  for (CallInst *CI : CallsToReplace) {
@ -5773,8 +5771,7 @@ void TranslateBuiltinOperations(
      continue;
    }
    if (group == HLOpcodeGroup::HLExtIntrinsic) {
-      // TODO: consider handling extensions to object methods
-      TranslateHLExtension(F, extCodegenHelper);
+      TranslateHLExtension(F, extCodegenHelper, handleMap, helper.hlslOP);
      continue;
    }
    TranslateHLBuiltinOperation(F, helper, group, &objHelper);
--- a/lib/HLSL/HLOperationLowerExtension.cpp
+++ b/lib/HLSL/HLOperationLowerExtension.cpp
@ -34,6 +34,7 @@ ExtensionLowering::Strategy ExtensionLowering::GetStrategy(StringRef strategy) {
    case 'n': return Strategy::NoTranslation;
    case 'r': return Strategy::Replicate;
    case 'p': return Strategy::Pack;
+    case 'm': return Strategy::Resource;
    default: break;
  }
  return Strategy::Unknown;
@ -44,17 +45,18 @@ llvm::StringRef ExtensionLowering::GetStrategyName(Strategy strategy) {
    case Strategy::NoTranslation: return "n";
    case Strategy::Replicate:     return "r";
    case Strategy::Pack:          return "p";
+    case Strategy::Resource:      return "m"; // m for resource method
    default: break;
  }
  return "?";
 }

-ExtensionLowering::ExtensionLowering(Strategy strategy, HLSLExtensionsCodegenHelper *helper) 
-  : m_strategy(strategy), m_helper(helper)
+ExtensionLowering::ExtensionLowering(Strategy strategy, HLSLExtensionsCodegenHelper *helper, const HandleMap &handleMap, OP& hlslOp)
+  : m_strategy(strategy), m_helper(helper), m_handleMap(handleMap), m_hlslOp(hlslOp)
  {}

-ExtensionLowering::ExtensionLowering(StringRef strategy, HLSLExtensionsCodegenHelper *helper) 
-  : ExtensionLowering(GetStrategy(strategy), helper)
+ExtensionLowering::ExtensionLowering(StringRef strategy, HLSLExtensionsCodegenHelper *helper, const HandleMap &handleMap, OP& hlslOp)
+  : ExtensionLowering(GetStrategy(strategy), helper, handleMap, hlslOp)
  {}

 llvm::Value *ExtensionLowering::Translate(llvm::CallInst *CI) {
@ -62,6 +64,7 @@ llvm::Value *ExtensionLowering::Translate(llvm::CallInst *CI) {
  case Strategy::NoTranslation: return NoTranslation(CI);
  case Strategy::Replicate:     return Replicate(CI);
  case Strategy::Pack:          return Pack(CI);
+  case Strategy::Resource:      return Resource(CI);
  default: break;
  }
  return Unknown(CI);
@ -75,8 +78,17 @@ llvm::Value *ExtensionLowering::Unknown(CallInst *CI) {
 // Interface to describe how to translate types from HL-dxil to dxil.
 class FunctionTypeTranslator {
 public:
+  // Arguments can be exploded into multiple copies of the same type.
+  // For example a <2 x i32> could become { i32, 2 } if the vector
+  // is expanded in place or { i32, 1 } if the call is replicated.
+  struct ArgumentType {
+    Type *type;
+    int  count;
+
+    ArgumentType(Type *ty, int cnt = 1) : type(ty), count(cnt) {}
+  };
  virtual Type *TranslateReturnType(CallInst *CI) = 0;
-  virtual Type *TranslateArgumentType(Type *OrigArgType) = 0;
+  virtual ArgumentType TranslateArgumentType(Value *OrigArg) = 0;
 };

 // Class to create the new function with the translated types for low-level dxil.
@ -85,6 +97,10 @@ public:
  template <typename TypeTranslator>
  static Function *GetLoweredFunction(CallInst *CI, ExtensionLowering &lower) {
    TypeTranslator typeTranslator;
+    return GetLoweredFunction(typeTranslator, CI, lower);
+  }
+  
+  static Function *GetLoweredFunction(FunctionTypeTranslator &typeTranslator, CallInst *CI, ExtensionLowering &lower) {
    FunctionTranslator translator(typeTranslator, lower);
    return translator.GetLoweredFunction(CI);
  }
@ -120,9 +136,11 @@ private:
    SmallVector<Type *, 10> ParamTypes;
    ParamTypes.reserve(CI->getNumArgOperands());
    for (unsigned i = 0; i < CI->getNumArgOperands(); ++i) {
-      Type *OrigTy = CI->getArgOperand(i)->getType();
-      Type *TranslatedTy = m_typeTranslator.TranslateArgumentType(OrigTy);
-      ParamTypes.push_back(TranslatedTy);
+      Value *OrigArg = CI->getArgOperand(i);
+      FunctionTypeTranslator::ArgumentType newArgType = m_typeTranslator.TranslateArgumentType(OrigArg);
+      for (int i = 0; i < newArgType.count; ++i) {
+        ParamTypes.push_back(newArgType.type);
+      }
    }

    const bool IsVarArg = false;
@ -151,8 +169,8 @@ class NoTranslationTypeTranslator : public FunctionTypeTranslator {
  virtual Type *TranslateReturnType(CallInst *CI) override {
    return CI->getType();
  }
-  virtual Type *TranslateArgumentType(Type *OrigArgType) override {
-    return OrigArgType;
+  virtual ArgumentType TranslateArgumentType(Value *OrigArg) override {
+    return ArgumentType(OrigArg->getType());
  }
 };

@ -212,13 +230,13 @@ class ReplicatedFunctionTypeTranslator : public FunctionTypeTranslator {
    return RetTy;
  }

-  virtual Type *TranslateArgumentType(Type *OrigArgType) override {
-    Type *Ty = OrigArgType;
+  virtual ArgumentType TranslateArgumentType(Value *OrigArg) override {
+    Type *Ty = OrigArg->getType();
    if (Ty->isVectorTy()) {
      Ty = Ty->getVectorElementType();
    }

-    return Ty;
+    return ArgumentType(Ty);
  }

 };
@ -404,8 +422,8 @@ class PackedFunctionTypeTranslator : public FunctionTypeTranslator {
  virtual Type *TranslateReturnType(CallInst *CI) override {
    return TranslateIfVector(CI->getType());
  }
-  virtual Type *TranslateArgumentType(Type *OrigArgType) override {
-    return TranslateIfVector(OrigArgType);
+  virtual ArgumentType TranslateArgumentType(Value *OrigArg) override {
+    return ArgumentType(TranslateIfVector(OrigArg->getType()));
  }

  Type *TranslateIfVector(Type *ty) {
@ -425,6 +443,191 @@ Value *ExtensionLowering::Pack(CallInst *CI) {
  return result;
 }

+///////////////////////////////////////////////////////////////////////////////
+// Resource Lowering.
+
+// Modify a call to a resouce method. Makes the following transformation:
+//
+// 1. Convert non-void return value to dx.types.ResRet.
+// 2. Convert resource parameters to the corresponding dx.types.Handle value.
+// 3. Expand vectors in place as separate arguments.
+//
+// Example
+// -----------------------------------------------------------------------------
+//
+//  %0 = call <2 x float> MyBufferOp(i32 138, %class.Buffer %3, <2 x i32> <1 , 2> )
+//  %r = call %dx.types.ResRet.f32 MyBufferOp(i32 138, %dx.types.Handle %buf, i32 1, i32 2 )
+//  %x = extractvalue %r, 0
+//  %y = extractvalue %r, 1
+//  %v = <2 x float> undef
+//  %v.1 = insertelement %v,   %x, 0
+//  %v.2 = insertelement %v.1, %y, 1
+class ResourceMethodCall {
+public:
+  ResourceMethodCall(CallInst *CI, Function &explodedFunction, const ExtensionLowering::HandleMap &handleMap)
+    : m_CI(CI)
+    , m_explodedFunction(explodedFunction)
+    , m_handleMap(handleMap)
+    , m_builder(CI)
+  { }
+
+  Value *Generate() {
+    SmallVector<Value *, 16> args;
+    ExplodeArgs(args);
+    Value *result = CreateCall(args);
+    result = ConvertResult(result);
+    return result;
+  }
+  
+  // Check to see if the value is mapped to a handle in the handleMap.
+  static Instruction *IsResourceHandle(Value *OrigArg, const ExtensionLowering::HandleMap &handleMap) {
+    if (Instruction *Inst = dyn_cast<Instruction>(OrigArg)) {
+      if (handleMap.count(Inst))
+        return Inst;
+    }
+    return nullptr;
+  }
+  
+private:
+  CallInst *m_CI;
+  Function &m_explodedFunction;
+  const ExtensionLowering::HandleMap &m_handleMap;
+  IRBuilder<> m_builder;
+  
+  Value *GetResourceHandle(Value *OrigArg) {
+    if (Instruction *Inst = IsResourceHandle(OrigArg, m_handleMap))
+      return m_handleMap.at(Inst);
+    return nullptr;
+    
+  }
+
+  void ExplodeArgs(SmallVectorImpl<Value*> &args) {
+    for (Value *arg : m_CI->arg_operands()) {
+      // vector arg: <N x ty> -> ty, ty, ..., ty (N times)
+      if (arg->getType()->isVectorTy()) {
+        for (unsigned i = 0; i < arg->getType()->getVectorNumElements(); i++) {
+          Value *xarg = m_builder.CreateExtractElement(arg, i);
+          args.push_back(xarg);
+        }
+      }
+      // resource handle arg: handle -> dx.types.Handle
+      else if (Value *handle = GetResourceHandle(arg)) {
+        args.push_back(handle);
+      }
+      // any other value: arg -> arg
+      else {
+        args.push_back(arg);
+      }
+    }
+  }
+
+  Value *CreateCall(const SmallVectorImpl<Value*> &args) {
+    return m_builder.CreateCall(&m_explodedFunction, args);
+  }
+
+  Value *ConvertResult(Value *result) {
+    Type *origRetTy = m_CI->getType();
+    if (origRetTy->isVoidTy())
+      return ConvertVoidResult(result);
+    else if (origRetTy->isVectorTy())
+      return ConvertVectorResult(origRetTy, result);
+    else
+      return ConvertScalarResult(origRetTy, result);
+  }
+
+  // Void result does not need any conversion.
+  Value *ConvertVoidResult(Value *result) {
+    return result;
+  }
+
+  // Vector result will be populated with the elements from the resource return.
+  Value *ConvertVectorResult(Type *origRetTy, Value *result) {
+    Type *resourceRetTy = result->getType();
+    assert(origRetTy->isVectorTy());
+    assert(resourceRetTy->isStructTy() && "expected resource return type to be a struct");
+    
+    const unsigned vectorSize = origRetTy->getVectorNumElements();
+    const unsigned structSize = resourceRetTy->getStructNumElements();
+    const unsigned size = std::min(vectorSize, structSize);
+    assert(vectorSize < structSize);
+    
+    // Copy resource struct elements to vector.
+    Value *vector = UndefValue::get(origRetTy);
+    for (unsigned i = 0; i < size; ++i) {
+      Value *element = m_builder.CreateExtractValue(result, { i });
+      vector = m_builder.CreateInsertElement(vector, element, i);
+    }
+
+    return vector;
+  }
+
+  // Scalar result will be populated with the first element of the resource return.
+  Value *ConvertScalarResult(Type *origRetTy, Value *result) {
+    assert(origRetTy->isSingleValueType());
+    return m_builder.CreateExtractValue(result, { 0 });
+  }
+
+};
+
+// Translate function return and argument types for resource method lowering.
+class ResourceFunctionTypeTranslator : public FunctionTypeTranslator {
+public:
+  ResourceFunctionTypeTranslator(const ExtensionLowering::HandleMap &handleMap, OP& hlslOp)
+    : m_handleMap(handleMap)
+    , m_hlslOp(hlslOp)
+  { }
+
+  // Translate return type as follows:
+  //
+  // void     -> void
+  // <N x ty> -> dx.types.ResRet.ty
+  //  ty      -> dx.types.ResRet.ty
+  virtual Type *TranslateReturnType(CallInst *CI) override {
+    Type *RetTy = CI->getType();
+    if (RetTy->isVoidTy())
+      return RetTy;
+    else if (RetTy->isVectorTy())
+      RetTy = RetTy->getVectorElementType();
+
+    return m_hlslOp.GetResRetType(RetTy);
+  }
+  
+  // Translate argument type as follows:
+  //
+  // resource -> dx.types.Handle
+  // <N x ty> -> { ty, N }
+  //  ty      -> { ty, 1 }
+  virtual ArgumentType TranslateArgumentType(Value *OrigArg) override {
+    int count = 1;
+    Type *ty = OrigArg->getType();
+
+    if (ty->isVectorTy()) {
+      count = ty->getVectorNumElements();
+      ty = ty->getVectorElementType();
+    }
+    else if (ResourceMethodCall::IsResourceHandle(OrigArg, m_handleMap)) {
+      ty = m_hlslOp.GetHandleType();
+    }
+
+    return ArgumentType(ty, count);
+  }
+
+private:
+  const ExtensionLowering::HandleMap &m_handleMap;
+  OP& m_hlslOp;
+};
+
+Value *ExtensionLowering::Resource(CallInst *CI) {
+  ResourceFunctionTypeTranslator resourceTypeTranslator(m_handleMap, m_hlslOp);
+  Function *resourceFunction = FunctionTranslator::GetLoweredFunction(resourceTypeTranslator, CI, *this);
+  if (!resourceFunction)
+    return nullptr;
+
+  ResourceMethodCall explode(CI, *resourceFunction, m_handleMap);
+  Value *result = explode.Generate();
+  return result;
+}
+
 ///////////////////////////////////////////////////////////////////////////////
 // Computing Extension Names.

--- a/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
+++ b/lib/Transforms/Scalar/ScalarReplAggregatesHLSL.cpp
@ -4069,8 +4069,11 @@ void SROA_Parameter_HLSL::flattenArgument(
                DXASSERT(data->getType()->isPointerTy(),
                         "Append value must be pointer.");
                IRBuilder<> Builder(CI);
-                Value *ldInst = Builder.CreateLoad(data);
-                Builder.CreateStore(ldInst, outputVal);
+
+                llvm::SmallVector<llvm::Value *, 16> idxList;
+                SplitCpy(data->getType(), outputVal, data, idxList,
+                         /*bAllowReplace*/ false, Builder);
+
                CI->setArgOperand(HLOperandIndex::kStreamAppendDataOpIndex, outputVal);
              }
              else {
@ -4089,9 +4092,13 @@ void SROA_Parameter_HLSL::flattenArgument(
                DXASSERT_LOCALVAR(eltCount, eltCount == EltPtrList.size(), "invalid element count");

                for (unsigned i = HLOperandIndex::kStreamAppendDataOpIndex; i < CI->getNumArgOperands(); i++) {
-                  Value *Elt = Builder.CreateLoad(CI->getArgOperand(i));
-                  Value *EltPtr = EltPtrList[i-HLOperandIndex::kStreamAppendDataOpIndex];
-                  Builder.CreateStore(Elt, EltPtr);
+                  Value *DataPtr = CI->getArgOperand(i);
+                  Value *EltPtr =
+                      EltPtrList[i - HLOperandIndex::kStreamAppendDataOpIndex];
+
+                  llvm::SmallVector<llvm::Value *, 16> idxList;
+                  SplitCpy(DataPtr->getType(), EltPtr, DataPtr, idxList,
+                           /*bAllowReplace*/ false, Builder);
                  CI->setArgOperand(i, EltPtr);
                }
              }
@ -4255,6 +4262,17 @@ static void LegalizeDxilInputOutputs(Function *F, DxilFunctionAnnotation *EntryA
      bNeedTemp = true;
      bLoadOutputFromTemp = true;
      bStoreInputToTemp = true;
+    } else if (bLoad && bStore) {
+      bNeedTemp = true;
+      switch (qual) {
+      case DxilParamInputQual::InputPrimitive:
+      case DxilParamInputQual::InputPatch:
+      case DxilParamInputQual::OutputPatch:
+        bStoreInputToTemp = true;
+        break;
+      default:
+        DXASSERT(0, "invalid input qual here");
+      }
    }

    if (HLMatrixLower::IsMatrixType(Ty)) {
--- a/lib/Transforms/Scalar/Scalarizer.cpp
+++ b/lib/Transforms/Scalar/Scalarizer.cpp
@ -344,8 +344,8 @@ void Scalarizer::transferMetadata(Instruction *Op, const ValueVector &CV) {
      // HLSL Change Begins
      // Transfer FPMath flag.
      if (FPMathOperator *FPMath = dyn_cast<FPMathOperator>(New)) {
-        FPMathOperator *FPMathOp = dyn_cast<FPMathOperator>(Op);
-        New->copyFastMathFlags(FPMathOp->getFastMathFlags());
+        if (FPMathOperator *FPMathOp = dyn_cast<FPMathOperator>(Op))
+          New->copyFastMathFlags(FPMathOp->getFastMathFlags());
      }
      // HLSL Change Ends
    }
--- a/tools/clang/lib/Basic/Targets.cpp
+++ b/tools/clang/lib/Basic/Targets.cpp
@ -6987,6 +6987,9 @@ public:
    BigEndian = false;
    TLSSupported = false;
    LongWidth = LongAlign = 64;
+    BoolWidth = 32;
+    // To avoid member for alignment.
+    BoolAlign = 8;

    // using the Microsoft ABI.
    TheCXXABI.set(TargetCXXABI::Microsoft);
@ -7031,6 +7034,9 @@ public:
  DXIL_32TargetInfo(const llvm::Triple &Triple) : DXILTargetInfo(Triple) {
    LongDoubleWidth = LongDoubleAlign = 64;
    LongDoubleFormat = &llvm::APFloat::IEEEdouble;
+    BoolWidth = 32;
+    // To avoid member for alignment.
+    BoolAlign = 8;
    // TODO: Update Description for DXIL
    DescriptionString = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32";
  }
--- a/tools/clang/lib/CodeGen/CGExpr.cpp
+++ b/tools/clang/lib/CodeGen/CGExpr.cpp
@ -1251,7 +1251,11 @@ llvm::Value *CodeGenFunction::EmitFromMemory(llvm::Value *Value, QualType Ty) {
  if (hasBooleanRepresentation(Ty)) {
    assert(Value->getType()->isIntegerTy(getContext().getTypeSize(Ty)) &&
           "wrong value rep of bool");
-    return Builder.CreateTrunc(Value, Builder.getInt1Ty(), "tobool");
+    // HLSL Change Begin.
+    // Use ne v, 0 to convert to i1 instead of trunc.
+    return Builder.CreateICmpNE(
+        Value, llvm::ConstantInt::get(Value->getType(), 0), "tobool");
+    // HLSL Change End.
  }

  return Value;
--- a/tools/clang/lib/CodeGen/CGHLSLMS.cpp
+++ b/tools/clang/lib/CodeGen/CGHLSLMS.cpp
@ -2607,7 +2607,6 @@ static void ReplaceBoolVectorSubscript(CallInst *CI) {
  Value *Ptr = CI->getArgOperand(0);
  Value *Idx = CI->getArgOperand(1);
  Value *IdxList[] = {ConstantInt::get(Idx->getType(), 0), Idx};
-  llvm::Type *i1Ty = llvm::Type::getInt1Ty(Idx->getContext());

  for (auto It = CI->user_begin(), E = CI->user_end(); It != E;) {
    Instruction *user = cast<Instruction>(*(It++));
@ -2624,7 +2623,8 @@ static void ReplaceBoolVectorSubscript(CallInst *CI) {
      // Must be a store inst here.
      StoreInst *SI = cast<StoreInst>(user);
      Value *V = SI->getValueOperand();
-      Value *cast = Builder.CreateTrunc(V, i1Ty);
+      Value *cast =
+          Builder.CreateICmpNE(V, llvm::ConstantInt::get(V->getType(), 0));
      Builder.CreateStore(cast, GEP);
      SI->eraseFromParent();
    }
@ -3141,9 +3141,9 @@ static void SimplifyArrayToVector(BitCastInst *BCI, std::vector<Instruction *> &

 static void SimplifyBoolCast(BitCastInst *BCI, llvm::Type *i1Ty, std::vector<Instruction *> &deadInsts) {
  // Transform
-  //%22 = bitcast i1* %21 to i8*
-  //%23 = load i8, i8* %22, !tbaa !3, !range !7
-  //%tobool5 = trunc i8 %23 to i1
+  //%22 = bitcast i1* %21 to i32*
+  //%23 = load i32, i32* %22, !tbaa !3, !range !7
+  //%tobool5 = icmp ne i32 %23, 0
  // To
  //%tobool5 = load i1, i1* %21, !tbaa !3, !range !7
  Value *i1Ptr = BCI->getOperand(0);
@ -3152,17 +3152,21 @@ static void SimplifyBoolCast(BitCastInst *BCI, llvm::Type *i1Ty, std::vector<Ins
      if (!LI->hasOneUse()) {
        continue;
      }
-      if (TruncInst *TI = dyn_cast<TruncInst>(*LI->user_begin())) {
-        if (TI->getType() == i1Ty) {
-          IRBuilder<> Builder(LI);
-          Value *i1Val = Builder.CreateLoad(i1Ptr);
-          TI->replaceAllUsesWith(i1Val);
-          deadInsts.emplace_back(LI);
-          deadInsts.emplace_back(TI);
+      if (ICmpInst *II = dyn_cast<ICmpInst>(*LI->user_begin())) {
+        if (ConstantInt *CI = dyn_cast<ConstantInt>(II->getOperand(1))) {
+          if (CI->getLimitedValue() == 0 &&
+              II->getPredicate() == CmpInst::ICMP_NE) {
+            IRBuilder<> Builder(LI);
+            Value *i1Val = Builder.CreateLoad(i1Ptr);
+            II->replaceAllUsesWith(i1Val);
+            deadInsts.emplace_back(LI);
+            deadInsts.emplace_back(II);
+          }
        }
      }
    }
  }
+  deadInsts.emplace_back(BCI);
 }

 typedef float(__cdecl *FloatUnaryEvalFuncType)(float);
@ -5116,8 +5120,8 @@ void CGMSHLSLRuntime::EmitHLSLOutParamConversionInit(
    BasicBlock *EntryBlock = &F->getEntryBlock();

    if (ParamTy->isBooleanType()) {
-      // Create i8 for bool.
-      ParamTy = CGM.getContext().CharTy;
+      // Create i32 for bool.
+      ParamTy = CGM.getContext().IntTy;
    }
    // Make sure the alloca is in entry block to stop inline create stacksave.
    IRBuilder<> Builder(EntryBlock->getFirstInsertionPt());
--- a/tools/clang/lib/Sema/SemaHLSL.cpp
+++ b/tools/clang/lib/Sema/SemaHLSL.cpp
@ -3073,15 +3073,15 @@ public:
      const HLSL_INTRINSIC *pPrior = nullptr;
      UINT64 lookupCookie = 0;
      CA2W wideTypeName(typeName);
-      table->LookupIntrinsic(wideTypeName, L"*", &pIntrinsic, &lookupCookie);
-      while (pIntrinsic != nullptr) {
+      HRESULT found = table->LookupIntrinsic(wideTypeName, L"*", &pIntrinsic, &lookupCookie);
+      while (pIntrinsic != nullptr && SUCCEEDED(found)) {
        if (!AreIntrinsicTemplatesEquivalent(pIntrinsic, pPrior)) {
          AddObjectIntrinsicTemplate(recordDecl, startDepth, pIntrinsic);
          // NOTE: this only works with the current implementation because
          // intrinsics are alive as long as the table is alive.
          pPrior = pIntrinsic;
        }
-        table->LookupIntrinsic(wideTypeName, L"*", &pIntrinsic, &lookupCookie);
+        found = table->LookupIntrinsic(wideTypeName, L"*", &pIntrinsic, &lookupCookie);
      }
    }
  }
@ -3868,6 +3868,7 @@ public:

  FunctionDecl* AddHLSLIntrinsicMethod(
    LPCSTR tableName,
+    LPCSTR lowering,
    _In_ const HLSL_INTRINSIC* intrinsic,
    _In_ FunctionTemplateDecl *FunctionTemplate,
    ArrayRef<Expr *> Args,
@ -3956,7 +3957,7 @@ public:
      SC_Extern, InlineSpecifiedFalse, IsConstexprFalse, NoLoc);

    // Add intrinsic attr
-    AddHLSLIntrinsicAttr(method, *m_context, tableName, "", intrinsic);
+    AddHLSLIntrinsicAttr(method, *m_context, tableName, lowering, intrinsic);

    // Record this function template specialization.
    TemplateArgumentList *argListCopy = TemplateArgumentList::CreateCopy(
@ -7791,7 +7792,7 @@ Sema::TemplateDeductionResult HLSLExternalSource::DeduceTemplateArgumentsForHLSL
      continue;
    }

-    Specialization = AddHLSLIntrinsicMethod(cursor.GetTableName(), *cursor, FunctionTemplate, Args, argTypes, argCount);
+    Specialization = AddHLSLIntrinsicMethod(cursor.GetTableName(), cursor.GetLoweringStrategy(), *cursor, FunctionTemplate, Args, argTypes, argCount);
    DXASSERT_NOMSG(Specialization->getPrimaryTemplate()->getCanonicalDecl() ==
      FunctionTemplate->getCanonicalDecl());

--- a/tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl
+++ b/tools/clang/test/CodeGenHLSL/BasicHLSL11_VS.hlsl
@ -20,7 +20,7 @@
 // CHECK: xy

 // CHECK: OutputPositionPresent=1
-// CHECK: dx.op.createHandle(i32 58, i8 2, i32 0, i32 5, i1 false)
+// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 5, i1 false)

 //--------------------------------------------------------------------------------------
 // File: BasicHLSL11_VS.hlsl
--- a/tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.hlsl
--- a/tools/clang/test/HLSL/dxil_validation/InnerCoverage.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/InnerCoverage.hlsl
--- a/tools/clang/test/CodeGenHLSL/InnerCoverage2.hlsl
+++ b/tools/clang/test/CodeGenHLSL/InnerCoverage2.hlsl
@ -0,0 +1,17 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// note: define GENLL in order to generate the basis for InnerCoverage.ll
+
+// CHECK: error: Parameter with semantic SV_InnerCoverage has overlapping semantic index at 0
+// CHECK: error: Pixel shader inputs SV_Coverage and SV_InnerCoverage are mutually exclusive
+
+void main(snorm float b : B, uint c:C,
+	in uint inner : InnerCoverage,
+	inout uint cover: SV_Coverage)
+{
+#ifndef GENLL
+  cover = cover & c;
+#else
+  cover = cover & inner;
+#endif
+}
--- a/tools/clang/test/HLSL/dxil_validation/IntegerDepth.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/IntegerDepth.hlsl
--- a/tools/clang/test/CodeGenHLSL/IntegerDepth2.hlsl
+++ b/tools/clang/test/CodeGenHLSL/IntegerDepth2.hlsl
@ -0,0 +1,8 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: @main
+
+float main(snorm float b : B, float c:C) : SV_DEPTH
+{
+  return b;
+}
--- a/tools/clang/test/HLSL/dxil_validation/SamplerKind.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/SamplerKind.hlsl
--- a/tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl
+++ b/tools/clang/test/CodeGenHLSL/Samples/d12_multithreading_vs.hlsl
@ -3,7 +3,7 @@
 // The constant buffer should be allocated with ID zero and referenced as such.

 // CHECK: cb0
-// CHECK: dx.op.createHandle(i32 58, i8 2, i32 0, i32 0
+// CHECK: dx.op.createHandle(i32 59, i8 2, i32 0, i32 0

 //*********************************************************
 //
--- a/tools/clang/test/CodeGenHLSL/SimpleGS5.hlsl
+++ b/tools/clang/test/CodeGenHLSL/SimpleGS5.hlsl
@ -0,0 +1,29 @@
+// RUN: %dxc -E main -T gs_6_0 %s | FileCheck %s
+
+// CHECK: InputPrimitive=patch2
+// CHECK: emitStream
+// CHECK: cutStream
+// CHECK: i32 24}
+
+struct GSOut {
+  float2 uv : TEXCOORD0;
+  float4 clr : COLOR;
+  float4 pos : SV_Position;
+  float3 norm[2] : NORMAL;
+};
+
+cbuffer b : register(b0) {
+  float2 invViewportSize;
+};
+
+// geometry shader that outputs 3 vertices from a point
+[maxvertexcount(3)]
+[instance(24)]
+void main(InputPatch<GSOut, 2>points, inout PointStream<GSOut> stream) {
+
+  points[0].norm[0] = 1;
+  points[0].norm[1] = 2;
+  stream.Append(points[0]);
+
+  stream.RestartStrip();
+}
--- a/tools/clang/test/HLSL/dxil_validation/UndefValue.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/UndefValue.hlsl
--- a/tools/clang/test/CodeGenHLSL/UndefValue2.hlsl
+++ b/tools/clang/test/CodeGenHLSL/UndefValue2.hlsl
@ -0,0 +1,9 @@
+// RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s
+
+// CHECK: @main
+
+float a;
+float main(snorm float b : B) : SV_DEPTH
+{
+  return b + a;
+}
--- a/tools/clang/test/HLSL/dxil_validation/barrier.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/barrier.hlsl
@ -73,7 +73,7 @@ void main( uint2 tid : SV_DispatchThreadID, uint2 gid : SV_GroupID, uint2 gtid :
    GroupMemoryBarrierWithGroupSync();
    float2x2 f2x2 = dataC[8*8-1-tid.y%(8*8)];
  AllMemoryBarrier();
-       fA[gidx+2] = f2x2; 
+       fA[gidx+2] = f2x2;
  AllMemoryBarrierWithGroupSync();
      fA[gidx+1] = f2x2;
  DeviceMemoryBarrier();
--- a/tools/clang/test/CodeGenHLSL/bindings1.hlsl
+++ b/tools/clang/test/CodeGenHLSL/bindings1.hlsl
@ -95,35 +95,35 @@
 // CHECK: %struct.Resources = type { %class.Texture2D, %class.Texture2D.0, %class.Texture2D, %class.Texture2D.0, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %class.RWTexture2D, %struct.SamplerComparisonState, %struct.SamplerState, %struct.SamplerComparisonState, %struct.SamplerState, <4 x float> }

 //                                                CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 7, i1 false)
-// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
-// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 0, i1 false)
-// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 8, i32 4, i1 false)
-// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 7, i32 2, i1 false)
-// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 6, i32 6, i1 false)
-// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 5, i32 35, i1 false)
-// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 4, i32 55, i1 false)
-// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 3, i32 104, i1 false)
-// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 2, i32 1, i1 false)
-// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 11, i1 false)
-// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 4, i32 11, i1 false)
-// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 1, i32 30, i1 false)
-// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 2, i32 94, i1 false)
-// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 3, i32 10, i1 false)
-// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 3, i32 2, i1 false)
-// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 1, i32 14, i1 false)
-// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 2, i32 22, i1 false)
-// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 3, i32 3, i1 false)
-// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 1, i32 29, i1 false)
-// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 2, i32 23, i1 false)
+// CHECK: %RWTex2_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 0, i32 7, i1 false)
+// CHECK: %Tex1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 0, i32 0, i1 false)
+// CHECK: %Samp2_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 0, i32 0, i1 false)
+// CHECK: %tbuf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 8, i32 4, i1 false)
+// CHECK: %tbuf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 7, i32 2, i1 false)
+// CHECK: %tbuf3_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 6, i32 6, i1 false)
+// CHECK: %tbuf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 5, i32 35, i1 false)
+// CHECK: %buf2_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 4, i32 55, i1 false)
+// CHECK: %buf1_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 3, i32 104, i1 false)
+// CHECK: %buf4_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 2, i32 1, i1 false)
+// CHECK: %MyCB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 2, i32 0, i32 11, i1 false)
+// CHECK: %MyTB_buffer = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 4, i32 11, i1 false)
+// CHECK: %Tex2_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 1, i32 30, i1 false)
+// CHECK: %Tex3_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 2, i32 94, i1 false)
+// CHECK: %Tex4_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0, i32 3, i32 10, i1 false)
+// CHECK: %RWTex1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 3, i32 2, i1 false)
+// CHECK: %RWTex3_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 1, i32 14, i1 false)
+// CHECK: %RWTex4_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1, i32 2, i32 22, i1 false)
+// CHECK: %Samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 3, i32 3, i1 false)
+// CHECK: %Samp3_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 1, i32 29, i1 false)
+// CHECK: %Samp4_sampler = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 3, i32 2, i32 23, i1 false)

 // check packoffset:
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %MyCB_buffer, i32 4)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %MyCB_buffer, i32 7)
-// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %MyCB_buffer, i32 21)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 4)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 7)
+// CHECK: @dx.op.cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %MyCB_buffer, i32 21)

 // check element index:
-// CHECK: @dx.op.bufferLoad.i32(i32 69, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)
+// CHECK: @dx.op.bufferLoad.i32(i32 70, %dx.types.Handle %tbuf1_buffer, i32 1, i32 undef)



--- a/tools/clang/test/CodeGenHLSL/firstbitHi.hlsl
+++ b/tools/clang/test/CodeGenHLSL/firstbitHi.hlsl
@ -12,10 +12,10 @@
 // CHECK: select
 // CHECK: i32 -1

-// CHECK: op.bufferStore.i32(i32 70, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
-// CHECK: op.bufferStore.i32(i32 70, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23
+// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 2, i32 undef, i32 26
+// CHECK: op.bufferStore.i32(i32 71, %dx.types.Handle %outputUAV_UAV_rawbuf, i32 3, i32 undef, i32 23

-// CHECK: dx.op.unaryBits.i64(i32 32, i64
+// CHECK: dx.op.unaryBits.i64(i32 33, i64
 // CHECK: sub i32 63
 // CHECK: icmp ne i32
 // CHECK: select
--- a/tools/clang/test/CodeGenHLSL/gatherOffset.hlsl
+++ b/tools/clang/test/CodeGenHLSL/gatherOffset.hlsl
@ -1,17 +1,17 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s

-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
-// CHECK: dx.op.textureGather.f32(i32 74
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75
+// CHECK: dx.op.textureGather.f32(i32 75


 SamplerState samp1;
--- a/tools/clang/test/HLSL/dxil_validation/hsAttribute.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/hsAttribute.hlsl
--- a/tools/clang/test/HLSL/dxil_validation/interpChange.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/interpChange.hlsl
--- a/tools/clang/test/HLSL/dxil_validation/interpOnInt.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/interpOnInt.hlsl
--- a/tools/clang/test/CodeGenHLSL/interpOnInt2.hlsl
+++ b/tools/clang/test/CodeGenHLSL/interpOnInt2.hlsl
@ -0,0 +1,9 @@
+// RUN: %dxc -E main -T ps_6_0 -fcgl %s | FileCheck %s
+
+// CHECK: main
+// After lowering, these would turn into multiple abs calls rather than a 4 x float
+// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 62,
+
+float4 main(float4 a : A, uint4 b : A1) : SV_TARGET {
+  return abs(a*b.yxxx);
+}
--- a/tools/clang/test/CodeGenHLSL/legacy_struct.hlsl
+++ b/tools/clang/test/CodeGenHLSL/legacy_struct.hlsl
@ -1,7 +1,7 @@
 // RUN: %dxc -E main -T ps_6_0 %s | FileCheck %s

-// CHECK: %dx.alignment.legacy.struct.S = type { i32, i32, i32, <2 x i32>, i32, i32, i32 }
 // CHECK: %"dx.alignment.legacy.$Globals" = type { float, %dx.alignment.legacy.struct.S, [1 x <4 x i32>] }
+// CHECK: %dx.alignment.legacy.struct.S = type { i32, i32, i32, <2 x i32>, i32, i32, i32 }

 RasterizerOrderedBuffer<float4> r;

--- a/tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl
+++ b/tools/clang/test/CodeGenHLSL/multiStreamGS.hlsl
@ -17,14 +17,14 @@

 // CHECK: OutputStreamMask=7

-// CHECK: emitStream(i32 97, i8 0)
-// CHECK: cutStream(i32 98, i8 0)
-// CHECK: emitStream(i32 97, i8 1)
-// CHECK: cutStream(i32 98, i8 1)
-// CHECK: emitStream(i32 97, i8 1)
-// CHECK: cutStream(i32 98, i8 1)
-// CHECK: emitStream(i32 97, i8 2)
-// CHECK: cutStream(i32 98, i8 2)
+// CHECK: emitStream(i32 99, i8 0)
+// CHECK: cutStream(i32 100, i8 0)
+// CHECK: emitStream(i32 99, i8 1)
+// CHECK: cutStream(i32 100, i8 1)
+// CHECK: emitStream(i32 99, i8 1)
+// CHECK: cutStream(i32 100, i8 1)
+// CHECK: emitStream(i32 99, i8 2)
+// CHECK: cutStream(i32 100, i8 2)

 struct MyStruct
 {
--- a/tools/clang/test/HLSL/dxil_validation/phiTGSM.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/phiTGSM.hlsl
--- a/tools/clang/test/CodeGenHLSL/reducible.hlsl
+++ b/tools/clang/test/CodeGenHLSL/reducible.hlsl
@ -0,0 +1,22 @@
+// RUN: %dxc -E main -T ps_6_0 %s  | FileCheck %s
+
+// CHECK: !"llvm.loop.unroll.disable"
+uint u;
+float main(float2 a : A, int3 b : B) : SV_Target
+{
+  float s = 0;
+  /*
+  [loop]
+  for(int i = 0; i < b.x; i++) {
+    s += a.x;
+    if (s == 5)
+      break;
+  }
+  */
+  if (s > a)
+    s -= u+b.x;
+  else
+    s += b.x+b.y;
+
+  return s;
+}
--- a/tools/clang/test/CodeGenHLSL/rovs.hlsl
+++ b/tools/clang/test/CodeGenHLSL/rovs.hlsl
@ -34,21 +34,21 @@ float4 main() : SV_TARGET {
 // CHECK: rob_UAV_buf_ROV

  float4 result = 0;
-// CHECK: dx.op.bufferLoad.f32(i32 69,
+// CHECK: dx.op.bufferLoad.f32(i32 70,
  result += rob[0];
-// CHECK: dx.op.bufferLoad.i32(i32 69
+// CHECK: dx.op.bufferLoad.i32(i32 70
  result += rba.Load(0);
-// CHECK: dx.op.bufferLoad.f32(i32 69,
+// CHECK: dx.op.bufferLoad.f32(i32 70,
  result += rsb[0].f4;
-// CHECK: dx.op.textureLoad.f32(i32 67,
+// CHECK: dx.op.textureLoad.f32(i32 68,
  result += rt1[0];
-// CHECK: dx.op.textureLoad.f32(i32 67,
+// CHECK: dx.op.textureLoad.f32(i32 68,
  result += rt1a[uint2(0, 0)];
-// CHECK: dx.op.textureLoad.f32(i32 67,
+// CHECK: dx.op.textureLoad.f32(i32 68,
  result += rt2[uint2(0, 1)];
-// CHECK: dx.op.textureLoad.f32(i32 67,
+// CHECK: dx.op.textureLoad.f32(i32 68,
  result += rt2a[uint3(0, 0, 0)];
-// CHECK: dx.op.textureLoad.f32(i32 67,
+// CHECK: dx.op.textureLoad.f32(i32 68,
  result += rt3[uint3(1, 2, 3)];

  result += rt4[uint3(1, 2, 3)];
--- a/tools/clang/test/HLSL/dxil_validation/semaOverlap.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/semaOverlap.hlsl
--- a/tools/clang/test/CodeGenHLSL/semaOverlap1.hlsl
+++ b/tools/clang/test/CodeGenHLSL/semaOverlap1.hlsl
@ -0,0 +1,9 @@
+// RUN: %dxc -E main -T ps_6_0 -fcgl %s | FileCheck %s
+
+// CHECK: main
+// After lowering, these would turn into multiple abs calls rather than a 4 x float
+// CHECK: call <4 x float> @"dx.hl.op..<4 x float> (i32, <4 x float>)"(i32 62,
+
+float4 main(float4 a : A, float4 b : A1) : SV_TARGET {
+  return abs(a*b.yxxx);
+}
--- a/tools/clang/test/HLSL/dxil_validation/uavBarrier.hlsl
+++ b/tools/clang/test/HLSL/dxil_validation/uavBarrier.hlsl
--- a/tools/clang/test/CodeGenHLSL/updateCounter2.hlsl
+++ b/tools/clang/test/CodeGenHLSL/updateCounter2.hlsl
@ -0,0 +1,26 @@
+// RUN: %dxc -E main -T ps_6_0 %s  | FileCheck %s
+
+// CHECK: RWStructuredBuffers may increment or decrement their counters, but not both.
+
+struct Foo
+{
+  float2 a;
+  float3 b;
+  int2 c[4];
+};
+
+Buffer<float4> buf1;
+RWStructuredBuffer<Foo> buf2;
+
+float4 main(float idx1 : Idx1, float idx2 : Idx2) : SV_Target
+{
+  uint status;
+  float4 r = 0;
+  int id = buf2.IncrementCounter();
+  buf2[id].a = float2(idx1, idx2);
+
+  id = buf2.IncrementCounter();
+  r.xy += buf1[id].a;  
+  
+  return r;
+}
--- a/tools/clang/test/HLSL/abs2_m.ll
+++ b/tools/clang/test/HLSL/abs2_m.ll
@ -1,6 +1,8 @@
 ; RUN: %dxv %s | FileCheck %s

 ; CHECK: DXIL intrinsic overload must be valid
+; Change dx.op.loadInput.i32(i32 4 to dx.op.loadInput.i32(i32 3
+
 ;
 ; Input signature:
 ;
@ -82,7 +84,7 @@ attributes #1 = { nounwind readnone }
 !dx.entryPoints = !{!12}

 !0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 4}
+!1 = !{i32 1, i32 0}
 !2 = !{!"ps", i32 6, i32 0}
 !3 = !{i32 1, void (<4 x i32>, <4 x i32>*)* @"\01?main@@YA?AV?$vector@H$03@@V1@@Z.flat", !4}
 !4 = !{!5, !7, !10}
--- a/tools/clang/test/HLSL/dxil_validation/Eval.ll
+++ b/tools/clang/test/HLSL/dxil_validation/Eval.ll
@ -1,94 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Interpolation mode on A used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample
-; CHECK: Interpolation mode on A used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample
-; CHECK: Interpolation mode on A used with eval_* instruction must be linear, linear_centroid, linear_noperspective, linear_noperspective_centroid, linear_sample or linear_noperspective_sample
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %RenderTargetGetSampleCount = tail call i32 @dx.op.renderTargetGetSampleCount(i32 80)
-  %sub = add i32 %RenderTargetGetSampleCount, -1
-  %5 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 0)
-  %6 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 1)
-  %7 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 2)
-  %8 = tail call float @dx.op.evalCentroid.f32(i32 92, i32 0, i32 0, i8 3)
-  %9 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 0, i32 %sub)
-  %10 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 1, i32 %sub)
-  %11 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 2, i32 %sub)
-  %12 = tail call float @dx.op.evalSampleIndex.f32(i32 91, i32 0, i32 0, i8 3, i32 %sub)
-  %add.i0 = fadd fast float %9, %5
-  %add.i1 = fadd fast float %10, %6
-  %add.i2 = fadd fast float %11, %7
-  %add.i3 = fadd fast float %12, %8
-  %13 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 0, i32 1, i32 2)
-  %14 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 1, i32 1, i32 2)
-  %15 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 2, i32 1, i32 2)
-  %16 = tail call float @dx.op.evalSnapped.f32(i32 90, i32 0, i32 0, i8 3, i32 1, i32 2)
-  %add5.i0 = fadd fast float %add.i0, %13
-  %add5.i1 = fadd fast float %add.i1, %14
-  %add5.i2 = fadd fast float %add.i2, %15
-  %add5.i3 = fadd fast float %add.i3, %16
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add5.i0)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %add5.i1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %add5.i2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %add5.i3)
-  ret void
-}
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.evalSampleIndex.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readonly
-declare i32 @dx.op.renderTargetGetSampleCount(i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.evalCentroid.f32(i32, i32, i32, i8) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.evalSnapped.f32(i32, i32, i32, i8, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!18}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !10, !12, !14, !16}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 0, !11, !9}
-!11 = !{i32 4, !"B", i32 5, i32 4, i32 7, i32 9}
-!12 = !{i32 0, !13, !9}
-!13 = !{i32 4, !"C", i32 5, i32 3, i32 7, i32 9}
-!14 = !{i32 0, !15, !9}
-!15 = !{i32 4, !"D", i32 5, i32 6, i32 7, i32 9}
-!16 = !{i32 1, !17, !9}
-!17 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!18 = !{void (<4 x float>, <4 x float>, <4 x float>, <4 x float>, <4 x float>*)* @main.flat, !"", !19, null, null}
-!19 = !{!20, !25, null}
-!20 = !{!21, !22, !23, !24}
-!21 = !{i32 0, !"A", i8 9, i8 0, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!22 = !{i32 1, !"B", i8 9, i8 0, !9, i8 4, i32 1, i8 4, i32 1, i8 0, null}
-!23 = !{i32 2, !"C", i8 9, i8 0, !9, i8 3, i32 1, i8 4, i32 2, i8 0, null}
-!24 = !{i32 3, !"D", i8 9, i8 0, !9, i8 6, i32 1, i8 4, i32 3, i8 0, null}
-!25 = !{!26}
-!26 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
--- a/tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.ll
+++ b/tools/clang/test/HLSL/dxil_validation/GetDimCalcLOD.ll
@ -1,136 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: GetDimensions used undef dimension z on TextureCube
-; CHECK: coord uninitialized
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; UV                       0   xy          0     NONE   float
-;
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Target                0   xyzw        0   TARGET   float   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Pixel Shader
-; DepthOutput=0
-; SampleFrequency=0
-;
-;
-; Input signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; UV                       0                 linear
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Target                0
-;
-; Buffer Definitions:
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; g_sam                             sampler      NA          NA      S0             s0     1
-; cube                              texture     f32        cube      T0             t0     1
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.TextureCube = type { <4 x float> }
-%struct.SamplerState = type { i32 }
-%dx.types.Handle = type { i8* }
-%dx.types.Dimensions = type { i32, i32, i32, i32 }
-
-@"\01?cube@@3V?$TextureCube@V?$vector@M$03@@@@A" = available_externally global %class.TextureCube zeroinitializer, align 4
-@"\01?g_sam@@3USamplerState@@A" = available_externally global %struct.SamplerState zeroinitializer, align 4
-@dx.typevar.0 = external addrspace(1) constant %class.TextureCube
-@llvm.used = appending global [5 x i8*] [i8* bitcast (%class.TextureCube* @"\01?cube@@3V?$TextureCube@V?$vector@M$03@@@@A" to i8*), i8* bitcast (%struct.SamplerState* @"\01?g_sam@@3USamplerState@@A" to i8*), i8* bitcast (%class.TextureCube* @"\01?cube@@3V?$TextureCube@V?$vector@M$03@@@@A" to i8*), i8* bitcast (%struct.SamplerState* @"\01?g_sam@@3USamplerState@@A" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.TextureCube addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<2 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %cube_texture_cube = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %g_sam_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %3 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %4 = call %dx.types.Dimensions @dx.op.getDimensions(i32 73, %dx.types.Handle %cube_texture_cube, i32 0)  ; GetDimensions(handle,mipLevel)
-  %5 = extractvalue %dx.types.Dimensions %4, 0
-  %6 = extractvalue %dx.types.Dimensions %4, 2
-  %7 = call float @dx.op.calculateLOD.f32(i32 84, %dx.types.Handle %cube_texture_cube, %dx.types.Handle %g_sam_sampler, float %2, float %3, float undef, i1 true)  ; CalculateLOD(handle,sampler,coord0,coord1,coord2,clamped)
-  %conv = uitofp i32 %5 to float
-  %conv1 = uitofp i32 %6 to float
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %conv)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %conv1)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %7)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 1.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Dimensions @dx.op.getDimensions(i32, %dx.types.Handle, i32) #2
-
-; Function Attrs: nounwind readonly
-declare float @dx.op.calculateLOD.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, i1) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!9, !12}
-!dx.entryPoints = !{!21}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{!4, null, null, !7}
-!4 = !{!5}
-!5 = !{i32 0, %class.TextureCube* @"\01?cube@@3V?$TextureCube@V?$vector@M$03@@@@A", !"cube", i32 0, i32 0, i32 1, i32 5, i32 0, !6}
-!6 = !{i32 0, i32 9}
-!7 = !{!8}
-!8 = !{i32 0, %struct.SamplerState* @"\01?g_sam@@3USamplerState@@A", !"g_sam", i32 0, i32 0, i32 1, i32 0, null}
-!9 = !{i32 0, %class.TextureCube addrspace(1)* @dx.typevar.0, !10}
-!10 = !{i32 16, !11}
-!11 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!12 = !{i32 1, void (<2 x float>, <4 x float>*)* @main.flat, !13}
-!13 = !{!14, !16, !19}
-!14 = !{i32 0, !15, !15}
-!15 = !{}
-!16 = !{i32 0, !17, !18}
-!17 = !{i32 4, !"UV", i32 7, i32 9}
-!18 = !{i32 0}
-!19 = !{i32 1, !20, !18}
-!20 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!21 = !{void (<2 x float>, <4 x float>*)* @main.flat, !"", !22, !3, null}
-!22 = !{!23, !25, null}
-!23 = !{!24}
-!24 = !{i32 0, !"UV", i8 9, i8 0, !18, i8 2, i32 1, i8 2, i32 0, i8 0, null}
-!25 = !{!26}
-!26 = !{i32 0, !"SV_Target", i8 9, i8 16, !18, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-
--- a/tools/clang/test/HLSL/dxil_validation/InnerCoverage.ll
+++ b/tools/clang/test/HLSL/dxil_validation/InnerCoverage.ll
@ -1,62 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: InnerCoverage and Coverage are mutually exclusive.
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: alwaysinline nounwind
-define void @main(float %b, i32 %c, i32* nocapture readnone dereferenceable(4) %cover) #0 {
-entry:
-  %0 = call i32 @dx.op.coverage.i32(i32 147)  ; Coverage()
-  %1 = call i32 @dx.op.innercoverage.i32(i32 148)  ; InnerCoverage()
-  %and = and i32 %1, %0
-  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %and)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.coverage.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.innercoverage.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #2
-
-attributes #0 = { alwaysinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.valver = !{!1}
-!dx.version = !{!2}
-!dx.shaderModel = !{!3}
-!dx.typeAnnotations = !{!4}
-!dx.entryPoints = !{!15}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 0}
-!2 = !{i32 0, i32 7}
-!3 = !{!"ps", i32 6, i32 0}
-!4 = !{i32 1, void (float, i32, i32*)* @main, !5}
-!5 = !{!6, !8, !11, !13}
-!6 = !{i32 1, !7, !7}
-!7 = !{}
-!8 = !{i32 0, !9, !10}
-!9 = !{i32 4, !"B", i32 7, i32 13}
-!10 = !{i32 0}
-!11 = !{i32 0, !12, !10}
-!12 = !{i32 4, !"C", i32 7, i32 5}
-!13 = !{i32 2, !14, !10}
-!14 = !{i32 4, !"SV_Coverage", i32 7, i32 5}
-!15 = !{void (float, i32, i32*)* @main, !"main", !16, null, null}
-!16 = !{!17, !20, null}
-!17 = !{!18, !19}
-!18 = !{i32 0, !"B", i8 13, i8 0, !10, i8 1, i32 1, i8 1, i32 0, i8 0, null}
-!19 = !{i32 1, !"C", i8 5, i8 0, !10, i8 1, i32 1, i8 1, i32 0, i8 1, null}
-!20 = !{!21}
-!21 = !{i32 0, !"SV_Coverage", i8 5, i8 14, !10, i8 0, i32 1, i8 1, i32 -1, i8 -1, null}
--- a/tools/clang/test/HLSL/dxil_validation/IntegerDepth.ll
+++ b/tools/clang/test/HLSL/dxil_validation/IntegerDepth.ll
@ -1,58 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Invalid interpolation mode for 'C'
-; CHECK: SV_Depth must be float
-; CHECK: External function 'dxil.op.loadInput.f32' is not a DXIL function
-; CHECK: External function 'dx.op.loadInput.f32' is unused
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(float, float, i32* nocapture readnone) #0 {
-entry:
-  %3 = call float @dxil.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %conv = fptosi float %3 to i32
-  call void @dx.op.storeOutput.i32(i32 5, i32 0, i32 0, i8 0, i32 %conv)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dxil.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!14}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (float, float, i32*)* @main.flat, !4}
-!4 = !{!5, !7, !10, !12}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"B", i32 7, i32 13}
-!9 = !{i32 0}
-!10 = !{i32 0, !11, !9}
-!11 = !{i32 4, !"C", i32 7, i32 9}
-!12 = !{i32 1, !13, !9}
-!13 = !{i32 4, !"SV_DEPTH", i32 7, i32 4}
-!14 = !{void (float, float, i32*)* @main.flat, !"", !15, null, null}
-!15 = !{!16, !19, null}
-!16 = !{!17, !18}
-!17 = !{i32 0, !"B", i8 13, i8 0, !9, i8 1, i32 1, i8 1, i32 0, i8 0, null}
-!18 = !{i32 1, !"C", i8 9, i8 0, !9, i8 8, i32 1, i8 1, i32 1, i8 0, null}
-!19 = !{!20}
-!20 = !{i32 0, !"SV_Depth", i8 4, i8 17, !9, i8 0, i32 1, i8 1, i32 -1, i8 -1, null}
--- a/tools/clang/test/HLSL/dxil_validation/MultiStream.ll
+++ b/tools/clang/test/HLSL/dxil_validation/MultiStream.ll
@ -1,306 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-;
-; Note: shader requires additional functionality:
-;       SV_RenderTargetArrayIndex or SV_ViewportArrayIndex from any shader feeding rasterizer
-;
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; POSSIZE                  0   xyz         0     NONE   float
-; COLOR                    0   xyzw        1     NONE   float
-;
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; TEXCOORD                 0   xy          0     NONE   float   xyzw
-; COLOR                    0   xyzw        1     NONE   float   xyzw
-; SV_Position              0   xyzw        2      POS   float   xyzw
-; TEXCOORD                 0   xy          3     NONE   float   xyzw
-; COLOR                    0   xyzw        4     NONE   float   xyzw
-; SV_Position              0   xyzw        5      POS   float   xyzw
-; SV_ViewportArrayIndex     0   x           6  VPINDEX    uint   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Geometry Shader
-; InputPrimitive=point
-; OutputTopology=point
-; OutputStreamMask=3
-; OutputPositionPresent=1
-;
-;
-; Input signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; POSSIZE                  0                 linear
-; COLOR                    0                 linear
-; SV_GSInstanceID          0        nointerpolation
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; TEXCOORD                 0                 linear
-; COLOR                    0                 linear
-; SV_Position              0          noperspective
-; TEXCOORD                 0                 linear
-; COLOR                    0                 linear
-; SV_Position              0          noperspective
-; SV_ViewportArrayIndex     0        nointerpolation
-;
-; Buffer Definitions:
-;
-; cbuffer b
-; {
-;
-;   struct b
-;   {
-;
-;       float2 invViewportSize;                       ; Offset:    0
-;
-;   } b                                               ; Offset:    0 Size:     8
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; b                                 cbuffer      NA          NA     CB0            cb0     1
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%b = type { <2 x float> }
-%struct.VSOutGSIn = type { <3 x float>, <4 x float> }
-%class.PointStream = type { %struct.VSOut }
-%struct.VSOut = type { <2 x float>, <4 x float>, <4 x float> }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-
-@b = external constant %b
-@dx.typevar.0 = external addrspace(1) constant %struct.VSOutGSIn
-@dx.typevar.1 = external addrspace(1) constant %class.PointStream
-@dx.typevar.2 = external addrspace(1) constant %struct.VSOut
-@dx.typevar.3 = external addrspace(1) constant %b
-@llvm.used = appending global [5 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSOutGSIn addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.PointStream addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSOut addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%b addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* bitcast (%b* @b to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @"\01?main@@YAXY00UVSOutGSIn@@V?$PointStream@UVSOut@@@@1IAAI@Z.flat"([1 x <3 x float>]* nocapture readnone, [1 x <4 x float>]* nocapture readnone, %class.PointStream* nocapture readnone, <2 x float>* nocapture readnone, <4 x float>* nocapture readnone, <4 x float>* nocapture readnone, %class.PointStream* nocapture readnone, <2 x float>* nocapture readnone, <4 x float>* nocapture readnone, <4 x float>* nocapture readnone, i32, i32* nocapture readnone) #0 {
-entry:
-  %12 = tail call i32 @dx.op.gsInstanceID.i32(i32 138)  ; GSInstanceID()
-  %verts.0 = alloca [3 x float], align 4
-  %verts.1 = alloca [3 x float], align 4
-  %13 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 0
-  %14 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 0
-  store float -5.000000e-01, float* %13, align 4
-  store float -5.000000e-01, float* %14, align 4
-  %15 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 1
-  %16 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 1
-  store float 1.500000e+00, float* %15, align 4
-  store float -5.000000e-01, float* %16, align 4
-  %17 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 2
-  %18 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 2
-  store float -5.000000e-01, float* %17, align 4
-  store float 1.500000e+00, float* %18, align 4
-  %19 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %20 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %21 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %22 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %23 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %24 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 2, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %25 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 3, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %rem = urem i32 %12, 3
-  %26 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 %rem
-  %27 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 %rem
-  %load30 = load float, float* %26, align 4
-  %load32 = load float, float* %27, align 4
-  %mul.i0 = fmul fast float %load30, %19
-  %mul.i1 = fmul fast float %load32, %19
-  %add.i0 = fadd fast float %mul.i0, %20
-  %add.i1 = fadd fast float %mul.i1, %21
-  %28 = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %29 = tail call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %28, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %30 = extractvalue %dx.types.CBufRet.f32 %29, 0
-  %31 = extractvalue %dx.types.CBufRet.f32 %29, 1
-  %mul.i.i0 = fmul fast float %30, 2.000000e+00
-  %mul.i.i1 = fmul fast float %31, 2.000000e+00
-  %mul1.i.i0 = fmul fast float %mul.i.i0, %add.i0
-  %mul1.i.i1 = fmul fast float %mul.i.i1, %add.i1
-  %sub.i = fadd fast float %mul1.i.i0, -1.000000e+00
-  %sub2.i = fsub fast float 1.000000e+00, %mul1.i.i1
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %22)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %23)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %24)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %25)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.emitStream(i32 97, i8 0)  ; EmitStream(streamId)
-  %add10 = add nuw nsw i32 %rem, 1
-  %32 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 %add10
-  %33 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 %add10
-  %load26 = load float, float* %32, align 4
-  %load28 = load float, float* %33, align 4
-  %mul14.i0 = fmul fast float %load26, %19
-  %mul14.i1 = fmul fast float %load28, %19
-  %add15.i0 = fadd fast float %mul14.i0, %20
-  %add15.i1 = fadd fast float %mul14.i1, %21
-  %mul1.i.31.i0 = fmul fast float %add15.i0, %mul.i.i0
-  %mul1.i.31.i1 = fmul fast float %add15.i1, %mul.i.i1
-  %sub.i.32 = fadd fast float %mul1.i.31.i0, -1.000000e+00
-  %sub2.i.33 = fsub fast float 1.000000e+00, %mul1.i.31.i1
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 2.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %22)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %23)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %24)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %25)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i.32)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i.33)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.emitStream(i32 97, i8 0)  ; EmitStream(streamId)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 0, float 2.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 1, float 0.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 0, float %22)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 1, float %23)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 2, float %24)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 4, i32 0, i8 3, float %25)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 0, float %sub.i.32)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 1, float %sub2.i.33)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 2, float 5.000000e-01)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 3, float 1.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.emitStream(i32 97, i8 1)  ; EmitStream(streamId)
-  %add21 = add nuw nsw i32 %rem, 2
-  %34 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 %add21
-  %35 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 %add21
-  %load23 = load float, float* %34, align 4
-  %load24 = load float, float* %35, align 4
-  %mul25.i0 = fmul fast float %load23, %19
-  %mul25.i1 = fmul fast float %load24, %19
-  %add26.i0 = fadd fast float %mul25.i0, %20
-  %add26.i1 = fadd fast float %mul25.i1, %21
-  %mul1.i.36.i0 = fmul fast float %add26.i0, %mul.i.i0
-  %mul1.i.36.i1 = fmul fast float %add26.i1, %mul.i.i1
-  %sub.i.37 = fadd fast float %mul1.i.36.i0, -1.000000e+00
-  %sub2.i.38 = fsub fast float 1.000000e+00, %mul1.i.36.i1
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 2.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %22)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %23)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %24)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %25)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i.37)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i.38)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.emitStream(i32 97, i8 0)  ; EmitStream(streamId)
-  tail call void @dx.op.storeOutput.i32(i32 5, i32 6, i32 0, i8 0, i32 2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  tail call void @dx.op.cutStream(i32 98, i8 0)  ; CutStream(streamId)
-  tail call void @dx.op.cutStream(i32 98, i8 1)  ; CutStream(streamId)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.gsInstanceID.i32(i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.cutStream(i32, i8) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.emitStream(i32, i8) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !17}
-!dx.entryPoints = !{!40}
-
-!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 4}
-!2 = !{!"gs", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %b* @b, !"b", i32 0, i32 0, i32 1, i32 8, null}
-!6 = !{i32 0, %struct.VSOutGSIn addrspace(1)* @dx.typevar.0, !7, %class.PointStream addrspace(1)* @dx.typevar.1, !10, %struct.VSOut addrspace(1)* @dx.typevar.2, !12, %b addrspace(1)* @dx.typevar.3, !15}
-!7 = !{i32 32, !8, !9}
-!8 = !{i32 3, i32 0, i32 4, !"POSSIZE", i32 6, !"posSize", i32 7, i32 9}
-!9 = !{i32 3, i32 16, i32 4, !"COLOR", i32 6, !"clr", i32 7, i32 9}
-!10 = !{i32 48, !11}
-!11 = !{i32 3, i32 0, i32 6, !"h"}
-!12 = !{i32 48, !13, !9, !14}
-!13 = !{i32 3, i32 0, i32 4, !"TEXCOORD0", i32 6, !"uv", i32 7, i32 9}
-!14 = !{i32 3, i32 32, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!15 = !{i32 0, !16}
-!16 = !{i32 3, i32 0, i32 6, !"invViewportSize", i32 7, i32 9}
-!17 = !{i32 1, void ([1 x <3 x float>]*, [1 x <4 x float>]*, %class.PointStream*, <2 x float>*, <4 x float>*, <4 x float>*, %class.PointStream*, <2 x float>*, <4 x float>*, <4 x float>*, i32, i32*)* @"\01?main@@YAXY00UVSOutGSIn@@V?$PointStream@UVSOut@@@@1IAAI@Z.flat", !18}
-!18 = !{!19, !21, !24, !26, !27, !29, !30, !32, !33, !34, !35, !36, !38}
-!19 = !{i32 0, !20, !20}
-!20 = !{}
-!21 = !{i32 0, !22, !23}
-!22 = !{i32 4, !"POSSIZE", i32 7, i32 9}
-!23 = !{i32 0}
-!24 = !{i32 0, !25, !23}
-!25 = !{i32 4, !"COLOR", i32 7, i32 9}
-!26 = !{i32 5, !20, !20}
-!27 = !{i32 5, !28, !23}
-!28 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!29 = !{i32 5, !25, !23}
-!30 = !{i32 5, !31, !23}
-!31 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!32 = !{i32 6, !20, !20}
-!33 = !{i32 6, !28, !23}
-!34 = !{i32 6, !25, !23}
-!35 = !{i32 6, !31, !23}
-!36 = !{i32 0, !37, !23}
-!37 = !{i32 4, !"SV_GSInstanceID", i32 7, i32 5}
-!38 = !{i32 1, !39, !23}
-!39 = !{i32 4, !"SV_ViewportArrayIndex", i32 7, i32 5}
-!40 = !{void ([1 x <3 x float>]*, [1 x <4 x float>]*, %class.PointStream*, <2 x float>*, <4 x float>*, <4 x float>*, %class.PointStream*, <2 x float>*, <4 x float>*, <4 x float>*, i32, i32*)* @"\01?main@@YAXY00UVSOutGSIn@@V?$PointStream@UVSOut@@@@1IAAI@Z.flat", !"", !41, !3, !54}
-!41 = !{!42, !46, null}
-!42 = !{!43, !44, !45}
-!43 = !{i32 0, !"POSSIZE", i8 9, !23, i8 2, i32 1, i8 3, i32 0, i8 0, null}
-!44 = !{i32 1, !"COLOR", i8 9, !23, i8 2, i32 1, i8 4, i32 1, i8 0, null}
-!45 = !{i32 2, !"SV_GSInstanceID", i8 5, !23, i8 1, i32 1, i8 1, i32 2, i8 0, null}
-!46 = !{!47, !44, !48, !49, !51, !52, !53}
-!47 = !{i32 0, !"TEXCOORD", i8 9, !23, i8 2, i32 1, i8 2, i32 0, i8 0, null}
-!48 = !{i32 2, !"SV_Position", i8 9, !23, i8 4, i32 1, i8 4, i32 2, i8 0, null}
-!49 = !{i32 3, !"TEXCOORD", i8 9, !23, i8 2, i32 1, i8 2, i32 3, i8 0, !50}
-!50 = !{i32 0, i32 1}
-!51 = !{i32 4, !"COLOR", i8 9, !23, i8 2, i32 1, i8 4, i32 4, i8 0, !50}
-!52 = !{i32 5, !"SV_Position", i8 9, !23, i8 4, i32 1, i8 4, i32 5, i8 0, !50}
-!53 = !{i32 6, !"SV_ViewportArrayIndex", i8 5, !23, i8 1, i32 1, i8 1, i32 6, i8 0, null}
-!54 = !{i32 0, i64 512, i32 1, !55}
-!55 = !{i32 1, i32 3, i32 3, i32 1, i32 24}
-
--- a/tools/clang/test/HLSL/dxil_validation/SimpleDs1.ll
+++ b/tools/clang/test/HLSL/dxil_validation/SimpleDs1.ll
@ -1,196 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: DS input control point count must be [0..32].  36 specified
-; CHECK: Invalid Tessellator Domain specified. Must be isoline, tri or quad
-; CHECK: DomainLocation component index out of bounds for the domain.
-; CHECK: DomainLocation component index out of bounds for the domain.
-; CHECK: DomainLocation component index out of bounds for the domain.
-
-
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%struct.PSSceneIn = type { <4 x float>, <2 x float>, <3 x float> }
-%struct.HSPerVertexData = type { %struct.PSSceneIn }
-%struct.HSPerPatchData = type { [3 x float], float }
-%class.OutputPatch = type { [3 x %struct.HSPerVertexData] }
-
-@dx.typevar.0 = external addrspace(1) constant %struct.PSSceneIn
-@dx.typevar.1 = external addrspace(1) constant %struct.HSPerVertexData
-@dx.typevar.2 = external addrspace(1) constant %struct.HSPerPatchData
-@dx.typevar.3 = external addrspace(1) constant %class.OutputPatch
-@llvm.used = appending global [4 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerVertexData addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerPatchData addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.OutputPatch addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PSSceneIn addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<3 x float>, [3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x float]* nocapture readnone, float* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, <3 x float>* nocapture readnone) #0 {
-entry:
-  %9 = tail call float @dx.op.domainLocation.f32(i32 108, i8 0)
-  %10 = tail call float @dx.op.domainLocation.f32(i32 108, i8 1)
-  %11 = tail call float @dx.op.domainLocation.f32(i32 108, i8 2)
-  %12 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
-  %13 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0)
-  %14 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 0)
-  %15 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 0)
-  %mul.i0 = fmul fast float %12, %9
-  %mul.i1 = fmul fast float %13, %9
-  %mul.i2 = fmul fast float %14, %9
-  %mul.i3 = fmul fast float %15, %9
-  %16 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 1)
-  %17 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 1)
-  %18 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 1)
-  %19 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 1)
-  %mul5.i0 = fmul fast float %16, %10
-  %mul5.i1 = fmul fast float %17, %10
-  %mul5.i2 = fmul fast float %18, %10
-  %mul5.i3 = fmul fast float %19, %10
-  %add.i0 = fadd fast float %mul5.i0, %mul.i0
-  %add.i1 = fadd fast float %mul5.i1, %mul.i1
-  %add.i2 = fadd fast float %mul5.i2, %mul.i2
-  %add.i3 = fadd fast float %mul5.i3, %mul.i3
-  %20 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 2)
-  %21 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 2)
-  %22 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 2)
-  %23 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 2)
-  %mul10.i0 = fmul fast float %20, %11
-  %mul10.i1 = fmul fast float %21, %11
-  %mul10.i2 = fmul fast float %22, %11
-  %mul10.i3 = fmul fast float %23, %11
-  %add11.i0 = fadd fast float %add.i0, %mul10.i0
-  %add11.i1 = fadd fast float %add.i1, %mul10.i1
-  %add11.i2 = fadd fast float %add.i2, %mul10.i2
-  %add11.i3 = fadd fast float %add.i3, %mul10.i3
-  %24 = tail call float @dx.op.loadPatchConstant.f32(i32 107, i32 0, i32 1, i8 0)
-  %add14.i0 = fadd fast float %add11.i0, %24
-  %add14.i1 = fadd fast float %add11.i1, %24
-  %add14.i2 = fadd fast float %add11.i2, %24
-  %add14.i3 = fadd fast float %add11.i3, %24
-  %25 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 0)
-  %26 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 0)
-  %mul19.i0 = fmul fast float %25, %9
-  %mul19.i1 = fmul fast float %26, %9
-  %27 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 1)
-  %28 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 1)
-  %mul24.i0 = fmul fast float %27, %10
-  %mul24.i1 = fmul fast float %28, %10
-  %add25.i0 = fadd fast float %mul24.i0, %mul19.i0
-  %add25.i1 = fadd fast float %mul24.i1, %mul19.i1
-  %29 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 2)
-  %30 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 2)
-  %mul30.i0 = fmul fast float %29, %11
-  %mul30.i1 = fmul fast float %30, %11
-  %add31.i0 = fadd fast float %add25.i0, %mul30.i0
-  %add31.i1 = fadd fast float %add25.i1, %mul30.i1
-  %31 = tail call float @dx.op.loadPatchConstant.f32(i32 107, i32 0, i32 0, i8 0)
-  %add36.i0 = fadd fast float %add31.i0, %31
-  %add36.i1 = fadd fast float %add31.i1, %31
-  %32 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 0)
-  %33 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 0)
-  %34 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 0)
-  %mul41.i0 = fmul fast float %32, %9
-  %mul41.i1 = fmul fast float %33, %9
-  %mul41.i2 = fmul fast float %34, %9
-  %35 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 1)
-  %36 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 1)
-  %37 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 1)
-  %mul46.i0 = fmul fast float %35, %10
-  %mul46.i1 = fmul fast float %36, %10
-  %mul46.i2 = fmul fast float %37, %10
-  %add47.i0 = fadd fast float %mul46.i0, %mul41.i0
-  %add47.i1 = fadd fast float %mul46.i1, %mul41.i1
-  %add47.i2 = fadd fast float %mul46.i2, %mul41.i2
-  %38 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 2)
-  %39 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 2)
-  %40 = tail call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 2)
-  %mul52.i0 = fmul fast float %38, %11
-  %mul52.i1 = fmul fast float %39, %11
-  %mul52.i2 = fmul fast float %40, %11
-  %add53.i0 = fadd fast float %add47.i0, %mul52.i0
-  %add53.i1 = fadd fast float %add47.i1, %mul52.i1
-  %add53.i2 = fadd fast float %add47.i2, %mul52.i2
-  %41 = tail call float @dx.op.loadPatchConstant.f32(i32 107, i32 1, i32 0, i8 0)
-  %add56.i0 = fadd fast float %add53.i0, %41
-  %add56.i1 = fadd fast float %add53.i1, %41
-  %add56.i2 = fadd fast float %add53.i2, %41
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add14.i0)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %add14.i1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %add14.i2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %add14.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %add36.i0)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %add36.i1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %add56.i0)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %add56.i1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float %add56.i2)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.domainLocation.f32(i32, i8) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadPatchConstant.f32(i32, i32, i32, i8) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3, !15}
-!dx.entryPoints = !{!36}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ds", i32 6, i32 0}
-!3 = !{i32 0, %struct.PSSceneIn addrspace(1)* @dx.typevar.0, !4, %struct.HSPerVertexData addrspace(1)* @dx.typevar.1, !8, %struct.HSPerPatchData addrspace(1)* @dx.typevar.2, !10, %class.OutputPatch addrspace(1)* @dx.typevar.3, !13}
-!4 = !{i32 44, !5, !6, !7}
-!5 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!6 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!7 = !{i32 3, i32 32, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!8 = !{i32 44, !9}
-!9 = !{i32 3, i32 0, i32 6, !"v"}
-!10 = !{i32 40, !11, !12}
-!11 = !{i32 3, i32 0, i32 4, !"SV_TessFactor", i32 6, !"edges", i32 7, i32 9}
-!12 = !{i32 3, i32 36, i32 4, !"SV_InsideTessFactor", i32 6, !"inside", i32 7, i32 9}
-!13 = !{i32 140, !14}
-!14 = !{i32 3, i32 0, i32 6, !"h"}
-!15 = !{i32 1, void (<3 x float>, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !16}
-!16 = !{!17, !19, !22, !24, !26, !28, !31, !33, !34, !35}
-!17 = !{i32 0, !18, !18}
-!18 = !{}
-!19 = !{i32 0, !20, !21}
-!20 = !{i32 4, !"SV_DomainLocation", i32 7, i32 9}
-!21 = !{i32 0}
-!22 = !{i32 4, !23, !21}
-!23 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!24 = !{i32 4, !25, !21}
-!25 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!26 = !{i32 4, !27, !21}
-!27 = !{i32 4, !"NORMAL", i32 7, i32 9}
-!28 = !{i32 0, !29, !30}
-!29 = !{i32 4, !"SV_TessFactor", i32 7, i32 9}
-!30 = !{i32 0, i32 1, i32 2}
-!31 = !{i32 0, !32, !21}
-!32 = !{i32 4, !"SV_InsideTessFactor", i32 7, i32 9}
-!33 = !{i32 1, !23, !21}
-!34 = !{i32 1, !25, !21}
-!35 = !{i32 1, !27, !21}
-!36 = !{void (<3 x float>, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !"", !37, null, !46}
-!37 = !{!38, !38, !42}
-!38 = !{!39, !40, !41}
-!39 = !{i32 0, !"SV_Position", i8 9, i8 3, !21, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!40 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !21, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!41 = !{i32 2, !"NORMAL", i8 9, i8 0, !21, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!42 = !{!44, !45}
-!44 = !{i32 0, !"SV_TessFactor", i8 9, i8 25, !30, i8 0, i32 3, i8 1, i32 0, i8 0, null}
-!45 = !{i32 1, !"SV_InsideTessFactor", i8 9, i8 26, !21, i8 0, i32 1, i8 1, i32 3, i8 0, null}
-!46 = !{i32 2, !47}
-!47 = !{i32 4, i32 36}
-
--- a/tools/clang/test/HLSL/dxil_validation/SimpleGs1.ll
+++ b/tools/clang/test/HLSL/dxil_validation/SimpleGs1.ll
@ -1,205 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: GS output vertex count must be [0..1024].  1025 specified
-; CHECK: GS instance count must be [1..32].  33 specified
-; CHECK: GS output primitive topology unrecognized
-; CHECK: GS input primitive unrecognized
-; CHECK: Stream index (5) must between 0 and 3
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%b = type { <2 x float> }
-%struct.VSOutGSIn = type { <3 x float>, <4 x float> }
-%class.TriangleStream = type { %struct.VSOut }
-%struct.VSOut = type { <2 x float>, <4 x float>, <4 x float>, i32 }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-
-@b = external constant %b
-@dx.typevar.0 = external addrspace(1) constant %struct.VSOutGSIn
-@dx.typevar.1 = external addrspace(1) constant %class.TriangleStream
-@dx.typevar.2 = external addrspace(1) constant %struct.VSOut
-@dx.typevar.3 = external addrspace(1) constant %b
-@llvm.used = appending global [6 x i8*] [i8* bitcast (%b* @b to i8*), i8* bitcast (%b* @b to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSOutGSIn addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.TriangleStream addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSOut addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%b addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat([1 x <3 x float>]* nocapture readnone, [1 x <4 x float>]* nocapture readnone, %class.TriangleStream* nocapture readnone, <2 x float>* nocapture readnone, <4 x float>* nocapture readnone, <4 x float>* nocapture readnone, i32* nocapture readnone) #0 {
-entry:
-  %verts.0 = alloca [3 x float], align 4
-  %verts.1 = alloca [3 x float], align 4
-  %7 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 0
-  %8 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 0
-  store float -5.000000e-01, float* %7, align 4
-  store float -5.000000e-01, float* %8, align 4
-  %9 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 1
-  %10 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 1
-  store float 1.500000e+00, float* %9, align 4
-  store float -5.000000e-01, float* %10, align 4
-  %11 = getelementptr [3 x float], [3 x float]* %verts.0, i32 0, i32 2
-  %12 = getelementptr [3 x float], [3 x float]* %verts.1, i32 0, i32 2
-  store float -5.000000e-01, float* %11, align 4
-  store float 1.500000e+00, float* %12, align 4
-  %13 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 0)
-  %14 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
-  %15 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0)
-  %16 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 0)
-  %17 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 0)
-  %18 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 2, i32 0)
-  %19 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 3, i32 0)
-  %load30 = load float, float* %7, align 4
-  %load32 = load float, float* %8, align 4
-  %mul.i0 = fmul fast float %load30, %13
-  %mul.i1 = fmul fast float %load32, %13
-  %add.i0 = fadd fast float %mul.i0, %14
-  %add.i1 = fadd fast float %mul.i1, %15
-  %20 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)
-  %21 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %20, i32 0)
-  %22 = extractvalue %dx.types.CBufRet.f32 %21, 0
-  %23 = extractvalue %dx.types.CBufRet.f32 %21, 1
-  %mul.i.i0 = fmul fast float %22, 2.000000e+00
-  %mul.i.i1 = fmul fast float %23, 2.000000e+00
-  %mul1.i.i0 = fmul fast float %mul.i.i0, %add.i0
-  %mul1.i.i1 = fmul fast float %mul.i.i1, %add.i1
-  %sub.i = fadd fast float %mul1.i.i0, -1.000000e+00
-  %sub2.i = fsub fast float 1.000000e+00, %mul1.i.i1
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %16)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %17)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %18)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %19)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)
-  call void @dx.op.storeOutput.i32(i32 5, i32 3, i32 0, i8 0, i32 0)
-  call void @dx.op.emitStream(i32 97, i8 0)
-  %load26 = load float, float* %9, align 4
-  %load28 = load float, float* %10, align 4
-  %mul12.i0 = fmul fast float %load26, %13
-  %mul12.i1 = fmul fast float %load28, %13
-  %add13.i0 = fadd fast float %mul12.i0, %14
-  %add13.i1 = fadd fast float %mul12.i1, %15
-  %mul1.i.29.i0 = fmul fast float %add13.i0, %mul.i.i0
-  %mul1.i.29.i1 = fmul fast float %add13.i1, %mul.i.i1
-  %sub.i.30 = fadd fast float %mul1.i.29.i0, -1.000000e+00
-  %sub2.i.31 = fsub fast float 1.000000e+00, %mul1.i.29.i1
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 2.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %16)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %17)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %18)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %19)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i.30)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i.31)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)
-  call void @dx.op.storeOutput.i32(i32 5, i32 3, i32 0, i8 0, i32 2)
-  call void @dx.op.emitStream(i32 97, i8 0)
-  %load23 = load float, float* %11, align 4
-  %load24 = load float, float* %12, align 4
-  %mul22.i0 = fmul fast float %load23, %13
-  %mul22.i1 = fmul fast float %load24, %13
-  %add23.i0 = fadd fast float %mul22.i0, %14
-  %add23.i1 = fadd fast float %mul22.i1, %15
-  %mul1.i.34.i0 = fmul fast float %add23.i0, %mul.i.i0
-  %mul1.i.34.i1 = fmul fast float %add23.i1, %mul.i.i1
-  %sub.i.35 = fadd fast float %mul1.i.34.i0, -1.000000e+00
-  %sub2.i.36 = fsub fast float 1.000000e+00, %mul1.i.34.i1
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 2.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %16)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %17)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %18)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %19)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %sub.i.35)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %sub2.i.36)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float 5.000000e-01)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 3, float 1.000000e+00)
-  call void @dx.op.storeOutput.i32(i32 5, i32 3, i32 0, i8 0, i32 1)
-  call void @dx.op.emitStream(i32 97, i8 0)
-  call void @dx.op.cutStream(i32 98, i8 0)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.cutStream(i32, i8) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.emitStream(i32, i8) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !18}
-!dx.entryPoints = !{!35}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"gs", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %b* @b, !"b", i32 0, i32 0, i32 1, i32 8, null}
-!6 = !{i32 0, %struct.VSOutGSIn addrspace(1)* @dx.typevar.0, !7, %class.TriangleStream addrspace(1)* @dx.typevar.1, !10, %struct.VSOut addrspace(1)* @dx.typevar.2, !12, %b addrspace(1)* @dx.typevar.3, !16}
-!7 = !{i32 32, !8, !9}
-!8 = !{i32 3, i32 0, i32 4, !"POSSIZE", i32 6, !"posSize", i32 7, i32 9}
-!9 = !{i32 3, i32 16, i32 4, !"COLOR", i32 6, !"clr", i32 7, i32 9}
-!10 = !{i32 52, !11}
-!11 = !{i32 3, i32 0, i32 6, !"h"}
-!12 = !{i32 52, !13, !9, !14, !15}
-!13 = !{i32 3, i32 0, i32 4, !"TEXCOORD0", i32 6, !"uv", i32 7, i32 9}
-!14 = !{i32 3, i32 32, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!15 = !{i32 3, i32 48, i32 4, !"SV_RenderTargetArrayIndex", i32 6, !"index", i32 7, i32 5}
-!16 = !{i32 0, !17}
-!17 = !{i32 3, i32 0, i32 6, !"invViewportSize", i32 7, i32 9}
-!18 = !{i32 1, void ([1 x <3 x float>]*, [1 x <4 x float>]*, %class.TriangleStream*, <2 x float>*, <4 x float>*, <4 x float>*, i32*)* @main.flat, !19}
-!19 = !{!20, !22, !25, !27, !28, !30, !31, !33}
-!20 = !{i32 0, !21, !21}
-!21 = !{}
-!22 = !{i32 0, !23, !24}
-!23 = !{i32 4, !"POSSIZE", i32 7, i32 9}
-!24 = !{i32 0}
-!25 = !{i32 0, !26, !24}
-!26 = !{i32 4, !"COLOR", i32 7, i32 9}
-!27 = !{i32 5, !21, !21}
-!28 = !{i32 5, !29, !24}
-!29 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!30 = !{i32 5, !26, !24}
-!31 = !{i32 5, !32, !24}
-!32 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!33 = !{i32 5, !34, !24}
-!34 = !{i32 4, !"SV_RenderTargetArrayIndex", i32 7, i32 5}
-!35 = !{void ([1 x <3 x float>]*, [1 x <4 x float>]*, %class.TriangleStream*, <2 x float>*, <4 x float>*, <4 x float>*, i32*)* @main.flat, !"", !36, !3, !44}
-!36 = !{!37, !40, null}
-!37 = !{!38, !39}
-!38 = !{i32 0, !"POSSIZE", i8 9, i8 0, !24, i8 2, i32 1, i8 3, i32 0, i8 0, null}
-!39 = !{i32 1, !"COLOR", i8 9, i8 0, !24, i8 2, i32 1, i8 4, i32 1, i8 0, null}
-!40 = !{!41, !39, !42, !43}
-!41 = !{i32 0, !"TEXCOORD", i8 9, i8 0, !24, i8 2, i32 1, i8 2, i32 0, i8 0, !50}
-!42 = !{i32 2, !"SV_Position", i8 9, i8 3, !24, i8 4, i32 1, i8 4, i32 2, i8 0, null}
-!43 = !{i32 3, !"SV_RenderTargetArrayIndex", i8 5, i8 4, !24, i8 1, i32 1, i8 1, i32 3, i8 0, null}
-!44 = !{i32 0, i64 512, i32 1, !45}
-!45 = !{i32 5, i32 1025, i32 1, i32 0, i32 33}
-;!45 = !{i32 1, i32 3, i32 1, i32 4, i32 1}
-
-!50 = !{i32 0, i32 5}
--- a/tools/clang/test/HLSL/dxil_validation/SimpleHs1.ll
+++ b/tools/clang/test/HLSL/dxil_validation/SimpleHs1.ll
@ -1,130 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: For pass thru hull shader, input control point count must match output control point count
-; CHECK: Total number of scalars across all HS output control points must not exceed
-; CHECK: Required TessFactor for domain not found declared anywhere in Patch Constant data
-; CHECK: Required TessFactor for domain not found declared anywhere in Patch Constant data
-
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%struct.PSSceneIn = type { <4 x float>, <2 x float>, <3 x float> }
-%struct.VSSceneIn = type { <3 x float>, <3 x float>, <2 x float> }
-%struct.HSPerPatchData = type { [3 x float], float }
-%class.InputPatch = type { [3 x %struct.PSSceneIn] }
-%struct.HSPerVertexData = type { %struct.PSSceneIn }
-
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %struct.PSSceneIn
-@dx.typevar.3 = external addrspace(1) constant %struct.VSSceneIn
-@dx.typevar.4 = external addrspace(1) constant %struct.HSPerPatchData
-@dx.typevar.5 = external addrspace(1) constant %class.InputPatch
-@dx.typevar.6 = external addrspace(1) constant %struct.HSPerVertexData
-@llvm.used = appending global [7 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PSSceneIn addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSSceneIn addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerPatchData addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.InputPatch addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerVertexData addrspace(1)* @dx.typevar.6 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat"([3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x float]* nocapture readnone, float* nocapture readnone) #0 {
-entry:
-  %retval.0 = alloca [3 x float], align 4
-  %arrayidx3 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 0
-  store float 1.000000e+00, float* %arrayidx3, align 4, !tbaa !62
-  %arrayidx22 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 1
-  store float 1.000000e+00, float* %arrayidx22, align 4, !tbaa !62
-  %arrayidx41 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 2
-  store float 1.000000e+00, float* %arrayidx41, align 4, !tbaa !62
-  %load = load [3 x float], [3 x float]* %retval.0, align 4
-  %5 = extractvalue [3 x float] %load, 0
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 0, i8 0, float %5)
-  %6 = extractvalue [3 x float] %load, 1
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 1, i8 0, float %6)
-  %7 = extractvalue [3 x float] %load, 2
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 2, i8 0, float %7)
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 1, i32 0, i8 0, float 1.000000e+00)
-  ret void
-}
-
-
-; Function Attrs: nounwind
-declare void @dx.op.storePatchConstant.f32(i32, i32, i32, i8, float) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3, !24}
-!dx.entryPoints = !{!46}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"hs", i32 6, i32 0}
-!3 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !4, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !7, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !9, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !13, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !17, %class.InputPatch addrspace(1)* @dx.typevar.5, !20, %struct.HSPerVertexData addrspace(1)* @dx.typevar.6, !22}
-!4 = !{i32 20, !5, !6}
-!5 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!6 = !{i32 3, i32 16, i32 6, !"mips"}
-!7 = !{i32 4, !8}
-!8 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!9 = !{i32 44, !10, !11, !12}
-!10 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!11 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!12 = !{i32 3, i32 32, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!13 = !{i32 40, !14, !15, !16}
-!14 = !{i32 3, i32 0, i32 4, !"POSITION", i32 6, !"pos", i32 7, i32 9}
-!15 = !{i32 3, i32 16, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!16 = !{i32 3, i32 32, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!17 = !{i32 40, !18, !19}
-!18 = !{i32 3, i32 0, i32 4, !"SV_TessFactor", i32 6, !"edges", i32 7, i32 9}
-!19 = !{i32 3, i32 36, i32 4, !"SV_InsideTessFactor", i32 6, !"inside", i32 7, i32 9}
-!20 = !{i32 140, !21}
-!21 = !{i32 3, i32 0, i32 6, !"h"}
-!22 = !{i32 44, !23}
-!23 = !{i32 3, i32 0, i32 6, !"v"}
-!24 = !{i32 1, void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat", !40}
-!25 = !{!26, !28, !31, !33, !35, !37, !38, !39}
-!26 = !{i32 0, !27, !27}
-!27 = !{}
-!28 = !{i32 0, !29, !30}
-!29 = !{i32 4, !"SV_OutputControlPointID", i32 7, i32 5}
-!30 = !{i32 0}
-!31 = !{i32 3, !32, !30}
-!32 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!33 = !{i32 3, !34, !30}
-!34 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!35 = !{i32 3, !36, !30}
-!36 = !{i32 4, !"NORMAL", i32 7, i32 9}
-!37 = !{i32 1, !32, !30}
-!38 = !{i32 1, !34, !30}
-!39 = !{i32 1, !36, !30}
-!40 = !{!26, !31, !33, !35, !41, !44}
-!41 = !{i32 1, !42, !43}
-!42 = !{i32 4, !"SV_TessFactor", i32 7, i32 9}
-!43 = !{i32 0, i32 1, i32 2}
-!44 = !{i32 1, !45, !30}
-!45 = !{i32 4, !"SV_InsideTessFactor", i32 7, i32 9}
-!46 = !{null, !"", !47, null, !60}
-!47 = !{!48, !53, !57}
-!48 = !{!50, !51, !52}
-!50 = !{i32 0, !"SV_Position", i8 9, i8 3, !30, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!51 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !30, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!52 = !{i32 2, !"NORMAL", i8 9, i8 0, !30, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!53 = !{!54, !55, !56, !66}
-!54 = !{i32 0, !"SV_Position", i8 9, i8 3, !30, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!55 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !30, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!56 = !{i32 2, !"NORMAL", i8 9, i8 0, !30, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!57 = !{!58, !59}
-!58 = !{i32 0, !"TessFactor", i8 9, i8 0, !43, i8 0, i32 3, i8 1, i32 0, i8 0, null}
-!59 = !{i32 1, !"InsideTessFactor", i8 9, i8 0, !30, i8 0, i32 1, i8 1, i32 3, i8 0, null}
-!60 = !{i32 3, !61}
-!61 = !{void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat", i32 3, i32 2000, i32 2, i32 3, i32 3, float 6.400000e+01}
-!62 = !{!63, !63, i64 0}
-!63 = !{!"float", !64, i64 0}
-!64 = !{!"omnipotent char", !65, i64 0}
-!65 = !{!"Simple C/C++ TBAA"}
-!66 = !{i32 3, !"COLOR", i8 9, i8 0, !30, i8 2, i32 1, i8 3, i32 3, i8 0, null}
-
--- a/tools/clang/test/HLSL/dxil_validation/SimpleHs3.ll
+++ b/tools/clang/test/HLSL/dxil_validation/SimpleHs3.ll
@ -1,167 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Hull Shader declared with Tri Domain must specify output primitive point, triangle_cw or triangle_ccw. Line output is not compatible with the Tri domain
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%struct.PSSceneIn = type { <4 x float>, <2 x float>, <3 x float> }
-%struct.VSSceneIn = type { <3 x float>, <3 x float>, <2 x float> }
-%struct.HSPerPatchData = type { [3 x float], float }
-%class.InputPatch = type { [3 x %struct.PSSceneIn] }
-%class.OutputPatch = type { [3 x %struct.PSSceneIn] }
-%struct.HSPerVertexData = type { %struct.PSSceneIn }
-
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %struct.PSSceneIn
-@dx.typevar.3 = external addrspace(1) constant %struct.VSSceneIn
-@dx.typevar.4 = external addrspace(1) constant %struct.HSPerPatchData
-@dx.typevar.5 = external addrspace(1) constant %class.InputPatch
-@dx.typevar.6 = external addrspace(1) constant %class.OutputPatch
-@dx.typevar.7 = external addrspace(1) constant %struct.HSPerVertexData
-@llvm.used = appending global [8 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PSSceneIn addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSSceneIn addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerPatchData addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.InputPatch addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.OutputPatch addrspace(1)* @dx.typevar.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerVertexData addrspace(1)* @dx.typevar.7 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@V?$OutputPatch@UPSSceneIn@@$02@@@Z.flat"([3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x float]* nocapture readnone, float* nocapture readnone) #0 {
-entry:
-  %retval.0 = alloca [3 x float], align 4
-  %8 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 0)
-  %arrayidx3 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 0
-  store float %8, float* %arrayidx3, align 4, !tbaa !65
-  %9 = call float @dx.op.loadOutputControlPoint.f32(i32 106, i32 1, i32 0, i8 1, i32 1)
-  %arrayidx32 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 1
-  store float %9, float* %arrayidx32, align 4, !tbaa !65
-  %arrayidx51 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 2
-  store float 1.000000e+00, float* %arrayidx51, align 4, !tbaa !65
-  %load = load [3 x float], [3 x float]* %retval.0, align 4
-  %10 = extractvalue [3 x float] %load, 0
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 0, i8 0, float %10)
-  %11 = extractvalue [3 x float] %load, 1
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 1, i8 0, float %11)
-  %12 = extractvalue [3 x float] %load, 2
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 2, i8 0, float %12)
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 1, i32 0, i8 0, float 1.000000e+00)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @main.flat(i32, [3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, <3 x float>* nocapture readnone) #0 {
-entry:
-  %7 = call i32 @dx.op.outputControlPointID.i32(i32 110)
-  %8 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 %7)
-  %9 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 %7)
-  %10 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 %7)
-  %11 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 %7)
-  %12 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 %7)
-  %13 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 %7)
-  %14 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 %7)
-  %15 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 %7)
-  %16 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 %7)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %9)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %10)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %11)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %12)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %13)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %14)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %15)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float %16)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.outputControlPointID.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storePatchConstant.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadOutputControlPoint.f32(i32, i32, i32, i8, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3, !24}
-!dx.entryPoints = !{!49}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"hs", i32 6, i32 0}
-!3 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !4, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !7, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !9, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !13, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !17, %class.InputPatch addrspace(1)* @dx.typevar.5, !20, %class.OutputPatch addrspace(1)* @dx.typevar.6, !20, %struct.HSPerVertexData addrspace(1)* @dx.typevar.7, !22}
-!4 = !{i32 20, !5, !6}
-!5 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!6 = !{i32 3, i32 16, i32 6, !"mips"}
-!7 = !{i32 4, !8}
-!8 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!9 = !{i32 44, !10, !11, !12}
-!10 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!11 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!12 = !{i32 3, i32 32, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!13 = !{i32 40, !14, !15, !16}
-!14 = !{i32 3, i32 0, i32 4, !"POSITION", i32 6, !"pos", i32 7, i32 9}
-!15 = !{i32 3, i32 16, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!16 = !{i32 3, i32 32, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!17 = !{i32 40, !18, !19}
-!18 = !{i32 3, i32 0, i32 4, !"SV_TessFactor", i32 6, !"edges", i32 7, i32 9}
-!19 = !{i32 3, i32 36, i32 4, !"SV_InsideTessFactor", i32 6, !"inside", i32 7, i32 9}
-!20 = !{i32 140, !21}
-!21 = !{i32 3, i32 0, i32 6, !"h"}
-!22 = !{i32 44, !23}
-!23 = !{i32 3, i32 0, i32 6, !"v"}
-!24 = !{i32 1, void (i32, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !25, void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@V?$OutputPatch@UPSSceneIn@@$02@@@Z.flat", !40}
-!25 = !{!26, !28, !31, !33, !35, !37, !38, !39}
-!26 = !{i32 0, !27, !27}
-!27 = !{}
-!28 = !{i32 0, !29, !30}
-!29 = !{i32 4, !"SV_OutputControlPointID", i32 7, i32 5}
-!30 = !{i32 0}
-!31 = !{i32 3, !32, !30}
-!32 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!33 = !{i32 3, !34, !30}
-!34 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!35 = !{i32 3, !36, !30}
-!36 = !{i32 4, !"NORMAL", i32 7, i32 9}
-!37 = !{i32 1, !32, !30}
-!38 = !{i32 1, !34, !30}
-!39 = !{i32 1, !36, !30}
-!40 = !{!26, !31, !33, !35, !41, !42, !43, !44, !47}
-!41 = !{i32 4, !32, !30}
-!42 = !{i32 4, !34, !30}
-!43 = !{i32 4, !36, !30}
-!44 = !{i32 1, !45, !46}
-!45 = !{i32 4, !"SV_TessFactor", i32 7, i32 9}
-!46 = !{i32 0, i32 1, i32 2}
-!47 = !{i32 1, !48, !30}
-!48 = !{i32 4, !"SV_InsideTessFactor", i32 7, i32 9}
-!49 = !{void (i32, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !"", !50, null, !63}
-!50 = !{!51, !56, !60}
-!51 = !{!53, !54, !55}
-!53 = !{i32 0, !"SV_Position", i8 9, i8 3, !30, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!54 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !30, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!55 = !{i32 2, !"NORMAL", i8 9, i8 0, !30, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!56 = !{!57, !58, !59}
-!57 = !{i32 0, !"SV_Position", i8 9, i8 3, !30, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!58 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !30, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!59 = !{i32 2, !"NORMAL", i8 9, i8 0, !30, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!60 = !{!61, !62}
-!61 = !{i32 0, !"SV_TessFactor", i8 9, i8 25, !46, i8 0, i32 3, i8 1, i32 0, i8 0, null}
-!62 = !{i32 1, !"SV_InsideTessFactor", i8 9, i8 26, !30, i8 0, i32 1, i8 1, i32 3, i8 0, null}
-!63 = !{i32 3, !64}
-!64 = !{void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@V?$OutputPatch@UPSSceneIn@@$02@@@Z.flat", i32 3, i32 3, i32 2, i32 3, i32 2, float 6.400000e+01}
-!65 = !{!66, !66, i64 0}
-!66 = !{!"float", !67, i64 0}
-!67 = !{!"omnipotent char", !68, i64 0}
-!68 = !{!"Simple C/C++ TBAA"}
--- a/tools/clang/test/HLSL/dxil_validation/SimpleHs4.ll
+++ b/tools/clang/test/HLSL/dxil_validation/SimpleHs4.ll
@ -1,225 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Hull Shader declared with IsoLine Domain must specify output primitive point or line. Triangle_cw or triangle_ccw output are not compatible with the IsoLine Domain.
-
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Position              0   xyzw        0      POS   float
-; TEXCOORD                 0   xy          1     NONE   float
-; NORMAL                   0   xyz         2     NONE   float
-;
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Position              0   xyzw        0      POS   float   xyzw
-; TEXCOORD                 0   xy          1     NONE   float   xyzw
-; NORMAL                   0   xyz         2     NONE   float   xyzw
-;
-;
-; Patch Constant signature signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_TessFactor            0   x           0  LINEDEN   float   xyzw
-; SV_TessFactor            1   x           1  LINEDET   float   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Hull Shader
-; InputControlPointCount=2
-; OutputControlPointCount=2
-; Domain=isoline
-; OutputPrimitive=line
-;
-;
-; Input signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Position              0          noperspective
-; TEXCOORD                 0                 linear
-; NORMAL                   0                 linear
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Position              0          noperspective
-; TEXCOORD                 0                 linear
-; NORMAL                   0                 linear
-;
-; Patch Constant signature signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_TessFactor            0
-;
-; Buffer Definitions:
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%struct.PSSceneIn = type { <4 x float>, <2 x float>, <3 x float> }
-%struct.VSSceneIn = type { <3 x float>, <3 x float>, <2 x float> }
-%struct.HSPerPatchData = type { [2 x float] }
-%struct.HSPerVertexData = type { %struct.PSSceneIn }
-%class.InputPatch = type { [2 x %struct.PSSceneIn] }
-%class.OutputPatch = type { [2 x %struct.HSPerVertexData] }
-
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %struct.PSSceneIn
-@dx.typevar.3 = external addrspace(1) constant %struct.VSSceneIn
-@dx.typevar.4 = external addrspace(1) constant %struct.HSPerPatchData
-@dx.typevar.5 = external addrspace(1) constant %struct.HSPerVertexData
-@dx.typevar.6 = external addrspace(1) constant %class.InputPatch
-@dx.typevar.7 = external addrspace(1) constant %class.OutputPatch
-@llvm.used = appending global [8 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PSSceneIn addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSSceneIn addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerPatchData addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerVertexData addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.InputPatch addrspace(1)* @dx.typevar.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.OutputPatch addrspace(1)* @dx.typevar.7 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$01@@V?$OutputPatch@UHSPerVertexData@@$01@@@Z.flat"([2 x <4 x float>]* nocapture readnone, [2 x <2 x float>]* nocapture readnone, [2 x <3 x float>]* nocapture readnone, [2 x <4 x float>]* nocapture readnone, [2 x <2 x float>]* nocapture readnone, [2 x <3 x float>]* nocapture readnone, [2 x float]* nocapture readnone) #0 {
-entry:
-  %retval.0 = alloca [2 x float], align 4
-  %7 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 0)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %arrayidx2 = getelementptr inbounds [2 x float], [2 x float]* %retval.0, i32 0, i32 0
-  store float %7, float* %arrayidx2, align 4, !tbaa !61
-  %8 = call float @dx.op.loadOutputControlPoint.f32(i32 106, i32 1, i32 0, i8 0, i32 1)  ; LoadOutputControlPoint(inputSigId,row,col,index)
-  %arrayidx31 = getelementptr inbounds [2 x float], [2 x float]* %retval.0, i32 0, i32 1
-  store float %8, float* %arrayidx31, align 4, !tbaa !61
-  %load = load [2 x float], [2 x float]* %retval.0, align 4
-  %9 = extractvalue [2 x float] %load, 0
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 0, i8 0, float %9)  ; StorePatchConstant(outputSigID,row,col,value)
-  %10 = extractvalue [2 x float] %load, 1
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 1, i8 0, float %10)  ; StorePatchConstant(outputSigID,row,col,value)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @main.flat(i32, [2 x <4 x float>]* nocapture readnone, [2 x <2 x float>]* nocapture readnone, [2 x <3 x float>]* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, <3 x float>* nocapture readnone) #0 {
-entry:
-  %7 = call i32 @dx.op.outputControlPointID.i32(i32 110)  ; OutputControlPointID()
-  %8 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %9 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %10 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %11 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %12 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %13 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %14 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %15 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %16 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 %7)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %9)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %10)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %11)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %12)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %13)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %14)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %15)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float %16)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.outputControlPointID.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storePatchConstant.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadOutputControlPoint.f32(i32, i32, i32, i8, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3, !23}
-!dx.entryPoints = !{!46}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"hs", i32 6, i32 0}
-!3 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !4, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !7, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !9, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !13, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !17, %struct.HSPerVertexData addrspace(1)* @dx.typevar.5, !19, %class.InputPatch addrspace(1)* @dx.typevar.6, !21, %class.OutputPatch addrspace(1)* @dx.typevar.7, !21}
-!4 = !{i32 20, !5, !6}
-!5 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!6 = !{i32 3, i32 16, i32 6, !"mips"}
-!7 = !{i32 4, !8}
-!8 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!9 = !{i32 44, !10, !11, !12}
-!10 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!11 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!12 = !{i32 3, i32 32, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!13 = !{i32 40, !14, !15, !16}
-!14 = !{i32 3, i32 0, i32 4, !"POSITION", i32 6, !"pos", i32 7, i32 9}
-!15 = !{i32 3, i32 16, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!16 = !{i32 3, i32 32, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!17 = !{i32 20, !18}
-!18 = !{i32 3, i32 0, i32 4, !"SV_TessFactor", i32 6, !"edges", i32 7, i32 9}
-!19 = !{i32 44, !20}
-!20 = !{i32 3, i32 0, i32 6, !"v"}
-!21 = !{i32 92, !22}
-!22 = !{i32 3, i32 0, i32 6, !"h"}
-!23 = !{i32 1, void ([2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, [2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, [2 x float]*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$01@@V?$OutputPatch@UHSPerVertexData@@$01@@@Z.flat", !24, void (i32, [2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !40}
-!24 = !{!25, !27, !30, !32, !34, !35, !36, !37}
-!25 = !{i32 0, !26, !26}
-!26 = !{}
-!27 = !{i32 3, !28, !29}
-!28 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!29 = !{i32 0}
-!30 = !{i32 3, !31, !29}
-!31 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!32 = !{i32 3, !33, !29}
-!33 = !{i32 4, !"NORMAL", i32 7, i32 9}
-!34 = !{i32 4, !28, !29}
-!35 = !{i32 4, !31, !29}
-!36 = !{i32 4, !33, !29}
-!37 = !{i32 1, !38, !39}
-!38 = !{i32 4, !"SV_TessFactor", i32 7, i32 9}
-!39 = !{i32 0, i32 1}
-!40 = !{!25, !41, !27, !30, !32, !43, !44, !45}
-!41 = !{i32 0, !42, !29}
-!42 = !{i32 4, !"SV_OutputControlPointID", i32 7, i32 5}
-!43 = !{i32 1, !28, !29}
-!44 = !{i32 1, !31, !29}
-!45 = !{i32 1, !33, !29}
-!46 = !{void (i32, [2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !"", !47, null, !59}
-!47 = !{!48, !53, !57}
-!48 = !{!50, !51, !52}
-!50 = !{i32 0, !"SV_Position", i8 9, i8 3, !29, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!51 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !29, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!52 = !{i32 2, !"NORMAL", i8 9, i8 0, !29, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!53 = !{!54, !55, !56}
-!54 = !{i32 0, !"SV_Position", i8 9, i8 3, !29, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!55 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !29, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!56 = !{i32 2, !"NORMAL", i8 9, i8 0, !29, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!57 = !{!58}
-!58 = !{i32 0, !"SV_TessFactor", i8 9, i8 25, !39, i8 0, i32 2, i8 1, i32 0, i8 0, null}
-!59 = !{i32 3, !60}
-!60 = !{void ([2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, [2 x <4 x float>]*, [2 x <2 x float>]*, [2 x <3 x float>]*, [2 x float]*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$01@@V?$OutputPatch@UHSPerVertexData@@$01@@@Z.flat", i32 2, i32 2, i32 1, i32 3, i32 3, float 6.400000e+01}
-!61 = !{!62, !62, i64 0}
-!62 = !{!"float", !63, i64 0}
-!63 = !{!"omnipotent char", !64, i64 0}
-!64 = !{!"Simple C/C++ TBAA"}
-
--- a/tools/clang/test/HLSL/dxil_validation/UndefValue.ll
+++ b/tools/clang/test/HLSL/dxil_validation/UndefValue.ll
@ -1,92 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Instructions should not read uninitialized value
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; B                        0   x           0     NONE unknown
-;
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Depth                 0   x           0    DEPTH   float   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Pixel Shader
-; DepthOutput=1
-; SampleFrequency=0
-;
-;
-; Input signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; B                        0        nointerpolation
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Depth                 0
-;
-; Buffer Definitions:
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @"\01?main@@YAMM@Z.flat"(float, float* nocapture readnone) #0 {
-entry:
-  %2 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %add = fadd fast float %2, undef
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (float, float*)* @"\01?main@@YAMM@Z.flat", !4}
-!4 = !{!5, !7, !10}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"B", i32 7, i32 13}
-!9 = !{i32 0}
-!10 = !{i32 1, !11, !9}
-!11 = !{i32 4, !"SV_DEPTH", i32 7, i32 9}
-!12 = !{void (float, float*)* @"\01?main@@YAMM@Z.flat", !"", !13, null, null}
-!13 = !{!14, !16, null}
-!14 = !{!15}
-!15 = !{i32 0, !"B", i8 13, i8 0, !9, i8 1, i32 1, i8 1, i32 0, i8 0, null}
-!16 = !{!17}
-!17 = !{i32 0, !"SV_Depth", i8 9, i8 17, !9, i8 0, i32 1, i8 1, i32 0, i8 0, null}
-
--- a/tools/clang/test/HLSL/dxil_validation/UpdateCounter.ll
+++ b/tools/clang/test/HLSL/dxil_validation/UpdateCounter.ll
@ -1,119 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: BufferUpdateCounter valid only on UAV
-; CHECK: BufferUpdateCounter valid only on structured buffers
-; CHECK: inc of BufferUpdateCounter must be an immediate constant
-; CHECK: RWStructuredBuffers may increment or decrement their counters, but not both.
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Buffer = type { <2 x float> }
-%class.RWStructuredBuffer = type { %struct.Foo }
-%struct.Foo = type { <2 x float>, <3 x float>, [4 x <2 x i32>] }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-
-@"\01?buf1@@3V?$Buffer@V?$vector@M$01@@@@A" = available_externally global %class.Buffer zeroinitializer, align 4
-@"\01?buf2@@3V?$RWStructuredBuffer@UFoo@@@@A" = available_externally global %class.RWStructuredBuffer zeroinitializer, align 4
-@dx.typevar.0 = external addrspace(1) constant %class.Buffer
-@dx.typevar.1 = external addrspace(1) constant %class.RWStructuredBuffer
-@dx.typevar.2 = external addrspace(1) constant %struct.Foo
-@llvm.used = appending global [7 x i8*] [i8* bitcast (%class.RWStructuredBuffer* @"\01?buf2@@3V?$RWStructuredBuffer@UFoo@@@@A" to i8*), i8* bitcast (%class.Buffer* @"\01?buf1@@3V?$Buffer@V?$vector@M$01@@@@A" to i8*), i8* bitcast (%class.Buffer* @"\01?buf1@@3V?$Buffer@V?$vector@M$01@@@@A" to i8*), i8* bitcast (%class.RWStructuredBuffer* @"\01?buf2@@3V?$RWStructuredBuffer@UFoo@@@@A" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Buffer addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.RWStructuredBuffer addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.Foo addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(float, float, <4 x float>* nocapture readnone) #0 {
-entry:
-  %buf2_UAV_structbuf = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
-  %buf1_texture_buf = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
-  %3 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %5 = call i32 @dx.op.bufferUpdateCounter(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i8 1)
-  call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 %5, i32 0, float %4, float %3, float undef, float undef, i8 3)
-  %6 = call i32 @dx.op.bufferUpdateCounter(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)
-  %BufferLoad1 = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf2_UAV_structbuf, i32 %6, i32 0)
-  %7 = extractvalue %dx.types.ResRet.f32 %BufferLoad1, 0
-  %8 = extractvalue %dx.types.ResRet.f32 %BufferLoad1, 1
-  %9 = call i32 @dx.op.bufferUpdateCounter(i32 71, %dx.types.Handle %buf1_texture_buf, i8 undef)
-  %BufferLoad = call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_buf, i32 %6, i32 undef)
-  %10 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 0
-  %11 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 1
-  %add.i0 = fadd fast float %10, %7
-  %add.i1 = fadd fast float %11, %8
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add.i0)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %add.i1)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 0.000000e+00)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind
-declare i32 @dx.op.bufferUpdateCounter(i32, %dx.types.Handle, i8) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32, %dx.types.Handle, i32, i32) #2
-
-; Function Attrs: nounwind
-declare void @dx.op.bufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!10, !19}
-!dx.entryPoints = !{!32}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{!4, !7, null, null}
-!4 = !{!5}
-!5 = !{i32 0, %class.Buffer* @"\01?buf1@@3V?$Buffer@V?$vector@M$01@@@@A", !"buf1", i32 0, i32 0, i32 1, i32 10, i32 0, !6}
-!6 = !{i32 0, i32 9}
-!7 = !{!8}
-!8 = !{i32 0, %class.RWStructuredBuffer* @"\01?buf2@@3V?$RWStructuredBuffer@UFoo@@@@A", !"buf2", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !9}
-!9 = !{i32 1, i32 52}
-!10 = !{i32 0, %class.Buffer addrspace(1)* @dx.typevar.0, !11, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.1, !13, %struct.Foo addrspace(1)* @dx.typevar.2, !15}
-!11 = !{i32 8, !12}
-!12 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!13 = !{i32 88, !14}
-!14 = !{i32 3, i32 0, i32 6, !"h"}
-!15 = !{i32 88, !16, !17, !18}
-!16 = !{i32 3, i32 0, i32 6, !"a", i32 7, i32 9}
-!17 = !{i32 3, i32 16, i32 6, !"b", i32 7, i32 9}
-!18 = !{i32 3, i32 32, i32 6, !"c", i32 7, i32 4}
-!19 = !{i32 1, void (float, float, <4 x float>*)* @main.flat, !20}
-!20 = !{!21, !23, !26, !29}
-!21 = !{i32 0, !22, !22}
-!22 = !{}
-!23 = !{i32 0, !24, !25}
-!24 = !{i32 4, !"Idx1", i32 7, i32 9}
-!25 = !{i32 1}
-!26 = !{i32 0, !27, !28}
-!27 = !{i32 4, !"Idx2", i32 7, i32 9}
-!28 = !{i32 2}
-!29 = !{i32 1, !30, !31}
-!30 = !{i32 4, !"SV_Target", i32 7, i32 9}
-!31 = !{i32 0}
-!32 = !{void (float, float, <4 x float>*)* @main.flat, !"", !33, !3, !39}
-!33 = !{!34, !37, null}
-!34 = !{!35, !36}
-!35 = !{i32 0, !"Idx", i8 9, i8 0, !25, i8 2, i32 1, i8 1, i32 0, i8 0, null}
-!36 = !{i32 1, !"Idx", i8 9, i8 0, !28, i8 2, i32 1, i8 1, i32 1, i8 0, null}
-!37 = !{!38}
-!38 = !{i32 0, !"SV_Target", i8 9, i8 16, !31, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!39 = !{i32 0, i64 8208}
--- a/tools/clang/test/HLSL/dxil_validation/barrier.ll
+++ b/tools/clang/test/HLSL/dxil_validation/barrier.ll
@ -1,171 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Internal declaration 'internalGV' is unused
-; CHECK: Vector type '<4 x float>' is not allowed
-; CHECK: External declaration 'dx.typevar.2' is unused
-; CHECK: Mode of Barrier must be an immediate constant
-; CHECK: sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory).  Only _t (thread group sync) is optional.
-; CHECK: sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal.
-
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%dx.alignment.legacy.class.RWStructuredBuffer = type { [2 x <2 x float>] }
-%dx.alignment.legacy.class.StructuredBuffer = type { %dx.alignment.legacy.struct.mat }
-%dx.alignment.legacy.struct.mat = type { [2 x <2 x float>] }
-%dx.alignment.legacy.class.StructuredBuffer.0 = type { [2 x <2 x float>] }
-%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }
-%class.matrix.float.2.2 = type { [2 x <2 x float>] }
-%class.StructuredBuffer = type { %struct.mat }
-%struct.mat = type { %class.matrix.float.2.2 }
-%class.StructuredBuffer.0 = type { %class.matrix.float.2.2 }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-
-@"internalGV" = internal global [64 x <4 x float>] undef
-@"\01?dataC@@3PAV?$matrix@M$01$01@@A.v" = addrspace(3) global [64 x <4 x float>] undef
-@"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" = external global %dx.alignment.legacy.class.RWStructuredBuffer
-@"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy" = external global %dx.alignment.legacy.class.StructuredBuffer
-@"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" = external global %dx.alignment.legacy.class.StructuredBuffer.0
-@dx.typevar.0 = external addrspace(1) constant %class.RWStructuredBuffer
-@dx.typevar.1 = external addrspace(1) constant %class.StructuredBuffer
-@dx.typevar.2 = external addrspace(1) constant %struct.mat
-@dx.typevar.3 = external addrspace(1) constant %class.StructuredBuffer.0
-@dx.typevar.4 = external addrspace(1) constant %dx.alignment.legacy.class.RWStructuredBuffer
-@dx.typevar.5 = external addrspace(1) constant %dx.alignment.legacy.struct.mat
-@dx.typevar.6 = external addrspace(1) constant %dx.alignment.legacy.class.StructuredBuffer
-@dx.typevar.7 = external addrspace(1) constant %dx.alignment.legacy.class.StructuredBuffer.0
-@llvm.used = appending global [11 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.struct.mat addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*), i8* bitcast (%dx.alignment.legacy.class.StructuredBuffer.0* @"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.RWStructuredBuffer addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.StructuredBuffer addrspace(1)* @dx.typevar.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.StructuredBuffer addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.StructuredBuffer.0 addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* bitcast (%dx.alignment.legacy.class.RWStructuredBuffer* @"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.mat addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* bitcast (%dx.alignment.legacy.class.StructuredBuffer* @"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.RWStructuredBuffer addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.StructuredBuffer.0 addrspace(1)* @dx.typevar.7 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: alwaysinline nounwind
-define void @main(<2 x i32> %tid, <2 x i32> %gid, <2 x i32> %gtid, i32 %gidx) #0 {
-entry:
-  %fA_UAV_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
-  %mats2_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 1, i32 1, i1 false)
-  %mats_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
-  %0 = tail call i32 @dx.op.threadId.i32(i32 93, i32 0)
-  %1 = tail call i32 @dx.op.threadId.i32(i32 93, i32 1)
-  %2 = tail call i32 @dx.op.groupId.i32(i32 94, i32 0)
-  %3 = tail call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
-  %4 = tail call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
-  %rem = and i32 %0, 63
-  %5 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 0
-  %6 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 1
-  %7 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 2
-  %8 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 3
-  %BufferLoad = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %mats_texture_structbuf, i32 %2, i32 0)
-  %9 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 0
-  %10 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 1
-  %11 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 2
-  %12 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 3
-  %BufferLoad10 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %mats2_texture_structbuf, i32 %3, i32 0)
-  %13 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 0
-  %14 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 1
-  %15 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 2
-  %16 = extractvalue %dx.types.ResRet.f32 %BufferLoad10, 3
-  %.i0 = fadd fast float %13, %9
-  %.i1 = fadd fast float %14, %11
-  %.i2 = fadd fast float %15, %10
-  %.i3 = fadd fast float %16, %12
-  store float %.i0, float addrspace(3)* %5, align 16
-  store float %.i1, float addrspace(3)* %6, align 4
-  store float %.i2, float addrspace(3)* %7, align 8
-  store float %.i3, float addrspace(3)* %8, align 4
-  tail call void @dx.op.barrier(i32 83, i32 15)
-  tail call void @dx.op.barrier(i32 83, i32 0)
-  %rem3 = and i32 %1, 63
-  %sub = xor i32 %rem3, 63
-  %17 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 0
-  %18 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 1
-  %19 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 2
-  %20 = getelementptr inbounds [64 x <4 x float>], [64 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 3
-  %21 = load float, float addrspace(3)* %17, align 16
-  %22 = load float, float addrspace(3)* %18, align 4
-  %23 = load float, float addrspace(3)* %19, align 8
-  %24 = load float, float addrspace(3)* %20, align 4
-  tail call void @dx.op.barrier(i32 83, i32 10)
-  %add = add i32 %4, 2
-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %add, i32 0, float %21, float %22, float %23, float %24, i8 15)
-  tail call void @dx.op.barrier(i32 83, i32 %rem)
-  %add6 = add i32 %4, 1
-  %25 = load %struct.mat, %struct.mat addrspace(1)* @dx.typevar.2, align 4
-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %add6, i32 0, float %21, float %22, float %23, float %24, i8 15)
-  tail call void @dx.op.barrier(i32 83, i32 2)
-  tail call void @dx.op.barrier(i32 83, i32 4)
-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %4, i32 0, float %21, float %22, float %23, float %24, i8 15)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.groupId.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadIdInGroup.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.flattenedThreadIdInGroup.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32, %dx.types.Handle, i32, i32) #2
-
-; Function Attrs: nounwind
-declare void @dx.op.barrier(i32, i32) #3
-
-; Function Attrs: nounwind
-declare void @dx.op.bufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8) #3
-
-attributes #0 = { alwaysinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-attributes #3 = { nounwind }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!10, !19}
-!dx.entryPoints = !{!32}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 5}
-!2 = !{!"cs", i32 6, i32 0}
-!3 = !{!4, !8, null, null}
-!4 = !{!5, !7}
-!5 = !{i32 0, %dx.alignment.legacy.class.StructuredBuffer* @"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy", !"mats", i32 0, i32 0, i32 1, i32 12, i32 0, !6}
-!6 = !{i32 1, i32 16}
-!7 = !{i32 1, %dx.alignment.legacy.class.StructuredBuffer.0* @"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy", !"mats2", i32 0, i32 1, i32 1, i32 12, i32 0, !6}
-!8 = !{!9}
-!9 = !{i32 0, %dx.alignment.legacy.class.RWStructuredBuffer* @"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy", !"fA", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !6}
-!10 = !{i32 0, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.0, !11, %class.StructuredBuffer addrspace(1)* @dx.typevar.1, !14, %struct.mat addrspace(1)* @dx.typevar.2, !16, %class.StructuredBuffer.0 addrspace(1)* @dx.typevar.3, !11, %dx.alignment.legacy.class.RWStructuredBuffer addrspace(1)* @dx.typevar.4, !11, %dx.alignment.legacy.struct.mat addrspace(1)* @dx.typevar.5, !16, %dx.alignment.legacy.class.StructuredBuffer addrspace(1)* @dx.typevar.6, !14, %dx.alignment.legacy.class.StructuredBuffer.0 addrspace(1)* @dx.typevar.7, !11}
-!11 = !{i32 24, !12}
-!12 = !{i32 2, !13, i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!13 = !{i32 2, i32 2, i32 2}
-!14 = !{i32 24, !15}
-!15 = !{i32 3, i32 0, i32 6, !"h"}
-!16 = !{i32 24, !17}
-!17 = !{i32 2, !18, i32 3, i32 0, i32 6, !"f2x2", i32 7, i32 9}
-!18 = !{i32 2, i32 2, i32 1}
-!19 = !{i32 1, void (<2 x i32>, <2 x i32>, <2 x i32>, i32)* @main, !20}
-!20 = !{!21, !23, !26, !28, !30}
-!21 = !{i32 1, !22, !22}
-!22 = !{}
-!23 = !{i32 0, !24, !25}
-!24 = !{i32 4, !"SV_DispatchThreadID", i32 7, i32 5}
-!25 = !{i32 0}
-!26 = !{i32 0, !27, !25}
-!27 = !{i32 4, !"SV_GroupID", i32 7, i32 5}
-!28 = !{i32 0, !29, !25}
-!29 = !{i32 4, !"SV_GroupThreadID", i32 7, i32 5}
-!30 = !{i32 0, !31, !25}
-!31 = !{i32 4, !"SV_GroupIndex", i32 7, i32 5}
-!32 = !{void (<2 x i32>, <2 x i32>, <2 x i32>, i32)* @main, !"", null, !3, !33}
-!33 = !{i32 0, i64 16, i32 4, !34}
-!34 = !{i32 8, i32 8, i32 1}
--- a/tools/clang/test/HLSL/dxil_validation/cbuffer1.50.ll
+++ b/tools/clang/test/HLSL/dxil_validation/cbuffer1.50.ll
@ -1,117 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK:Cbuffer access out of bound
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no %s
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Target                0   xyzw        0   TARGET   float   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Pixel Shader
-; DepthOutput=0
-; SampleFrequency=0
-;
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Target                0
-;
-; Buffer Definitions:
-;
-; cbuffer Foo2
-; {
-;
-;   struct Foo2
-;   {
-;
-;       float4 g2;                                    ; Offset:    0
-;
-;   } Foo2                                            ; Offset:    0 Size:    16
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; Foo2                              cbuffer      NA          NA     CB0            cb5     1
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%Foo2 = type { <4 x float> }
-%dx.types.Handle = type { i8* }
-
-@Foo2 = external constant %Foo2
-@dx.typevar.0 = external addrspace(1) constant %Foo2
-@llvm.used = appending global [3 x i8*] [i8* bitcast (%Foo2* @Foo2 to i8*), i8* bitcast (%Foo2* @Foo2 to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%Foo2 addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>* nocapture readnone) #0 {
-entry:
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 0, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
-  %3 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 4, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
-  %4 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 8, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
-  %5 = call float @dx.op.cbufferLoad.f32(i32 59, %dx.types.Handle %1, i32 16, i32 8)  ; CBufferLoad(handle,byteOffset,alignment)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %4)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %5)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.cbufferLoad.f32(i32, %dx.types.Handle, i32, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !9}
-!dx.entryPoints = !{!16}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %Foo2* @Foo2, !"Foo2", i32 0, i32 5, i32 1, i32 16, null}
-!6 = !{i32 0, %Foo2 addrspace(1)* @dx.typevar.0, !7}
-!7 = !{i32 0, !8}
-!8 = !{i32 3, i32 0, i32 6, !"g2", i32 7, i32 9}
-!9 = !{i32 1, void (<4 x float>*)* @main.flat, !10}
-!10 = !{!11, !13}
-!11 = !{i32 0, !12, !12}
-!12 = !{}
-!13 = !{i32 1, !14, !15}
-!14 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!15 = !{i32 0}
-!16 = !{void (<4 x float>*)* @main.flat, !"", !17, !3, null}
-!17 = !{null, !18, null}
-!18 = !{!19}
-!19 = !{i32 0, !"SV_Target", i8 9, i8 16, !15, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-
--- a/tools/clang/test/HLSL/dxil_validation/cbuffer1.50_legacy.ll
+++ b/tools/clang/test/HLSL/dxil_validation/cbuffer1.50_legacy.ll
@ -1,120 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK:Cbuffer access out of bound
-
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no %s
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; SV_Target                0   xyzw        0   TARGET   float   xyzw
-;
-;
-; Pipeline Runtime Information:
-;
-; Pixel Shader
-; DepthOutput=0
-; SampleFrequency=0
-;
-;
-; Output signature:
-;
-; Name                 Index             InterpMode
-; -------------------- ----- ----------------------
-; SV_Target                0
-;
-; Buffer Definitions:
-;
-; cbuffer Foo2
-; {
-;
-;   struct Foo2
-;   {
-;
-;       float4 g2;                                    ; Offset:    0
-;
-;   } Foo2                                            ; Offset:    0 Size:    16
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; Foo2                              cbuffer      NA          NA     CB0            cb5     1
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%Foo2 = type { <4 x float> }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-
-@Foo2 = external constant %Foo2
-@dx.typevar.0 = external addrspace(1) constant %Foo2
-@llvm.used = appending global [3 x i8*] [i8* bitcast (%Foo2* @Foo2 to i8*), i8* bitcast (%Foo2* @Foo2 to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%Foo2 addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>* nocapture readnone) #0 {
-entry:
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %1, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %3 = extractvalue %dx.types.CBufRet.f32 %2, 0
-  %4 = extractvalue %dx.types.CBufRet.f32 %2, 1
-  %5 = extractvalue %dx.types.CBufRet.f32 %2, 2
-  %6 = extractvalue %dx.types.CBufRet.f32 %2, 3
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %4)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %5)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %6)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !9}
-!dx.entryPoints = !{!16}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %Foo2* @Foo2, !"Foo2", i32 0, i32 5, i32 1, i32 16, null}
-!6 = !{i32 0, %Foo2 addrspace(1)* @dx.typevar.0, !7}
-!7 = !{i32 0, !8}
-!8 = !{i32 3, i32 0, i32 6, !"g2", i32 7, i32 9}
-!9 = !{i32 1, void (<4 x float>*)* @main.flat, !10}
-!10 = !{!11, !13}
-!11 = !{i32 0, !12, !12}
-!12 = !{}
-!13 = !{i32 1, !14, !15}
-!14 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!15 = !{i32 0}
-!16 = !{void (<4 x float>*)* @main.flat, !"", !17, !3, null}
-!17 = !{null, !18, null}
-!18 = !{!19}
-!19 = !{i32 0, !"SV_Target", i8 9, i8 16, !15, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-
--- a/tools/clang/test/HLSL/dxil_validation/csThreadSize.ll
+++ b/tools/clang/test/HLSL/dxil_validation/csThreadSize.ll
@ -1,159 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Declared Thread Group X size 1025 outside valid range [1..1024]
-; CHECK: Declared Thread Group Y size 1025 outside valid range [1..1024]
-; CHECK: Declared Thread Group Z size 1025 outside valid range [1..64]
-; CHECK: Declared Thread Group Count 1076890625 (X*Y*Z) is beyond the valid maximum of 1024
-; CHECK: Total Thread Group Shared Memory storage is 1024000000, exceeded 32768
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%dx.alignment.legacy.class.RWStructuredBuffer = type { [2 x <2 x float>] }
-%dx.alignment.legacy.class.StructuredBuffer = type { %dx.alignment.legacy.struct.mat }
-%dx.alignment.legacy.struct.mat = type { [2 x <2 x float>] }
-%dx.alignment.legacy.class.StructuredBuffer.0 = type { [2 x <2 x float>] }
-%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }
-%class.matrix.float.2.2 = type { [2 x <2 x float>] }
-%class.StructuredBuffer = type { %struct.mat }
-%struct.mat = type { %class.matrix.float.2.2 }
-%class.StructuredBuffer.0 = type { %class.matrix.float.2.2 }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-
-@"\01?dataC@@3PAV?$matrix@M$01$01@@A.v" = addrspace(3) global [64000000 x <4 x float>] undef
-@"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" = external global %dx.alignment.legacy.class.RWStructuredBuffer
-@"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy" = external global %dx.alignment.legacy.class.StructuredBuffer
-@"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" = external global %dx.alignment.legacy.class.StructuredBuffer.0
-@dx.typevar.0 = external addrspace(1) constant %class.RWStructuredBuffer
-@dx.typevar.1 = external addrspace(1) constant %class.StructuredBuffer
-@dx.typevar.2 = external addrspace(1) constant %struct.mat
-@dx.typevar.3 = external addrspace(1) constant %class.StructuredBuffer.0
-@dx.typevar.4 = external addrspace(1) constant %dx.alignment.legacy.class.RWStructuredBuffer
-@dx.typevar.5 = external addrspace(1) constant %dx.alignment.legacy.struct.mat
-@dx.typevar.6 = external addrspace(1) constant %dx.alignment.legacy.class.StructuredBuffer
-@dx.typevar.7 = external addrspace(1) constant %dx.alignment.legacy.class.StructuredBuffer.0
-@llvm.used = appending global [11 x i8*] [i8* bitcast (%dx.alignment.legacy.class.RWStructuredBuffer* @"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.StructuredBuffer.0 addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* bitcast (%dx.alignment.legacy.class.StructuredBuffer* @"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.StructuredBuffer addrspace(1)* @dx.typevar.6 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.RWStructuredBuffer addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.RWStructuredBuffer addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* bitcast (%dx.alignment.legacy.class.StructuredBuffer.0* @"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast 
-(%struct.mat addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.StructuredBuffer addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.class.StructuredBuffer.0 addrspace(1)* @dx.typevar.7 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%dx.alignment.legacy.struct.mat addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: alwaysinline nounwind
-define void @main(<2 x i32> %tid, <2 x i32> %gid, <2 x i32> %gtid, i32 %gidx) #0 {
-entry:
-  %fA_UAV_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
-  %mats2_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 1, i32 0, i1 false)
-  %mats_texture_structbuf = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 0, i1 false)
-  %0 = tail call i32 @dx.op.threadId.i32(i32 93, i32 0)
-  %1 = tail call i32 @dx.op.threadId.i32(i32 93, i32 1)
-  %2 = tail call i32 @dx.op.groupId.i32(i32 94, i32 0)
-  %3 = tail call i32 @dx.op.threadIdInGroup.i32(i32 95, i32 1)
-  %4 = tail call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 96)
-  %rem = and i32 %0, 63
-  %5 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 0
-  %6 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 1
-  %7 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 2
-  %8 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %rem, i32 3
-  %BufferLoad = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %mats_texture_structbuf, i32 %2, i32 0)
-  %9 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 0
-  %10 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 1
-  %11 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 2
-  %12 = extractvalue %dx.types.ResRet.f32 %BufferLoad, 3
-  %BufferLoad7 = tail call %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32 69, %dx.types.Handle %mats2_texture_structbuf, i32 %3, i32 0)
-  %13 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 0
-  %14 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 1
-  %15 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 2
-  %16 = extractvalue %dx.types.ResRet.f32 %BufferLoad7, 3
-  %.i0 = fadd fast float %13, %9
-  %.i1 = fadd fast float %14, %11
-  %.i2 = fadd fast float %15, %10
-  %.i3 = fadd fast float %16, %12
-  store float %.i0, float addrspace(3)* %5, align 16
-  store float %.i1, float addrspace(3)* %6, align 4
-  store float %.i2, float addrspace(3)* %7, align 8
-  store float %.i3, float addrspace(3)* %8, align 4
-  tail call void @dx.op.barrier(i32 83, i32 9)
-  %rem3 = and i32 %1, 63
-  %sub = xor i32 %rem3, 63
-  %17 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 0
-  %18 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 1
-  %19 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 2
-  %20 = getelementptr inbounds [64000000 x <4 x float>], [64000000 x <4 x float>] addrspace(3)* @"\01?dataC@@3PAV?$matrix@M$01$01@@A.v", i32 0, i32 %sub, i32 3
-  %21 = load float, float addrspace(3)* %17, align 16
-  %22 = load float, float addrspace(3)* %18, align 4
-  %23 = load float, float addrspace(3)* %19, align 8
-  %24 = load float, float addrspace(3)* %20, align 4
-  tail call void @dx.op.bufferStore.f32(i32 70, %dx.types.Handle %fA_UAV_structbuf, i32 %4, i32 0, float %21, float %22, float %23, float %24, i8 15)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.groupId.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadIdInGroup.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.flattenedThreadIdInGroup.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.bufferStore.f32(i32, %dx.types.Handle, i32, i32, float, float, float, float, i8) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.bufferLoad.f32(i32, %dx.types.Handle, i32, i32) #3
-
-; Function Attrs: nounwind
-declare void @dx.op.barrier(i32, i32) #2
-
-attributes #0 = { alwaysinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind }
-attributes #3 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!10, !19}
-!dx.entryPoints = !{!32}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 5}
-!2 = !{!"cs", i32 6, i32 0}
-!3 = !{!4, !8, null, null}
-!4 = !{!5, !7}
-!5 = !{i32 0, %dx.alignment.legacy.class.StructuredBuffer* @"\01?mats@@3V?$StructuredBuffer@Umat@@@@A_legacy", !"mats", i32 0, i32 0, i32 1, i32 12, i32 0, !6}
-!6 = !{i32 1, i32 16}
-!7 = !{i32 1, %dx.alignment.legacy.class.StructuredBuffer.0* @"\01?mats2@@3V?$StructuredBuffer@V?$matrix@M$01$01@@@@A_legacy", !"mats2", i32 0, i32 1, i32 1, i32 12, i32 0, !6}
-!8 = !{!9}
-!9 = !{i32 0, %dx.alignment.legacy.class.RWStructuredBuffer* @"\01?fA@@3V?$RWStructuredBuffer@V?$matrix@M$01$01@@@@A_legacy", !"fA", i32 0, i32 0, i32 1, i32 12, i1 false, i1 false, i1 false, !6}
-!10 = !{i32 0, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.0, !11, %class.StructuredBuffer addrspace(1)* @dx.typevar.1, !14, %struct.mat addrspace(1)* @dx.typevar.2, !16, %class.StructuredBuffer.0 addrspace(1)* @dx.typevar.3, !11, %dx.alignment.legacy.class.RWStructuredBuffer addrspace(1)* @dx.typevar.4, !11, %dx.alignment.legacy.struct.mat addrspace(1)* @dx.typevar.5, !16, %dx.alignment.legacy.class.StructuredBuffer addrspace(1)* @dx.typevar.6, !14, %dx.alignment.legacy.class.StructuredBuffer.0 addrspace(1)* @dx.typevar.7, !11}
-!11 = !{i32 24, !12}
-!12 = !{i32 2, !13, i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!13 = !{i32 2, i32 2, i32 2}
-!14 = !{i32 24, !15}
-!15 = !{i32 3, i32 0, i32 6, !"h"}
-!16 = !{i32 24, !17}
-!17 = !{i32 2, !18, i32 3, i32 0, i32 6, !"f2x2", i32 7, i32 9}
-!18 = !{i32 2, i32 2, i32 1}
-!19 = !{i32 1, void (<2 x i32>, <2 x i32>, <2 x i32>, i32)* @main, !20}
-!20 = !{!21, !23, !26, !28, !30}
-!21 = !{i32 1, !22, !22}
-!22 = !{}
-!23 = !{i32 0, !24, !25}
-!24 = !{i32 4, !"SV_DispatchThreadID", i32 7, i32 5}
-!25 = !{i32 0}
-!26 = !{i32 0, !27, !25}
-!27 = !{i32 4, !"SV_GroupID", i32 7, i32 5}
-!28 = !{i32 0, !29, !25}
-!29 = !{i32 4, !"SV_GroupThreadID", i32 7, i32 5}
-!30 = !{i32 0, !31, !25}
-!31 = !{i32 4, !"SV_GroupIndex", i32 7, i32 5}
-!32 = !{void (<2 x i32>, <2 x i32>, <2 x i32>, i32)* @main, !"", null, !3, !33}
-!33 = !{i32 0, i64 16, i32 4, !34}
-!34 = !{i32 1025, i32 1025, i32 1025}
-
--- a/tools/clang/test/HLSL/dxil_validation/deadloop.ll
+++ b/tools/clang/test/HLSL/dxil_validation/deadloop.ll
@ -1,92 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Named metadata 'dx.unused' is unknown
-; CHECK: Loop must have break
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%"$Globals" = type { i32 }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-
-@"\01?i@@3HA" = global i32 0, align 4
-@"$Globals" = external constant %"$Globals"
-@dx.typevar.0 = external addrspace(1) constant %"$Globals"
-@llvm.used = appending global [3 x i8*] [i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<2 x float>, <3 x i32>, float* nocapture readnone) #0 {
-entry:
-  %3 = call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %5 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)
-  %6 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 60, %dx.types.Handle %5, i32 0)
-  %7 = extractvalue %dx.types.CBufRet.i32 %6, 0
-  %cmp = icmp slt i32 %7, %3
-  br i1 %cmp, label %while.body, label %while.end
-
-while.body:                                       ; preds = %while.body, %entry
-  %s.01 = phi float [ %add, %while.body ], [ 0.000000e+00, %entry ]
-  %add = fadd fast float %s.01, %4
-  br label %while.body
-
-while.end:                                        ; preds = %while.body, %entry
-  %s.0.lcssa = phi float [ 0.000000e+00, %entry ], [ %add, %while.body ]
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %s.0.lcssa)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !9}
-!dx.entryPoints = !{!20}
-!dx.unused = !{!20}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
-!6 = !{i32 0, %"$Globals" addrspace(1)* @dx.typevar.0, !7}
-!7 = !{i32 0, !8}
-!8 = !{i32 3, i32 0, i32 6, !"i", i32 7, i32 4}
-!9 = !{i32 1, void (<2 x float>, <3 x i32>, float*)* @main.flat, !10}
-!10 = !{!11, !13, !16, !18}
-!11 = !{i32 0, !12, !12}
-!12 = !{}
-!13 = !{i32 0, !14, !15}
-!14 = !{i32 4, !"A", i32 7, i32 9}
-!15 = !{i32 0}
-!16 = !{i32 0, !17, !15}
-!17 = !{i32 4, !"B", i32 7, i32 4}
-!18 = !{i32 1, !19, !15}
-!19 = !{i32 4, !"SV_Target", i32 7, i32 9}
-!20 = !{void (<2 x float>, <3 x i32>, float*)* @main.flat, !"", !21, !3, null}
-!21 = !{!22, !25, null}
-!22 = !{!23, !24}
-!23 = !{i32 0, !"A", i8 9, i8 0, !15, i8 2, i32 1, i8 2, i32 0, i8 0, null}
-!24 = !{i32 1, !"B", i8 4, i8 0, !15, i8 1, i32 1, i8 3, i32 1, i8 0, null}
-!25 = !{!26}
-!26 = !{i32 0, !"SV_Target", i8 9, i8 16, !15, i8 0, i32 1, i8 1, i32 0, i8 0, null}
--- a/tools/clang/test/HLSL/dxil_validation/hsAttribute.ll
+++ b/tools/clang/test/HLSL/dxil_validation/hsAttribute.ll
@ -1,159 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: HS input control point count must be [1..32].  36 specified
-; CHECK: Invalid Tessellator Domain specified. Must be isoline, tri or quad
-; CHECK: Invalid Tessellator Partitioning specified. Must be integer, pow2, fractional_odd or fractional_even.
-; CHECK: Invalid Tessellator Output Primitive specified. Must be point, line, triangleCW or triangleCCW.
-; CHECK: Hull Shader MaxTessFactor must be [1.000000..64.000000].  65.000000 specified
-; CHECK: Invalid Tessellator Domain specified. Must be isoline, tri or quad
-; CHECK: output control point count must be [0..32].  36 specified
-
-
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%struct.PSSceneIn = type { <4 x float>, <2 x float>, <3 x float> }
-%struct.VSSceneIn = type { <3 x float>, <3 x float>, <2 x float> }
-%struct.HSPerPatchData = type { [3 x float], float }
-%struct.HSPerVertexData = type { %struct.PSSceneIn }
-
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %struct.PSSceneIn
-@dx.typevar.3 = external addrspace(1) constant %struct.VSSceneIn
-@dx.typevar.4 = external addrspace(1) constant %struct.HSPerPatchData
-@dx.typevar.5 = external addrspace(1) constant %struct.HSPerVertexData
-@llvm.used = appending global [6 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PSSceneIn addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.VSSceneIn addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerPatchData addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.HSPerVertexData addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat"([3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, [3 x float]* nocapture readnone, float* nocapture readnone) #0 {
-entry:
-  %retval.0 = alloca [3 x float], align 4
-  %arrayidx3 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 0
-  store float 1.000000e+00, float* %arrayidx3, align 4, !tbaa !56
-  %arrayidx22 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 1
-  store float 1.000000e+00, float* %arrayidx22, align 4, !tbaa !56
-  %arrayidx41 = getelementptr inbounds [3 x float], [3 x float]* %retval.0, i32 0, i32 2
-  store float 1.000000e+00, float* %arrayidx41, align 4, !tbaa !56
-  %load = load [3 x float], [3 x float]* %retval.0, align 4
-  %5 = extractvalue [3 x float] %load, 0
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 0, i8 0, float %5)
-  %6 = extractvalue [3 x float] %load, 1
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 1, i8 0, float %6)
-  %7 = extractvalue [3 x float] %load, 2
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 0, i32 2, i8 0, float %7)
-  call void @dx.op.storePatchConstant.f32(i32 109, i32 1, i32 0, i8 0, float 1.000000e+00)
-  ret void
-}
-
-; Function Attrs: nounwind
-define void @main.flat(i32, [3 x <4 x float>]* nocapture readnone, [3 x <2 x float>]* nocapture readnone, [3 x <3 x float>]* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, <3 x float>* nocapture readnone) #0 {
-entry:
-  %7 = call i32 @dx.op.outputControlPointID.i32(i32 110)
-  %8 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 %7)
-  %9 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 %7)
-  %10 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 %7)
-  %11 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 %7)
-  %12 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 %7)
-  %13 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 %7)
-  %14 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 0, i32 %7)
-  %15 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 1, i32 %7)
-  %16 = call float @dx.op.loadInput.f32(i32 4, i32 2, i32 0, i8 2, i32 %7)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %8)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %9)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %10)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %11)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %12)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %13)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 0, float %14)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 1, float %15)
-  call void @dx.op.storeOutput.f32(i32 5, i32 2, i32 0, i8 2, float %16)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.outputControlPointID.i32(i32) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storePatchConstant.f32(i32, i32, i32, i8, float) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.valver = !{!1}
-!dx.version = !{!2}
-!dx.shaderModel = !{!3}
-!dx.typeAnnotations = !{!4, !23}
-!dx.entryPoints = !{!45}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 0}
-!2 = !{i32 0, i32 7}
-!3 = !{!"hs", i32 6, i32 0}
-!4 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !5, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !8, %struct.PSSceneIn addrspace(1)* @dx.typevar.2, !10, %struct.VSSceneIn addrspace(1)* @dx.typevar.3, !14, %struct.HSPerPatchData addrspace(1)* @dx.typevar.4, !18, %struct.HSPerVertexData addrspace(1)* @dx.typevar.5, !21}
-!5 = !{i32 20, !6, !7}
-!6 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!7 = !{i32 3, i32 16, i32 6, !"mips"}
-!8 = !{i32 4, !9}
-!9 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!10 = !{i32 44, !11, !12, !13}
-!11 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!12 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!13 = !{i32 3, i32 32, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!14 = !{i32 40, !15, !16, !17}
-!15 = !{i32 3, i32 0, i32 4, !"POSITION", i32 6, !"pos", i32 7, i32 9}
-!16 = !{i32 3, i32 16, i32 4, !"NORMAL", i32 6, !"norm", i32 7, i32 9}
-!17 = !{i32 3, i32 32, i32 4, !"TEXCOORD0", i32 6, !"tex", i32 7, i32 9}
-!18 = !{i32 40, !19, !20}
-!19 = !{i32 3, i32 0, i32 4, !"SV_TessFactor", i32 6, !"edges", i32 7, i32 9}
-!20 = !{i32 3, i32 36, i32 4, !"SV_InsideTessFactor", i32 6, !"inside", i32 7, i32 9}
-!21 = !{i32 44, !22}
-!22 = !{i32 3, i32 0, i32 6, !"v"}
-!23 = !{i32 1, void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat", !24, void (i32, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !39}
-!24 = !{!25, !27, !30, !32, !34, !37}
-!25 = !{i32 0, !26, !26}
-!26 = !{}
-!27 = !{i32 3, !28, !29}
-!28 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!29 = !{i32 0}
-!30 = !{i32 3, !31, !29}
-!31 = !{i32 4, !"TEXCOORD0", i32 7, i32 9}
-!32 = !{i32 3, !33, !29}
-!33 = !{i32 4, !"NORMAL", i32 7, i32 9}
-!34 = !{i32 1, !35, !36}
-!35 = !{i32 4, !"SV_TessFactor", i32 7, i32 9}
-!36 = !{i32 0, i32 1, i32 2}
-!37 = !{i32 1, !38, !29}
-!38 = !{i32 4, !"SV_InsideTessFactor", i32 7, i32 9}
-!39 = !{!25, !40, !27, !30, !32, !42, !43, !44}
-!40 = !{i32 0, !41, !29}
-!41 = !{i32 4, !"SV_OutputControlPointID", i32 7, i32 5}
-!42 = !{i32 1, !28, !29}
-!43 = !{i32 1, !31, !29}
-!44 = !{i32 1, !33, !29}
-!45 = !{void (i32, [3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, <4 x float>*, <2 x float>*, <3 x float>*)* @main.flat, !"", !46, null, !54}
-!46 = !{!47, !47, !51}
-!47 = !{!48, !49, !50}
-!48 = !{i32 0, !"SV_Position", i8 9, i8 3, !29, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!49 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !29, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!50 = !{i32 2, !"NORMAL", i8 9, i8 0, !29, i8 2, i32 1, i8 3, i32 2, i8 0, null}
-!51 = !{!52, !53}
-!52 = !{i32 0, !"SV_TessFactor", i8 9, i8 25, !36, i8 0, i32 3, i8 1, i32 0, i8 0, null}
-!53 = !{i32 1, !"SV_InsideTessFactor", i8 9, i8 26, !29, i8 0, i32 1, i8 1, i32 3, i8 0, null}
-!54 = !{i32 3, !55}
-!55 = !{void ([3 x <4 x float>]*, [3 x <2 x float>]*, [3 x <3 x float>]*, [3 x float]*, float*)* @"\01?HSPerPatchFunc@@YA?AUHSPerPatchData@@V?$InputPatch@UPSSceneIn@@$02@@@Z.flat", i32 36, i32 36, i32 0, i32 0, i32 0, float 6.500000e+01}
-!56 = !{!57, !57, i64 0}
-!57 = !{!"float", !58, i64 0}
-!58 = !{!"omnipotent char", !59, i64 0}
-!59 = !{!"Simple C/C++ TBAA"}
--- a/tools/clang/test/HLSL/dxil_validation/interpChange.ll
+++ b/tools/clang/test/HLSL/dxil_validation/interpChange.ll
@ -1,58 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: signature element A at location (0,2) size (1,2) has interpolation mode that differs from another element packed into the same row.
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<2 x float>, <2 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %5)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %6)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %4)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!15}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<2 x float>, <2 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !10, !13}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 0, !11, !12}
-!11 = !{i32 4, !"A1", i32 5, i32 3, i32 7, i32 9}
-!12 = !{i32 1}
-!13 = !{i32 1, !14, !9}
-!14 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!15 = !{void (<2 x float>, <2 x float>, <4 x float>*)* @main.flat, !"", !16, null, null}
-!16 = !{!17, !20, null}
-!17 = !{!18, !19}
-!18 = !{i32 0, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 2, i32 0, i8 0, null}
-!19 = !{i32 1, !"A", i8 9, i8 0, !12, i8 3, i32 1, i8 2, i32 0, i8 2, null}
-!20 = !{!21}
-!21 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
--- a/tools/clang/test/HLSL/dxil_validation/interpOnInt.ll
+++ b/tools/clang/test/HLSL/dxil_validation/interpOnInt.ll
@ -1,76 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK:signature element A specifies invalid interpolation mode for integer component type.
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x i32>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %3 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %7 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
-  %8 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
-  %conv.i0 = uitofp i32 %4 to float
-  %conv.i1 = uitofp i32 %3 to float
-  %mul.i0 = fmul fast float %5, %conv.i0
-  %mul.i1 = fmul fast float %6, %conv.i1
-  %mul.i2 = fmul fast float %7, %conv.i1
-  %mul.i3 = fmul fast float %8, %conv.i1
-  %FAbs = tail call float @dx.op.unary.f32(i32 6, float %mul.i0)
-  %FAbs1 = tail call float @dx.op.unary.f32(i32 6, float %mul.i1)
-  %FAbs2 = tail call float @dx.op.unary.f32(i32 6, float %mul.i2)
-  %FAbs3 = tail call float @dx.op.unary.f32(i32 6, float %mul.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %FAbs1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %FAbs2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %FAbs3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!15}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x i32>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !10, !13}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 0, !11, !12}
-!11 = !{i32 4, !"A1", i32 5, i32 3, i32 7, i32 5}
-!12 = !{i32 1}
-!13 = !{i32 1, !14, !9}
-!14 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!15 = !{void (<4 x float>, <4 x i32>, <4 x float>*)* @main.flat, !"", !16, null, null}
-!16 = !{!17, !20, null}
-!17 = !{!18, !19}
-!18 = !{i32 0, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!19 = !{i32 1, !"A", i8 5, i8 0, !12, i8 3, i32 1, i8 4, i32 1, i8 0, null}
-!20 = !{!21}
-!21 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
--- a/tools/clang/test/HLSL/dxil_validation/invalidSigCompTy.ll
+++ b/tools/clang/test/HLSL/dxil_validation/invalidSigCompTy.ll
@ -1,64 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: signature A specifies unrecognized or invalid component type
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %2 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
-  %mul.i0 = fmul fast float %3, %2
-  %mul.i2 = fmul fast float %4, %2
-  %mul.i3 = fmul fast float %5, %2
-  %FAbs = tail call float @dx.op.unary.f32(i32 6, float %mul.i0)
-  %FAbs2 = tail call float @dx.op.unary.f32(i32 6, float %mul.i2)
-  %FAbs3 = tail call float @dx.op.unary.f32(i32 6, float %mul.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %FAbs2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %FAbs3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !10}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 1, !11, !9}
-!11 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!12 = !{void (<4 x float>, <4 x float>*)* @main.flat, !"", !13, null, null}
-!13 = !{!14, !16, null}
-!14 = !{!15}
-!15 = !{i32 0, !"A", i8 0, i8 0, !9, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!16 = !{!17}
-!17 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
--- a/tools/clang/test/HLSL/dxil_validation/multiStream2.ll
+++ b/tools/clang/test/HLSL/dxil_validation/multiStream2.ll
@ -1,280 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Multiple GS output streams are used but 'XXX' is not pointlist
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%"$Globals" = type { i32 }
-%struct.MyStruct = type { <4 x float>, <2 x float> }
-%struct.MyStruct2 = type { <3 x i32>, [3 x <4 x float>], <3 x i32> }
-%class.PointStream = type { %struct.MyStruct2 }
-%class.TriangleStream = type { %struct.MyStruct }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-
-@"\01?g1@@3HA" = global i32 0, align 4
-@"$Globals" = external constant %"$Globals"
-@dx.typevar.0 = external addrspace(1) constant %struct.MyStruct
-@dx.typevar.1 = external addrspace(1) constant %struct.MyStruct2
-@dx.typevar.2 = external addrspace(1) constant %"$Globals"
-@llvm.used = appending global [5 x i8*] [i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.MyStruct addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.MyStruct2 addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat([1 x <4 x float>]* nocapture readnone, %class.TriangleStream* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone, %class.PointStream* nocapture readnone, <3 x i32>* nocapture readnone, [3 x <4 x float>]* nocapture readnone, <3 x i32>* nocapture readnone, %class.TriangleStream* nocapture readnone, <4 x float>* nocapture readnone, <2 x float>* nocapture readnone) #0 {
-entry:
-  %11 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)
-  %b.1.0 = alloca [3 x float], align 4
-  %b.1.1 = alloca [3 x float], align 4
-  %b.1.2 = alloca [3 x float], align 4
-  %b.1.3 = alloca [3 x float], align 4
-  %12 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 0)
-  %13 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 0)
-  %14 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 0)
-  %15 = getelementptr [3 x float], [3 x float]* %b.1.0, i32 0, i32 0
-  %16 = getelementptr [3 x float], [3 x float]* %b.1.1, i32 0, i32 0
-  %17 = getelementptr [3 x float], [3 x float]* %b.1.2, i32 0, i32 0
-  %18 = getelementptr [3 x float], [3 x float]* %b.1.3, i32 0, i32 0
-  store float 0.000000e+00, float* %15, align 4
-  store float 0.000000e+00, float* %16, align 4
-  store float 0.000000e+00, float* %17, align 4
-  store float 0.000000e+00, float* %18, align 4
-  %19 = getelementptr [3 x float], [3 x float]* %b.1.0, i32 0, i32 1
-  %20 = getelementptr [3 x float], [3 x float]* %b.1.1, i32 0, i32 1
-  %21 = getelementptr [3 x float], [3 x float]* %b.1.2, i32 0, i32 1
-  %22 = getelementptr [3 x float], [3 x float]* %b.1.3, i32 0, i32 1
-  store float 0.000000e+00, float* %19, align 4
-  store float 0.000000e+00, float* %20, align 4
-  store float 0.000000e+00, float* %21, align 4
-  store float 0.000000e+00, float* %22, align 4
-  %23 = getelementptr [3 x float], [3 x float]* %b.1.0, i32 0, i32 2
-  %24 = getelementptr [3 x float], [3 x float]* %b.1.1, i32 0, i32 2
-  %25 = getelementptr [3 x float], [3 x float]* %b.1.2, i32 0, i32 2
-  %26 = getelementptr [3 x float], [3 x float]* %b.1.3, i32 0, i32 2
-  %conv = fptoui float %12 to i32
-  %27 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 %conv)
-  %28 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 %conv)
-  %29 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 %conv)
-  %30 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 %conv)
-  %conv3.i1 = fptoui float %13 to i32
-  %conv3.i2 = fptoui float %14 to i32
-  %conv5.i0 = fptoui float %27 to i32
-  %conv5.i1 = fptoui float %28 to i32
-  %conv5.i2 = fptoui float %29 to i32
-  %mul.i0 = fmul fast float %27, 4.400000e+01
-  %mul.i1 = fmul fast float %28, 4.400000e+01
-  %mul.i2 = fmul fast float %29, 4.400000e+01
-  %mul.i3 = fmul fast float %30, 4.400000e+01
-  store float %mul.i0, float* %23, align 4
-  store float %mul.i1, float* %24, align 4
-  store float %mul.i2, float* %25, align 4
-  store float %mul.i3, float* %26, align 4
-  %31 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 60, %dx.types.Handle %11, i32 0)
-  %32 = extractvalue %dx.types.CBufRet.i32 %31, 0
-  %tobool = icmp eq i32 %32, 0
-  br i1 %tobool, label %if.else, label %if.then
-
-if.then:                                          ; preds = %entry
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %27)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %28)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %29)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %30)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %12)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %13)
-  call void @dx.op.emitStream(i32 97, i8 0)
-  call void @dx.op.cutStream(i32 98, i8 0)
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  %33 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 0)
-  %conv8 = fptoui float %33 to i32
-  %34 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 0
-  %35 = load float, float* %34, align 4
-  %36 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 0
-  %37 = load float, float* %36, align 4
-  %38 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 0
-  %39 = load float, float* %38, align 4
-  %40 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 0
-  %41 = load float, float* %40, align 4
-  %42 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 1
-  %43 = load float, float* %42, align 4
-  %44 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 1
-  %45 = load float, float* %44, align 4
-  %46 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 1
-  %47 = load float, float* %46, align 4
-  %48 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 1
-  %49 = load float, float* %48, align 4
-  %50 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 2
-  %51 = load float, float* %50, align 4
-  %52 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 2
-  %53 = load float, float* %52, align 4
-  %54 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 2
-  %55 = load float, float* %54, align 4
-  %56 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 2
-  %57 = load float, float* %56, align 4
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 0, i32 %conv8)
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 1, i32 %conv3.i1)
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 2, i32 %conv3.i2)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 0, float %35)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 1, float %37)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 2, float %39)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 3, float %41)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 0, float %43)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 1, float %45)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 2, float %47)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 3, float %49)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 0, float %51)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 1, float %53)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 2, float %55)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 3, float %57)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 0, i32 %conv5.i0)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 1, i32 %conv5.i1)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 2, i32 %conv5.i2)
-  call void @dx.op.emitStream(i32 97, i8 1)
-  call void @dx.op.cutStream(i32 98, i8 1)
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %b.0.0.i0 = phi i32 [ %conv, %if.then ], [ %conv8, %if.else ]
-  %58 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 0
-  %59 = load float, float* %58, align 4
-  %60 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 0
-  %61 = load float, float* %60, align 4
-  %62 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 0
-  %63 = load float, float* %62, align 4
-  %64 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 0
-  %65 = load float, float* %64, align 4
-  %66 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 1
-  %67 = load float, float* %66, align 4
-  %68 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 1
-  %69 = load float, float* %68, align 4
-  %70 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 1
-  %71 = load float, float* %70, align 4
-  %72 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 1
-  %73 = load float, float* %72, align 4
-  %74 = getelementptr inbounds [3 x float], [3 x float]* %b.1.0, i32 0, i32 2
-  %75 = load float, float* %74, align 4
-  %76 = getelementptr inbounds [3 x float], [3 x float]* %b.1.1, i32 0, i32 2
-  %77 = load float, float* %76, align 4
-  %78 = getelementptr inbounds [3 x float], [3 x float]* %b.1.2, i32 0, i32 2
-  %79 = load float, float* %78, align 4
-  %80 = getelementptr inbounds [3 x float], [3 x float]* %b.1.3, i32 0, i32 2
-  %81 = load float, float* %80, align 4
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 0, i32 %b.0.0.i0)
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 1, i32 %conv3.i1)
-  call void @dx.op.storeOutput.i32(i32 5, i32 2, i32 0, i8 2, i32 %conv3.i2)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 0, float %59)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 1, float %61)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 2, float %63)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 0, i8 3, float %65)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 0, float %67)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 1, float %69)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 2, float %71)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 1, i8 3, float %73)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 0, float %75)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 1, float %77)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 2, float %79)
-  call void @dx.op.storeOutput.f32(i32 5, i32 3, i32 2, i8 3, float %81)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 0, i32 %conv5.i0)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 1, i32 %conv5.i1)
-  call void @dx.op.storeOutput.i32(i32 5, i32 4, i32 0, i8 2, i32 %conv5.i2)
-  call void @dx.op.emitStream(i32 97, i8 1)
-  call void @dx.op.cutStream(i32 98, i8 1)
-  call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 0, float %27)
-  call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 1, float %28)
-  call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 2, float %29)
-  call void @dx.op.storeOutput.f32(i32 5, i32 5, i32 0, i8 3, float %30)
-  call void @dx.op.storeOutput.f32(i32 5, i32 6, i32 0, i8 0, float %12)
-  call void @dx.op.storeOutput.f32(i32 5, i32 6, i32 0, i8 1, float %13)
-  call void @dx.op.emitStream(i32 97, i8 2)
-  call void @dx.op.cutStream(i32 98, i8 2)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.i32(i32, i32, i32, i8, i32) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.cutStream(i32, i8) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.emitStream(i32, i8) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !16}
-!dx.entryPoints = !{!39}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"gs", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
-!6 = !{i32 0, %struct.MyStruct addrspace(1)* @dx.typevar.0, !7, %struct.MyStruct2 addrspace(1)* @dx.typevar.1, !10, %"$Globals" addrspace(1)* @dx.typevar.2, !14}
-!7 = !{i32 24, !8, !9}
-!8 = !{i32 3, i32 0, i32 4, !"SV_Position", i32 6, !"pos", i32 7, i32 9}
-!9 = !{i32 3, i32 16, i32 4, !"AAA", i32 6, !"a", i32 7, i32 9}
-!10 = !{i32 76, !11, !12, !13}
-!11 = !{i32 3, i32 0, i32 4, !"XXX", i32 6, !"X", i32 7, i32 5}
-!12 = !{i32 3, i32 16, i32 4, !"PPP", i32 6, !"p", i32 7, i32 9}
-!13 = !{i32 3, i32 64, i32 4, !"YYY", i32 6, !"Y", i32 7, i32 5}
-!14 = !{i32 0, !15}
-!15 = !{i32 3, i32 0, i32 6, !"g1", i32 7, i32 4}
-!16 = !{i32 1, void ([1 x <4 x float>]*, %class.TriangleStream*, <4 x float>*, <2 x float>*, %class.PointStream*, <3 x i32>*, [3 x <4 x float>]*, <3 x i32>*, %class.TriangleStream*, <4 x float>*, <2 x float>*)* @main.flat, !17}
-!17 = !{!18, !20, !23, !24, !26, !28, !29, !31, !34, !36, !37, !38}
-!18 = !{i32 0, !19, !19}
-!19 = !{}
-!20 = !{i32 0, !21, !22}
-!21 = !{i32 4, !"COORD", i32 7, i32 9}
-!22 = !{i32 0}
-!23 = !{i32 5, !19, !19}
-!24 = !{i32 5, !25, !22}
-!25 = !{i32 4, !"SV_Position", i32 7, i32 9}
-!26 = !{i32 5, !27, !22}
-!27 = !{i32 4, !"AAA", i32 7, i32 9}
-!28 = !{i32 6, !19, !19}
-!29 = !{i32 6, !30, !22}
-!30 = !{i32 4, !"XXX", i32 7, i32 5}
-!31 = !{i32 6, !32, !33}
-!32 = !{i32 4, !"PPP", i32 7, i32 9}
-!33 = !{i32 0, i32 1, i32 2}
-!34 = !{i32 6, !35, !22}
-!35 = !{i32 4, !"YYY", i32 7, i32 5}
-!36 = !{i32 7, !19, !19}
-!37 = !{i32 7, !25, !22}
-!38 = !{i32 7, !27, !22}
-!39 = !{void ([1 x <4 x float>]*, %class.TriangleStream*, <4 x float>*, <2 x float>*, %class.PointStream*, <3 x i32>*, [3 x <4 x float>]*, <3 x i32>*, %class.TriangleStream*, <4 x float>*, <2 x float>*)* @main.flat, !"", !40, !3, !53}
-!40 = !{!41, !43, null}
-!41 = !{!42}
-!42 = !{i32 0, !"COORD", i8 9, i8 0, !22, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!43 = !{!44, !45, !46, !48, !49, !50, !52}
-!44 = !{i32 0, !"SV_Position", i8 9, i8 3, !22, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!45 = !{i32 1, !"AAA", i8 9, i8 0, !22, i8 2, i32 1, i8 2, i32 1, i8 0, null}
-!46 = !{i32 2, !"XXX", i8 5, i8 0, !22, i8 1, i32 1, i8 3, i32 0, i8 0, !47}
-!47 = !{i32 0, i32 1}
-!48 = !{i32 3, !"PPP", i8 9, i8 0, !33, i8 2, i32 3, i8 4, i32 1, i8 0, !47}
-!49 = !{i32 4, !"YYY", i8 5, i8 0, !22, i8 1, i32 1, i8 3, i32 4, i8 0, !47}
-!50 = !{i32 5, !"SV_Position", i8 9, i8 3, !22, i8 4, i32 1, i8 4, i32 0, i8 0, !51}
-!51 = !{i32 0, i32 2}
-!52 = !{i32 6, !"AAA", i8 9, i8 0, !22, i8 2, i32 1, i8 2, i32 1, i8 0, !51}
-!53 = !{i32 1, !54}
-!54 = !{i32 1, i32 12, i32 7, i32 4, i32 1}
--- a/tools/clang/test/HLSL/dxil_validation/phiTGSM.ll
+++ b/tools/clang/test/HLSL/dxil_validation/phiTGSM.ll
@ -1,117 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-; CHECK: TGSM pointers must originate from an unambiguous TGSM global variable
-
-;
-; Input signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no %s
-;
-; Output signature:
-;
-; Name                 Index   Mask Register SysValue  Format   Used
-; -------------------- ----- ------ -------- -------- ------- ------
-; no %s
-;
-; Pipeline Runtime Information:
-;
-;
-;
-; Buffer Definitions:
-;
-; cbuffer $Globals
-; {
-;
-;   struct $Globals
-;   {
-;
-;       float t;                                      ; Offset:    0
-;
-;   } $Globals                                        ; Offset:    0 Size:     4
-;
-; }
-;
-;
-; Resource Bindings:
-;
-; Name                                 Type  Format         Dim      ID      HLSL Bind  Count
-; ------------------------------ ---------- ------- ----------- ------- -------------- ------
-; $Globals                          cbuffer      NA          NA     CB0            cb0     1
-;
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%"$Globals" = type { float }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-
-@"\01?g_Data@@3PAIA" = addrspace(3) global [32 x i32] zeroinitializer, align 4
-@"\01?g_Data2@@3PAIA" = addrspace(3) global [32 x i32] zeroinitializer, align 4
-@"\01?t@@3MA" = global float 0.000000e+00, align 4
-@dx.typevar.0 = external addrspace(1) constant %"$Globals"
-@llvm.used = appending global [1 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: alwaysinline nounwind
-define void @main(i32 %idx) #0 {
-entry:
-  %0 = call i32 @dx.op.threadId.i32(i32 93, i32 0)  ; ThreadId(component)
-  %1 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %2 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %1, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %3 = extractvalue %dx.types.CBufRet.f32 %2, 0
-  %cmp = fcmp fast ogt float %3, 1.000000e+00
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:                                          ; preds = %entry
-  %arrayidx = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @"\01?g_Data@@3PAIA", i32 0, i32 %0
-  br label %if.end
-
-if.else:                                          ; preds = %entry
-  %arrayidx2 = getelementptr inbounds [32 x i32], [32 x i32] addrspace(3)* @"\01?g_Data2@@3PAIA", i32 0, i32 %0
-  br label %if.end
-
-if.end:                                           ; preds = %if.else, %if.then
-  %arrayPhi = phi i32 addrspace(3)* [ %arrayidx, %if.then ], [ %arrayidx2, %if.else ]
-  %4 = atomicrmw add i32 addrspace(3)* %arrayPhi, i32 1 seq_cst
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.threadId.i32(i32, i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #1
-
-attributes #0 = { alwaysinline nounwind "disable-tail-calls"="false" "less-precise-fpmad"="false" "no-frame-pointer-elim"="false" "no-infs-fp-math"="false" "no-nans-fp-math"="false" "no-realign-stack" "stack-protector-buffer-size"="0" "unsafe-fp-math"="false" "use-soft-float"="false" }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !9}
-!dx.entryPoints = !{!16}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 6}
-!2 = !{!"cs", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %"$Globals"* undef, !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
-!6 = !{i32 0, %"$Globals" addrspace(1)* @dx.typevar.0, !7}
-!7 = !{i32 0, !8}
-!8 = !{i32 3, i32 0, i32 6, !"t", i32 7, i32 9}
-!9 = !{i32 1, void (i32)* @main, !10}
-!10 = !{!11, !13}
-!11 = !{i32 1, !12, !12}
-!12 = !{}
-!13 = !{i32 0, !14, !15}
-!14 = !{i32 4, !"SV_DispatchThreadId", i32 7, i32 5}
-!15 = !{i32 0}
-!16 = !{void (i32)* @main, !"", null, !3, !17}
-!17 = !{i32 4, !18}
-!18 = !{i32 64, i32 1, i32 1}
-
--- a/tools/clang/test/HLSL/dxil_validation/reducible.ll
+++ b/tools/clang/test/HLSL/dxil_validation/reducible.ll
@ -1,176 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-; CHECK: Execution flow must be reducible
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%struct.Interpolants2 = type { <4 x float>, <4 x float>, <4 x float> }
-%struct.Inh = type { %struct.Interpolants, float }
-%struct.Interpolants = type { <4 x float>, <4 x float> }
-%"$Globals" = type { %struct.Interpolants2, %struct.Inh, i32, <4 x i32> }
-%struct.Vertex = type { <4 x float>, <4 x float> }
-%dx.types.Handle = type { i8* }
-%dx.types.CBufRet.i32 = type { i32, i32, i32, i32 }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-
-@"\01?c2@@3UInterpolants2@@A" = global %struct.Interpolants2 zeroinitializer, align 4
-@"\01?c@@3UInh@@A" = global %struct.Inh zeroinitializer, align 4
-@"\01?i@@3HA" = global i32 0, align 4
-@"\01?i4@@3V?$vector@I$03@@A" = global <4 x i32> zeroinitializer, align 4
-@"$Globals" = external constant %"$Globals"
-@dx.typevar.0 = external addrspace(1) constant %struct.Interpolants2
-@dx.typevar.1 = external addrspace(1) constant %struct.Inh
-@dx.typevar.2 = external addrspace(1) constant %struct.Interpolants
-@dx.typevar.3 = external addrspace(1) constant %struct.Vertex
-@dx.typevar.4 = external addrspace(1) constant %"$Globals"
-@llvm.used = appending global [7 x i8*] [i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* bitcast (%"$Globals"* @"$Globals" to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.Interpolants2 addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.Inh addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.Interpolants addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.Vertex addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>* nocapture readnone, <4 x float>* nocapture readnone, <4 x float>* nocapture readnone, <4 x float>* nocapture readnone) #0 {
-entry:
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 60, %dx.types.Handle %4, i32 5)  ; CBufferLoadLegacy(handle,regIndex)
-  %6 = extractvalue %dx.types.CBufRet.i32 %5, 1
-  %cmp = icmp sgt i32 %6, 1
-  br i1 %cmp, label %if.then, label %if.else
-
-if.then:                                          ; preds = %entry
-  %7 = call %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32 60, %dx.types.Handle %4, i32 6)  ; CBufferLoadLegacy(handle,regIndex)
-  %8 = extractvalue %dx.types.CBufRet.i32 %7, 2
-  %9 = uitofp i32 %8 to float
-  br label %if.then.5
-
-if.else:                                          ; preds = %entry
-  %cmp2 = icmp sgt i32 %6, 0
-  br i1 %cmp2, label %if.then.5, label %if.else.6
-
-if.then.5:                                        ; preds = %if.else
-  %10 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 3)  ; CBufferLoadLegacy(handle,regIndex)
-  %11 = extractvalue %dx.types.CBufRet.f32 %10, 0
-  %12 = extractvalue %dx.types.CBufRet.f32 %10, 1
-  %13 = extractvalue %dx.types.CBufRet.f32 %10, 2
-  %14 = extractvalue %dx.types.CBufRet.f32 %10, 3
-  %15 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 4)  ; CBufferLoadLegacy(handle,regIndex)
-  %16 = extractvalue %dx.types.CBufRet.f32 %15, 0
-  %17 = extractvalue %dx.types.CBufRet.f32 %15, 1
-  %18 = extractvalue %dx.types.CBufRet.f32 %15, 2
-  %19 = extractvalue %dx.types.CBufRet.f32 %15, 3
-  %cmp12 = icmp sgt i32 %6, 1
-  br i1 %cmp2, label %if.then, label %if.else.6  
-
-if.else.6:                                        ; preds = %if.else
-  %cmp7 = icmp sgt i32 %6, -1
-  br i1 %cmp7, label %if.then.10, label %if.end.13
-
-if.then.10:                                       ; preds = %if.else.6
-  %20 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %21 = extractvalue %dx.types.CBufRet.f32 %20, 0
-  %22 = extractvalue %dx.types.CBufRet.f32 %20, 1
-  %23 = extractvalue %dx.types.CBufRet.f32 %20, 2
-  %24 = extractvalue %dx.types.CBufRet.f32 %20, 3
-  %25 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 1)  ; CBufferLoadLegacy(handle,regIndex)
-  %26 = extractvalue %dx.types.CBufRet.f32 %25, 0
-  %27 = extractvalue %dx.types.CBufRet.f32 %25, 1
-  %28 = extractvalue %dx.types.CBufRet.f32 %25, 2
-  %29 = extractvalue %dx.types.CBufRet.f32 %25, 3
-  br label %return
-
-if.end.13:                                        ; preds = %if.else.6
-  %30 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %31 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %32 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %33 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %34 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %35 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %36 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 2, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %37 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 3, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  br label %return
-
-return:                                           ; preds = %if.end.13, %if.then.10, %if.then.5, %if.then
-  %retval.1.0.i0 = phi float [ %9, %if.then ], [ %16, %if.then.5 ], [ %26, %if.then.10 ], [ %34, %if.end.13 ]
-  %retval.1.0.i1 = phi float [ %9, %if.then ], [ %17, %if.then.5 ], [ %27, %if.then.10 ], [ %35, %if.end.13 ]
-  %retval.1.0.i2 = phi float [ %9, %if.then ], [ %18, %if.then.5 ], [ %28, %if.then.10 ], [ %36, %if.end.13 ]
-  %retval.1.0.i3 = phi float [ %9, %if.then ], [ %19, %if.then.5 ], [ %29, %if.then.10 ], [ %37, %if.end.13 ]
-  %retval.0.0.i0 = phi float [ %9, %if.then ], [ %11, %if.then.5 ], [ %21, %if.then.10 ], [ %30, %if.end.13 ]
-  %retval.0.0.i1 = phi float [ %9, %if.then ], [ %12, %if.then.5 ], [ %22, %if.then.10 ], [ %31, %if.end.13 ]
-  %retval.0.0.i2 = phi float [ %9, %if.then ], [ %13, %if.then.5 ], [ %23, %if.then.10 ], [ %32, %if.end.13 ]
-  %retval.0.0.i3 = phi float [ %9, %if.then ], [ %14, %if.then.5 ], [ %24, %if.then.10 ], [ %33, %if.end.13 ]
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %retval.0.0.i0)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %retval.0.0.i1)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %retval.0.0.i2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %retval.0.0.i3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 0, float %retval.1.0.i0)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 1, float %retval.1.0.i1)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 2, float %retval.1.0.i2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 1, i32 0, i8 3, float %retval.1.0.i3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.i32 @dx.op.cbufferLoadLegacy.i32(i32, %dx.types.Handle, i32) #1
-
-; Function Attrs: nounwind readnone
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!6, !22}
-!dx.entryPoints = !{!34}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"vs", i32 6, i32 0}
-!3 = !{null, null, !4, null}
-!4 = !{!5}
-!5 = !{i32 0, %"$Globals"* @"$Globals", !"$Globals", i32 0, i32 0, i32 1, i32 112, null}
-!6 = !{i32 0, %struct.Interpolants2 addrspace(1)* @dx.typevar.0, !7, %struct.Inh addrspace(1)* @dx.typevar.1, !11, %struct.Interpolants addrspace(1)* @dx.typevar.2, !14, %struct.Vertex addrspace(1)* @dx.typevar.3, !15, %"$Globals" addrspace(1)* @dx.typevar.4, !17}
-!7 = !{i32 48, !8, !9, !10}
-!8 = !{i32 3, i32 0, i32 4, !"SV_POSITION0", i32 6, !"position", i32 7, i32 9}
-!9 = !{i32 3, i32 16, i32 4, !"COLOR0", i32 6, !"color", i32 7, i32 9}
-!10 = !{i32 3, i32 32, i32 4, !"COLOR2", i32 6, !"color2", i32 7, i32 9}
-!11 = !{i32 36, !12, !13}
-!12 = !{i32 3, i32 0, i32 6, !"Interpolants"}
-!13 = !{i32 3, i32 32, i32 6, !"a", i32 7, i32 9}
-!14 = !{i32 32, !8, !9}
-!15 = !{i32 32, !16, !9}
-!16 = !{i32 3, i32 0, i32 4, !"POSITION0", i32 6, !"position", i32 7, i32 9}
-!17 = !{i32 0, !18, !19, !20, !21}
-!18 = !{i32 3, i32 0, i32 6, !"c2"}
-!19 = !{i32 3, i32 48, i32 6, !"c"}
-!20 = !{i32 3, i32 84, i32 6, !"i", i32 7, i32 4}
-!21 = !{i32 3, i32 96, i32 6, !"i4", i32 7, i32 5}
-!22 = !{i32 1, void (<4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*)* @main.flat, !23}
-!23 = !{!24, !26, !29, !31, !33}
-!24 = !{i32 0, !25, !25}
-!25 = !{}
-!26 = !{i32 0, !27, !28}
-!27 = !{i32 4, !"POSITION0", i32 7, i32 9}
-!28 = !{i32 0}
-!29 = !{i32 0, !30, !28}
-!30 = !{i32 4, !"COLOR0", i32 7, i32 9}
-!31 = !{i32 1, !32, !28}
-!32 = !{i32 4, !"SV_POSITION0", i32 7, i32 9}
-!33 = !{i32 1, !30, !28}
-!34 = !{void (<4 x float>*, <4 x float>*, <4 x float>*, <4 x float>*)* @main.flat, !"", !35, !3, null}
-!35 = !{!36, !39, null}
-!36 = !{!37, !38}
-!37 = !{i32 0, !"POSITION", i8 9, i8 3, !28, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!38 = !{i32 1, !"COLOR", i8 9, i8 0, !28, i8 0, i32 1, i8 4, i32 1, i8 0, null}
-!39 = !{!40, !41}
-!40 = !{i32 0, !"SV_Position", i8 9, i8 3, !28, i8 4, i32 1, i8 4, i32 0, i8 0, null}
-!41 = !{i32 1, !"COLOR", i8 9, i8 0, !28, i8 2, i32 1, i8 4, i32 1, i8 0, null}
-
--- a/tools/clang/test/HLSL/dxil_validation/sampleBias.ll
+++ b/tools/clang/test/HLSL/dxil_validation/sampleBias.ll
@ -1,153 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: bias amount for sample_b must be in the range [-16.000000,15.990000], but 18.000000 was specified as an immediate
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%"$Globals" = type { float }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-%struct.SamplerState = type { i32 }
-
-@"\01?bias@@3MA" = global float 0.000000e+00, align 4
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %"$Globals"
-@llvm.used = appending global [3 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<2 x float>, <4 x float>* nocapture readnone) #0 {
-  %text1_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 0, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %samp1_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 5, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %4 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %5 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %6 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 undef, i32 undef, i32 undef, float 1.8000000e01, float undef)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
-  %7 = extractvalue %dx.types.ResRet.f32 %6, 0
-  %8 = extractvalue %dx.types.ResRet.f32 %6, 1
-  %9 = extractvalue %dx.types.ResRet.f32 %6, 2
-  %10 = extractvalue %dx.types.ResRet.f32 %6, 3
-  %11 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %5, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %12 = extractvalue %dx.types.CBufRet.f32 %11, 0
-  %13 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -5, i32 7, i32 undef, float %12, float undef)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
-  %14 = extractvalue %dx.types.ResRet.f32 %13, 0
-  %15 = extractvalue %dx.types.ResRet.f32 %13, 1
-  %16 = extractvalue %dx.types.ResRet.f32 %13, 2
-  %17 = extractvalue %dx.types.ResRet.f32 %13, 3
-  %.i0 = fadd fast float %14, %7
-  %.i1 = fadd fast float %15, %8
-  %.i2 = fadd fast float %16, %9
-  %.i3 = fadd fast float %17, %10
-  %18 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -4, i32 1, i32 undef, float %12, float 1.8000000e01)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
-  %19 = extractvalue %dx.types.ResRet.f32 %18, 0
-  %20 = extractvalue %dx.types.ResRet.f32 %18, 1
-  %21 = extractvalue %dx.types.ResRet.f32 %18, 2
-  %22 = extractvalue %dx.types.ResRet.f32 %18, 3
-  %.i01 = fadd fast float %.i0, %19
-  %.i12 = fadd fast float %.i1, %20
-  %.i23 = fadd fast float %.i2, %21
-  %.i34 = fadd fast float %.i3, %22
-  %23 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -3, i32 2, i32 undef, float %12, float 0.000000e+00)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
-  %24 = extractvalue %dx.types.ResRet.f32 %23, 0
-  %25 = extractvalue %dx.types.ResRet.f32 %23, 1
-  %26 = extractvalue %dx.types.ResRet.f32 %23, 2
-  %27 = extractvalue %dx.types.ResRet.f32 %23, 3
-  %28 = extractvalue %dx.types.ResRet.f32 %23, 4
-  %.i05 = fadd fast float %.i01, %24
-  %.i16 = fadd fast float %.i12, %25
-  %.i27 = fadd fast float %.i23, %26
-  %.i38 = fadd fast float %.i34, %27
-  %29 = uitofp i32 %28 to float
-  %.i09 = fadd fast float %.i05, %29
-  %.i110 = fadd fast float %.i16, %29
-  %.i211 = fadd fast float %.i27, %29
-  %.i312 = fadd fast float %.i38, %29
-  %30 = call %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32 62, %dx.types.Handle %text1_texture_2d, %dx.types.Handle %samp1_sampler, float %3, float %4, float undef, float undef, i32 -3, i32 2, i32 undef, float %12, float %3)  ; SampleBias(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,bias,clamp)
-  %31 = extractvalue %dx.types.ResRet.f32 %30, 0
-  %32 = extractvalue %dx.types.ResRet.f32 %30, 1
-  %33 = extractvalue %dx.types.ResRet.f32 %30, 2
-  %34 = extractvalue %dx.types.ResRet.f32 %30, 3
-  %35 = extractvalue %dx.types.ResRet.f32 %30, 4
-  %.i013 = fadd fast float %.i09, %31
-  %.i114 = fadd fast float %.i110, %32
-  %.i215 = fadd fast float %.i211, %33
-  %.i316 = fadd fast float %.i312, %34
-  %36 = uitofp i32 %35 to float
-  %.i017 = fadd fast float %.i013, %36
-  %.i118 = fadd fast float %.i114, %36
-  %.i219 = fadd fast float %.i215, %36
-  %.i320 = fadd fast float %.i316, %36
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %.i017)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %.i118)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %.i219)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %.i320)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.sampleBias.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float, float) #2
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.valver = !{!1}
-!dx.version = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.typeAnnotations = !{!12, !20}
-!dx.entryPoints = !{!29}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 0}
-!2 = !{i32 0, i32 7}
-!3 = !{!"ps", i32 6, i32 0}
-!4 = !{!5, null, !8, !10}
-!5 = !{!6}
-!6 = !{i32 0, %class.Texture2D* undef, !"text1", i32 0, i32 3, i32 1, i32 2, i32 0, !7}
-!7 = !{i32 0, i32 9}
-!8 = !{!9}
-!9 = !{i32 0, %"$Globals"* undef, !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
-!10 = !{!11}
-!11 = !{i32 0, %struct.SamplerState* undef, !"samp1", i32 0, i32 5, i32 1, i32 0, null}
-!12 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !13, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !16, %"$Globals" addrspace(1)* @dx.typevar.2, !18}
-!13 = !{i32 20, !14, !15}
-!14 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!15 = !{i32 3, i32 16, i32 6, !"mips"}
-!16 = !{i32 4, !17}
-!17 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!18 = !{i32 0, !19}
-!19 = !{i32 3, i32 0, i32 6, !"bias", i32 7, i32 9}
-!20 = !{i32 1, void (<2 x float>, <4 x float>*)* @main.flat, !21}
-!21 = !{!22, !24, !27}
-!22 = !{i32 0, !23, !23}
-!23 = !{}
-!24 = !{i32 0, !25, !26}
-!25 = !{i32 4, !"A", i32 7, i32 9}
-!26 = !{i32 0}
-!27 = !{i32 1, !28, !26}
-!28 = !{i32 4, !"SV_Target", i32 7, i32 9}
-!29 = !{void (<2 x float>, <4 x float>*)* @main.flat, !"main", !30, !4, null}
-!30 = !{!31, !33, null}
-!31 = !{!32}
-!32 = !{i32 0, !"A", i8 9, i8 0, !26, i8 2, i32 1, i8 2, i32 0, i8 0, null}
-!33 = !{!34}
-!34 = !{i32 0, !"SV_Target", i8 9, i8 16, !26, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-
--- a/tools/clang/test/HLSL/dxil_validation/samplerKind.ll
+++ b/tools/clang/test/HLSL/dxil_validation/samplerKind.ll
@ -1,219 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Invalid sampler mode on sampler 'g_samLinear'
-; CHECK: Invalid sampler mode on sampler 'g_samLinearC'
-; CHECK: Type 'st' is a struct type but is used as a parameter in function 'main.flat'
-; CHECK: sample_c_*/gather_c instructions require sampler declared in comparison mode
-; CHECK: sample, lod and gather should on srv resource
-; CHECK: lod instruction requires sampler declared in default mode
-; CHECK: sample, lod and gather should on srv resource
-; CHECK: sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode
-; CHECK: sample, lod and gather should on srv resource
-; CHECK: sample/_l/_d/_cl_s/gather instruction requires sampler declared in default mode
-; CHECK: sample, lod and gather should on srv resource
-; CHECK: sample_c_*/gather_c instructions require sampler declared in comparison mode
-; CHECK: sample, lod and gather should on srv resource
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.Texture2D = type { <4 x float>, %"class.Texture2D<vector<float, 4> >::mips_type" }
-%"class.Texture2D<vector<float, 4> >::mips_type" = type { i32 }
-%class.RWTexture2D = type { <4 x float> }
-%struct.PS_INPUT = type { <3 x float>, <2 x float> }
-%"$Globals" = type { float }
-%cbPerFrame = type { <3 x float>, float }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-%dx.types.CBufRet.f32 = type { float, float, float, float }
-%struct.SamplerState = type { i32 }
-%struct.SamplerComparisonState = type { i32 }
-
-@"\01?cmpVal@@3MA" = global float 0.000000e+00, align 4
-@dx.typevar.0 = external addrspace(1) constant %class.Texture2D
-@dx.typevar.1 = external addrspace(1) constant %"class.Texture2D<vector<float, 4> >::mips_type"
-@dx.typevar.2 = external addrspace(1) constant %class.RWTexture2D
-@dx.typevar.3 = external addrspace(1) constant %struct.PS_INPUT
-@dx.typevar.4 = external addrspace(1) constant %"$Globals"
-@dx.typevar.5 = external addrspace(1) constant %cbPerFrame
-@llvm.used = appending global [6 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.Texture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%class.RWTexture2D addrspace(1)* @dx.typevar.2 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%struct.PS_INPUT addrspace(1)* @dx.typevar.3 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%"$Globals" addrspace(1)* @dx.typevar.4 to i8 addrspace(1)*) to i8*), i8* addrspacecast (i8 addrspace(1)* bitcast (%cbPerFrame addrspace(1)* @dx.typevar.5 to i8 addrspace(1)*) to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<3 x float>* nocapture readnone, <2 x float>* nocapture readnone, <4 x float>* nocapture readnone, %struct.PS_INPUT * %st) #0 {
-entry:
-  %uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 3, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %g_samLinearC_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %g_samLinear_sampler = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 3, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %3 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 1, i32 1, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %4 = call %dx.types.Handle @dx.op.createHandle(i32 58, i8 2, i32 0, i32 0, i1 false)  ; CreateHandle(resourceClass,rangeId,index,nonUniformIndex)
-  %5 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %6 = call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %7 = call %dx.types.ResRet.f32 @dx.op.sample.f32(i32 61, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 undef, float undef)  ; Sample(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,clamp)
-  %8 = extractvalue %dx.types.ResRet.f32 %7, 0
-  %9 = extractvalue %dx.types.ResRet.f32 %7, 1
-  %10 = extractvalue %dx.types.ResRet.f32 %7, 2
-  %11 = extractvalue %dx.types.ResRet.f32 %7, 3
-  %12 = call float @dx.op.calculateLOD.f32(i32 84, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, i1 true)  ; CalculateLOD(handle,sampler,coord0,coord1,coord2,clamped)
-  %add.i0 = fadd fast float %8, %12
-  %add.i1 = fadd fast float %9, %12
-  %add.i2 = fadd fast float %10, %12
-  %add.i3 = fadd fast float %11, %12
-  %13 = call %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32 74, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinear_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 0)  ; TextureGather(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel)
-  %14 = extractvalue %dx.types.ResRet.f32 %13, 0
-  %15 = extractvalue %dx.types.ResRet.f32 %13, 1
-  %16 = extractvalue %dx.types.ResRet.f32 %13, 2
-  %17 = extractvalue %dx.types.ResRet.f32 %13, 3
-  %add5.i0 = fadd fast float %add.i0, %14
-  %add5.i1 = fadd fast float %add.i1, %15
-  %add5.i2 = fadd fast float %add.i2, %16
-  %add5.i3 = fadd fast float %add.i3, %17
-  %18 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %4, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %19 = extractvalue %dx.types.CBufRet.f32 %18, 0
-  %20 = call %dx.types.ResRet.f32 @dx.op.sampleCmp.f32(i32 65, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinearC_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 undef, float %19, float undef)  ; SampleCmp(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,offset2,compareValue,clamp)
-  %21 = extractvalue %dx.types.ResRet.f32 %20, 0
-  %add10.i0 = fadd fast float %add5.i0, %21
-  %add10.i1 = fadd fast float %add5.i1, %21
-  %add10.i2 = fadd fast float %add5.i2, %21
-  %add10.i3 = fadd fast float %add5.i3, %21
-  %22 = call %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32 75, %dx.types.Handle %g_txDiffuse_texture_2d, %dx.types.Handle %g_samLinearC_sampler, float %5, float %6, float undef, float undef, i32 undef, i32 undef, i32 0, float %19)  ; TextureGatherCmp(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel,compareVale)
-  %23 = extractvalue %dx.types.ResRet.f32 %22, 0
-  %24 = extractvalue %dx.types.ResRet.f32 %22, 1
-  %25 = extractvalue %dx.types.ResRet.f32 %22, 2
-  %26 = extractvalue %dx.types.ResRet.f32 %22, 3
-  %add13.i0 = fadd fast float %add10.i0, %23
-  %add13.i1 = fadd fast float %add10.i1, %24
-  %add13.i2 = fadd fast float %add10.i2, %25
-  %add13.i3 = fadd fast float %add10.i3, %26
-  %27 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %28 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %29 = call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)  ; LoadInput(inputSigId,rowIndex,colIndex,gsVertexAxis)
-  %30 = call %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32 60, %dx.types.Handle %3, i32 0)  ; CBufferLoadLegacy(handle,regIndex)
-  %31 = extractvalue %dx.types.CBufRet.f32 %30, 0
-  %32 = extractvalue %dx.types.CBufRet.f32 %30, 1
-  %33 = extractvalue %dx.types.CBufRet.f32 %30, 2
-  %34 = call float @dx.op.dot3.f32(i32 56, float %31, float %32, float %33, float %27, float %28, float %29)  ; Dot3(ax,ay,az,bx,by,bz)
-  %Saturate = call float @dx.op.unary.f32(i32 7, float %34)  ; Saturate(value)
-  %35 = extractvalue %dx.types.CBufRet.f32 %30, 3
-  %FMax = call float @dx.op.binary.f32(i32 34, float %Saturate, float %35)  ; FMax(a,b)
-  %mul.i0 = fmul fast float %FMax, %add13.i0
-  %mul.i1 = fmul fast float %FMax, %add13.i1
-  %mul.i2 = fmul fast float %FMax, %add13.i2
-  %mul.i3 = fmul fast float %FMax, %add13.i3
-  %TextureLoad = call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 67, %dx.types.Handle %uav1_UAV_2d, i32 undef, i32 0, i32 0, i32 undef, i32 undef, i32 undef, i32 undef)  ; TextureLoad(srv,mipLevelOrSampleCount,coord0,coord1,coord2,offset0,offset1,offset2)
-  %36 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 0
-  %37 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 1
-  %38 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 2
-  %39 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 3
-  %mul20.i0 = fmul fast float %mul.i0, %36
-  %mul20.i1 = fmul fast float %mul.i1, %37
-  %mul20.i2 = fmul fast float %mul.i2, %38
-  %mul20.i3 = fmul fast float %mul.i3, %39
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %mul20.i0)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %mul20.i1)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %mul20.i2)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %mul20.i3)  ; StoreOutput(outputtSigId,rowIndex,colIndex,value)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readonly
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.CBufRet.f32 @dx.op.cbufferLoadLegacy.f32(i32, %dx.types.Handle, i32) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.textureGatherCmp.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float) #2
-
-; Function Attrs: nounwind readonly
-declare float @dx.op.calculateLOD.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, i1) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.textureGather.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.sample.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.sampleCmp.f32(i32, %dx.types.Handle, %dx.types.Handle, float, float, float, float, i32, i32, i32, float, float) #2
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i32) #2
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.dot3.f32(i32, float, float, float, float, float, float) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.binary.f32(i32, float, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.valver = !{!1}
-!dx.version = !{!2}
-!dx.shaderModel = !{!3}
-!dx.resources = !{!4}
-!dx.typeAnnotations = !{!16, !31}
-!dx.entryPoints = !{!42}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 1, i32 0}
-!2 = !{i32 0, i32 7}
-!3 = !{!"ps", i32 6, i32 0}
-!4 = !{!5, !8, !10, !13}
-!5 = !{!6}
-!6 = !{i32 0, %class.Texture2D* undef, !"g_txDiffuse", i32 0, i32 0, i32 1, i32 2, i32 0, !7}
-!7 = !{i32 0, i32 9}
-!8 = !{!9}
-!9 = !{i32 0, %class.RWTexture2D* undef, !"uav1", i32 0, i32 3, i32 1, i32 2, i1 false, i1 false, i1 false, !7}
-!10 = !{!11, !12}
-!11 = !{i32 0, %"$Globals"* undef, !"$Globals", i32 0, i32 0, i32 1, i32 4, null}
-!12 = !{i32 1, %cbPerFrame* undef, !"cbPerFrame", i32 0, i32 1, i32 1, i32 16, null}
-!13 = !{!14, !15}
-!14 = !{i32 0, %struct.SamplerState* undef, !"g_samLinear", i32 0, i32 0, i32 1, i32 3, null}
-!15 = !{i32 1, %struct.SamplerComparisonState* undef, !"g_samLinearC", i32 0, i32 1, i32 1, i32 3, null}
-!16 = !{i32 0, %class.Texture2D addrspace(1)* @dx.typevar.0, !17, %"class.Texture2D<vector<float, 4> >::mips_type" addrspace(1)* @dx.typevar.1, !20, %class.RWTexture2D addrspace(1)* @dx.typevar.2, !22, %struct.PS_INPUT addrspace(1)* @dx.typevar.3, !23, %"$Globals" addrspace(1)* @dx.typevar.4, !26, %cbPerFrame addrspace(1)* @dx.typevar.5, !28}
-!17 = !{i32 20, !18, !19}
-!18 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!19 = !{i32 3, i32 16, i32 6, !"mips"}
-!20 = !{i32 4, !21}
-!21 = !{i32 3, i32 0, i32 6, !"handle", i32 7, i32 5}
-!22 = !{i32 16, !18}
-!23 = !{i32 24, !24, !25}
-!24 = !{i32 3, i32 0, i32 4, !"NORMAL", i32 5, i32 6, i32 6, !"vNormal", i32 7, i32 9}
-!25 = !{i32 3, i32 16, i32 4, !"TEXCOORD0", i32 5, i32 4, i32 6, !"vTexcoord", i32 7, i32 9}
-!26 = !{i32 0, !27}
-!27 = !{i32 3, i32 0, i32 6, !"cmpVal", i32 7, i32 9}
-!28 = !{i32 0, !29, !30}
-!29 = !{i32 3, i32 0, i32 6, !"g_vLightDir", i32 7, i32 9}
-!30 = !{i32 3, i32 12, i32 6, !"g_fAmbient", i32 7, i32 9}
-!31 = !{i32 1, void (<3 x float>*, <2 x float>*, <4 x float>*, %struct.PS_INPUT * )* @main.flat, !32}
-!32 = !{!33, !35, !38, !40, !40}
-!33 = !{i32 0, !34, !34}
-!34 = !{}
-!35 = !{i32 0, !36, !37}
-!36 = !{i32 4, !"NORMAL", i32 5, i32 6, i32 7, i32 9}
-!37 = !{i32 0}
-!38 = !{i32 0, !39, !37}
-!39 = !{i32 4, !"TEXCOORD0", i32 5, i32 4, i32 7, i32 9}
-!40 = !{i32 1, !41, !37}
-!41 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!42 = !{void (<3 x float>*, <2 x float>*, <4 x float>*, %struct.PS_INPUT * )* @main.flat, !"main", !43, !4, !49}
-!43 = !{!44, !47, null}
-!44 = !{!45, !46}
-!45 = !{i32 0, !"NORMAL", i8 9, i8 0, !37, i8 6, i32 1, i8 3, i32 0, i8 0, null}
-!46 = !{i32 1, !"TEXCOORD", i8 9, i8 0, !37, i8 4, i32 1, i8 2, i32 1, i8 0, null}
-!47 = !{!48}
-!48 = !{i32 0, !"SV_Target", i8 9, i8 16, !37, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!49 = !{i32 0, i64 8192}
--- a/tools/clang/test/HLSL/dxil_validation/semaOverlap.ll
+++ b/tools/clang/test/HLSL/dxil_validation/semaOverlap.ll
@ -1,68 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Semantic 'A' overlap at 0
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %7 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
-  %8 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
-  %mul.i0 = fmul fast float %5, %4
-  %mul.i1 = fmul fast float %6, %3
-  %mul.i2 = fmul fast float %7, %3
-  %mul.i3 = fmul fast float %8, %3
-  %FAbs = tail call float @dx.op.unary.f32(i32 6, float %mul.i0)
-  %FAbs1 = tail call float @dx.op.unary.f32(i32 6, float %mul.i1)
-  %FAbs2 = tail call float @dx.op.unary.f32(i32 6, float %mul.i2)
-  %FAbs3 = tail call float @dx.op.unary.f32(i32 6, float %mul.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %FAbs1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %FAbs2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %FAbs3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !7, !10}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 1, !11, !9}
-!11 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!12 = !{void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !"", !13, null, null}
-!13 = !{!14, !17, null}
-!14 = !{!15, !16}
-!15 = !{i32 0, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!16 = !{i32 1, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 4, i32 1, i8 0, null}
-!17 = !{!18}
-!18 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
--- a/tools/clang/test/HLSL/dxil_validation/sigOutOfRange.ll
+++ b/tools/clang/test/HLSL/dxil_validation/sigOutOfRange.ll
@ -1,69 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: signature element A at location (8000,0) size (1,4) is out of range.
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %7 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
-  %8 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
-  %mul.i0 = fmul fast float %5, %4
-  %mul.i1 = fmul fast float %6, %3
-  %mul.i2 = fmul fast float %7, %3
-  %mul.i3 = fmul fast float %8, %3
-  %FAbs = tail call float @dx.op.unary.f32(i32 6, float %mul.i0)
-  %FAbs1 = tail call float @dx.op.unary.f32(i32 6, float %mul.i1)
-  %FAbs2 = tail call float @dx.op.unary.f32(i32 6, float %mul.i2)
-  %FAbs3 = tail call float @dx.op.unary.f32(i32 6, float %mul.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %FAbs1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %FAbs2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %FAbs3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !7, !10}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 1, !11, !9}
-!11 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!12 = !{void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !"", !13, null, null}
-!13 = !{!14, !17, null}
-!14 = !{!15, !16}
-!15 = !{i32 0, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!16 = !{i32 1, !"A", i8 9, i8 0, !19, i8 2, i32 1, i8 4, i32 8000, i8 0, null}
-!17 = !{!18}
-!18 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!19 = !{i32 1}
--- a/tools/clang/test/HLSL/dxil_validation/sigOverlap.ll
+++ b/tools/clang/test/HLSL/dxil_validation/sigOverlap.ll
@ -1,69 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: signature element A at location (0,0) size (1,4) overlaps another signature element.
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-; Function Attrs: nounwind
-define void @main.flat(<4 x float>, <4 x float>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %3 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call float @dx.op.loadInput.f32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %7 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 2, i32 undef)
-  %8 = tail call float @dx.op.loadInput.f32(i32 4, i32 0, i32 0, i8 3, i32 undef)
-  %mul.i0 = fmul fast float %5, %4
-  %mul.i1 = fmul fast float %6, %3
-  %mul.i2 = fmul fast float %7, %3
-  %mul.i3 = fmul fast float %8, %3
-  %FAbs = tail call float @dx.op.unary.f32(i32 6, float %mul.i0)
-  %FAbs1 = tail call float @dx.op.unary.f32(i32 6, float %mul.i1)
-  %FAbs2 = tail call float @dx.op.unary.f32(i32 6, float %mul.i2)
-  %FAbs3 = tail call float @dx.op.unary.f32(i32 6, float %mul.i3)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %FAbs)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %FAbs1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %FAbs2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %FAbs3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.loadInput.f32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare float @dx.op.unary.f32(i32, float) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!12}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !4}
-!4 = !{!5, !7, !7, !10}
-!5 = !{i32 0, !6, !6}
-!6 = !{}
-!7 = !{i32 0, !8, !9}
-!8 = !{i32 4, !"A", i32 7, i32 9}
-!9 = !{i32 0}
-!10 = !{i32 1, !11, !9}
-!11 = !{i32 4, !"SV_TARGET", i32 7, i32 9}
-!12 = !{void (<4 x float>, <4 x float>, <4 x float>*)* @main.flat, !"", !13, null, null}
-!13 = !{!14, !17, null}
-!14 = !{!15, !16}
-!15 = !{i32 0, !"A", i8 9, i8 0, !9, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!16 = !{i32 1, !"A", i8 9, i8 0, !19, i8 2, i32 1, i8 4, i32 0, i8 0, null}
-!17 = !{!18}
-!18 = !{i32 0, !"SV_Target", i8 9, i8 16, !9, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!19 = !{i32 1}
--- a/tools/clang/test/HLSL/dxil_validation/uavBarrier.ll
+++ b/tools/clang/test/HLSL/dxil_validation/uavBarrier.ll
@ -1,119 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: uav load don't support offset
-; CHECK: uav load don't support mipLevel/sampleIndex
-; CHECK: store on typed uav must write to all four components of the UAV
-; CHECK: sync in a non-Compute Shader must only sync UAV (sync_uglobal)
-
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%class.RWTexture2D = type { <4 x float> }
-%dx.types.Handle = type { i8* }
-%dx.types.ResRet.f32 = type { float, float, float, float, i32 }
-
-@"\01?uav1@@3V?$RWTexture2D@V?$vector@M$03@@@@A" = available_externally global %class.RWTexture2D zeroinitializer, align 4
-@dx.typevar.0 = external addrspace(1) constant %class.RWTexture2D
-@llvm.used = appending global [2 x i8*] [i8* addrspacecast (i8 addrspace(1)* bitcast (%class.RWTexture2D addrspace(1)* @dx.typevar.0 to i8 addrspace(1)*) to i8*), i8* bitcast (%class.RWTexture2D* @"\01?uav1@@3V?$RWTexture2D@V?$vector@M$03@@@@A" to i8*)], section "llvm.metadata"
-
-; Function Attrs: nounwind
-define void @main.flat(<2 x i32>, <2 x i32>, <4 x float>* nocapture readnone) #0 {
-entry:
-  %uav1_UAV_2d = tail call %dx.types.Handle @dx.op.createHandle(i32 58, i8 1, i32 0, i32 0, i1 false)
-  %3 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 0, i32 undef)
-  %4 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 1, i32 0, i8 1, i32 undef)
-  %5 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 0, i32 undef)
-  %6 = tail call i32 @dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 1, i32 undef)
-  %TextureLoad = tail call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 67, %dx.types.Handle %uav1_UAV_2d, i32 %3, i32 %3, i32 %4, i32 %3, i32 undef, i32 %3, i32 undef)
-  %7 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 0
-  %8 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 1
-  %9 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 2
-  %10 = extractvalue %dx.types.ResRet.f32 %TextureLoad, 3
-  tail call void @dx.op.barrier(i32 83, i32 9)
-  %TextureLoad1 = tail call %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32 67, %dx.types.Handle %uav1_UAV_2d, i32 undef, i32 %5, i32 %6, i32 undef, i32 undef, i32 undef, i32 undef)
-  %11 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 0
-  %12 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 1
-  %13 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 2
-  %14 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 3
-  %15 = extractvalue %dx.types.ResRet.f32 %TextureLoad1, 4
-  %conv = uitofp i32 %15 to float
-  %factor = fmul fast float %conv, 2.000000e+00
-  %add4.i0 = fadd fast float %11, %7
-  %add9.i0 = fadd fast float %add4.i0, %factor
-  %factor4 = fmul fast float %conv, 2.000000e+00
-  %add4.i1 = fadd fast float %12, %8
-  %add9.i1 = fadd fast float %add4.i1, %factor4
-  %factor5 = fmul fast float %conv, 2.000000e+00
-  %add4.i2 = fadd fast float %13, %9
-  %add9.i2 = fadd fast float %add4.i2, %factor5
-  %factor6 = fmul fast float %conv, 2.000000e+00
-  %add4.i3 = fadd fast float %14, %10
-  %add9.i3 = fadd fast float %add4.i3, %factor6
-  tail call void @dx.op.barrier(i32 83, i32 2)
-  tail call void @dx.op.textureStore.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 %3, i32 %4, i32 undef, float %add9.i0, float %add9.i1, float %add9.i2, float undef, i8 7)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float %add9.i0)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float %add9.i1)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float %add9.i2)
-  tail call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float %add9.i3)
-  ret void
-}
-
-; Function Attrs: nounwind readnone
-declare i32 @dx.op.loadInput.i32(i32, i32, i32, i8, i32) #1
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-
-; Function Attrs: nounwind readnone
-declare %dx.types.Handle @dx.op.createHandle(i32, i8, i32, i32, i1) #1
-
-; Function Attrs: nounwind readonly
-declare %dx.types.ResRet.f32 @dx.op.textureLoad.f32(i32, %dx.types.Handle, i32, i32, i32, i32, i32, i32, i32) #2
-
-; Function Attrs: nounwind
-declare void @dx.op.textureStore.f32(i32, %dx.types.Handle, i32, i32, i32, float, float, float, float, i8) #0
-
-; Function Attrs: nounwind
-declare void @dx.op.barrier(i32, i32) #0
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readnone }
-attributes #2 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.resources = !{!3}
-!dx.typeAnnotations = !{!7, !10}
-!dx.entryPoints = !{!21}
-
-!0 = !{!"clang version 3.7 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 7}
-!2 = !{!"ps", i32 6, i32 0}
-!3 = !{null, !4, null, null}
-!4 = !{!5}
-!5 = !{i32 0, %class.RWTexture2D* @"\01?uav1@@3V?$RWTexture2D@V?$vector@M$03@@@@A", !"uav1", i32 0, i32 3, i32 1, i32 2, i1 false, i1 false, i1 false, !6}
-!6 = !{i32 0, i32 9}
-!7 = !{i32 0, %class.RWTexture2D addrspace(1)* @dx.typevar.0, !8}
-!8 = !{i32 16, !9}
-!9 = !{i32 3, i32 0, i32 6, !"h", i32 7, i32 9}
-!10 = !{i32 1, void (<2 x i32>, <2 x i32>, <4 x float>*)* @main.flat, !11}
-!11 = !{!12, !14, !17, !19}
-!12 = !{i32 0, !13, !13}
-!13 = !{}
-!14 = !{i32 0, !15, !16}
-!15 = !{i32 4, !"A", i32 7, i32 5}
-!16 = !{i32 0}
-!17 = !{i32 0, !18, !16}
-!18 = !{i32 4, !"B", i32 7, i32 5}
-!19 = !{i32 1, !20, !16}
-!20 = !{i32 4, !"SV_Target", i32 7, i32 9}
-!21 = !{void (<2 x i32>, <2 x i32>, <4 x float>*)* @main.flat, !"", !22, !3, !28}
-!22 = !{!23, !26, null}
-!23 = !{!24, !25}
-!24 = !{i32 0, !"A", i8 5, i8 0, !16, i8 1, i32 1, i8 2, i32 0, i8 0, null}
-!25 = !{i32 1, !"B", i8 5, i8 0, !16, i8 1, i32 1, i8 2, i32 1, i8 0, null}
-!26 = !{!27}
-!27 = !{i32 0, !"SV_Target", i8 9, i8 16, !16, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!28 = !{i32 0, i64 8192}
--- a/tools/clang/test/HLSL/val-inst-disallowed.ll
+++ b/tools/clang/test/HLSL/val-inst-disallowed.ll
@ -1,56 +0,0 @@
-; RUN: %dxv %s | FileCheck %s
-
-; CHECK: Semantic 'SV_Target' is invalid as vs Output
-
-target datalayout = "e-m:e-p:32:32-i64:64-f80:32-n8:16:32-a:0:32-S32"
-target triple = "dxil-ms-dx"
-
-%dx.types.wave_t = type { i8* }
-
-define void @"\01?main@@YA?AV?$vector@M$03@@XZ.flat"(<4 x float>*) {
-entry:
-; CHECK: Instructions must not reference reserved opcodes
-  %WaveCapture = call %dx.types.wave_t @dx.op.waveCapture(i32 114, i8 0)
-
-; CHECK: Declaration '%dx.types.wave_t = type { i8* }' uses a reserved prefix
-  %wave_local = alloca %dx.types.wave_t
-
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 0, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 1, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 2, float 0.000000e+00)
-  call void @dx.op.storeOutput.f32(i32 5, i32 0, i32 0, i8 3, float 0.000000e+00)
-  ret void
-; CHECK: Instructions must be of an allowed type
-  unreachable
-}
-
-; Function Attrs: nounwind
-declare void @dx.op.storeOutput.f32(i32, i32, i32, i8, float) #0
-; Function Attrs: nounwind readonly
-declare %dx.types.wave_t @dx.op.waveCapture(i32, i8) #1
-; Function Attrs: nounwind readonly
-declare i1 @dx.op.waveAllIsTrue(i32, %dx.types.wave_t, i1) #1
-
-attributes #0 = { nounwind }
-attributes #1 = { nounwind readonly }
-
-!llvm.ident = !{!0}
-!dx.version = !{!1}
-!dx.shaderModel = !{!2}
-!dx.typeAnnotations = !{!3}
-!dx.entryPoints = !{!9}
-
-!0 = !{!"clang version 3.7.0 (tags/RELEASE_370/final)"}
-!1 = !{i32 0, i32 4}
-!2 = !{!"vs", i32 6, i32 0}
-!3 = !{i32 1, void (<4 x float>*)* @"\01?main@@YA?AV?$vector@M$03@@XZ.flat", !4}
-!4 = !{!5, !7}
-!5 = !{i32 0, !6, !13}
-!6 = !{}
-!7 = !{i32 1, !8, !13}
-!8 = !{i32 4, !"SV_Target", i32 7, i32 9}
-!9 = !{void (<4 x float>*)* @"\01?main@@YA?AV?$vector@M$03@@XZ.flat", !"", !10, null, null}
-!10 = !{null, !11, null}
-!11 = !{!12}
-!12 = !{i32 0, !"SV_Target", i8 9, i8 16, !13, i8 0, i32 1, i8 4, i32 0, i8 0, null}
-!13 = !{i32 0}
--- a/tools/clang/tools/dotnetc/EditorForm.cs
+++ b/tools/clang/tools/dotnetc/EditorForm.cs
@ -221,8 +221,8 @@ namespace MainNs
                "  <InputElement SemanticName='POSITION' Format='R32G32B32_FLOAT' AlignedByteOffset='0' />\r\n" +
                "  <InputElement SemanticName='COLOR' Format='R32G32B32A32_FLOAT' AlignedByteOffset='12' />\r\n" +
                " </InputElements>\r\n" +
-                " <Shader Name='VS' Target='vs_5_1' EntryPoint='VSMain' />\r\n" +
-                " <Shader Name='PS' Target='ps_5_1' EntryPoint='PSMain' />\r\n" +
+                " <Shader Name='VS' Target='vs_6_0' EntryPoint='VSMain' />\r\n" +
+                " <Shader Name='PS' Target='ps_6_0' EntryPoint='PSMain' />\r\n" +
                "</ShaderOp>\r\n";

            this.CodeBox.Text =
@ -670,7 +670,7 @@ namespace MainNs
                result.SetFromText = options.Count > 0;
                result.Mode = GetValueOrDefault(options, "mode", "hlsl");
                result.Entry = GetValueOrDefault(options, "hlsl-entry", "main");
-                result.Target = GetValueOrDefault(options, "hlsl-target", "ps_5_1");
+                result.Target = GetValueOrDefault(options, "hlsl-target", "ps_6_0");
                result.Arguments = GetValueOrDefault(options, "hlsl-args", "").Split(' ').Select(a => a.Trim()).ToArray();
                return result;
            }
--- a/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
+++ b/tools/clang/tools/dxcompiler/dxcompilerobj.cpp
@ -1413,6 +1413,7 @@ static const char *OpCodeSignatures[] = {
  "(value)",  // Atan
  "(value)",  // Hcos
  "(value)",  // Hsin
+  "(value)",  // Htan
  "(value)",  // Exp
  "(value)",  // Frc
  "(value)",  // Log
@ -1469,8 +1470,6 @@ static const char *OpCodeSignatures[] = {
  "(handle,mipLevel)",  // GetDimensions
  "(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel)",  // TextureGather
  "(srv,sampler,coord0,coord1,coord2,coord3,offset0,offset1,channel,compareVale)",  // TextureGatherCmp
-  "()",  // ToDelete5
-  "()",  // ToDelete6
  "(srv,index)",  // Texture2DMSGetSamplePosition
  "(index)",  // RenderTargetGetSamplePosition
  "()",  // RenderTargetGetSampleCount
@ -1486,6 +1485,9 @@ static const char *OpCodeSignatures[] = {
  "(inputSigId,inputRowIndex,inputColIndex,offsetX,offsetY)",  // EvalSnapped
  "(inputSigId,inputRowIndex,inputColIndex,sampleIndex)",  // EvalSampleIndex
  "(inputSigId,inputRowIndex,inputColIndex)",  // EvalCentroid
+  "()",  // SampleIndex
+  "()",  // Coverage
+  "()",  // InnerCoverage
  "(component)",  // ThreadId
  "(component)",  // GroupId
  "(component)",  // ThreadIdInGroup
@ -1493,12 +1495,9 @@ static const char *OpCodeSignatures[] = {
  "(streamId)",  // EmitStream
  "(streamId)",  // CutStream
  "(streamId)",  // EmitThenCutStream
+  "()",  // GSInstanceID
  "(lo,hi)",  // MakeDouble
-  "()",  // ToDelete1
-  "()",  // ToDelete2
  "(value)",  // SplitDouble
-  "()",  // ToDelete3
-  "()",  // ToDelete4
  "(inputSigId,row,col,index)",  // LoadOutputControlPoint
  "(inputSigId,row,col)",  // LoadPatchConstant
  "(component)",  // DomainLocation
@ -1506,12 +1505,9 @@ static const char *OpCodeSignatures[] = {
  "()",  // OutputControlPointID
  "()",  // PrimitiveID
  "()",  // CycleCounterLegacy
-  "(value)",  // Htan
-  "()",  // WaveCaptureReserved
  "()",  // WaveIsFirstLane
  "()",  // WaveGetLaneIndex
  "()",  // WaveGetLaneCount
-  "()",  // WaveIsHelperLaneReserved
  "(cond)",  // WaveAnyTrue
  "(cond)",  // WaveAllTrue
  "(value)",  // WaveActiveAllEqual
@ -1521,8 +1517,6 @@ static const char *OpCodeSignatures[] = {
  "(value,op,sop)",  // WaveActiveOp
  "(value,op)",  // WaveActiveBit
  "(value,op,sop)",  // WavePrefixOp
-  "()",  // WaveGetOrderedIndex
-  "()",  // GlobalOrderedCountIncReserved
  "(value,quadLane)",  // QuadReadLaneAt
  "(value,op)",  // QuadOp
  "(value)",  // BitcastI16toF16
@ -1531,17 +1525,13 @@ static const char *OpCodeSignatures[] = {
  "(value)",  // BitcastF32toI32
  "(value)",  // BitcastI64toF64
  "(value)",  // BitcastF64toI64
-  "()",  // GSInstanceID
  "(value)",  // LegacyF32ToF16
  "(value)",  // LegacyF16ToF32
  "(value)",  // LegacyDoubleToFloat
  "(value)",  // LegacyDoubleToSInt32
  "(value)",  // LegacyDoubleToUInt32
  "(value)",  // WaveAllBitCount
-  "(value)",  // WavePrefixBitCount
-  "()",  // SampleIndex
-  "()",  // Coverage
-  "()"  // InnerCoverage
+  "(value)"  // WavePrefixBitCount
 };
 // OPCODE-SIGS:END

--- a/tools/clang/unittests/HLSL/CompilerTest.cpp
+++ b/tools/clang/unittests/HLSL/CompilerTest.cpp
@ -461,6 +461,7 @@ public:
  TEST_METHOD(CodeGenSimpleGS2)
  TEST_METHOD(CodeGenSimpleGS3)
  TEST_METHOD(CodeGenSimpleGS4)
+  TEST_METHOD(CodeGenSimpleGS5)
  TEST_METHOD(CodeGenSimpleHS1)
  TEST_METHOD(CodeGenSimpleHS2)
  TEST_METHOD(CodeGenSimpleHS3)
@ -2340,6 +2341,10 @@ TEST_F(CompilerTest, CodeGenSimpleGS4) {
  CodeGenTestCheck(L"..\\CodeGenHLSL\\SimpleGS4.hlsl");
 }

+TEST_F(CompilerTest, CodeGenSimpleGS5) {
+  CodeGenTestCheck(L"..\\CodeGenHLSL\\SimpleGS5.hlsl");
+}
+
 TEST_F(CompilerTest, CodeGenSimpleHS1) {
  CodeGenTestCheck(L"..\\CodeGenHLSL\\SimpleHS1.hlsl");
 }
--- a/tools/clang/unittests/HLSL/ExtensionTest.cpp
+++ b/tools/clang/unittests/HLSL/ExtensionTest.cpp
@ -15,6 +15,7 @@
 #include "dxc/dxcapi.internal.h"
 #include "dxc/HLSL/HLOperationLowerExtension.h"
 #include "dxc/HlslIntrinsicOp.h"
+#include "llvm/Support/Regex.h"

 ///////////////////////////////////////////////////////////////////////////////
 // Support for test intrinsics.
@ -90,6 +91,12 @@ static const HLSL_INTRINSIC_ARGUMENT TestUnsigned[] = {
  { "x", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_UINT, 1, 1},
 };

+// float2 = MyBufferOp(uint2 addr)
+static const HLSL_INTRINSIC_ARGUMENT TestMyBufferOp[] = {
+  { "MyBufferOp", AR_QUAL_OUT, 0, LITEMPLATE_VECTOR, 0, LICOMPTYPE_FLOAT, 1, 2 },
+  { "addr", AR_QUAL_IN, 1, LITEMPLATE_VECTOR, 1, LICOMPTYPE_UINT, 1, 2},
+};
+
 struct Intrinsic {
  LPCWSTR hlslName;
  const char *dxilName;
@ -119,11 +126,79 @@ Intrinsic Intrinsics[] = {
  {L"test_unsigned","test_unsigned",   "n", { static_cast<unsigned>(hlsl::IntrinsicOp::IOP_min), false, true, -1, countof(TestUnsigned), TestUnsigned}},
 };

+Intrinsic BufferIntrinsics[] = {
+  {L"MyBufferOp",   "MyBufferOp",      "m", { 12, false, true, -1, countof(TestMyBufferOp), TestMyBufferOp}},
+};
+
+class IntrinsicTable {
+public:
+  IntrinsicTable(wchar_t *ns, Intrinsic *begin, Intrinsic *end)
+    :  m_namespace(ns), m_begin(begin), m_end(end)
+  { }
+
+  struct SearchResult {
+    Intrinsic *intrinsic;
+    uint64_t index;
+
+    SearchResult() : SearchResult(nullptr, 0) {}
+    SearchResult(Intrinsic *i, uint64_t n) : intrinsic(i), index(n) {}
+    operator bool() { return intrinsic != nullptr; }
+  };
+
+  SearchResult Search(const wchar_t *name, std::ptrdiff_t startIndex) const {
+    Intrinsic *begin = m_begin + startIndex;
+    assert(std::distance(begin, m_end) >= 0);
+    if (IsStar(name))
+      return BuildResult(begin);
+
+    Intrinsic *found = std::find_if(begin, m_end, [name](const Intrinsic &i) {
+      return wcscmp(i.hlslName, name) == 0;
+    });
+
+    return BuildResult(found);
+  }
+  
+  SearchResult Search(unsigned opcode) const {
+    Intrinsic *begin = m_begin;
+    assert(std::distance(begin, m_end) >= 0);
+    
+    Intrinsic *found = std::find_if(begin, m_end, [opcode](const Intrinsic &i) {
+      return i.hlsl.Op == opcode;
+    });
+
+    return BuildResult(found);
+  }
+  
+  bool MatchesNamespace(const wchar_t *ns) const {
+    return wcscmp(m_namespace, ns) == 0;
+  }
+
+private:
+  const wchar_t *m_namespace;
+  Intrinsic *m_begin;
+  Intrinsic *m_end;
+
+  bool IsStar(const wchar_t *name) const {
+    return wcscmp(name, L"*") == 0;
+  }
+
+  SearchResult BuildResult(Intrinsic *found) const {
+    if (found == m_end)
+      return SearchResult{ nullptr, std::numeric_limits<uint64_t>::max() };
+
+    return SearchResult{ found, static_cast<uint64_t>(std::distance(m_begin, found)) };
+  }
+};
+
 class TestIntrinsicTable : public IDxcIntrinsicTable {
 private:
  DXC_MICROCOM_REF_FIELD(m_dwRef);
+  std::vector<IntrinsicTable> m_tables;
 public:
-  TestIntrinsicTable() : m_dwRef(0) { }
+  TestIntrinsicTable() : m_dwRef(0) { 
+    m_tables.push_back(IntrinsicTable(L"",       std::begin(Intrinsics), std::end(Intrinsics)));
+    m_tables.push_back(IntrinsicTable(L"Buffer", std::begin(BufferIntrinsics), std::end(BufferIntrinsics)));
+  }
  DXC_MICROCOM_ADDREF_RELEASE_IMPL(m_dwRef)
  __override HRESULT STDMETHODCALLTYPE QueryInterface(REFIID iid, void** ppvObject) {
    return DoBasicQueryInterface<IDxcIntrinsicTable>(this, iid, ppvObject);
@ -138,47 +213,61 @@ public:
  __override HRESULT STDMETHODCALLTYPE LookupIntrinsic(
      LPCWSTR typeName, LPCWSTR functionName, const HLSL_INTRINSIC **pIntrinsic,
      _Inout_ UINT64 *pLookupCookie) {
-    if (typeName != nullptr && *typeName) return E_FAIL;
-    Intrinsic *intrinsic =
-      std::find_if(std::begin(Intrinsics), std::end(Intrinsics), [functionName](const Intrinsic &i) {
-        return wcscmp(i.hlslName, functionName) == 0;
-    });
-    if (intrinsic == std::end(Intrinsics))
+    if (typeName == nullptr)
      return E_FAIL;

-    *pIntrinsic = &intrinsic->hlsl;
-    *pLookupCookie = 0;
-    return S_OK;
+    // Search for matching intrinsic name in matching namespace.
+    IntrinsicTable::SearchResult result;
+    for (const IntrinsicTable &table : m_tables) {
+      if (table.MatchesNamespace(typeName)) {
+        result = table.Search(functionName, *pLookupCookie);
+        break;
+      }
+    }
+
+    if (result) {
+      *pIntrinsic = &result.intrinsic->hlsl;
+      *pLookupCookie = result.index + 1;
+    }
+    else {
+      *pIntrinsic = nullptr;
+      *pLookupCookie = 0;
+    }
+
+    return result.intrinsic ? S_OK : E_FAIL;
  }

  __override HRESULT STDMETHODCALLTYPE
  GetLoweringStrategy(UINT opcode, _Outptr_ LPCSTR *pStrategy) {
-    Intrinsic *intrinsic =
-      std::find_if(std::begin(Intrinsics), std::end(Intrinsics), [opcode](const Intrinsic &i) {
-      return i.hlsl.Op == opcode;
-    });
+    Intrinsic *intrinsic = FindByOpcode(opcode);
    
-    if (intrinsic == std::end(Intrinsics))
+    if (!intrinsic)
      return E_FAIL;

    *pStrategy = intrinsic->strategy;
-
    return S_OK;
  }

  __override HRESULT STDMETHODCALLTYPE
  GetIntrinsicName(UINT opcode, _Outptr_ LPCSTR *pName) {
-    Intrinsic *intrinsic =
-      std::find_if(std::begin(Intrinsics), std::end(Intrinsics), [opcode](const Intrinsic &i) {
-      return i.hlsl.Op == opcode;
-    });
+    Intrinsic *intrinsic = FindByOpcode(opcode);

-    if (intrinsic == std::end(Intrinsics))
+    if (!intrinsic)
      return E_FAIL;

    *pName = intrinsic->dxilName;
    return S_OK;
  }
+
+  Intrinsic *FindByOpcode(UINT opcode) {
+    IntrinsicTable::SearchResult result;
+    for (const IntrinsicTable &table : m_tables) {
+      result = table.Search(opcode);
+      if (result)
+        break;
+    }
+    return result.intrinsic;
+  }
 };

 // A class to test semantic define validation.
@ -312,6 +401,7 @@ public:
  TEST_METHOD(PackedLowering);
  TEST_METHOD(ReplicateLoweringWhenOnlyVectorIsResult);
  TEST_METHOD(UnsignedOpcodeIsUnchanged);
+  TEST_METHOD(ResourceExtensionIntrinsic);
 };

 TEST_F(ExtensionTest, DefineWhenRegisteredThenPreserved) {
@ -591,3 +681,25 @@ TEST_F(ExtensionTest, UnsignedOpcodeIsUnchanged) {
    disassembly.npos !=
    disassembly.find("call i32 @test_unsigned(i32 113, "));
 }
+
+TEST_F(ExtensionTest, ResourceExtensionIntrinsic) {
+  Compiler c(m_dllSupport);
+  c.RegisterIntrinsicTable(new TestIntrinsicTable());
+  c.Compile(
+    "Buffer<float2> buf;"
+    "float2 main(uint2 v1 : V1) : SV_Target {\n"
+    "  return buf.MyBufferOp(uint2(1, 2));\n"
+    "}\n",
+    { L"/Vd" }, {}
+  );
+  std::string disassembly = c.Disassemble();
+
+  // Things to check
+  // - return type is translated to dx.types.ResRet
+  // - buffer is translated to dx.types.Handle
+  // - vector is exploded
+  llvm::Regex regex("call %dx.types.ResRet.f32 @MyBufferOp\\(i32 12, %dx.types.Handle %.*, i32 1, i32 2\\)");
+  std::string regexErrors;
+  VERIFY_IS_TRUE(regex.isValid(regexErrors));
+  VERIFY_IS_TRUE(regex.match(disassembly));
+}
--- a/tools/clang/unittests/HLSL/ValidationTest.cpp
+++ b/tools/clang/unittests/HLSL/ValidationTest.cpp
@ -122,6 +122,7 @@ public:
  TEST_METHOD(StructBitCast)
  TEST_METHOD(MultiDimArray)
  TEST_METHOD(NoFunctionParam)
+  TEST_METHOD(I8Type)

  TEST_METHOD(ClipCullMaxComponents)
  TEST_METHOD(ClipCullMaxRows)
@ -149,7 +150,6 @@ public:
  TEST_METHOD(WhenDepthNotFloatThenFail);
  TEST_METHOD(BarrierFail);
  TEST_METHOD(CBufferLegacyOutOfBoundFail);
-  TEST_METHOD(CBufferOutOfBoundFail);
  TEST_METHOD(CsThreadSizeFail);
  TEST_METHOD(DeadLoopFail);
  TEST_METHOD(EvalFail);
@ -218,6 +218,11 @@ public:
        const char *pStart = (const char *)text->GetBufferPointer();
        const char *pEnd = pStart + text->GetBufferSize();
        const char *pMatch = std::search(pStart, pEnd, pErrorMsg, pErrorMsg + strlen(pErrorMsg));
+        if (pEnd == pMatch) {
+          WEX::Logging::Log::Comment(WEX::Common::String().Format(
+              L"Unable to find '%S' in text:\r\n%.*S", pErrorMsg, (pEnd - pStart),
+              pStart));
+        }
        VERIFY_ARE_NOT_EQUAL(pEnd, pMatch);
      }
    }
@ -284,7 +289,7 @@ public:

  void RewriteAssemblyCheckMsg(LPCSTR pSource, LPCSTR pShaderModel,
                               llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
-                               LPCSTR pErrorMsg, bool bRegex = false) {
+                               llvm::ArrayRef<LPCSTR> pErrorMsgs, bool bRegex = false) {
    CComPtr<IDxcBlob> pText;
    CComPtr<IDxcBlobEncoding> pSourceBlob;
    
@ -302,11 +307,13 @@ public:
        m_dllSupport.CreateInstance(CLSID_DxcAssembler, &pAssembler));
    VERIFY_SUCCEEDED(pAssembler->AssembleToContainer(pText, &pAssembleResult));

-    if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true, bRegex)) {
-      // Assembly succeeded, try validation.
-      CComPtr<IDxcBlob> pBlob;
-      VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
-      CheckValidationMsg(pBlob, pErrorMsg, bRegex);
+    for (auto pErrorMsg : pErrorMsgs) {
+      if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true, bRegex)) {
+        // Assembly succeeded, try validation.
+        CComPtr<IDxcBlob> pBlob;
+        VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
+        CheckValidationMsg(pBlob, pErrorMsg, bRegex);
+      }
    }
  }

@ -351,7 +358,7 @@ public:
  
  void RewriteAssemblyCheckMsg(LPCWSTR name, LPCSTR pShaderModel,
                               llvm::ArrayRef<LPCSTR> pLookFors, llvm::ArrayRef<LPCSTR> pReplacements,
-                               LPCSTR pErrorMsg, bool bRegex = false) {
+                               llvm::ArrayRef<LPCSTR> pErrorMsgs, bool bRegex = false) {
    std::wstring fullPath = hlsl_test::GetPathToHlslDataFile(name);
    CComPtr<IDxcLibrary> pLibrary;
    CComPtr<IDxcBlobEncoding> pSource;
@ -364,19 +371,20 @@ public:

    CComPtr<IDxcBlob> pText;

-    RewriteAssemblyToText(pSource, pShaderModel, pLookFors, pReplacements, &pText);
+    RewriteAssemblyToText(pSource, pShaderModel, pLookFors, pReplacements, &pText, bRegex);

    CComPtr<IDxcAssembler> pAssembler;
    CComPtr<IDxcOperationResult> pAssembleResult;
    VERIFY_SUCCEEDED(
        m_dllSupport.CreateInstance(CLSID_DxcAssembler, &pAssembler));
    VERIFY_SUCCEEDED(pAssembler->AssembleToContainer(pText, &pAssembleResult));
-
-    if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true, bRegex)) {
-      // Assembly succeeded, try validation.
-      CComPtr<IDxcBlob> pBlob;
-      VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
-      CheckValidationMsg(pBlob, pErrorMsg, bRegex);
+    for (auto pErrorMsg : pErrorMsgs) {
+      if (!CheckOperationResultMsg(pAssembleResult, pErrorMsg, true, bRegex)) {
+        // Assembly succeeded, try validation.
+        CComPtr<IDxcBlob> pBlob;
+        VERIFY_SUCCEEDED(pAssembleResult->GetResult(&pBlob));
+        CheckValidationMsg(pBlob, pErrorMsg, bRegex);
+      }
    }
  }
 };
@ -457,96 +465,378 @@ TEST_F(ValidationTest, WhenUnknownBlocksThenFail) {
 }

 TEST_F(ValidationTest, WhenInstrDisallowedThenFail) {
-  TestCheck(L"val-inst-disallowed.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\abs2.hlsl", "ps_6_0",
+      {
+          "target triple = \"dxil-ms-dx\"",
+          "ret void",
+          "dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 3, i32 undef)",
+          "!\"ps\", i32 6, i32 0",
+      },
+      {
+          "target triple = \"dxil-ms-dx\"\n%dx.types.wave_t = type { i8* }",
+          "unreachable",
+          "dx.op.loadInput.i32(i32 4, i32 0, i32 0, i8 3, i32 undef)\n%wave_local = alloca %dx.types.wave_t",
+          "!\"vs\", i32 6, i32 0",
+      },
+      {"Semantic 'SV_Target' is invalid as vs Output",
+       "Declaration '%dx.types.wave_t = type { i8* }' uses a reserved prefix",
+       "Instructions must be of an allowed type",
+      }
+  );
 }

 TEST_F(ValidationTest, WhenDepthNotFloatThenFail) {
-  TestCheck(L"dxil_validation\\IntegerDepth.ll");
+  RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\IntegerDepth2.hlsl", "ps_6_0",
+                          {
+                              "!\"SV_Depth\", i8 9",
+                          },
+                          {
+                              "!\"SV_Depth\", i8 4",
+                          },
+                          {
+                              "SV_Depth must be float",
+                          });
 }

 TEST_F(ValidationTest, BarrierFail) {
-  TestCheck(L"dxil_validation\\barrier.ll");
+    RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\barrier.hlsl", "cs_6_0",
+      {"dx.op.barrier(i32 82, i32 8)",
+        "dx.op.barrier(i32 82, i32 9)",
+        "dx.op.barrier(i32 82, i32 11)",
+        "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n",
+        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)",
+      },
+      {"dx.op.barrier(i32 82, i32 15)",
+        "dx.op.barrier(i32 82, i32 0)",
+        "dx.op.barrier(i32 82, i32 %rem)",
+        "%class.RWStructuredBuffer = type { %class.matrix.float.2.2 }\n"
+        "@dx.typevar.8 = external addrspace(1) constant %class.RWStructuredBuffer\n"
+        "@\"internalGV\" = internal global [64 x <4 x float>] undef\n",
+        "call i32 @dx.op.flattenedThreadIdInGroup.i32(i32 98)\n"
+        "%load = load %class.RWStructuredBuffer, %class.RWStructuredBuffer addrspace(1)* @dx.typevar.8",
+      },
+      {"Internal declaration 'internalGV' is unused",
+       "External declaration 'dx.typevar.8' is unused",
+       "Vector type '<4 x float>' is not allowed",
+       "Mode of Barrier must be an immediate constant",
+       "sync must include some form of memory barrier - _u (UAV) and/or _g (Thread Group Shared Memory)",
+       "sync can't specify both _ugroup and _uglobal. If both are needed, just specify _uglobal"
+      });
 }
 TEST_F(ValidationTest, CBufferLegacyOutOfBoundFail) {
-  TestCheck(L"dxil_validation\\cbuffer1.50_legacy.ll");
-}
-TEST_F(ValidationTest, CBufferOutOfBoundFail) {
-  TestCheck(L"dxil_validation\\cbuffer1.50.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\cbuffer1.50.hlsl", "ps_6_0",
+      "cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %Foo2_buffer, i32 0)",
+      "cbufferLoadLegacy.f32(i32 61, %dx.types.Handle %Foo2_buffer, i32 6)",
+      "Cbuffer access out of bound");
 }
+
 TEST_F(ValidationTest, CsThreadSizeFail) {
-  TestCheck(L"dxil_validation\\csThreadSize.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\share_mem1.hlsl", "cs_6_0",
+      {"!{i32 8, i32 8, i32 1",
+       "[256 x float]"},
+      {"!{i32 1025, i32 1025, i32 1025",
+       "[64000000 x float]"},
+      {"Declared Thread Group X size 1025 outside valid range",
+       "Declared Thread Group Y size 1025 outside valid range",
+       "Declared Thread Group Z size 1025 outside valid range",
+       "Declared Thread Group Count 1076890625 (X*Y*Z) is beyond the valid maximum",
+       "Total Thread Group Shared Memory storage is 256000000, exceeded 32768",
+      });
 }
 TEST_F(ValidationTest, DeadLoopFail) {
-  TestCheck(L"dxil_validation\\deadloop.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\loop1.hlsl", "ps_6_0",
+      {"br i1 %exitcond, label %for.end.loopexit, label %for.body, !llvm.loop !([0-9]+)",
+       "%add.lcssa = phi float \\[ %add, %for.body \\]",
+       "!dx.entryPoints = !\\{!([0-9]+)\\}",
+       "\\[ %add.lcssa, %for.end.loopexit \\]"
+      },
+      {"br label %for.body",
+       "",
+       "!dx.entryPoints = !\\{!\\1\\}\n!dx.unused = !\\{!\\1\\}",
+       "[ 0.000000e+00, %for.end.loopexit ]"
+      },
+      {"Loop must have break",
+       "Named metadata 'dx.unused' is unknown",
+      },
+      /*bRegex*/true);
 }
 TEST_F(ValidationTest, EvalFail) {
-  TestCheck(L"dxil_validation\\Eval.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\Eval.hlsl", "ps_6_0",
+      "!\"A\", i8 9, i8 0, !([0-9]+), i8 2, i32 1, i8 4",
+      "!\"A\", i8 9, i8 0, !\\1, i8 0, i32 1, i8 4",
+      "Interpolation mode on A used with eval_\\* instruction must be ",
+      /*bRegex*/true);
 }
 TEST_F(ValidationTest, GetDimCalcLODFail) {
-  TestCheck(L"dxil_validation\\GetDimCalcLOD.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\GetDimCalcLOD.hlsl", "ps_6_0",
+      {"extractvalue %dx.types.Dimensions %2, 1",
+       "float 1.000000e+00, i1 true"
+      },
+      {"extractvalue %dx.types.Dimensions %2, 2",
+       "float undef, i1 true"
+      },
+      {"GetDimensions used undef dimension z on TextureCube",
+       "coord uninitialized"});
 }
 TEST_F(ValidationTest, HsAttributeFail) {
-  TestCheck(L"dxil_validation\\hsAttribute.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\hsAttribute.hlsl", "hs_6_0",
+      {"i32 3, i32 3, i32 2, i32 3, i32 3, float 6.400000e+01"
+      },
+      {"i32 36, i32 36, i32 0, i32 0, i32 0, float 6.500000e+01"
+      },
+      {"HS input control point count must be [1..32].  36 specified",
+       "Invalid Tessellator Domain specified. Must be isoline, tri or quad",
+       "Invalid Tessellator Partitioning specified",
+       "Invalid Tessellator Output Primitive specified",
+       "Hull Shader MaxTessFactor must be [1.000000..64.000000].  65.000000 specified",
+       "output control point count must be [0..32].  36 specified"});
 }
 TEST_F(ValidationTest, InnerCoverageFail) {
-  TestCheck(L"dxil_validation\\InnerCoverage.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\InnerCoverage2.hlsl", "ps_6_0",
+      {"dx.op.coverage.i32(i32 93)",
+       "declare i32 @dx.op.coverage.i32(i32)"
+      },
+      {"dx.op.coverage.i32(i32 93)\n  %inner = call i32 @dx.op.innercoverage.i32(i32 94)",
+       "declare i32 @dx.op.coverage.i32(i32)\n"
+       "declare i32 @dx.op.innercoverage.i32(i32)"
+      },
+      "InnerCoverage and Coverage are mutually exclusive.");
 }
 TEST_F(ValidationTest, InterpChangeFail) {
-  TestCheck(L"dxil_validation\\interpChange.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\interpChange.hlsl", "ps_6_0",
+      "i32 1, i8 0, null}",
+      "i32 0, i8 2, null}",
+      "interpolation mode that differs from another element packed",
+      /*bRegex*/true);
 }
 TEST_F(ValidationTest, InterpOnIntFail) {
-  TestCheck(L"dxil_validation\\interpOnInt.ll");
+    RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\interpOnInt2.hlsl", "ps_6_0",
+      "!\"A\", i8 5, i8 0, !([0-9]+), i8 1",
+      "!\"A\", i8 5, i8 0, !\\1, i8 2",
+      "signature element A specifies invalid interpolation mode for integer component type",
+      /*bRegex*/true);
 }
 TEST_F(ValidationTest, InvalidSigCompTyFail) {
-  TestCheck(L"dxil_validation\\invalidSigCompTy.ll");
+    RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\abs2.hlsl", "ps_6_0",
+      "!\"A\", i8 4",
+      "!\"A\", i8 0",
+      "A specifies unrecognized or invalid component type");
 }
 TEST_F(ValidationTest, MultiStream2Fail) {
-  TestCheck(L"dxil_validation\\multiStream2.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\multiStreamGS.hlsl", "gs_6_0",
+      "i32 1, i32 12, i32 7, i32 1, i32 1",
+      "i32 1, i32 12, i32 7, i32 2, i32 1",
+      "Multiple GS output streams are used but 'XXX' is not pointlist");
 }
 TEST_F(ValidationTest, PhiTGSMFail) {
-  TestCheck(L"dxil_validation\\phiTGSM.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\phiTGSM.hlsl", "cs_6_0",
+      "ret void",
+      "%arrayPhi = phi i32 addrspace(3)* [ %arrayidx, %if.then ], [ %arrayidx2, %if.else ]\n"
+      "%phiAtom = atomicrmw add i32 addrspace(3)* %arrayPhi, i32 1 seq_cst\n"
+      "ret void",
+      "TGSM pointers must originate from an unambiguous TGSM global variable");
 }
 TEST_F(ValidationTest, ReducibleFail) {
-  TestCheck(L"dxil_validation\\reducible.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\reducible.hlsl", "ps_6_0",
+      {"%conv\n"
+       "  br label %if.end",
+       "to float\n"
+       "  br label %if.end"
+      },
+      {"%conv\n"
+      "  br i1 %cmp.i0, label %if.else, label %if.end",
+       "to float\n"
+       "  br i1 %cmp.i0, label %if.then, label %if.end"
+      },
+      "Execution flow must be reducible");
 }
 TEST_F(ValidationTest, SampleBiasFail) {
-  TestCheck(L"dxil_validation\\sampleBias.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\sampleBias.hlsl", "ps_6_0",
+      {"float -1.600000e+01"
+      },
+      {"float 1.800000e+01"
+      },
+      "bias amount for sample_b must be in the range [-16.000000,15.990000]");
 }
 TEST_F(ValidationTest, SamplerKindFail) {
-  TestCheck(L"dxil_validation\\samplerKind.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\samplerKind.hlsl", "ps_6_0",
+      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1",
+       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0",
+       "\"g_samLinear\", i32 0, i32 0, i32 1, i32 0",
+       "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 1",
+      },
+      {"uav1_UAV_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 0",
+       "g_txDiffuse_texture_2d = call %dx.types.Handle @dx.op.createHandle(i32 59, i8 1",
+       "\"g_samLinear\", i32 0, i32 0, i32 1, i32 3",
+       "\"g_samLinearC\", i32 0, i32 1, i32 1, i32 3",
+      },
+      {"Invalid sampler mode",
+       "require sampler declared in comparison mode",
+       "requires sampler declared in default mode",
+       "should on srv resource"});
 }
 TEST_F(ValidationTest, SemaOverlapFail) {
-  TestCheck(L"dxil_validation\\semaOverlap.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\semaOverlap1.hlsl", "ps_6_0",
+      {"!([0-9]+) = !\\{i32 0, !\"A\", i8 9, i8 0, !([0-9]+), i8 2, i32 1, i8 4, i32 0, i8 0, null\\}\n"
+      "!([0-9]+) = !\\{i32 0\\}\n"
+      "!([0-9]+) = !\\{i32 1, !\"A\", i8 9, i8 0, !([0-9]+)",
+      },
+      {"!\\1 = !\\{i32 0, !\"A\", i8 9, i8 0, !\\2, i8 2, i32 1, i8 4, i32 0, i8 0, null\\}\n"
+      "!\\3 = !\\{i32 0\\}\n"
+      "!\\4 = !\\{i32 1, !\"A\", i8 9, i8 0, !\\2",
+      },
+      {"Semantic 'A' overlap at 0"},
+      /*bRegex*/true);
 }
 TEST_F(ValidationTest, SigOutOfRangeFail) {
-  TestCheck(L"dxil_validation\\sigOutOfRange.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\semaOverlap1.hlsl", "ps_6_0",
+      {"i32 1, i8 0, null}",
+      },
+      {"i32 8000, i8 0, null}",
+      },
+      {"signature element A at location (8000,0) size (1,4) is out of range"});
 }
 TEST_F(ValidationTest, SigOverlapFail) {
-  TestCheck(L"dxil_validation\\sigOverlap.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\semaOverlap1.hlsl", "ps_6_0",
+      {"i32 1, i8 0, null}",
+      },
+      {"i32 0, i8 0, null}",
+      },
+      {"signature element A at location (0,0) size (1,4) overlaps another signature element"});
 }
 TEST_F(ValidationTest, SimpleHs1Fail) {
-  TestCheck(L"dxil_validation\\SimpleHs1.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\SimpleHs1.hlsl", "hs_6_0",
+      {"i32 3, i32 3, i32 2, i32 3, i32 3, float 6.400000e+01}",
+       "\"SV_TessFactor\", i8 9, i8 25",
+       "\"SV_InsideTessFactor\", i8 9, i8 26",
+      },
+      {"i32 3, i32 3000, i32 2, i32 3, i32 3, float 6.400000e+01}",
+       "\"TessFactor\", i8 9, i8 0",
+       "\"InsideTessFactor\", i8 9, i8 0",
+      },
+      {"output control point count must be [0..32].  3000 specified",
+       "Required TessFactor for domain not found declared anywhere in Patch Constant data",
+       // TODO: enable this after support pass thru hull shader.
+       //"For pass thru hull shader, input control point count must match output control point count",
+       //"Total number of scalars across all HS output control points must not exceed",
+      });
 }
 TEST_F(ValidationTest, SimpleHs3Fail) {
-  TestCheck(L"dxil_validation\\SimpleHs3.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\SimpleHs3.hlsl", "hs_6_0",
+      {
+          "i32 3, i32 3, i32 2, i32 3, i32 3, float 6.400000e+01}",
+      },
+      {
+          "i32 3, i32 3, i32 2, i32 3, i32 2, float 6.400000e+01}",
+      },
+      {"Hull Shader declared with Tri Domain must specify output primitive "
+       "point, triangle_cw or triangle_ccw. Line output is not compatible with "
+       "the Tri domain"});
 }
 TEST_F(ValidationTest, SimpleHs4Fail) {
-  TestCheck(L"dxil_validation\\SimpleHs4.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\SimpleHs4.hlsl", "hs_6_0",
+      {
+          "i32 2, i32 2, i32 1, i32 3, i32 2, float 6.400000e+01}",
+      },
+      {
+          "i32 2, i32 2, i32 1, i32 3, i32 3, float 6.400000e+01}",
+      },
+      {"Hull Shader declared with IsoLine Domain must specify output primitive "
+       "point or line. Triangle_cw or triangle_ccw output are not compatible "
+       "with the IsoLine Domain"});
 }
 TEST_F(ValidationTest, SimpleDs1Fail) {
-  TestCheck(L"dxil_validation\\SimpleDs1.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\SimpleDs1.hlsl", "ds_6_0",
+      {"!{i32 2, i32 3}"
+      },
+      {"!{i32 4, i32 36}"
+      },
+      {"DS input control point count must be [0..32].  36 specified",
+       "Invalid Tessellator Domain specified. Must be isoline, tri or quad",
+       "DomainLocation component index out of bounds for the domain"});
 }
 TEST_F(ValidationTest, SimpleGs1Fail) {
-  TestCheck(L"dxil_validation\\SimpleGs1.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\SimpleGs1.hlsl", "gs_6_0",
+      {"!{i32 1, i32 3, i32 1, i32 5, i32 1}",
+       "i8 4, i32 1, i8 4, i32 1, i8 0, null}"
+      },
+      {"!{i32 5, i32 1025, i32 1, i32 0, i32 33}",
+      "i8 4, i32 1, i8 4, i32 1, i8 0, !100}\n"
+      "!100 = !{i32 0, i32 5}"
+      },
+      {"GS output vertex count must be [0..1024].  1025 specified",
+       "GS instance count must be [1..32].  33 specified",
+       "GS output primitive topology unrecognized",
+       "GS input primitive unrecognized",
+       "Stream index (5) must between 0 and 3"});
 }
 TEST_F(ValidationTest, UavBarrierFail) {
-  TestCheck(L"dxil_validation\\uavBarrier.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\uavBarrier.hlsl", "ps_6_0",
+      {"dx.op.barrier(i32 82, i32 2)",
+       "textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 undef",
+       "i32 undef, i32 undef, i32 undef, i32 undef)",
+       "float %add9.i3, i8 15)",
+      },
+      {"dx.op.barrier(i32 82, i32 9)",
+       "textureLoad.f32(i32 68, %dx.types.Handle %uav1_UAV_2d, i32 1",
+       "i32 1, i32 2, i32 undef, i32 undef)",
+       "float undef, i8 7)",
+      },
+      {"uav load don't support offset",
+       "uav load don't support mipLevel/sampleIndex",
+       "store on typed uav must write to all four components of the UAV",
+       "sync in a non-Compute Shader must only sync UAV (sync_uglobal)"});
 }
 TEST_F(ValidationTest, UndefValueFail) {
-  TestCheck(L"dxil_validation\\UndefValue.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\UndefValue.hlsl", "ps_6_0",
+      {"fadd fast float %([0-9]+)"
+      },
+      {"fadd fast float undef"
+      },
+      {"Instructions should not read uninitialized value"},
+      /*bRegex*/ true);
 }
 TEST_F(ValidationTest, UpdateCounterFail) {
-  TestCheck(L"dxil_validation\\UpdateCounter.ll");
+  RewriteAssemblyCheckMsg(
+      L"..\\CodeGenHLSL\\UpdateCounter2.hlsl", "ps_6_0",
+      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)",
+       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)"
+      },
+      {"%2 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 -1)",
+       "%3 = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf2_UAV_structbuf, i8 1)\n"
+       "%srvUpdate = call i32 @dx.op.bufferUpdateCounter(i32 72, %dx.types.Handle %buf1_texture_buf, i8 undef)"
+      },
+      {"BufferUpdateCounter valid only on UAV",
+       "BufferUpdateCounter valid only on structured buffers",
+       "inc of BufferUpdateCounter must be an immediate constant",
+       "RWStructuredBuffers may increment or decrement their counters, but not both"});
 }

 TEST_F(ValidationTest, WhenIncorrectModelThenFail) {
@ -588,8 +878,8 @@ TEST_F(ValidationTest, GsVertexIDOutOfBound) {
 TEST_F(ValidationTest, StreamIDOutOfBound) {
  RewriteAssemblyCheckMsg(
      L"..\\CodeGenHLSL\\SimpleGs1.hlsl", "gs_6_0",
-      "dx.op.emitStream(i32 97, i8 0)",
-      "dx.op.emitStream(i32 97, i8 1)", 
+      "dx.op.emitStream(i32 99, i8 0)",
+      "dx.op.emitStream(i32 99, i8 1)", 
      "expect StreamID between 0 , got 1");
 }

@ -736,32 +1026,32 @@ TEST_F(ValidationTest, StructBufGlobalCoherentAndCounter) {
 TEST_F(ValidationTest, StructBufStrideAlign) {
    RewriteAssemblyCheckMsg(
      L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "!7 = !{i32 1, i32 52}",
-      "!7 = !{i32 1, i32 50}",
+      "= !{i32 1, i32 52}",
+      "= !{i32 1, i32 50}",
      "structured buffer element size must be a multiple of 4 bytes (actual size 50 bytes)");
 }

 TEST_F(ValidationTest, StructBufStrideOutOfBound) {
    RewriteAssemblyCheckMsg(
      L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "!7 = !{i32 1, i32 52}",
-      "!7 = !{i32 1, i32 2052}",
+      "= !{i32 1, i32 52}",
+      "= !{i32 1, i32 2052}",
      "structured buffer elements cannot be larger than 2048 bytes (actual size 2052 bytes)");
 }

 TEST_F(ValidationTest, StructBufLoadCoordinates) {
    RewriteAssemblyCheckMsg(
      L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 8)",
-      "bufferLoad.f32(i32 69, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 undef)",
+      "bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 8)",
+      "bufferLoad.f32(i32 70, %dx.types.Handle %buf1_texture_structbuf, i32 1, i32 undef)",
      "structured buffer require 2 coordinates");
 }

 TEST_F(ValidationTest, StructBufStoreCoordinates) {
    RewriteAssemblyCheckMsg(
      L"..\\CodeGenHLSL\\struct_buf1.hlsl", "ps_6_0",
-      "bufferStore.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 0",
-      "bufferStore.f32(i32 70, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 undef",
+      "bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 0",
+      "bufferStore.f32(i32 71, %dx.types.Handle %buf2_UAV_structbuf, i32 0, i32 undef",
      "structured buffer require 2 coordinates");
 }

@ -864,17 +1154,19 @@ TEST_F(ValidationTest, PsOutputSemantic) {
 TEST_F(ValidationTest, ArrayOfSVTarget) {
    RewriteAssemblyCheckMsg(
      L"..\\CodeGenHLSL\\targetArray.hlsl", "ps_6_0",
-      "i32 6, !\"SV_Target\", i8 9, i8 16, !32, i8 0, i32 1",
-      "i32 6, !\"SV_Target\", i8 9, i8 16, !32, i8 0, i32 2",
-      "Pixel shader output registers are not indexable.");
+      "i32 6, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1",
+      "i32 6, !\"SV_Target\", i8 9, i8 16, !\\1, i8 0, i32 2",
+      "Pixel shader output registers are not indexable.",
+      /*bRegex*/true);
 }

 TEST_F(ValidationTest, InfiniteLog) {
    RewriteAssemblyCheckMsg(
      L"..\\CodeGenHLSL\\intrinsic_val_imm.hlsl", "ps_6_0",
-      "op.unary.f32(i32 22, float %1)",
-      "op.unary.f32(i32 22, float 0x7FF0000000000000)",
-      "No indefinite logarithm");
+      "op.unary.f32\\(i32 23, float %[0-9+]\\)",
+      "op.unary.f32(i32 23, float 0x7FF0000000000000)",
+      "No indefinite logarithm",
+      /*bRegex*/true);
 }

 TEST_F(ValidationTest, InfiniteAsin) {
@ -896,9 +1188,10 @@ TEST_F(ValidationTest, InfiniteAcos) {
 TEST_F(ValidationTest, InfiniteDdxDdy) {
    RewriteAssemblyCheckMsg(
      L"..\\CodeGenHLSL\\intrinsic_val_imm.hlsl", "ps_6_0",
-      "op.unary.f32(i32 86, float %1)",
-      "op.unary.f32(i32 86, float 0x7FF0000000000000)",
-      "No indefinite derivative calculation");
+      "op.unary.f32\\(i32 85, float %[0-9]+\\)",
+      "op.unary.f32(i32 85, float 0x7FF0000000000000)",
+      "No indefinite derivative calculation",
+      /*bRegex*/true);
 }

 TEST_F(ValidationTest, IDivByZero) {
@ -981,9 +1274,19 @@ TEST_F(ValidationTest, MultiDimArray) {

 TEST_F(ValidationTest, NoFunctionParam) {
  RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\abs2.hlsl", "ps_6_0",
-                          {"define void @main()", "void ()* @main", "!5 = !{!6}"},
-                          {"define void @main(<4 x i32> %mainArg)", "void (<4 x i32>)* @main", "!5 = !{!6, !6}"},
-                          "with parameter is not permitted");
+    {"define void @main\\(\\)",               "void \\(\\)\\* @main, !([0-9]+)\\}(.*)!\\1 = !\\{!([0-9]+)\\}",  "void \\(\\)\\* @main"},
+    {"define void @main(<4 x i32> %mainArg)", "void (<4 x i32>)* @main, !\\1}\\2!\\1 = !{!\\3, !\\3}",          "void (<4 x i32>)* @main"},
+    "with parameter is not permitted",
+    /*bRegex*/true);
+}
+
+TEST_F(ValidationTest, I8Type) {
+  RewriteAssemblyCheckMsg(L"..\\CodeGenHLSL\\staticGlobals.hlsl", "ps_6_0",
+                          "%([0-9]+) = alloca \\[4 x float\\]",
+                          "%\\1 = alloca [4 x float]\n"
+                          "  %m8 = alloca i8",
+                          "I8 can only used as immediate value for intrinsic",
+    /*bRegex*/true);
 }

 TEST_F(ValidationTest, WhenWaveAffectsGradientThenFail) {
@ -1044,7 +1347,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
    ",
      "hs_6_0", 
      "dx.op.storeOutput.f32(i32 5",
-      "dx.op.storePatchConstant.f32(i32 109",
+      "dx.op.storePatchConstant.f32(i32 108",
      "opcode 'StorePatchConstant' should only used in 'PatchConstant function'");
 }

@ -1095,7 +1398,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
    ",
      "hs_6_0",
      "dx.op.loadInput.f32(i32 4",
-      "dx.op.loadOutputControlPoint.f32(i32 106",
+      "dx.op.loadOutputControlPoint.f32(i32 105",
      "opcode 'LoadOutputControlPoint' should only used in 'PatchConstant function'");
 }

@ -1146,7 +1449,7 @@ HSPerVertexData main( const uint id : SV_OutputControlPointID,\
    ",
      "hs_6_0",
      "ret void",
-      "call i32 @dx.op.outputControlPointID.i32(i32 110)\n ret void",
+      "call i32 @dx.op.outputControlPointID.i32(i32 109)\n ret void",
      "opcode 'OutputControlPointID' should only used in 'hull function'");
 }

@ -1643,13 +1946,13 @@ void main( \
    ",
    "vs_6_0",

-    "!{i32 1, !\"Array\", i8 5, i8 0, !([0-9]+), i8 1, i32 2, i8 1, i32 1, i8 0, null}\n"
-    "!17 = !{i32 0, i32 1}\n"
-    "!([0-9]+) = !{i32 2, !\"Value\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 3, i32 1, i8 1, null}",
+    {"!{i32 1, !\"Array\", i8 5, i8 0, !([0-9]+), i8 1, i32 2, i8 1, i32 1, i8 0, null}(.*)"
+    "!\\1 = !{i32 0, i32 1}\n",
+    "= !{i32 2, !\"Value\", i8 5, i8 0, !([0-9]+), i8 1, i32 1, i8 3, i32 1, i8 1, null}"},

-    "!{i32 1, !\"Array\", i8 5, i8 0, !\\1, i8 1, i32 2, i8 1, i32 1, i8 1, null}\n"
-    "!17 = !{i32 0, i32 1}\n"
-    "!\\2 = !{i32 2, !\"Value\", i8 5, i8 0, !\\3, i8 1, i32 1, i8 3, i32 2, i8 0, null}",
+    {"!{i32 1, !\"Array\", i8 5, i8 0, !\\1, i8 1, i32 2, i8 1, i32 1, i8 1, null}\\2"
+    "!\\1 = !{i32 0, i32 1}\n",
+    "= !{i32 2, !\"Value\", i8 5, i8 0, !\\1, i8 1, i32 1, i8 3, i32 2, i8 0, null}"},

    "signature element Value at location \\(2,0\\) size \\(1,3\\) overlaps another signature element.",
    /*bRegex*/true);
@ -1657,18 +1960,12 @@ void main( \

 TEST_F(ValidationTest, SemMultiDepth) {
  RewriteAssemblyCheckMsg(" \
-float4 main(float4 f4 : Input, out float d0 : SV_Depth, out float d1 : SV_Target1) : SV_Target \
+float4 main(float4 f4 : Input, out float d0 : SV_Depth, out float d1 : SV_Target) : SV_Target1 \
 { d0 = f4.z; d1 = f4.w; return f4; } \
    ",
    "ps_6_0",
-
-    "!{i32 1, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1, i8 1, i32 1, i8 0, null}\n"
-    "!16 = !{i32 1}\n"
-    "!([0-9]+) = !{i32 2, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1, i8 4, i32 0, i8 0, null}",
-
-    "!{i32 1, !\"SV_DepthGreaterEqual\", i8 9, i8 19, !\\3, i8 0, i32 1, i8 1, i32 -1, i8 -1, null}\n"
-    "!\\2 = !{i32 2, !\"SV_Target\", i8 9, i8 16, !\\3, i8 0, i32 1, i8 4, i32 0, i8 0, null}",
-
+    {"!{i32 1, !\"SV_Target\", i8 9, i8 16, !([0-9]+), i8 0, i32 1, i8 1, i32 0, i8 0, null}"},
+    {"!{i32 1, !\"SV_DepthGreaterEqual\", i8 9, i8 19, !\\1, i8 0, i32 1, i8 1, i32 -1, i8 -1, null}"},
    "Pixel Shader only allows one type of depth semantic to be declared",
    /*bRegex*/true);
 }
--- a/utils/hct/hctdb.py
+++ b/utils/hct/hctdb.py
@ -246,7 +246,7 @@ class db_dxil(object):
        for i in "ThreadId,GroupId,ThreadIdInGroup,FlattenedThreadIdInGroup".split(","):
            self.name_idx[i].category = "Compute shader"
            self.name_idx[i].shader_models = "c"
-        for i in "EmitStream,CutStream,EmitThenCutStream".split(","):
+        for i in "EmitStream,CutStream,EmitThenCutStream,GSInstanceID".split(","):
            self.name_idx[i].category = "Geometry shader"
            self.name_idx[i].shader_models = "g"
        for i in "LoadOutputControlPoint,LoadPatchConstant".split(","):
@ -262,9 +262,6 @@ class db_dxil(object):
            self.name_idx[i].category = "Double precision"
        for i in "CycleCounterLegacy".split(","):
            self.name_idx[i].category = "Other"
-        for i in "GSInstanceID".split(","):
-            self.name_idx[i].category = "GS"
-            self.name_idx[i].shader_models = "g"
        for i in "LegacyF32ToF16,LegacyF16ToF32".split(","):
            self.name_idx[i].category = "Legacy floating-point"
        for i in self.instr:
@ -360,39 +357,45 @@ class db_dxil(object):
        # overload types are a string of (v)oid, (h)alf, (f)loat, (d)ouble, (1)-bit, (8)-bit, (w)ord, (i)nt, (l)ong
        self.opcode_param = db_dxil_param(1, "i32", "opcode", "DXIL opcode")
        retvoid_param = db_dxil_param(0, "v", "", "no return value")
-        self.add_dxil_op("TempRegLoad", 0, "TempRegLoad", "helper load operation", "hfwi", "ro", [
+        next_op_idx = 0
+        self.add_dxil_op("TempRegLoad", next_op_idx, "TempRegLoad", "helper load operation", "hfwi", "ro", [
            db_dxil_param(0, "$o", "", "register value"),
            db_dxil_param(2, "u32", "index", "linearized register index")])
-        self.add_dxil_op("TempRegStore", 1, "TempRegStore", "helper store operation", "hfwi", "", [
+        next_op_idx += 1
+        self.add_dxil_op("TempRegStore", next_op_idx, "TempRegStore", "helper store operation", "hfwi", "", [
            retvoid_param,
            db_dxil_param(2, "u32", "index", "linearized register index"),
            db_dxil_param(3, "$o", "value", "value to store")])
-        self.add_dxil_op("MinPrecXRegLoad", 2, "MinPrecXRegLoad", "helper load operation for minprecision", "hw", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("MinPrecXRegLoad", next_op_idx, "MinPrecXRegLoad", "helper load operation for minprecision", "hw", "ro", [
            db_dxil_param(0, "$o", "", "register value"),
            db_dxil_param(2, "pf32", "regIndex", "pointer to indexable register"),
            db_dxil_param(3, "i32", "index", "index"),
            db_dxil_param(4, "u8", "component", "component")])
-        self.add_dxil_op("MinPrecXRegStore", 3, "MinPrecXRegStore", "helper store operation for minprecision", "hw", "", [
+        next_op_idx += 1
+        self.add_dxil_op("MinPrecXRegStore", next_op_idx, "MinPrecXRegStore", "helper store operation for minprecision", "hw", "", [
            retvoid_param,
            db_dxil_param(2, "pf32", "regIndex", "pointer to indexable register"),
            db_dxil_param(3, "i32", "index", "index"),
            db_dxil_param(4, "u8", "component", "component"),
            db_dxil_param(5, "$o", "value", "value to store")])
-        self.add_dxil_op("LoadInput", 4, "LoadInput", "loads the value from shader input", "hfwi", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("LoadInput", next_op_idx, "LoadInput", "loads the value from shader input", "hfwi", "rn", [
            db_dxil_param(0, "$o", "", "input value"),
            db_dxil_param(2, "u32", "inputSigId", "input signature element ID"),
            db_dxil_param(3, "u32", "rowIndex", "row index relative to element"),
            db_dxil_param(4, "u8", "colIndex", "column index relative to element"),
            db_dxil_param(5, "i32", "gsVertexAxis", "gsVertexAxis")])
-        self.add_dxil_op("StoreOutput", 5, "StoreOutput", "stores the value to shader output", "hfwi", "", [ # note, cannot store bit even though load supports it
+        next_op_idx += 1
+        self.add_dxil_op("StoreOutput", next_op_idx, "StoreOutput", "stores the value to shader output", "hfwi", "", [ # note, cannot store bit even though load supports it
            retvoid_param,
            db_dxil_param(2, "u32", "outputtSigId", "output signature element ID"),
            db_dxil_param(3, "u32", "rowIndex", "row index relative to element"),
            db_dxil_param(4, "u8", "colIndex", "column index relative to element"),
            db_dxil_param(5, "$o", "value", "value to store")])
+        next_op_idx += 1

        # Unary float operations are regular.
-        next_op_idx = 6
        for i in "FAbs,Saturate".split(","):
            self.add_dxil_op(i, next_op_idx, "Unary", "returns the " + i, "hfd", "rn", [
                db_dxil_param(0, "$o", "", "operation result"),
@ -403,12 +406,11 @@ class db_dxil(object):
                db_dxil_param(0, "i1", "", "operation result"),
                db_dxil_param(2, "$o", "value", "input value")])
            next_op_idx += 1
-        for i in "Cos,Sin,Tan,Acos,Asin,Atan,Hcos,Hsin,Exp,Frc,Log,Sqrt,Rsqrt,Round_ne,Round_ni,Round_pi,Round_z".split(","):
+        for i in "Cos,Sin,Tan,Acos,Asin,Atan,Hcos,Hsin,Htan,Exp,Frc,Log,Sqrt,Rsqrt,Round_ne,Round_ni,Round_pi,Round_z".split(","):
            self.add_dxil_op(i, next_op_idx, "Unary", "returns the " + i, "hf", "rn", [
                db_dxil_param(0, "$o", "", "operation result"),
                db_dxil_param(2, "$o", "value", "input value")])
            next_op_idx += 1
-        # HTan is in this category but is out of order.

        # Unary int operations are regular.
        for i in "Bfrev".split(","):
@ -459,20 +461,20 @@ class db_dxil(object):
            next_op_idx += 1

        # Tertiary float.
-        assert next_op_idx == 47, "next operation index is %d rather than 47 and thus opcodes are broken" % next_op_idx
-        self.add_dxil_op("FMad", 47, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "hfd", "rn", [
+        self.add_dxil_op("FMad", next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "hfd", "rn", [
            db_dxil_param(0, "$o", "", "the fused multiply-addition of parameters a * b + c"),
            db_dxil_param(2, "$o", "a", "first value for FMA, the first factor"),
            db_dxil_param(3, "$o", "b", "second value for FMA, the second factor"),
            db_dxil_param(4, "$o", "c", "third value for FMA, the addend")])
-        self.add_dxil_op("Fma", 48, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "d", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("Fma", next_op_idx, "Tertiary", "performs a fused multiply add (FMA) of the form a * b + c", "d", "rn", [
            db_dxil_param(0, "$o", "", "the double-precision fused multiply-addition of parameters a * b + c, accurate to 0.5 units of least precision (ULP)"),
            db_dxil_param(2, "$o", "a", "first value for FMA, the first factor"),
            db_dxil_param(3, "$o", "b", "second value for FMA, the second factor"),
            db_dxil_param(4, "$o", "c", "third value for FMA, the addend")])
+        next_op_idx += 1

        # Tertiary int.
-        next_op_idx = 49
        for i in "IMad,UMad".split(","):
            self.add_dxil_op(i, next_op_idx, "Tertiary", "performs an integral " + i, "wil", "rn", [
                db_dxil_param(0, "$o", "", "the operation result"),
@ -489,22 +491,23 @@ class db_dxil(object):
            next_op_idx += 1

        # Quaternary
-        assert next_op_idx == 54, "next operation index is %d rather than 54 and thus opcodes are broken" % next_op_idx
-        self.add_dxil_op("Bfi", 54, "Quaternary", "given a bit range from the LSB of a number, places that number of bits in another number at any offset", "i", "rn", [
+        self.add_dxil_op("Bfi", next_op_idx, "Quaternary", "given a bit range from the LSB of a number, places that number of bits in another number at any offset", "i", "rn", [
            db_dxil_param(0, "$o", "", "the operation result"),
            db_dxil_param(2, "$o", "width", "the bitfield width to take from the value"),
            db_dxil_param(3, "$o", "offset", "the bitfield offset to replace in the value"),
            db_dxil_param(4, "$o", "value", "the number the bits are taken from"),
            db_dxil_param(5, "$o", "replaceCount", "the number of bits to be replaced")])
+        next_op_idx += 1

        # Dot.
-        self.add_dxil_op("Dot2", 55, "Dot2", "two-dimensional vector dot-product", "hf", "rn", [
+        self.add_dxil_op("Dot2", next_op_idx, "Dot2", "two-dimensional vector dot-product", "hf", "rn", [
            db_dxil_param(0, "$o", "", "the operation result"),
            db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
            db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
            db_dxil_param(4, "$o", "bx", "the first component of the second vector"),
            db_dxil_param(5, "$o", "by", "the second component of the second vector")])
-        self.add_dxil_op("Dot3", 56, "Dot3", "three-dimensional vector dot-product", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("Dot3", next_op_idx, "Dot3", "three-dimensional vector dot-product", "hf", "rn", [
            db_dxil_param(0, "$o", "", "the operation result"),
            db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
            db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
@ -512,7 +515,8 @@ class db_dxil(object):
            db_dxil_param(5, "$o", "bx", "the first component of the second vector"),
            db_dxil_param(6, "$o", "by", "the second component of the second vector"),
            db_dxil_param(7, "$o", "bz", "the third component of the second vector")])
-        self.add_dxil_op("Dot4", 57, "Dot4", "four-dimensional vector dot-product", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("Dot4", next_op_idx, "Dot4", "four-dimensional vector dot-product", "hf", "rn", [
            db_dxil_param(0, "$o", "", "the operation result"),
            db_dxil_param(2, "$o", "ax", "the first component of the first vector"),
            db_dxil_param(3, "$o", "ay", "the second component of the first vector"),
@ -522,24 +526,28 @@ class db_dxil(object):
            db_dxil_param(7, "$o", "by", "the second component of the second vector"),
            db_dxil_param(8, "$o", "bz", "the third component of the second vector"),
            db_dxil_param(9, "$o", "bw", "the fourth component of the second vector")])
+        next_op_idx += 1

        # Resources.
-        self.add_dxil_op("CreateHandle", 58, "CreateHandle", "creates the handle to a resource", "v", "ro", [
+        self.add_dxil_op("CreateHandle", next_op_idx, "CreateHandle", "creates the handle to a resource", "v", "ro", [
            db_dxil_param(0, "res", "", "the handle to the resource"),
            db_dxil_param(2, "i8", "resourceClass", "the class of resource to create (SRV, UAV, CBuffer, Sampler)", is_const=True), # maps to DxilResourceBase::Class
            db_dxil_param(3, "i32", "rangeId", "range identifier for resource"),
            db_dxil_param(4, "i32", "index", "zero-based index into range"),
            db_dxil_param(5, "i1", "nonUniformIndex", "non-uniform resource index", is_const=True)])
-        self.add_dxil_op("CBufferLoad", 59, "CBufferLoad", "loads a value from a constant buffer resource", "hfd8wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("CBufferLoad", next_op_idx, "CBufferLoad", "loads a value from a constant buffer resource", "hfd8wil", "ro", [
            db_dxil_param(0, "$o", "", "the value for the constant buffer variable"),
            db_dxil_param(2, "res", "handle", "cbuffer handle"),
            db_dxil_param(3, "u32", "byteOffset", "linear byte offset of value"),
            db_dxil_param(4, "u32", "alignment", "load access alignment", is_const=True)])
-        self.add_dxil_op("CBufferLoadLegacy", 60, "CBufferLoadLegacy", "loads a value from a constant buffer resource", "hfdwi", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("CBufferLoadLegacy", next_op_idx, "CBufferLoadLegacy", "loads a value from a constant buffer resource", "hfdwi", "ro", [
            db_dxil_param(0, "$cb", "", "the value for the constant buffer variable"),
            db_dxil_param(2, "res", "handle", "cbuffer handle"),
            db_dxil_param(3, "u32", "regIndex", "0-based index into cbuffer instance")])
-        self.add_dxil_op("Sample", 61, "Sample", "samples a texture", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("Sample", next_op_idx, "Sample", "samples a texture", "hf", "ro", [
            db_dxil_param(0, "$r", "", "the sampled value"),
            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@ -551,7 +559,8 @@ class db_dxil(object):
            db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
            db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
            db_dxil_param(11, "f", "clamp", "clamp value")])
-        self.add_dxil_op("SampleBias", 62, "SampleBias", "samples a texture after applying the input bias to the mipmap level", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("SampleBias", next_op_idx, "SampleBias", "samples a texture after applying the input bias to the mipmap level", "hf", "ro", [
            db_dxil_param(0, "$r", "", "the sampled value"),
            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@ -564,7 +573,8 @@ class db_dxil(object):
            db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
            db_dxil_param(11, "f", "bias", "bias value"),
            db_dxil_param(12, "f", "clamp", "clamp value")])
-        self.add_dxil_op("SampleLevel", 63, "SampleLevel", "samples a texture using a mipmap-level offset", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("SampleLevel", next_op_idx, "SampleLevel", "samples a texture using a mipmap-level offset", "hf", "ro", [
            db_dxil_param(0, "$r", "", "the sampled value"),
            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@ -576,7 +586,8 @@ class db_dxil(object):
            db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
            db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
            db_dxil_param(11, "f", "LOD", "level of detail, biggest map if less than or equal to zero; fraction used to interpolate across levels")])
-        self.add_dxil_op("SampleGrad", 64, "SampleGrad", "samples a texture using a gradient to influence the way the sample location is calculated", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("SampleGrad", next_op_idx, "SampleGrad", "samples a texture using a gradient to influence the way the sample location is calculated", "hf", "ro", [
            db_dxil_param(0, "$r", "", "the sampled value"),
            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@ -594,7 +605,8 @@ class db_dxil(object):
            db_dxil_param(15, "f", "ddy1", "rate of change of the texture coordinate in the y direction"),
            db_dxil_param(16, "f", "ddy2", "rate of change of the texture coordinate in the y direction"),
            db_dxil_param(17, "f", "clamp", "clamp value")])
-        self.add_dxil_op("SampleCmp", 65, "SampleCmp", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("SampleCmp", next_op_idx, "SampleCmp", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
            db_dxil_param(0, "$r", "", "the value for the constant buffer variable"),
            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@ -607,7 +619,8 @@ class db_dxil(object):
            db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
            db_dxil_param(11, "f", "compareValue", "the value to compare with"),
            db_dxil_param(12, "f", "clamp", "clamp value")])
-        self.add_dxil_op("SampleCmpLevelZero", 66, "SampleCmpLevelZero", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("SampleCmpLevelZero", next_op_idx, "SampleCmpLevelZero", "samples a texture and compares a single component against the specified comparison value", "hf", "ro", [
            db_dxil_param(0, "$r", "", "the value for the constant buffer variable"),
            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@ -619,7 +632,8 @@ class db_dxil(object):
            db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
            db_dxil_param(10, "i32", "offset2", "optional offset, applicable to Texture3D"),
            db_dxil_param(11, "f", "compareValue", "the value to compare with")])
-        self.add_dxil_op("TextureLoad", 67, "TextureLoad", "reads texel data without any filtering or sampling", "hfwi", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("TextureLoad", next_op_idx, "TextureLoad", "reads texel data without any filtering or sampling", "hfwi", "ro", [
            db_dxil_param(0, "$r", "", "the loaded value"),
            db_dxil_param(2, "res", "srv", "handle of SRV or UAV to sample"),
            db_dxil_param(3, "i32", "mipLevelOrSampleCount", "sample count for Texture2DMS, mip level otherwise"),
@ -629,7 +643,8 @@ class db_dxil(object):
            db_dxil_param(7, "i32", "offset0", "optional offset"),
            db_dxil_param(8, "i32", "offset1", "optional offset"),
            db_dxil_param(9, "i32", "offset2", "optional offset")])
-        self.add_dxil_op("TextureStore", 68, "TextureStore", "reads texel data without any filtering or sampling", "hfwi", "", [
+        next_op_idx += 1
+        self.add_dxil_op("TextureStore", next_op_idx, "TextureStore", "reads texel data without any filtering or sampling", "hfwi", "", [
            db_dxil_param(0, "v", "", ""),
            db_dxil_param(2, "res", "srv", "handle of UAV to store to"),
            db_dxil_param(3, "i32", "coord0", "coordinate"),
@ -640,12 +655,14 @@ class db_dxil(object):
            db_dxil_param(8, "$o", "value2", "value"),
            db_dxil_param(9, "$o", "value3", "value"),
            db_dxil_param(10,"i8", "mask", "written value mask")])
-        self.add_dxil_op("BufferLoad", 69, "BufferLoad", "reads from a TypedBuffer", "hfwil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("BufferLoad", next_op_idx, "BufferLoad", "reads from a TypedBuffer", "hfwil", "ro", [
            db_dxil_param(0, "$r", "", "the loaded value"),
            db_dxil_param(2, "res", "srv", "handle of TypedBuffer SRV to sample"),
            db_dxil_param(3, "i32", "index", "element index"),
            db_dxil_param(4, "i32", "wot", "coordinate")])
-        self.add_dxil_op("BufferStore", 70, "BufferStore", "writes to a RWTypedBuffer", "hfwil", "", [
+        next_op_idx += 1
+        self.add_dxil_op("BufferStore", next_op_idx, "BufferStore", "writes to a RWTypedBuffer", "hfwil", "", [
            db_dxil_param(0, "v", "", ""),
            db_dxil_param(2, "res", "uav", "handle of UAV to store to"),
            db_dxil_param(3, "i32", "coord0", "coordinate in elements"),
@ -655,18 +672,22 @@ class db_dxil(object):
            db_dxil_param(7, "$o", "value2", "value"),
            db_dxil_param(8, "$o", "value3", "value"),
            db_dxil_param(9, "i8", "mask", "written value mask")])
-        self.add_dxil_op("BufferUpdateCounter", 71, "BufferUpdateCounter", "atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV", "v", "", [
+        next_op_idx += 1
+        self.add_dxil_op("BufferUpdateCounter", next_op_idx, "BufferUpdateCounter", "atomically increments/decrements the hidden 32-bit counter stored with a Count or Append UAV", "v", "", [
            db_dxil_param(0, "i32", "", "the new value in the buffer"),
            db_dxil_param(2, "res", "uav", "handle to a structured buffer UAV with the count or append flag"),
            db_dxil_param(3, "i8", "inc", "1 to increase, 0 to decrease")])
-        self.add_dxil_op("CheckAccessFullyMapped", 72, "CheckAccessFullyMapped", "determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource", "i", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("CheckAccessFullyMapped", next_op_idx, "CheckAccessFullyMapped", "determines whether all values from a Sample, Gather, or Load operation accessed mapped tiles in a tiled resource", "i", "ro", [
            db_dxil_param(0, "i1", "", "nonzero if all values accessed mapped tiles in a tiled resource"),
            db_dxil_param(2, "u32", "status", "status result from the Sample, Gather or Load operation")])
-        self.add_dxil_op("GetDimensions", 73, "GetDimensions", "gets texture size information", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("GetDimensions", next_op_idx, "GetDimensions", "gets texture size information", "v", "ro", [
            db_dxil_param(0, "dims", "", "dimension information for texture"),
            db_dxil_param(2, "res", "handle", "resource handle to query"),
            db_dxil_param(3, "i32", "mipLevel", "mip level to query")])
-        self.add_dxil_op("TextureGather", 74, "TextureGather", "gathers the four texels that would be used in a bi-linear filtering operation", "fi", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("TextureGather", next_op_idx, "TextureGather", "gathers the four texels that would be used in a bi-linear filtering operation", "fi", "ro", [
            db_dxil_param(0, "$r", "", "dimension information for texture"),
            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@ -677,7 +698,8 @@ class db_dxil(object):
            db_dxil_param(8, "i32", "offset0", "optional offset, applicable to Texture1D, Texture1DArray, and as part of offset1"),
            db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
            db_dxil_param(10, "i32", "channel", "channel to sample")])
-        self.add_dxil_op("TextureGatherCmp", 75, "TextureGatherCmp", "same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp", "fi", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("TextureGatherCmp", next_op_idx, "TextureGatherCmp", "same as TextureGather, except this instrution performs comparison on texels, similar to SampleCmp", "fi", "ro", [
            db_dxil_param(0, "$r", "", "gathered texels"),
            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
            db_dxil_param(3, "res", "sampler", "handle of sampler to use"),
@ -689,20 +711,23 @@ class db_dxil(object):
            db_dxil_param(9, "i32", "offset1", "optional offset, applicable to Texture2D, Texture2DArray, and as part of offset2"),
            db_dxil_param(10, "i32", "channel", "channel to sample"),
            db_dxil_param(11, "f", "compareVale", "value to compare with")])
-        self.add_dxil_op_reserved("ToDelete5", 76)
-        self.add_dxil_op_reserved("ToDelete6", 77)
-        self.add_dxil_op("Texture2DMSGetSamplePosition", 78, "Texture2DMSGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
+        next_op_idx += 1
+
+        self.add_dxil_op("Texture2DMSGetSamplePosition", next_op_idx, "Texture2DMSGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
            db_dxil_param(0, "SamplePos", "", "sample position"),
            db_dxil_param(2, "res", "srv", "handle of SRV to sample"),
            db_dxil_param(3, "i32", "index", "zero-based sample index")])
-        self.add_dxil_op("RenderTargetGetSamplePosition", 79, "RenderTargetGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("RenderTargetGetSamplePosition", next_op_idx, "RenderTargetGetSamplePosition", "gets the position of the specified sample", "v", "ro", [
            db_dxil_param(0, "SamplePos", "", "sample position"),
            db_dxil_param(2, "i32", "index", "zero-based sample index")])
-        self.add_dxil_op("RenderTargetGetSampleCount", 80, "RenderTargetGetSampleCount", "gets the number of samples for a render target", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("RenderTargetGetSampleCount", next_op_idx, "RenderTargetGetSampleCount", "gets the number of samples for a render target", "v", "ro", [
            db_dxil_param(0, "u32", "", "number of sampling locations for a render target")])
+        next_op_idx += 1

        # Atomics. Note that on TGSM, atomics are performed with LLVM instructions.
-        self.add_dxil_op("AtomicBinOp", 81, "AtomicBinOp", "performs an atomic operation on two operands", "i", "", [
+        self.add_dxil_op("AtomicBinOp", next_op_idx, "AtomicBinOp", "performs an atomic operation on two operands", "i", "", [
            db_dxil_param(0, "i32", "", "the original value in the location updated"),
            db_dxil_param(2, "res", "handle", "typed int or uint UAV handle"),
            db_dxil_param(3, "i32", "atomicOp", "atomic operation as per DXIL::AtomicBinOpCode"),
@ -710,7 +735,8 @@ class db_dxil(object):
            db_dxil_param(5, "i32", "offset1", "offset"),
            db_dxil_param(6, "i32", "offset2", "offset"),
            db_dxil_param(7, "i32", "newValue", "new value")])
-        self.add_dxil_op("AtomicCompareExchange", 82, "AtomicCompareExchange", "atomic compare and exchange to memory", "i", "", [
+        next_op_idx += 1
+        self.add_dxil_op("AtomicCompareExchange", next_op_idx, "AtomicCompareExchange", "atomic compare and exchange to memory", "i", "", [
            db_dxil_param(0, "i32", "", "the original value in the location updated"),
            db_dxil_param(2, "res", "handle", "typed int or uint UAV handle"),
            db_dxil_param(3, "i32", "offset0", "offset in elements"),
@ -718,14 +744,16 @@ class db_dxil(object):
            db_dxil_param(5, "i32", "offset2", "offset"),
            db_dxil_param(6, "i32", "compareValue", "value to compare for exchange"),
            db_dxil_param(7, "i32", "newValue", "new value")])
+        next_op_idx += 1

        # Synchronization.
-        self.add_dxil_op("Barrier", 83, "Barrier", "inserts a memory barrier in the shader", "v", "", [
+        self.add_dxil_op("Barrier", next_op_idx, "Barrier", "inserts a memory barrier in the shader", "v", "", [
            retvoid_param,
            db_dxil_param(2, "i32", "barrierMode", "a mask of DXIL::BarrierMode values", is_const=True)])
+        next_op_idx += 1

        # Pixel shader
-        self.add_dxil_op("CalculateLOD", 84, "CalculateLOD", "calculates the level of detail", "f", "ro", [
+        self.add_dxil_op("CalculateLOD", next_op_idx, "CalculateLOD", "calculates the level of detail", "f", "ro", [
            db_dxil_param(0, "f", "", "level of detail"),
            db_dxil_param(2, "res", "handle", "resource handle"),
            db_dxil_param(3, "res", "sampler", "sampler handle"),
@ -733,147 +761,184 @@ class db_dxil(object):
            db_dxil_param(5, "f", "coord1", "coordinate"),
            db_dxil_param(6, "f", "coord2", "coordinate"),
            db_dxil_param(7, "i1", "clamped", "1 if clampled LOD should be calculated, 0 for unclamped")])
-        self.add_dxil_op("Discard", 85, "Discard", "discard the current pixel", "v", "", [
+        next_op_idx += 1
+        self.add_dxil_op("Discard", next_op_idx, "Discard", "discard the current pixel", "v", "", [
            retvoid_param,
            db_dxil_param(2, "i1", "condition", "condition for conditional discard")])
-        self.add_dxil_op("DerivCoarseX", 86, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("DerivCoarseX", next_op_idx, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
            db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget x direction"),
            db_dxil_param(2, "$o", "value", "input to rate of change")])
-        self.add_dxil_op("DerivCoarseY", 87, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("DerivCoarseY", next_op_idx, "Unary", "computes the rate of change of components per stamp", "hf", "rn", [
            db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget y direction"),
            db_dxil_param(2, "$o", "value", "input to rate of change")])
-        self.add_dxil_op("DerivFineX", 88, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("DerivFineX", next_op_idx, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
            db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget x direction"),
            db_dxil_param(2, "$o", "value", "input to rate of change")])
-        self.add_dxil_op("DerivFineY", 89, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("DerivFineY", next_op_idx, "Unary", "computes the rate of change of components per pixel", "hf", "rn", [
            db_dxil_param(0, "$o", "", "rate of change in value with regards to RenderTarget y direction"),
            db_dxil_param(2, "$o", "value", "input to rate of change")])
-        self.add_dxil_op("EvalSnapped", 90, "EvalSnapped", "evaluates an input attribute at pixel center with an offset", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("EvalSnapped", next_op_idx, "EvalSnapped", "evaluates an input attribute at pixel center with an offset", "hf", "rn", [
            db_dxil_param(0, "$o", "", "result"),
            db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
            db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
            db_dxil_param(4, "i8",  "inputColIndex", "column index of an input attribute"),
            db_dxil_param(5, "i32", "offsetX", "2D offset from the pixel center using a 16x16 grid"),
            db_dxil_param(6, "i32", "offsetY", "2D offset from the pixel center using a 16x16 grid")])
-        self.add_dxil_op("EvalSampleIndex", 91, "EvalSampleIndex", "evaluates an input attribute at a sample location", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("EvalSampleIndex", next_op_idx, "EvalSampleIndex", "evaluates an input attribute at a sample location", "hf", "rn", [
            db_dxil_param(0, "$o", "", "result"),
            db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
            db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
            db_dxil_param(4, "i8",  "inputColIndex", "column index of an input attribute"),
            db_dxil_param(5, "i32", "sampleIndex", "sample location")])
-        self.add_dxil_op("EvalCentroid", 92, "EvalCentroid", "evaluates an input attribute at pixel center", "hf", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("EvalCentroid", next_op_idx, "EvalCentroid", "evaluates an input attribute at pixel center", "hf", "rn", [
            db_dxil_param(0, "$o", "", "result"),
            db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
            db_dxil_param(3, "i32", "inputRowIndex", "row index of an input attribute"),
            db_dxil_param(4, "i8",  "inputColIndex", "column index of an input attribute")])
+        next_op_idx += 1
+        self.add_dxil_op("SampleIndex", next_op_idx, "SampleIndex", "returns the sample index in a sample-frequency pixel shader", "i", "rn", [
+            db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1
+        self.add_dxil_op("Coverage", next_op_idx, "Coverage", "returns the coverage mask input in a pixel shader", "i", "rn", [
+            db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1
+        self.add_dxil_op("InnerCoverage", next_op_idx, "InnerCoverage", "returns underestimated coverage input from conservative rasterization in a pixel shader", "i", "rn", [
+            db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1

        # Compute shader.
-        self.add_dxil_op("ThreadId", 93, "ThreadId", "reads the thread ID", "i", "rn", [
+        self.add_dxil_op("ThreadId", next_op_idx, "ThreadId", "reads the thread ID", "i", "rn", [
            db_dxil_param(0, "i32", "", "thread ID component"),
            db_dxil_param(2, "i32", "component", "component to read (x,y,z)")])
-        self.add_dxil_op("GroupId", 94, "GroupId", "reads the group ID (SV_GroupID)", "i", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("GroupId", next_op_idx, "GroupId", "reads the group ID (SV_GroupID)", "i", "rn", [
            db_dxil_param(0, "i32", "", "group ID component"),
            db_dxil_param(2, "i32", "component", "component to read")])
-        self.add_dxil_op("ThreadIdInGroup", 95, "ThreadIdInGroup", "reads the thread ID within the group (SV_GroupThreadID)", "i", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("ThreadIdInGroup", next_op_idx, "ThreadIdInGroup", "reads the thread ID within the group (SV_GroupThreadID)", "i", "rn", [
            db_dxil_param(0, "i32", "", "thread ID in group component"),
            db_dxil_param(2, "i32", "component", "component to read (x,y,z)")])
-        self.add_dxil_op("FlattenedThreadIdInGroup", 96, "FlattenedThreadIdInGroup", "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("FlattenedThreadIdInGroup", next_op_idx, "FlattenedThreadIdInGroup", "provides a flattened index for a given thread within a given group (SV_GroupIndex)", "i", "rn", [
            db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1

        # Geometry shader
-        self.add_dxil_op("EmitStream", 97, "EmitStream", "emits a vertex to a given stream", "v", "", [
+        self.add_dxil_op("EmitStream", next_op_idx, "EmitStream", "emits a vertex to a given stream", "v", "", [
            retvoid_param,
            db_dxil_param(2, "i8", "streamId", "target stream ID for operation")])
-        self.add_dxil_op("CutStream", 98, "CutStream", "completes the current primitive topology at the specified stream", "v", "", [
+        next_op_idx += 1
+        self.add_dxil_op("CutStream", next_op_idx, "CutStream", "completes the current primitive topology at the specified stream", "v", "", [
            retvoid_param,
            db_dxil_param(2, "i8", "streamId", "target stream ID for operation")])
-        self.add_dxil_op("EmitThenCutStream", 99, "EmitThenCutStream", "equivalent to an EmitStream followed by a CutStream", "v", "", [
+        next_op_idx += 1
+        self.add_dxil_op("EmitThenCutStream", next_op_idx, "EmitThenCutStream", "equivalent to an EmitStream followed by a CutStream", "v", "", [
            retvoid_param,
            db_dxil_param(2, "i8", "streamId", "target stream ID for operation")])
+        next_op_idx += 1
+        self.add_dxil_op("GSInstanceID", next_op_idx, "GSInstanceID", "GSInstanceID", "i", "rn", [
+            db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1

        # Double precision
-        self.add_dxil_op("MakeDouble", 100, "MakeDouble", "creates a double value", "d", "rn", [
+        self.add_dxil_op("MakeDouble", next_op_idx, "MakeDouble", "creates a double value", "d", "rn", [
            db_dxil_param(0, "d", "", "result"),
            db_dxil_param(2, "i32", "lo", "low part of double"),
            db_dxil_param(3, "i32", "hi", "high part of double")])
-        self.add_dxil_op_reserved("ToDelete1", 101)
-        self.add_dxil_op_reserved("ToDelete2", 102)
-        self.add_dxil_op("SplitDouble", 103, "SplitDouble", "splits a double into low and high parts", "d", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("SplitDouble", next_op_idx, "SplitDouble", "splits a double into low and high parts", "d", "rn", [
            db_dxil_param(0, "splitdouble", "", "result"),
            db_dxil_param(2, "d", "value", "value to split")])
-        self.add_dxil_op_reserved("ToDelete3", 104)
-        self.add_dxil_op_reserved("ToDelete4", 105)
+        next_op_idx += 1

        # Domain & Hull shader.
-        self.add_dxil_op("LoadOutputControlPoint", 106, "LoadOutputControlPoint", "LoadOutputControlPoint", "hfwi", "rn", [
+        self.add_dxil_op("LoadOutputControlPoint", next_op_idx, "LoadOutputControlPoint", "LoadOutputControlPoint", "hfwi", "rn", [
            db_dxil_param(0, "$o", "", "result"),
            db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
            db_dxil_param(3, "i32", "row", "row, relative to the element"),
            db_dxil_param(4, "i8", "col", "column, relative to the element"),
            db_dxil_param(5, "i32", "index", "vertex/point index")])
-        self.add_dxil_op("LoadPatchConstant", 107, "LoadPatchConstant", "LoadPatchConstant", "hfwi", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("LoadPatchConstant", next_op_idx, "LoadPatchConstant", "LoadPatchConstant", "hfwi", "rn", [
            db_dxil_param(0, "$o", "", "result"),
            db_dxil_param(2, "i32", "inputSigId", "input signature element ID"),
            db_dxil_param(3, "i32", "row", "row, relative to the element"),
            db_dxil_param(4, "i8", "col", "column, relative to the element")])
+        next_op_idx += 1

        # Domain shader.
-        self.add_dxil_op("DomainLocation", 108, "DomainLocation", "DomainLocation", "f", "rn", [
+        self.add_dxil_op("DomainLocation", next_op_idx, "DomainLocation", "DomainLocation", "f", "rn", [
            db_dxil_param(0, "f", "", "result"),
            db_dxil_param(2, "i8", "component", "input", is_const=True)])
+        next_op_idx += 1

        # Hull shader.
-        self.add_dxil_op("StorePatchConstant", 109, "StorePatchConstant", "StorePatchConstant", "hfwi", "", [
+        self.add_dxil_op("StorePatchConstant", next_op_idx, "StorePatchConstant", "StorePatchConstant", "hfwi", "", [
            retvoid_param,
            db_dxil_param(2, "i32", "outputSigID", "output signature element ID"),
            db_dxil_param(3, "i32", "row", "row, relative to the element"),
            db_dxil_param(4, "i8", "col", "column, relative to the element"),
            db_dxil_param(5, "$o", "value", "value to store")])
-        self.add_dxil_op("OutputControlPointID", 110, "OutputControlPointID", "OutputControlPointID", "i", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("OutputControlPointID", next_op_idx, "OutputControlPointID", "OutputControlPointID", "i", "rn", [
            db_dxil_param(0, "i32", "", "result")])
-        self.add_dxil_op("PrimitiveID", 111, "PrimitiveID", "PrimitiveID", "i", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("PrimitiveID", next_op_idx, "PrimitiveID", "PrimitiveID", "i", "rn", [
            db_dxil_param(0, "i32", "", "result")])
+        next_op_idx += 1

-        self.add_dxil_op("CycleCounterLegacy", 112, "CycleCounterLegacy", "CycleCounterLegacy", "v", "rn", [
+        self.add_dxil_op("CycleCounterLegacy", next_op_idx, "CycleCounterLegacy", "CycleCounterLegacy", "v", "rn", [
            db_dxil_param(0, "twoi32", "", "result")])
+        next_op_idx += 1
            
-        self.add_dxil_op("Htan", 113, "Unary", "returns the hyperbolic tangent of the specified value", "hf", "rn", [
-            db_dxil_param(0, "$o", "", "operation result"),
-            db_dxil_param(2, "$o", "value", "input value in radians")])
-
        # Add wave intrinsics.
-        self.add_dxil_op_reserved("WaveCaptureReserved", 114)
-        self.add_dxil_op("WaveIsFirstLane", 115, "WaveIsFirstLane", "returns 1 for the first lane in the wave", "v", "ro", [
+        self.add_dxil_op("WaveIsFirstLane", next_op_idx, "WaveIsFirstLane", "returns 1 for the first lane in the wave", "v", "ro", [
            db_dxil_param(0, "i1", "", "operation result")])
-        self.add_dxil_op("WaveGetLaneIndex", 116, "WaveGetLaneIndex", "returns the index of the current lane in the wave", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveGetLaneIndex", next_op_idx, "WaveGetLaneIndex", "returns the index of the current lane in the wave", "v", "ro", [
            db_dxil_param(0, "i32", "", "operation result")])
-        self.add_dxil_op("WaveGetLaneCount", 117, "WaveGetLaneCount", "returns the number of lanes in the wave", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveGetLaneCount", next_op_idx, "WaveGetLaneCount", "returns the number of lanes in the wave", "v", "ro", [
            db_dxil_param(0, "i32", "", "operation result")])
-        self.add_dxil_op_reserved("WaveIsHelperLaneReserved", 118)
-        self.add_dxil_op("WaveAnyTrue", 119, "WaveAnyTrue", "returns 1 if any of the lane evaluates the value to true", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveAnyTrue", next_op_idx, "WaveAnyTrue", "returns 1 if any of the lane evaluates the value to true", "v", "ro", [
            db_dxil_param(0, "i1", "", "operation result"),
            db_dxil_param(2, "i1", "cond", "condition to test")])
-        self.add_dxil_op("WaveAllTrue", 120, "WaveAllTrue", "returns 1 if all the lanes evaluate the value to true", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveAllTrue", next_op_idx, "WaveAllTrue", "returns 1 if all the lanes evaluate the value to true", "v", "ro", [
            db_dxil_param(0, "i1", "", "operation result"),
            db_dxil_param(2, "i1", "cond", "condition to test")])
-        self.add_dxil_op("WaveActiveAllEqual", 121, "WaveActiveAllEqual", "returns 1 if all the lanes have the same value", "hfd18wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveActiveAllEqual", next_op_idx, "WaveActiveAllEqual", "returns 1 if all the lanes have the same value", "hfd18wil", "ro", [
            db_dxil_param(0, "i1", "", "operation result"),
            db_dxil_param(2, "$o", "value", "value to compare")])
-        self.add_dxil_op("WaveActiveBallot", 122, "WaveActiveBallot", "returns a struct with a bit set for each lane where the condition is true", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveActiveBallot", next_op_idx, "WaveActiveBallot", "returns a struct with a bit set for each lane where the condition is true", "v", "ro", [
            db_dxil_param(0, "$u4", "", "operation result"),
            db_dxil_param(2, "i1", "cond", "condition to ballot on")])
-        self.add_dxil_op("WaveReadLaneAt", 123, "WaveReadLaneAt", "returns the value from the specified lane", "hfd18wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveReadLaneAt", next_op_idx, "WaveReadLaneAt", "returns the value from the specified lane", "hfd18wil", "ro", [
            db_dxil_param(0, "$o", "", "operation result"),
            db_dxil_param(2, "$o", "value", "value to read"),
            db_dxil_param(3, "i32", "lane", "lane index")])
-        self.add_dxil_op("WaveReadLaneFirst", 124, "WaveReadLaneFirst", "returns the value from the first lane", "hf18wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveReadLaneFirst", next_op_idx, "WaveReadLaneFirst", "returns the value from the first lane", "hf18wil", "ro", [
            db_dxil_param(0, "$o", "", "operation result"),
            db_dxil_param(2, "$o", "value", "value to read")])
-        self.add_dxil_op("WaveActiveOp", 125, "WaveActiveOp", "returns the result the operation across waves", "hfd18wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WaveActiveOp", next_op_idx, "WaveActiveOp", "returns the result the operation across waves", "hfd18wil", "ro", [
            db_dxil_param(0, "$o", "", "operation result"),
            db_dxil_param(2, "$o", "value", "input value"),
            db_dxil_param(3, "i8", "op", "kind of operation to perform", enum_name="WaveOpKind", is_const=True),
            db_dxil_param(4, "i8", "sop", "sign of operands", enum_name="SignedOpKind", is_const=True)])
+        next_op_idx += 1
        self.add_enum_type("SignedOpKind", "Sign vs. unsigned operands for operation", [
            (0, "Signed", "signed integer or floating-point operands"),
            (1, "Unsigned", "unsigned integer operands")])
@ -882,90 +947,97 @@ class db_dxil(object):
            (1, "Product", "product of values"), 
            (2, "Min", "minimum value"), 
            (3, "Max", "maximum value")])
-        self.add_dxil_op("WaveActiveBit", 126, "WaveActiveBit", "returns the result of the operation across all lanes", "8wil", "ro", [
+        self.add_dxil_op("WaveActiveBit", next_op_idx, "WaveActiveBit", "returns the result of the operation across all lanes", "8wil", "ro", [
            db_dxil_param(0, "$o", "", "operation result"),
            db_dxil_param(2, "$o", "value", "input value"),
            db_dxil_param(3, "i8", "op", "kind of operation to perform", enum_name="WaveBitOpKind", is_const=True)])
+        next_op_idx += 1
        self.add_enum_type("WaveBitOpKind", "Kind of bitwise cross-lane operation", [
            (0, "And", "bitwise and of values"), 
            (1, "Or", "bitwise or of values"), 
            (2, "Xor", "bitwise xor of values")])
-        self.add_dxil_op("WavePrefixOp", 127, "WavePrefixOp", "returns the result of the operation on prior lanes", "hfd8wil", "ro", [
+        self.add_dxil_op("WavePrefixOp", next_op_idx, "WavePrefixOp", "returns the result of the operation on prior lanes", "hfd8wil", "ro", [
            db_dxil_param(0, "$o", "", "operation result"),
            db_dxil_param(2, "$o", "value", "input value"),
            db_dxil_param(3, "i8", "op", "0=sum,1=product", enum_name="WaveOpKind", is_const=True),
            db_dxil_param(4, "i8", "sop", "sign of operands", enum_name="SignedOpKind", is_const=True)])
-        self.add_dxil_op_reserved("WaveGetOrderedIndex", 128)
-        self.add_dxil_op_reserved("GlobalOrderedCountIncReserved", 129)
-        self.add_dxil_op("QuadReadLaneAt", 130, "QuadReadLaneAt", "reads from a lane in the quad", "hfd18wil", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("QuadReadLaneAt", next_op_idx, "QuadReadLaneAt", "reads from a lane in the quad", "hfd18wil", "ro", [
            db_dxil_param(0, "$o", "", "operation result"),
            db_dxil_param(2, "$o", "value", "value to read"),
            db_dxil_param(3, "u32", "quadLane", "lane to read from (0-4)", max_value = 3, is_const=True)])
+        next_op_idx += 1
        self.add_enum_type("QuadOpKind", "Kind of quad-level operation", [
            (0, "ReadAcrossX", "returns the value from the other lane in the quad in the horizontal direction"), 
            (1, "ReadAcrossY", "returns the value from the other lane in the quad in the vertical direction"),
            (2, "ReadAcrossDiagonal", "returns the value from the lane across the quad in horizontal and vertical direction")])
-        self.add_dxil_op("QuadOp", 131, "QuadOp", "returns the result of a quad-level operation", "hfd8wil", "ro", [
+        self.add_dxil_op("QuadOp", next_op_idx, "QuadOp", "returns the result of a quad-level operation", "hfd8wil", "ro", [
            db_dxil_param(0, "$o", "", "operation result"),
            db_dxil_param(2, "$o", "value", "value for operation"),
            db_dxil_param(3, "i8", "op", "operation", enum_name = "QuadOpKind", is_const=True)])
+        next_op_idx += 1

        # Add bitcasts
-        self.add_dxil_op("BitcastI16toF16", 132, "BitcastI16toF16", "bitcast between different sizes", "v", "rn", [
+        self.add_dxil_op("BitcastI16toF16", next_op_idx, "BitcastI16toF16", "bitcast between different sizes", "v", "rn", [
            db_dxil_param(0, "h", "", "operation result"),
            db_dxil_param(2, "i16", "value", "input value")])
-        self.add_dxil_op("BitcastF16toI16", 133, "BitcastF16toI16", "bitcast between different sizes", "v", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("BitcastF16toI16", next_op_idx, "BitcastF16toI16", "bitcast between different sizes", "v", "rn", [
            db_dxil_param(0, "i16", "", "operation result"),
            db_dxil_param(2, "h", "value", "input value")])
-        self.add_dxil_op("BitcastI32toF32", 134, "BitcastI32toF32", "bitcast between different sizes", "v", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("BitcastI32toF32", next_op_idx, "BitcastI32toF32", "bitcast between different sizes", "v", "rn", [
            db_dxil_param(0, "f", "", "operation result"),
            db_dxil_param(2, "i32", "value", "input value")])
-        self.add_dxil_op("BitcastF32toI32", 135, "BitcastF32toI32", "bitcast between different sizes", "v", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("BitcastF32toI32", next_op_idx, "BitcastF32toI32", "bitcast between different sizes", "v", "rn", [
            db_dxil_param(0, "i32", "", "operation result"),
            db_dxil_param(2, "f", "value", "input value")])
-        self.add_dxil_op("BitcastI64toF64", 136, "BitcastI64toF64", "bitcast between different sizes", "v", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("BitcastI64toF64", next_op_idx, "BitcastI64toF64", "bitcast between different sizes", "v", "rn", [
            db_dxil_param(0, "d", "", "operation result"),
            db_dxil_param(2, "i64", "value", "input value")])
-        self.add_dxil_op("BitcastF64toI64", 137, "BitcastF64toI64", "bitcast between different sizes", "v", "rn", [
+        next_op_idx += 1
+        self.add_dxil_op("BitcastF64toI64", next_op_idx, "BitcastF64toI64", "bitcast between different sizes", "v", "rn", [
            db_dxil_param(0, "i64", "", "operation result"),
            db_dxil_param(2, "d", "value", "input value")])
+        next_op_idx += 1
        
-        self.add_dxil_op("GSInstanceID", 138, "GSInstanceID", "GSInstanceID", "i", "rn", [
-            db_dxil_param(0, "i32", "", "result")])
-
-        self.add_dxil_op("LegacyF32ToF16", 139, "LegacyF32ToF16", "legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)", "v", "rn", [
+        self.add_dxil_op("LegacyF32ToF16", next_op_idx, "LegacyF32ToF16", "legacy fuction to convert float (f32) to half (f16) (this is not related to min-precision)", "v", "rn", [
            db_dxil_param(0, "i32", "", "low 16 bits - half value, high 16 bits - zeroes"),
            db_dxil_param(2, "f", "value", "float value to convert")])
+        next_op_idx += 1

-        self.add_dxil_op("LegacyF16ToF32", 140, "LegacyF16ToF32", "legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)", "v", "rn", [
+        self.add_dxil_op("LegacyF16ToF32", next_op_idx, "LegacyF16ToF32", "legacy fuction to convert half (f16) to float (f32) (this is not related to min-precision)", "v", "rn", [
            db_dxil_param(0, "f", "", "converted float value"),
            db_dxil_param(2, "i32", "value", "half value to convert")])
+        next_op_idx += 1

-        self.add_dxil_op("LegacyDoubleToFloat", 141, "LegacyDoubleToFloat", "legacy fuction to convert double to float", "v", "rn", [
+        self.add_dxil_op("LegacyDoubleToFloat", next_op_idx, "LegacyDoubleToFloat", "legacy fuction to convert double to float", "v", "rn", [
            db_dxil_param(0, "f", "", "float value"),
            db_dxil_param(2, "d", "value", "double value to convert")])
+        next_op_idx += 1

-        self.add_dxil_op("LegacyDoubleToSInt32", 142, "LegacyDoubleToSInt32", "legacy fuction to convert double to int32", "v", "rn", [
+        self.add_dxil_op("LegacyDoubleToSInt32", next_op_idx, "LegacyDoubleToSInt32", "legacy fuction to convert double to int32", "v", "rn", [
            db_dxil_param(0, "i32", "", "i32 value"),
            db_dxil_param(2, "d", "value", "double value to convert")])
+        next_op_idx += 1

-        self.add_dxil_op("LegacyDoubleToUInt32", 143, "LegacyDoubleToUInt32", "legacy fuction to convert double to uint32", "v", "rn", [
+        self.add_dxil_op("LegacyDoubleToUInt32", next_op_idx, "LegacyDoubleToUInt32", "legacy fuction to convert double to uint32", "v", "rn", [
            db_dxil_param(0, "i32", "", "i32 value"),
            db_dxil_param(2, "d", "value", "double value to convert")])
+        next_op_idx += 1

-        self.add_dxil_op("WaveAllBitCount", 144, "WaveAllOp", "returns the count of bits set to 1 across the wave", "v", "ro", [
+        self.add_dxil_op("WaveAllBitCount", next_op_idx, "WaveAllOp", "returns the count of bits set to 1 across the wave", "v", "ro", [
            db_dxil_param(0, "i32", "", "operation result"),
            db_dxil_param(2, "i1", "value", "input value")])
-        self.add_dxil_op("WavePrefixBitCount", 145, "WavePrefixOp", "returns the count of bits set to 1 on prior lanes", "v", "ro", [
+        next_op_idx += 1
+        self.add_dxil_op("WavePrefixBitCount", next_op_idx, "WavePrefixOp", "returns the count of bits set to 1 on prior lanes", "v", "ro", [
            db_dxil_param(0, "i32", "", "operation result"),
            db_dxil_param(2, "i1", "value", "input value")])
+        next_op_idx += 1

-        self.add_dxil_op("SampleIndex", 146, "SampleIndex", "returns the sample index in a sample-frequency pixel shader", "i", "rn", [
-            db_dxil_param(0, "i32", "", "result")])
-        self.add_dxil_op("Coverage", 147, "Coverage", "returns the coverage mask input in a pixel shader", "i", "rn", [
-            db_dxil_param(0, "i32", "", "result")])
-        self.add_dxil_op("InnerCoverage", 148, "InnerCoverage", "returns underestimated coverage input from conservative rasterization in a pixel shader", "i", "rn", [
-            db_dxil_param(0, "i32", "", "result")])
+        assert next_op_idx == 139, "next operation index is %d rather than 143 and thus opcodes are broken" % next_op_idx

        # Set interesting properties.
        self.build_indices()
@ -1502,6 +1574,7 @@ class db_dxil(object):
        self.add_valrule_msg("Types.Defined", "Type must be defined based on DXIL primitives", "Type '%0' is not defined on DXIL primitives")
        self.add_valrule_msg("Types.IntWidth", "Int type must be of valid width", "Int type '%0' has an invalid width")
        self.add_valrule("Types.NoMultiDim", "Only one dimension allowed for array type")
+        self.add_valrule("Types.I8", "I8 can only used as immediate value for intrinsic")

        self.add_valrule_msg("Sm.Name", "Target shader model name must be known", "Unknown shader model '%0'")
        self.add_valrule("Sm.Opcode", "Opcode must be defined in target shader model")
--- a/utils/hct/hctdb_instrhelp.py
+++ b/utils/hct/hctdb_instrhelp.py
@ -183,6 +183,7 @@ class db_instrhelp_gen:
            "i32": "int32_t",
            "u32": "uint32_t"
            }
+        self.IsDxilOpFuncCallInst = "hlsl::OP::IsDxilOpFuncCallInst"

    def print_content(self):
        self.print_header()
@ -238,7 +239,7 @@ class db_instrhelp_gen:
            print("  operator bool() const {")
            if i.is_dxil_op:
                op_name = i.fully_qualified_name()
-                print("    return hlsl::OP::IsDxilOpFuncCallInst(Instr, %s);" % op_name)
+                print("    return %s(Instr, %s);" % (self.IsDxilOpFuncCallInst, op_name))
            else:
                print("    return Instr->getOpcode() == llvm::Instruction::%s;" % i.name)
            print("  }")
--- a/utils/hct/hcttest-samples.py
+++ b/utils/hct/hcttest-samples.py
@ -130,11 +130,8 @@ def ActionCopySDKLayers(args, name, dxil):
        ], args.symbols)
 def ActionCopyWarp12(args, name, dxil):
    CopyBins(args, name, dxil, [
-            'd3d12warp.dll',
+            'd3d10warp.dll',
        ], args.symbols)
-def MakeD3D12WarpCopy(bin_path):
-    # Copy d3d10warp.dll to d3d12warp.dll
-    shutil.copy2(os.path.join(bin_path, 'd3d10warp.dll'), os.path.join(bin_path, 'd3d12warp.dll'))

 def PathSplitAll(p):
    s = filter(None, os.path.split(p))
@ -161,7 +158,7 @@ def ListRuntimeCompilePaths(args):
        'D3D11_3SDKLayers.dll',
        'D3D12SDKLayers.dll',
        'DXGIDebug.dll',
-        'd3d12warp.dll',
+        'd3d10warp.dll',
    ]]

 def CheckEnvironment(args):